13#ifndef LINALG_PATCHDOMAINDECOMPOSITIONPPRECONDITIONER_HH
14#define LINALG_PATCHDOMAINDECOMPOSITIONPPRECONDITIONER_HH
18#include <boost/timer/timer.hpp>
20#include <dune/common/dynvector.hh>
21#include "dune/istl/preconditioner.hh"
59 std::vector<size_t>
const&
indices()
const
89 template <
int m,
class LocalMatrix,
class Real>
95 for (
int i=0; i<aloc.N(); ++i)
97 for (
int i=0; i<aloc.N(); ++i)
98 aloc[i][i] += Amax/r * unitMatrix<Real,m>();
103 std::vector<size_t> dofs;
113 template <
class Scalar>
121 template <
int m,
class Scalar=
float>
130 template <
class T,
class Index>
140 Real anorm = two_norm(alocinv);
143 regularize<m>(alocinv,regularizeForCondition);
150 std::random_device rd;
151 std::mt19937 gen(rd());
152 std::uniform_real_distribution<> dis(-1.0,1.0);
153 for (
int i=0; i<alocinv.N(); ++i)
154 for (
int j=0; j<alocinv.M(); ++j)
155 for (
int k=0; k<m; ++k)
156 for (
int l=0; l<m; ++l)
157 delta[i][j][k][l] = dis(gen);
158 Real dnorm = two_norm(delta);
160 Real epsilon = ::epsilon;
161 delta *= epsilon / anorm / dnorm;
166 auto n = alocinv.N() * m;
171 template <
class Vector>
200 std::array<std::vector<size_t>,3>
nestedDissection(std::vector<std::pair<size_t,size_t>>
const& edges,
201 std::vector<size_t>
const& edgeStart);
208 template <
class StorageScalar>
217 template <
class SparseMatrix>
227 template <
class FactorizationScalar>
245 template <
class FactorizationScalar>
248 std::vector<size_t> v1, v2, s;
249 std::unique_ptr<NestedDissection<StorageScalar>> l1FactorND, l2FactorND;
251 std::vector<StorageScalar> sFactor;
272 template <
class StorageScalar,
class FactorizationScalar>
297 template <
int m,
class StorageScalar,
class FactorizationScalar>
303 template <
class T,
class Index>
306 FactorizationScalar regularizeForCondition,
310 using LocalMatrix = Dune::BCRSMatrix<Dune::FieldMatrix<T,m,m>>;
314 regularize<m>(aloc,regularizeForCondition);
327 template <
class Vector>
331 choleskyFactor.solve(y);
341 namespace DomainDecompositionPreconditionerDetail
343 template <
class Scalar>
346 template <
class Scalar>
347 void choleskySolve(
int n,
int nrhs, std::vector<Scalar>
const& L, Scalar* b);
356 template <
class Scalar>
364 template <
int m,
class Scalar=
float>
373 template <
class T,
class Index>
382 regularize<1>(aloc,regularizeForCondition);
389 std::random_device rd;
390 std::mt19937 gen(rd());
391 std::uniform_real_distribution<> dis(-1.0,0.0);
396 auto lpos = begin(choleskyFactor);
397 for (
int j=0; j<n; ++j)
398 for (
int i=j; i<n; ++i)
402 delta[i][j] = dis(gen) / (i-j+1);
407 Scalar epsilon = ::epsilon;
408 delta *= epsilon/two_norm(delta)/two_norm(L);
410 lpos = begin(choleskyFactor);
411 for (
int j=0; j<n; ++j)
412 for (
int i=j; i<n; ++i)
414 *lpos += delta[i][j];
422 template <
class Vector>
431 std::vector<Scalar> choleskyFactor;
449 template <
class StorageTag,
int components>
460 template <
class Scalar,
int components>
466 template <
class Scalar,
int components>
472 template <
class StorageScalar,
class FactorizationScalar,
int components>
493 template <
class Space,
int m,
class StorageTag,
494 class SparseMatrixIndex = std::size_t>
496 typename Space::template Element<m>::type::StorageType>
501 using domain_type =
typename Space::template Element<m>::type::StorageType;
519 template <
class Matrix>
521 typename Matrix::field_type regularizeForCondition = -1.0,
522 StorageTag storageTag = StorageTag(),
526 assert(A.N()==A.M());
528 timer.
start(
"make Schwarz preconditioner");
531 std::vector<std::vector<size_t>> patchDofs(space.gridView().size(space.gridView().dimension));
534 assert(patches.size()==patchDofs.size());
537 :
std::min(nTasks,(
int)patches.size());
538 inverseLocalMatrices.resize(nThreads);
543 parallelFor([&patches,&A,&space,regularizeForCondition,storageTag,&taskTiming,
this](
size_t j,
size_t s)
549 auto const& ap = algebraicPatch(space,patches[i]);
553 InverseLocalMatrix aloc = InverseLocalMatrix(ap,A,regularizeForCondition,storageTag);
554 this->inverseLocalMatrices[j].push_back(std::move(aloc));
558 std::cout <<
"exception in inverting block " << i <<
" in thread " << j <<
" of " << s <<
":\n";
559 std::cout << ex.what() <<
"\n";
565 timer.stop(
"make Schwarz preconditioner");
567std::cerr <<
"Schwarz preconditioner storage size: " <<
storage()/(1024*1024) <<
" MB\n"
568 <<
" flop count: " <<
flops()/(1024*1024) <<
" M\n";
570 std::ofstream f(
"out.gnu");
587 using StorageScalar =
typename InverseLocalMatrix::field_type;
597 std::mutex rhsAccess;
600 for (
auto const& alocinv: this->inverseLocalMatrices[j])
602 auto const& indices = alocinv.indices();
603 int const n = indices.size();
608 for (
int i=0; i<n; ++i)
609 yloc[i] = y[indices[i]];
611 alocinv.mv(yloc,xloc);
613 std::lock_guard<std::mutex> lock(rhsAccess);
614 for (
int i=0; i<n; ++i)
615 x[indices[i]] += xloc[i];
617 },inverseLocalMatrices.
size());
633 for (
auto const& as: inverseLocalMatrices)
634 for (
auto const& a: as)
645 for (
auto const& as: inverseLocalMatrices)
647 for (
auto const& a: as)
654 std::vector<std::vector<InverseLocalMatrix>> inverseLocalMatrices;
An inverse local matrix representation that simply stores a dense Cholesky factor.
void mv(Vector const &x, Vector &y) const
DenseCholeskyLocalMatrixStorage(std::vector< size_t > const &ap, NumaBCRSMatrix< Dune::FieldMatrix< T, m, m >, Index > const &A, field_type regularizeForCondition, DenseCholeskyStorageTag< Scalar >)
A parametrized tag type for selecting storage of inverses by dense Cholesky factors with floating poi...
An inverse local matrix representation that simply stores the dense inverse.
void mv(Vector const &x, Vector &y) const
DenseInverseLocalMatrixStorage(std::vector< size_t > const &ap, NumaBCRSMatrix< Dune::FieldMatrix< T, m, m >, Index > const &A, field_type regularizeForCondition, DenseInverseStorageTag< Scalar >)
A parametrized tag type for selecting dense storage of inverses with floating point representation of...
A wrapper class for conveniently providing exceptions with context information.
A LAPACK-compatible dense matrix class with shape specified at runtime.
A base class providing common functionality for storage of inverse local matrices.
void regularize(LocalMatrix &aloc, Real r) const
size_t flops() const
Reports the number of floating point operations per preconditioner application.
size_t storage() const
Reports the storage size (in bytes) of the inverse block matrix.
InverseLocalMatrixStorageBase(std::vector< size_t > const &ap)
std::vector< size_t > const & indices() const
Returns the global degrees of freedom defining the submatrix treated as a block here.
Providing a matrix or array interface to LAPACK-ordered entries.
Providing a vector interface for contiguously stored entries.
size_t size() const
The number of entries stored.
An inverse local matrix representation that stores a sparse Cholesky factor using nested dissection.
void mv(Vector const &x, Vector &y) const
NestedDissectionCholeskyLocalMatrixStorage(std::vector< size_t > const &ap, NumaBCRSMatrix< Dune::FieldMatrix< T, m, m >, Index > const &A, FactorizationScalar regularizeForCondition, NestedDissectionStorageTag< StorageScalar, FactorizationScalar > storageTag)
a nested dissection factorization for symmetric positive definite sparse matrices
size_t storageSize() const
size_t size()
reports the total number of separators in this nested dissection
NestedDissection()=default
NestedDissection< StorageScalar > & operator=(NestedDissection< FactorizationScalar > &&nd)
move assignment from different storage scalar type
NestedDissection(SparseMatrix const &A, size_t skipEntries, size_t skipDimension)
Constructor.
A parametrized tag type for selecting storage of inverses by nested dissection with floating point re...
NestedDissectionStorageTag(size_t skipEntries_=100, size_t skipDimension_=32)
size_t skipDimension
omit further dissection if the size of the system is below this number
size_t skipEntries
omit further dissection if the size of the zero-block (in terms of entries) is below this number
A NUMA-aware compressed row storage matrix adhering mostly to the Dune ISTL interface (to complete....
static NumaThreadPool & instance(int maxThreads=std::numeric_limits< int >::max())
Returns a globally unique thread pool instance.
int cpus() const
Reports the total number of CPUs (usually a multiple of nodes).
An additive overlapping domain decomposition type preconditioner for higher order finite elements app...
virtual void apply(domain_type &x, range_type const &y)
Applys the preconditioner as .
virtual Space::Scalar applyDp(domain_type &x, range_type const &y)
size_t storage() const
Reports the storage size (in bytes) of the preconditioner.
typename Space::template Element< m >::type::StorageType domain_type
PatchDomainDecompositionPreconditioner(Space const &space, Matrix const &A, typename Matrix::field_type regularizeForCondition=-1.0, StorageTag storageTag=StorageTag(), int nTasks=0)
Constructor.
virtual bool requiresInitializedInput() const
Returns true if the target vector x has to be initialized to zero before calling apply or applyDp.
size_t flops() const
reports the number of floating point operations when applying the preconditioner
Scalar Real
The real type on which the scalar field is based.
A scope guard object that automatically closes a timing section on destruction.
Interface for symmetric preconditioners.
A class that gathers data on task timing and provides gnuplot visualization.
void start(int task)
defines the start of given task.
void stop(int task)
defines the start of given task.
Supports gathering and reporting execution times information for nested program parts.
static Timings & instance()
Returns a reference to a single default instance.
struct Times const * start(std::string const &name)
Starts or continues the timing of given section.
auto computePatches(GridView const &gridview)
Computes the patches around vertices of the grid view.
Dune::FieldVector< T, n > max(Dune::FieldVector< T, n > x, Dune::FieldVector< T, n > const &y)
Componentwise maximum.
Dune::FieldVector< T, n > min(Dune::FieldVector< T, n > x, Dune::FieldVector< T, n > const &y)
Componentwise minimum.
Index uniformWeightRangeStart(BlockIndex i, BlockIndex n, Index m)
Computes partitioning points of ranges for uniform weight distributions.
void parallelFor(Func const &f, int maxTasks=std::numeric_limits< int >::max())
A parallel for loop that executes the given functor in parallel on different CPUs.
void choleskySolve(int n, int nrhs, std::vector< Scalar > const &L, Scalar *b)
std::vector< Scalar > choleskyFactor(DynamicMatrix< Scalar > const &A)
DynamicMatrix< Scalar > flatMatrix(DynamicMatrix< Scalar > const &A)
Dune::FieldVector< Scalar, dim > Vector
std::array< std::vector< size_t >, 3 > nestedDissection(std::vector< std::pair< size_t, size_t > > const &edges, std::vector< size_t > const &edgeStart)
Computes a nested dissection of the given graph.
EntryTraits< Entry >::real_type frobenius_norm2(Entry const &x)
Provides the actual type of local matrix storage based on a tag and the number of components.
void type
The actual type used for storing local matrices.