KASKADE 7 development version
jacobiPreconditioner.hh
Go to the documentation of this file.
1/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
2/* */
3/* This file is part of the library KASKADE 7 */
4/* https://www.zib.de/research/projects/kaskade7-finite-element-toolbox */
5/* */
6/* Copyright (C) 2002-2020 Zuse Institute Berlin */
7/* */
8/* KASKADE 7 is distributed under the terms of the ZIB Academic License. */
9/* see $KASKADE/academic.txt */
10/* */
11/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
12
13#ifndef JACOBI_PRECONDITIONER_HH
14#define JACOBI_PRECONDITIONER_HH
15
16#include "dune/istl/preconditioners.hh"
17#include "dune/istl/solvercategory.hh"
18
19#include "fem/fixdune.hh"
20#include "fem/istlinterface.hh"
21
24
27
28namespace Kaskade
29{
31 namespace JacobiPreconditionerDetail
32 {
33 template <class Entry, int row=0, int col=0>
34 struct DiagonalBlock // TODO: implement move constructor for efficient return by ExtractDiagonal functor
35 {
36 using field_type = typename Entry::field_type;
37
38 DiagonalBlock() = default;
39 DiagonalBlock(DiagonalBlock&& other) = default;
41
42 // mat is the matrix to extract the diagonal from and invert it
43 // weight is the Jacobi step size, i.e. the inverted diagonal is multiplied by w
44 template <class Matrix, class Weight>
45 DiagonalBlock(Matrix const& mat, Weight w)
46 : size(mat.N()), dps(NumaThreadPool::instance().nodes())
47 {
48 assert(mat.N() == mat.M()); // make sure it's quadratic
49 size_t const nodes = NumaThreadPool::instance().nodes();
50 for (size_t i=0; i<nodes; ++i)
51 diag.push_back(std::vector<Entry,NumaAllocator<Entry>>(uniformWeightRangeStart(i+1,nodes,size)-uniformWeightRangeStart(i,nodes,size),
52 Entry(),NumaAllocator<Entry>(i)));
53
54 // copy and invert diagonal
55 for (auto ri=mat.begin(); ri!=mat.end(); ++ri)
56 {
57 auto const i = ri.index();
58 size_t const node = uniformWeightRange(static_cast<size_t>(i),nodes,size); // which NUMA node
59 size_t const k = i - uniformWeightRangeStart(node,nodes,size); // which offset in node block
60
61 if (k<0 || k>=diag[node].size())
62 {
63 std::cerr << "i=" << i << " nodes=" << nodes << " node=" << node << " k=" << k << " => i=" << k + (node*size)/nodes << " diag[node].size=" << diag[node].size() << " n=" << size << "\n";
64 abort();
65 }
66
67 auto ci = ri->begin();
68 auto cend = ri->end();
69 for (; ci!=cend && ci.index()!=i; ++ci) // find diagonal entry TODO: more efficient search
70 ;
71 Entry& a = diag[node][k];
72 if (ci.index() == i)
73 {
74 a = *ci;
75 a.invert(); // TODO: this calls DynamicMatrix::invert and performs memory allocation even for fixed size 3x3 matrices via std::vector...
76 a *= w;
77 }
78 else
79 a = 0; // ignore zero diagonal elements
80 }
81 }
82
88 template <class X, class Y>
89 field_type apply(X& x, Y const& y) const
90 {
91 // Perform one Jacobi step for Ax=y, i.e. for A = L+D+R we compute x = D^{-1}y
92 // Note that the Dune::SeqJac preconditioner as of Dune 2.2.1 allocates and frees a new vector in every
93 // iteration, and iterates each time over the whole matrix, which is quite costly.
94 // This implementation is roughly five times faster.
95 std::fill(dps.begin(),dps.end(),0.0);
96 parallelForNodes([&x,&y,this](size_t node, size_t nodes)
97 {
98 assert(nodes==NumaThreadPool::instance().nodes());
99 size_t const start = uniformWeightRangeStart(node,nodes,this->size);
100 auto const& d = this->diag[node];
101 size_t const n = d.size();
102 field_type dp = 0;
103 for (size_t k=0; k<n; ++k)
104 {
105 size_t const i = k + start;
106 x[i] = d[k] * y[i];
107 dp += x[i] * y[i];
108 }
109 this->dps[node] = dp;
110 });
111 return std::accumulate(dps.begin(),dps.end(),0.0);
112 }
113
114
115 static int const rowId = row;
116 static int const colId = col;
117
118 size_t size;
119
120 // diag[i] contains entries [i*n/nodes, (i+1)*n/nodes[
121 std::vector<std::vector<Entry,NumaAllocator<Entry>>> diag;
122
123 private:
124 mutable std::vector<field_type> dps; // for storing duality pairings of parallel blocks (could be local to apply, but is here to prevent reallocation
125 };
126
127 // --------------------------------------------------------------------------------------------
128
129 // boost::fusion functor extracting, inverting, and scaling the diagonal of a matrix
131 {
132 // w is the Jacobi step size or damping factor
133 ExtractDiagonal(double w_): w(w_) {}
134
135 template <class T> struct result {};
136
137 template <class Block>
138 struct result<ExtractDiagonal(Block)> {
139 typedef typename std::remove_reference<Block>::type B;
141 };
142
143 template <class MB>
144 typename result<ExtractDiagonal(MB)>::type operator()(MB const& mb) const
145 {
146 return typename result<ExtractDiagonal(MB)>::type(*mb.matrix,w);
147 }
148
149 private:
150 double w;
151 };
152
153 // --------------------------------------------------------------------------------------------
154
155 template <class Matrix, class Domain=typename MatrixTraits<Matrix>::NaturalDomain, class Range=typename MatrixTraits<Matrix>::NaturalRange>
157 {
158 public:
159 static int const category = Dune::SolverCategory::sequential;
160
161 using matrix_type = Matrix;
162 using domain_type = Domain;
163 using range_type = Range;
164 using field_type = typename Matrix::field_type;
165
172
175
183 JacobiPreconditionerBase(Matrix const& mat, double w=1.0)
184 : diag(mat,w)
185 { }
186
187 virtual void apply(domain_type& x, range_type const& b) { applyDp(x,b); }
188
190 {
191 return diag.apply(x,y);
192 }
193
194 virtual bool requiresInitializedInput() const { return false; }
195
196 private:
198 };
199 }
201
202 // ----------------------------------------------------------------------------------------------
203 // ----------------------------------------------------------------------------------------------
204
211 template <class Operator>
213 typename Operator::domain_type,
214 typename Operator::range_type>
215 {
217 using field_type = typename Base::field_type;
218
219 public:
227 JacobiPreconditioner(Operator const& op, double w=1.0)
228 : Base(op.getmat(),w)
229 { }
230 };
231
237 template <class Operator>
238 auto makeJacobiPreconditioner(Operator const& op, double w=1.0)
239 {
240 static Deprecated dummy("makeJacobiPreconditioner() is deprecated, call JacobiPreconditioner(A) directly",2020);
242 }
243
244 // ----------------------------------------------------------------------------------------------
245
250 template <class Entry, class Index>
251 class JacobiPreconditioner<NumaBCRSMatrix<Entry,Index>>: public JacobiPreconditionerDetail::JacobiPreconditionerBase<NumaBCRSMatrix<Entry,Index>>
252 {
254 public:
255 using field_type = typename Base::field_type;
256
264
266
268
277 : Base(mat,w)
278 {}
279 };
280
281 // ----------------------------------------------------------------------------------------------
282
295 template <class Assembler,
296 int firstRow, int lastRow,
297 int firstCol, int lastCol,
298 class BlockFilter,
299 bool symmetric>
300 class JacobiPreconditioner<AssembledGalerkinOperator<Assembler,firstRow,lastRow,firstCol,lastCol,BlockFilter,symmetric>>
301 : public SymmetricPreconditioner<typename AssembledGalerkinOperator<Assembler,firstRow,lastRow,firstCol,lastCol,BlockFilter,symmetric>::Domain,
302 typename AssembledGalerkinOperator<Assembler,firstRow,lastRow,firstCol,lastCol,BlockFilter,symmetric>::Range>
303 {
305 typedef typename Operator::Domain Domain;
306 typedef typename Operator::Range Range;
307 typedef typename Operator::Scalar Scalar;
308
309 // A block filter that selects only the diagonal blocks of a heterogeneous block matrix.
310 struct DiagonalFilter {
311 template <class Block> struct apply { typedef boost::mpl::bool_<Block::rowId==Block::colId> type; };
312 };
313
314
315 public:
316 static int const category = Dune::SolverCategory::sequential;
317
324 explicit JacobiPreconditioner(Operator const& op, Scalar w=1.0):
325 blocks(boost::fusion::transform(
326 boost::fusion::filter_if<DiagonalFilter>(
327 boost::fusion::transform(boost::fusion::filter_if<BlockFilter>(IstlInterfaceDetail::AllBlocks<typename Operator::Assembler>::apply(op.getAssembler())),
328 IstlInterfaceDetail::Translate<firstRow,firstCol>())),
329 JacobiPreconditionerDetail::ExtractDiagonal(w)))
330 {}
331
337 virtual void apply (Domain& x, Range const& y)
338 {
339 applyDp(x,y);
340 }
341
347 virtual Scalar applyDp (Domain& x, Range const& y)
348 {
349 Scalar dp = 0;
350 boost::fusion::for_each(blocks,ApplyJacobi(x,y,dp));
351 return dp;
352 }
353
359 virtual bool requiresInitializedInput() const { return false; }
360
361 private:
362 // a boost::fusion list of all the matrix blocks to consider. Select all blocks in the subranges covered by the
363 // operator, shift the indices to start at 0, then select the ones on the diagonal and extract, invert, and store
364 // the diagonal.
365 typename boost::fusion::result_of::as_vector<
366 typename boost::fusion::result_of::transform<
367 typename boost::fusion::result_of::filter_if<
368 typename boost::fusion::result_of::transform<
369 typename boost::fusion::result_of::filter_if<typename IstlInterfaceDetail::AllBlocks<typename Operator::Assembler>::result,BlockFilter>::type,
370 IstlInterfaceDetail::Translate<firstRow,firstCol> >::type,
371 DiagonalFilter>::type,
373
374 struct ApplyJacobi
375 {
376 ApplyJacobi(Domain& x_, Range const& y_, Scalar& dp): x(x_), y(y_), dualPairing(dp) {}
377
378 template <class Block>
379 void operator()(Block const& mb) const
380 {
381 using namespace boost::fusion;
382 dualPairing += mb.apply(at_c<Block::colId>(x.data),at_c<Block::rowId>(y.data));
383 }
384
385 private:
386 Domain& x;
387 Range const& y;
388 Scalar& dualPairing;
389 };
390
391 };
392} // namespace Kaskade
393#endif
An adaptor presenting a Dune::LinearOperator <domain_type,range_type> interface to a contiguous sub-b...
typename Assembler::AnsatzVariableSetDescription::template CoefficientVector< firstCol, lastCol > Domain
typename Assembler::TestVariableSetDescription::template CoefficientVector< firstRow, lastRow > Range
Convenience class for marking deprecated functions. A static object of this class can be created insi...
Definition: deprecation.hh:35
virtual bool requiresInitializedInput() const
Returns true if the target vector x has to be initialized to zero before calling apply or applyDp.
typename Base::field_type field_type
JacobiPreconditioner & operator=(JacobiPreconditioner &&)=default
JacobiPreconditioner()=default
Default constructor.
JacobiPreconditioner(JacobiPreconditioner &&)=default
JacobiPreconditioner(NumaBCRSMatrix< Entry, Index > const &mat, double w=1.0)
Constructor.
virtual field_type applyDp(domain_type &x, range_type const &y)
JacobiPreconditionerBase & operator=(JacobiPreconditionerBase &&)=default
virtual void apply(domain_type &x, range_type const &b)
virtual bool requiresInitializedInput() const
Returns true if the target vector x has to be initialized to zero before calling apply or applyDp.
JacobiPreconditionerBase(Matrix const &mat, double w=1.0)
Constructor.
JacobiPreconditionerBase(JacobiPreconditionerBase &&)=default
JacobiPreconditioner(Operator const &op, double w=1.0)
Constructor.
An STL allocator that uses memory of a specific NUMA node only.
Definition: threading.hh:653
A NUMA-aware compressed row storage matrix adhering mostly to the Dune ISTL interface (to complete....
Implementation of thread pools suitable for parallelization of (more or less) memory-bound algorithms...
Definition: threading.hh:293
static NumaThreadPool & instance(int maxThreads=std::numeric_limits< int >::max())
Returns a globally unique thread pool instance.
int nodes() const
Reports the number of NUMA nodes (i.e., memory interfaces/CPU sockets)
Definition: threading.hh:316
Interface for symmetric preconditioners.
This file contains various utility functions that augment the basic functionality of Dune.
auto makeJacobiPreconditioner(Operator const &op, double w=1.0)
DEPRECATED, use JacobiPreconditioner(A) directly.
Index uniformWeightRangeStart(BlockIndex i, BlockIndex n, Index m)
Computes partitioning points of ranges for uniform weight distributions.
Definition: threading.hh:75
void parallelForNodes(Func const &f, int maxTasks=std::numeric_limits< int >::max())
A parallel for loop that executes the given functor in parallel on different NUMA nodes.
Definition: threading.hh:604
Index uniformWeightRange(Index j, Index n, Index m)
Computes the range in which an index is to be found when partitioned for uniform weights.
Definition: threading.hh:91
A block filter specifying which subblocks of a stiffness matrix or right hand side shall be assembled...
Definition: concepts.hh:763
DiagonalBlock(DiagonalBlock &&other)=default
std::vector< std::vector< Entry, NumaAllocator< Entry > > > diag
DiagonalBlock & operator=(DiagonalBlock &&other)=default
field_type apply(X &x, Y const &y) const
Computes and returns .
result< ExtractDiagonal(MB)>::type operator()(MB const &mb) const
void NaturalRange
The natural range type.
Definition: matrixTraits.hh:58