KASKADE 7 development version
assemblerCore.hh
Go to the documentation of this file.
1/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
2/* */
3/* This file is part of the library KASKADE 7 */
4/* https://www.zib.de/research/projects/kaskade7-finite-element-toolbox */
5/* */
6/* Copyright (C) 2002-2024 Zuse Institute Berlin */
7/* */
8/* KASKADE 7 is distributed under the terms of the ZIB Academic License. */
9/* see $KASKADE/academic.txt */
10/* */
11/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
12
13#ifndef ASSEMBLERCORE_HH
14#define ASSEMBLERCORE_HH
15
16#include <iostream>
17#include <numeric>
18#include <memory>
19#include <mutex>
20#include <tuple>
21#include <new>
22#include <type_traits>
23
24#include <boost/version.hpp>
25#include <boost/mpl/accumulate.hpp>
26#include <boost/mpl/bool.hpp>
27#include <boost/mpl/if.hpp>
28#include <boost/timer/timer.hpp>
29
30#include <boost/exception/diagnostic_information.hpp>
31#include <boost/fusion/algorithm.hpp>
32#include <boost/fusion/sequence.hpp>
33#include <boost/fusion/include/find_if.hpp>
34
35#include "dune/common/config.h"
36
37#include "dune/common/fvector.hh"
38#include "dune/common/fmatrix.hh"
39#include "dune/geometry/type.hh"
40#include "dune/grid/common/capabilities.hh"
41#include "dune/istl/bcrsmatrix.hh"
42#include "dune/istl/bdmatrix.hh"
43
44#include "fem/firstless.hh"
45#include "fem/fixfusion.hh"
46#include "fem/functional_aux.hh"
47#include "fem/gridmanager.hh"
48#include "fem/quadrature.hh"
49#include "fem/variables.hh"
50
54
57#include "utilities/timing.hh"
59
60
61// this is omitted in boost/fusion/sequence/container/vector/detail/vector_n.hpp
62#undef N
63
64namespace Kaskade
65{
66
73 static int const timerStatistics = 0x1; // gather global execution time statistics
74 static int const scatterStatistics = 0x2;
75 static int const lockStatistics = 0x4;
76 static int const localTimerStatistics = 0x0; // gather local (per element) time statistics
87 static int const statistics = 0;
88
89 //---------------------------------------------------------------------
90
97 namespace AssemblyDetail {
98
99 using namespace boost::fusion;
100
101
108 class RowGroupManager
109 {
110 public:
114 RowGroupManager();
115
119 virtual ~RowGroupManager();
120
121 // Noncopyable since we contain mutexes.
122 RowGroupManager(RowGroupManager const& m) = delete;
123 RowGroupManager& operator=(RowGroupManager const& m) = delete;
124
130 void init(int nrg, size_t rows);
131
138 void init(int nrg, std::vector<std::vector<size_t>> const& colIndex);
139
147 void init(int nrg, std::vector<size_t> const& weight);
148
153 std::pair<size_t,size_t> range(int n) const;
154
158 std::mutex& mutex(int n) const;
159
163 int size() const;
164
165
166 private:
167 // start indices of row groups
168 std::vector<size_t> rowGroupStart;
169
170 // Support for simultaneous write access to different row groups.
171 std::mutex* mutexes;
172
173 // deletes the mutexes
174 void clear();
175 };
176
177 // ----------------------------------------------------------------------------------------------------
178
189 template <class Policy, class RowVar, class ColVar, class SparseIdx>
190 struct MatrixBlock
191 {
192 static int const rowId = RowVar::id;
193 static int const colId = ColVar::id;
194 static int const rowSpaceIndex = RowVar::spaceIndex;
195 static int const colSpaceIndex = ColVar::spaceIndex;
196 static bool const symmetric = Policy::template BlockInfo<rowId,colId>::symmetric;
197 static bool const mirror = Policy::template BlockInfo<rowId,colId>::mirror;
198 static bool const lumped = Policy::template BlockInfo<rowId,colId>::lumped;
199 static bool const makePositive = Policy::template BlockInfo<rowId,colId>::makePositive;
200
201 using SparseIndex = SparseIdx;
202 typedef MatrixBlock<Policy,RowVar,ColVar,SparseIndex> Self;
203 typedef typename Policy::Scalar Scalar;
204 typedef typename Policy::Spaces Spaces;
206 typedef NumaBCRSMatrix<BlockType, SparseIndex> Matrix;
207
208 typedef typename SpaceType<Spaces,RowVar::spaceIndex>::type RowSpace;
209 typedef typename SpaceType<Spaces,ColVar::spaceIndex>::type ColSpace;
210 typedef typename RowSpace::GridView GridView;
211 typedef typename GridView::template Codim<0>::Iterator CellIterator;
212
213 static int const dim = ColSpace::dim;
214
218 MatrixBlock() {}
219
223 MatrixBlock(MatrixBlock const& mb)
224 : matrix(new Matrix(*mb.matrix))
225 , isDense(mb.isDense)
226 {}
227
231 MatrixBlock& operator=(MatrixBlock const& mb)
232 {
233 *matrix = *mb.matrix;
234 isDense = mb.isDense;
235 }
236
237 MatrixBlock& operator=(Scalar x)
238 {
239 assert(matrix.get());
240 *matrix = x;
241 return *this;
242 }
243
244 MatrixBlock& operator+=(MatrixBlock const& mb)
245 {
246 *matrix += *mb.matrix;
247 return *this;
248 }
249
250
254 Matrix& globalMatrix()
255 {
256 assert(matrix.get());
257 return *matrix;
258 }
259
263 Matrix const& globalMatrix() const
264 {
265 assert(matrix.get());
266 return *matrix;
267 }
268
276 std::shared_ptr<Matrix const> globalMatrixPointer() const { return matrix; }
277
278
292 template <class FaceOracle>
293 void init(Spaces const& spaces, CellIterator first, CellIterator last,
294 FaceOracle const& considerFace)
295 {
296 RowSpace const& rowSpace = *at_c<RowVar::spaceIndex>(spaces);
297 ColSpace const& colSpace = *at_c<ColVar::spaceIndex>(spaces);
298
299 // @todo: respect *dynamic* presence flag. static one is respected
300 // on construction of sequence of matrix blocks. Respect symmetric flag.
301 if (!Policy::template BlockInfo<rowId,colId>::present)
302 {
303 assert("Aieee! Nonpresent matrix block detected!\n"==0);
304 abort();
305 }
306
307 boost::timer::cpu_timer timer;
308
309 size_t nnz = 0;
310 size_t const rows = rowSpace.degreesOfFreedom();
311 size_t const cols = colSpace.degreesOfFreedom();
312
313 // If mass lumping is desired, we just create a diagonal matrix.
314 // Otherwise, we need to establish the connectivity pattern of
315 // the ansatz functions.
316 if (Policy::template BlockInfo<rowId,colId>::lumped)
317 {
318 assert(RowVar::spaceIndex==ColVar::spaceIndex && rows==cols);
319 NumaCRSPatternCreator<SparseIndex> creator(rows,cols,false,1);
320 for (size_t i=0; i<rows; ++i)
321 creator.addElements(&i,&i+1,&i,&i+1);
322 matrix.reset(new Matrix(creator)); // zero init
323 nnz = rows;
324 }
325 else
326 {
327 // In case of global support of shape functions of one of the involved spaces (usually ConstantSpace),
328 // the resulting matrix is dense. Use a shortcut to define the sparsity structure.
329 bool const dense = RowSpace::Mapper::globalSupport || ColSpace::Mapper::globalSupport;
330
331
332 // Allocate sparse matrix and define the sparsity structure. Estimate the required number of entries per row
333 // for efficient preallocation.
334 // Ws 2016-01-22: Zero nnz init is actually faster on P1 pattern, probably because otherwise the
335 // chunks put a high burden on the memory system by requesting memory at the same time. This appears
336 // to parallelize quite badly and hence is turned off in parallel mode.
337 int preallocateEntriesPerRow = 0;
338 if (NumaThreadPool::instance().nodes()==1) // sequential allocation in NumaCRSPatternCreator
339 {
340 int localSize = colSpace.mapper().globalIndices(0).size(); // # entries per row of local matrices (rough estimate)
341 preallocateEntriesPerRow = dense? 0: // for dense matrices we'll use a specialized version
342 dim<=1? 2*localSize: // make room for (roughly) all elemental matrices appended...
343 dim==2? 8*localSize: // ... this is overprovisioning, but faster due to less ...
344 20*localSize; // ... allocations and data movement.
345 }
346 NumaCRSPatternCreator<SparseIndex> creator(rows,cols,symmetric,preallocateEntriesPerRow);
347
348 if (statistics & timerStatistics)
349 std::cout << "initial creator construction (" << rowId << "," << colId << "): " << timer.format();
350 timer.start();
351
352
353 if (dense)
354 {
355 creator.addAllElements();
356 if (statistics & timerStatistics)
357 std::cout << "entering indices for (" << rowId << "," << colId << "): " << timer.format();
358 timer.start();
359 }
360 else
361 {
362 // Traditional local FE spaces - that's the hard case. We have to traverse the grid and pick up all
363 // interactions of (local) basis functions.
364
365 // Iteration over all cells. Note that the grid views of row and
366 // column spaces need not cover the whole grid (e.g., in case of
367 // local support). It would be best to iterate over the grid
368 // view with smaller support, but for simplicity we choose the
369 // row space.
370
371 size_t const nCells = rowSpace.indexSet().size(0);
372
373 std::vector<typename RowSpace::Mapper::GlobalIndexRange> rIndices(nCells,rowSpace.mapper().initGlobalIndexRange());
374 std::vector<typename ColSpace::Mapper::GlobalIndexRange> cIndices(nCells,colSpace.mapper().initGlobalIndexRange());
375 for (size_t i=0; i<nCells; ++i)
376 {
377 rIndices[i] = rowSpace.mapper().globalIndices(i);
378 cIndices[i] = colSpace.mapper().globalIndices(i);
379 }
380 if (statistics & timerStatistics)
381 std::cout << "gathering indices for (" << rowId << "," << colId << "): " << timer.format();
382 timer.start();
383
384 creator.addElements(rIndices,cIndices); // enter all entries in one go
385
386 // We may need to assemble integrals on inner faces - then there are matrix entries coupling degrees of freedom
387 // associated to different cells (e.g., in discontinuous Galerkin methods). Add these entries, too.
388 if(Policy::considerInnerFaces)
389 {
390 GridView const& gridView = rowSpace.gridView();
391
392 for (CellIterator ci=first; ci!=last; ++ci)
393 {
394 auto const& rIndices = rowSpace.mapper().globalIndices(*ci);
395
396 for (auto const& face: intersections(gridView,*ci))
397 {
398 // only true inner faces are considered, and only if requested
399 if (face.boundary() || !face.neighbor() || considerFace(face)==false)
400 continue;
401
402 auto const& cIndices = colSpace.mapper().globalIndices(face.outside());
403
404 creator.addElements(begin(rIndices),end(rIndices),
405 begin(cIndices),end(cIndices));
406 } // end iteration over faces
407 } // end iteration over cells
408 } // end Policy::considerInnerFaces
409
410 if (statistics & timerStatistics)
411 std::cout << "entering indices for (" << rowId << "," << colId << "): " << timer.format();
412 timer.start();
413
414 creator.balance();
415 if (statistics & timerStatistics)
416 std::cout << "balancing for (" << rowId << "," << colId << "): " << timer.format();
417 timer.start();
418 }
419
420 // Now that the pattern creator is filled, create pattern and matrix.
421
422 // Count number of nonzeros.
423 nnz = creator.nonzeroes();
424
425 // Create the matrix (zero initialization of entries).
426 matrix.reset(new Matrix(creator));
427 if (statistics & timerStatistics)
428 std::cout << "creating pattern & matrix for (" << rowId << "," << colId << "): " << timer.format();
429 timer.start();
430 }
431
432 // Check density (nonzeros>(rows*cols/2). Be careful not to get
433 // overflow for very large and sparse matrices!
434 if (std::min(rows,cols)>0)
435 isDense = nnz/std::min(rows,cols) > std::max(rows,cols)/2;
436 else
437 isDense = false; // well, empty matrices are as sparse as one can hope for ;)
438
439 if (statistics & timerStatistics)
440 std::cout << "init cleanup for (" << rowId << "," << colId << "): " << timer.format() << "\n";
441 }
442
443 // Returns true if the matrix is essentially dense (more than 50% nonzeros).
444 bool dense() const { return isDense; }
445
446 private:
447 std::shared_ptr<Matrix> matrix;
448 bool isDense;
449 };
450
451 // ---------------------------------------------------------------------------------------
452
453 // a boost::fusion predicate telling us whether the current matrix block has the given row and column indices.
454 template <int row, int col>
455 struct IsBlock
456 {
457 template <class MatrixBlock>
458 struct apply
459 {
460 using type = std::conditional_t<MatrixBlock::rowId==row && MatrixBlock::colId==col,boost::mpl::true_,boost::mpl::false_>;
461 };
462 };
463
464 // ---------------------------------------------------------------------------------------
465
469 template <class Policy, class AnsatzVariables, class TestVariables, class SparseIndex>
470 class BlockArray
471 {
472 // Retain only those blocks which are present in the functional
473 struct PresenceFilter
474 {
475 template <class Block>
476 struct apply
477 {
478 typedef boost::mpl::bool_<Policy::template BlockInfo<Block::rowId,Block::colId>::present> type;
479 };
480 };
481
482 static auto create(AnsatzVariables a, TestVariables t)
483 {
484 auto joiner = [=](auto blocks, auto rowVar)
485 {
486 using RowVar = decltype(rowVar);
487 auto thisRow = transform(a,[](auto avar){ return MatrixBlock<Policy,RowVar,decltype(avar),SparseIndex>(); });
488 return join(blocks,thisRow);
489 };
490 auto allBlocks = accumulate(t,vector<>(),joiner);
491 return as_vector(filter_if<PresenceFilter>(allBlocks));
492 }
493
494 public:
495 typedef decltype(create(AnsatzVariables(),TestVariables())) type;
496 };
497
498
499 //---------------------------------------------------------------------
500
504 template <class Policy, class RV>
505 struct RhsBlock
506 {
507 typedef typename std::remove_reference<RV>::type RowVar;
508
509 static int const rowId = RowVar::id;
510
511 typedef typename Policy::Scalar Scalar;
512 typedef typename Policy::Spaces Spaces;
513 typedef Dune::FieldVector<Scalar,RowVar::m> BlockType;
514 typedef typename Dune::BlockVector<BlockType> Rhs;
515 typedef RhsBlock<Policy,RowVar> Self;
516
517 static int const rowSpaceIndex = RowVar::spaceIndex;
518 typedef typename SpaceType<Spaces,RowVar::spaceIndex>::type RowSpace;
519 typedef typename RowSpace::GridView::template Codim<0>::Iterator CellIterator;
520
521
525 void init(Spaces const& spaces, CellIterator /*first*/, CellIterator /*last*/, int nrg)
526 {
527 RowSpace const& rowSpace = *at_c<RowVar::spaceIndex>(spaces);
528 size_t rows = rowSpace.degreesOfFreedom();
529
530 rowGroupManager = std::make_unique<RowGroupManager>();
531 rowGroupManager->init(nrg,rows);
532 }
533
534 RowGroupManager& rowGroup() { return *rowGroupManager; }
535
536 // A class storing a local element rhs vector and the corresponding indices
537 struct LocalVector
538 {
539 // just because the ranges are not default constructible
540 LocalVector(): ridx(RowSpace::Mapper::initSortedIndexRange()) {}
541
542 std::vector<BlockType> vector;
543
544 typedef typename RowSpace::Mapper::SortedIndexRange SortedRowIdx;
545
549 SortedRowIdx ridx;
550 };
551
552 // A structure for holding a sequence of several local vectors to be filled sequentially
553 // and to be scattered together.
554 struct LocalVectors
555 {
556 typedef Self RhsBlock;
557 typedef std::vector<BlockType> LocalVectorType;
558
559 LocalVectors(int n, RhsBlock& rb_): localVectors(n), currentLocalVector(0), rb(&rb_) {}
560
561 // Storage for local rhs vectors.
562 std::vector<LocalVector> localVectors;
563
564 // Which is the current local vector to be filled?
565 int currentLocalVector;
566
567 // Pointing back to its rhs block
568 RhsBlock* rb;
569 };
570
571 private:
572 std::unique_ptr<RowGroupManager> rowGroupManager;
573 };
574
578 template <class Policy, class TestVariables>
579 class RhsBlockArray
580 {
581 // Define a mapping from Variable to RhsBlock for Variable as row variable.
582 // Retain only those blocks which are present in the functional
583 struct PresenceFilter
584 {
585 template <class Block>
586 struct apply
587 {
588 typedef boost::mpl::bool_<Policy::template RhsBlockInfo<Block::rowId>::present> type;
589 };
590 };
591
592 static auto create(TestVariables t)
593 {
594 auto allBlocks = transform(t,[](auto tvar) { return RhsBlock<Policy,decltype(tvar)>(); });
595 return as_vector(filter_if<PresenceFilter>(allBlocks));
596 }
597
598 public:
599 typedef decltype(create(TestVariables())) type;
600 };
601
602 //---------------------------------------------------------------------
603
608 template <class Policy>
609 struct RhsLocalData
610 {
614 RhsLocalData(int n_): n(n_) {}
615
616 template <class RhsBlock>
617 typename RhsBlock::LocalVectors operator()(RhsBlock const& rb) const
618 {
619 // due to boost::fusion::transform providing only const references to the source sequence elements, we
620 // need a const cast here :(
621 return typename RhsBlock::LocalVectors(n,removeConst(rb));
622 }
623
624 private:
625 int n;
626 };
627
631 struct MatrixLocalData
632 {
637 MatrixLocalData(size_t s_)
638 : s(s_) {}
639
640 template <class MatrixBlock>
641 auto operator()(MatrixBlock const& mb) const
642 {
643 // When switching from boost 1.51 to 1.57, it seems that the filter_view only presents
644 // const references to the elements (why?), but we need non-const ones. Taking non-const
645 // reference as arguments leads to failure of overload resolution, omitting this
646 // functor, and ultimately to error. As a workaround, we take const reference and cast
647 // it away.
648 return LocalMatrices<typename MatrixBlock::BlockType,
649 MatrixBlock::lumped,
650 typename MatrixBlock::RowSpace::Mapper::SortedIndexRange,
651 typename MatrixBlock::ColSpace::Mapper::SortedIndexRange,
652 MatrixBlock>(removeConst(mb).globalMatrix(),s);
653 }
654
655 private:
656 size_t s;
657 };
658
659 //---------------------------------------------------------------------
660
661 template <class Spaces>
662 class GetMaxDerivativeBase
663 {
664 public:
665 GetMaxDerivativeBase(Spaces const& spaces_, std::map<void const*,int>& deriv_): spaces(spaces_), deriv(deriv_) {}
666
667 private:
668 Spaces const& spaces;
669 std::map<void const*,int>& deriv;
670
671 protected:
672 template <int spaceIndex>
673 void enterSpace(int d) const
674 {
675 void const* space = at_c<spaceIndex>(spaces);
676 auto i = deriv.find(space);
677 if (i==deriv.end())
678 deriv[space] = d;
679 else
680 i->second = std::max(i->second,d);
681 }
682 };
683
684 template <class Functional, class Spaces>
685 class GetMaxDerivativeMatrix: public GetMaxDerivativeBase<Spaces>
686 {
687 public:
688 GetMaxDerivativeMatrix(Spaces const& spaces, std::map<void const*,int>& deriv): GetMaxDerivativeBase<Spaces>(spaces,deriv) {}
689
690 template <class LocalMatrices>
691 void operator()(LocalMatrices const&) const
692 {
693 using MatrixBlock = typename LocalMatrices::InfoType;
694 int d = Functional::template D2<MatrixBlock::rowId,MatrixBlock::colId>::derivatives;
695 this->template enterSpace<MatrixBlock::rowSpaceIndex>(d);
696 this->template enterSpace<MatrixBlock::colSpaceIndex>(d);
697 }
698 };
699
700
701 template <class Functional, class Spaces>
702 class GetMaxDerivativeRhs: public GetMaxDerivativeBase<Spaces>
703 {
704 public:
705 GetMaxDerivativeRhs(Spaces const& spaces, std::map<void const*,int>& deriv): GetMaxDerivativeBase<Spaces>(spaces,deriv) {}
706
707 template <class RBlock>
708 void operator()(RBlock const&) const
709 {
710 int d = Functional::template D1<RBlock::RhsBlock::rowId>::derivatives;
711 this->template enterSpace<RBlock::RhsBlock::rowSpaceIndex>(d);
712 }
713 };
714
715
721 template <class Functional, class Spaces, class LocalMatrices, class RBlocks>
722 std::map<void const*,int> derivatives(Functional const& f, Spaces const& spaces, LocalMatrices const& localMatrices, RBlocks const& rblocks)
723 {
724 std::map<void const*,int> deriv; // map from space address to maximum occuring derivative
725
726 for_each(localMatrices,GetMaxDerivativeMatrix<Functional,Spaces>(spaces,deriv));
727 for_each(rblocks,GetMaxDerivativeRhs<Functional,Spaces>(spaces,deriv));
728 return deriv;
729 }
730
731 //---------------------------------------------------------------------
732
736 template <class Evaluators, class TestVariables>
737 struct ClearLocalRhs
738 {
739 ClearLocalRhs(Evaluators const& evaluators_):
740 eval(evaluators_)
741 {}
742
743 template <class LocalVectors>
744 void operator()(LocalVectors& lv) const
745 {
746 // Deletes all entries from vector and inserts the needed number
747 // of zero elements. Thus, finite elements with different number
748 // of shape functions are supported. Memory reallocations do
749 // rarely occur.
750 assert(lv.currentLocalVector < lv.localVectors.size());
751
752 int const rSpaceIdx = result_of::value_at_c<TestVariables,LocalVectors::RhsBlock::rowId>::type::spaceIndex;
753
754 auto& reval = at_c<rSpaceIdx>(eval); // evaluator
755
756 auto& rc = lv.localVectors[lv.currentLocalVector]; // current local vector structure
757
758 // set current local vector to correct size and value 0
759 rc.vector.resize(reval.size());
760 std::fill(rc.vector.begin(),rc.vector.end(),0);
761
762 // retrieve sorted indices
763 rc.ridx = reval.sortedIndices();
764 }
765
766 Evaluators const& eval;
767 };
768
772 template <class Evaluators, class AnsatzVariableSetDescription, class TestVariableSetDescription>
773 struct NewLocalMatrix
774 {
775 NewLocalMatrix(Evaluators const& reval_, Evaluators const& ceval_)
776 : reval(reval_), ceval(ceval_)
777 {}
778
779 template <class LocalMatrices>
780 void operator()(LocalMatrices& lm) const
781 {
782 using MatrixBlock = typename LocalMatrices::InfoType;
783
784 int const cSpaceIdx = spaceIndex<AnsatzVariableSetDescription,MatrixBlock::colId>;
785 int const rSpaceIdx = spaceIndex<TestVariableSetDescription ,MatrixBlock::rowId>;
786
787 lm.push_back(at_c<rSpaceIdx>(reval).sortedIndices(),at_c<cSpaceIdx>(ceval).sortedIndices());
788 }
789
790 Evaluators const& reval;
791 Evaluators const& ceval;
792 };
793
794
795 //---------------------------------------------------------------------
796
800 template <class TestVariableSetDescription, class Evaluators, class Real, class Cache>
801 struct UpdateLocalRhs
802 {
803 UpdateLocalRhs(Evaluators const& evaluators_, Real integrationFactor_, Cache const& cache_)
804 : evaluators(evaluators_)
805 , integrationFactor(integrationFactor_)
806 , cache(cache_)
807 {}
808
809 template <class LocalVectors>
810 void operator()(LocalVectors& lv) const
811 {
812 typedef typename LocalVectors::RhsBlock RhsBlock;
813
814 int const rSpaceIndex = spaceIndex<TestVariableSetDescription,RhsBlock::rowId>;
815 auto& rc = lv.localVectors[lv.currentLocalVector].vector; // current local vector
816 size_t const rows = rc.size();
817 for (size_t i=0; i<rows; ++i)
818 {
819 // Check that the cache returns the correct type. The typedefs just serve for
820 // creating readable error messages.
821 using rhsEntryType = std::remove_reference_t<decltype(rc[i])>;
822 using d1ResultType = decltype(cache.template d1<RhsBlock::rowId>(at_c<rSpaceIndex>(evaluators).evalData[i]));
823 static_assert(std::is_same<rhsEntryType,d1ResultType>::value,
824 "Types don't match. Check return type of d1() in problem formulation.");
825
826 rc[i] += integrationFactor * cache.template d1<RhsBlock::rowId>(at_c<rSpaceIndex>(evaluators).evalData[i]);
827 assert(!std::isnan(rc[i][0])); // check that first entry in rhs vector is valid
828 }
829 }
830
831 Evaluators const& evaluators;
832 Real integrationFactor;
833 Cache const& cache;
834 };
835
836
840 template <class AnsatzVariableSetDescription, class TestVariableSetDescription, class Evaluators, class Real, class Cache>
841 struct UpdateLocalMatrix
842 {
843 UpdateLocalMatrix(Evaluators const& evaluators_, Real integrationFactor_, Cache const& cache_):
844 evaluators(evaluators_), integrationFactor(integrationFactor_), cache(cache_)
845 {}
846
847 template <class LocalMatrices>
848 void operator()(LocalMatrices& lm) const
849 {
850 using MatrixBlock = typename LocalMatrices::InfoType;
851
852 // Get the evaluated ansatz (column) and test (row) functions
853 int const rowId = MatrixBlock::rowId;
854 int const colId = MatrixBlock::colId;
855 int const rSpaceIndex = spaceIndex<TestVariableSetDescription,rowId>;
856 int const cSpaceIndex = spaceIndex<AnsatzVariableSetDescription,colId>;
857 auto const& rEval = at_c<rSpaceIndex>(evaluators).evalData;
858 auto const& cEval = at_c<cSpaceIndex>(evaluators).evalData;
859
860 auto& A = lm.back(); // the current local Galerkin matrix to contribute to
861
862 // retrieve number of localized ansatz functions
863 int const rows = A.ridx().size();
864 int const cols = A.cidx().size();
865
866 // loop over all combinations of localized ansatz and test functions
867 for (int i=0; i<rows; ++i)
868 {
869 int begin = MatrixBlock::lumped? i: 0;
870 int end = (MatrixBlock::symmetric||MatrixBlock::lumped)? i+1: cols;
871 for (int j=begin; j<end; ++j)
872 {
873 static_assert(std::is_same<std::remove_reference_t<decltype(A(i,j))>,
874 decltype(cache.template d2<rowId,colId>(rEval[i],cEval[j]))
875 >::value, "Types don't match. Check return type of d2() in problem formulation.");
876 A(i,j) += integrationFactor * cache.template d2<rowId,colId>(rEval[i], cEval[j]);
877 assert(!std::isnan(A(i,j)[0][0])); // check that top left entry is valid
878 }
879 }
880 }
881
882 Evaluators const& evaluators;
883 Real integrationFactor;
884 Cache const& cache;
885 };
886
887
888 // TODO: factorize common functionality out of both UpdateLocalMatrix classes
889 template <class AnsatzVariableSetDescription, class TestVariableSetDescription,
890 class Evaluators, class Real, class Cache>
891 struct UpdateLocalMatrixFromInnerBoundaryCache
892 {
893 UpdateLocalMatrixFromInnerBoundaryCache(Evaluators const& insideEval, Evaluators const& outsideEval,
894 Real integrationFactor_, Cache const& cache_, bool sameCell_)
895 : insideEvaluator(insideEval)
896 , outsideEvaluator(outsideEval)
897 , integrationFactor(integrationFactor_)
898 , cache(cache_)
899 , sameCell(sameCell_)
900 {}
901
902 template <class LocalMatrices>
903 void operator()(LocalMatrices& lm) const
904 {
905 using MatrixBlock = typename LocalMatrices::InfoType;
906
907 int const rSpaceIndex = spaceIndex<TestVariableSetDescription, MatrixBlock::rowId>;
908 int const cSpaceIndex = spaceIndex<AnsatzVariableSetDescription,MatrixBlock::colId>;
909
910 auto& A = lm.localMatrices.back();
911
912 // retrieve number of localized ansatz functions
913 int const rows = A.ridx().size();
914 int const cols = A.cidx().size();
915
916 // loop over all combinations of localized ansatz and test functions
917 for (int i=0; i<rows; ++i)
918 {
919 int begin = MatrixBlock::lumped? i: 0;
920 int end = ((MatrixBlock::symmetric && sameCell)||MatrixBlock::lumped)? i+1: cols;
921 for (int j=begin; j<end; ++j)
922 {
923 auto tmp = cache.template d2<MatrixBlock::rowId,MatrixBlock::colId>(
924 at_c<rSpaceIndex>(insideEvaluator).evalData[i],
925 at_c<cSpaceIndex>(outsideEvaluator).evalData[j], sameCell);
926 A(i,j) += integrationFactor * tmp;
927 assert(!std::isnan(A(i,j)[0][0])); // check that top left entry is valid
928 }
929 }
930 }
931
932 Evaluators const& insideEvaluator, outsideEvaluator;
933 Real integrationFactor;
934 Cache const& cache;
935 bool sameCell;
936 };
937 //---------------------------------------------------------------------
938
942 template <class GlobalRhs>
943 struct ScatterLocalRhs
944 {
951 ScatterLocalRhs(GlobalRhs& globalRhs_, bool immediate_ = false):
952 globalRhs(globalRhs_), immediate(immediate_)
953 {}
954
955 template <class LocalVectors>
956 void operator()(LocalVectors& lv) const
957 {
958 auto& gRhs = at_c<LocalVectors::RhsBlock::rowId>(globalRhs.data);
959
960 auto const& rowGroups = lv.rb->rowGroup();
961
962 if (immediate || lv.currentLocalVector+1==lv.localVectors.size())
963 {
964 int const last = immediate ? lv.currentLocalVector : lv.localVectors.size();
965
966 // step through all row groups and scatter all the included local vectors' components
967 for (int rg=0; rg<rowGroups.size(); ++rg)
968 {
969 // obtain and lock the row group
970 auto const [firstRow,lastRow] = rowGroups.range(rg);
971 std::lock_guard lock(rowGroups.mutex(rg));
972
973 // step through all local vectors
974 for (int i=0; i<last; ++i)
975 {
976 auto& ri = lv.localVectors[i].vector;
977 auto& ridx = lv.localVectors[i].ridx;
978
979 assert(ri.size()==ridx.size());
980
981 for (int r=0; r<ridx.size(); ++r)
982 {
983 size_t const rGlobalIndex = ridx[r].first;
984
985 // check if this row is inside the row group
986 if (firstRow<=rGlobalIndex && rGlobalIndex<lastRow)
987 {
988 size_t const rLocalIndex = ridx[r].second;
989 gRhs[rGlobalIndex] += ri[rLocalIndex];
990 }
991 }
992 }
993 }
994
995 // everything's scattered - clean up
996 lv.currentLocalVector = 0;
997 }
998 else
999 ++lv.currentLocalVector; // prepare next local vector for updating
1000 }
1001
1002 GlobalRhs& globalRhs;
1003 bool immediate;
1004 };
1005
1006 //---------------------------------------------------------------------------------------------
1007
1018 template <class Functional>
1019 struct SymmetrizeLocalMatrix
1020 {
1021 template <class LocalMatrices>
1022 void operator()(LocalMatrices& lm) const
1023 {
1024 using MatrixBlock = typename LocalMatrices::InfoType;
1025
1026 // If a block is symmetric, only the lower part of the local
1027 // matrix is filled. Mirror to the upper part for easy access in
1028 // the following loop. For lumped (purely diagonal) blocks, of course,
1029 // nothing is to do.
1030 if constexpr (!MatrixBlock::lumped && MatrixBlock::symmetric)
1031 {
1032 auto& m = lm.back();
1033 int const N = m.ridx().size();
1034 assert(N==m.cidx().size());
1035
1036 // copy lower triangular part to upper triangular part, transposing
1037 // each entry
1038 for (int i=0; i<N-1; ++i)
1039 for (int j=i+1; j<N; ++j)
1040 m(i,j) = transpose(m(j,i));
1041
1042 // If we are asked to enforce positive semidefiniteness of these local
1043 // matrices (symmetric and non-lumped), we do this using an eigendecomposition,
1044 // and shifting all nonpositive eigenvalues to zero.
1045 if constexpr (MatrixBlock::makePositive)
1046 {
1047 assert(f);
1048 constexpr int row = MatrixBlock::rowId;
1049 constexpr int col = MatrixBlock::colId;
1050
1051 // First we need to extract the local matrix.
1052 using Entry = typename MatrixBlock::BlockType;
1053 DynamicMatrix<Entry> Aloc(N,N);
1054 for (int j=0; j<N; ++j)
1055 for (int i=0; i<N; ++i)
1056 Aloc[i][j] = m(i,j);
1057
1058 // now we set all negative eigenvalues to zero in the eigendecomposition
1059 bool changed = makePositiveSemiDefinite(Aloc,f->template makePositiveThreshold<row,col>());
1060
1061 // now we copy the matrix back
1062 if (changed)
1063 for (int j=0; j<N; ++j)
1064 for (int i=0; i<N; ++i)
1065 m(i,j) = Aloc[i][j];
1066 }
1067 }
1068 }
1069
1070 Functional const* f;
1071 };
1072
1073 //---------------------------------------------------------------------
1074 //---------------------------------------------------------------------
1075
1082 template <class Functional>
1083 struct VariationalFunctionalPolicy
1084 {
1085 typedef typename Functional::Scalar Scalar;
1086 typedef typename Functional::AnsatzVars::Spaces Spaces;
1087 static constexpr bool considerInnerFaces = hasInnerBoundaryCache<Functional>;
1088
1089 template <int rowId>
1090 struct RhsBlockInfo
1091 {
1092 static bool const present = Functional::template D1<rowId>::present;
1093 };
1094
1095
1096 template <int rowId, int colId>
1097 struct BlockInfo
1098 {
1099 static bool const present = Functional::template D2<rowId,colId>::present && rowId>=colId;
1100 static bool const symmetric = (rowId==colId || Functional::template D2<rowId,colId>::symmetric)
1101 && (result_of::value_at_c<typename Functional::TestVars::Variables,rowId>::type::spaceIndex
1102 == result_of::value_at_c<typename Functional::AnsatzVars::Variables,colId>::type::spaceIndex);
1103 static bool const mirror = rowId>colId;
1104 static bool const lumped = symmetric && Functional::template D2<rowId,colId>::lumped;
1105 static bool const makePositive = symmetric && Functional::template D2<rowId,colId>::makePositive;
1106 };
1107
1113 Scalar functional() const { return fValue; }
1114
1115 static constexpr bool hasValue = true;
1116
1117 Scalar fValue;
1118 };
1119
1127 template <class Functional>
1128 struct WeakFormulationPolicy
1129 {
1130 typedef typename Functional::Scalar Scalar;
1131 typedef typename Functional::AnsatzVars::Spaces Spaces;
1132 static constexpr bool considerInnerFaces = hasInnerBoundaryCache<Functional>;
1133
1134 template <int rowId>
1135 struct RhsBlockInfo
1136 {
1137 static bool const present = Functional::template D1<rowId>::present;
1138 };
1139
1140
1141 template <int rowId, int colId>
1142 struct BlockInfo
1143 {
1144 static bool const present = Functional::template D2<rowId,colId>::present;
1145 static bool const symmetric = Functional::template D2<rowId,colId>::symmetric
1146 && (result_of::value_at_c<typename Functional::TestVars::Variables,rowId>::type::spaceIndex
1147 == result_of::value_at_c<typename Functional::AnsatzVars::Variables,colId>::type::spaceIndex);
1148 static bool const mirror = false;
1149 static bool const lumped = symmetric && Functional::template D2<rowId,colId>::lumped;
1150 static bool const makePositive = symmetric && Functional::template D2<rowId,colId>::makePositive;
1151 };
1152
1153 public:
1154 static constexpr bool hasValue = false;
1155
1156 Scalar fValue = 0;
1157 };
1158
1159
1164 template <class Problem>
1165 using FormulationPolicy = std::conditional_t<Problem::type==VariationalFunctional,
1166 VariationalFunctionalPolicy<Problem>,
1167 WeakFormulationPolicy<Problem>>;
1168
1169 // -------------------------------------------------------------------------------------------------------
1170
1175 struct TakeAllBlocks
1176 {
1177 template <int row> struct D1 { static bool const assemble = true; };
1178 template <int row, int col> struct D2 { static bool const assemble = true; };
1179 };
1180
1181 // Mapping a block filter to an MPL predicate
1182 template <class BlockFilter>
1183 struct MatrixBlockFilter
1184 {
1185 template <class X>
1186 struct apply
1187 {
1188 typedef boost::mpl::bool_<BlockFilter::template D2<X::rowId,X::colId>::assemble> type;
1189 };
1190 };
1191
1192 template <class> struct Fill;
1193
1194 template <class Block, bool, bool>
1195 struct CopyBlock
1196 {
1197 template <class OtherBlock>
1198 static void apply(OtherBlock const&, std::unique_ptr<Block>&, bool, size_t){}
1199 };
1200
1201 template <class Scalar, int n, class Allocator, bool symmetric>
1202 struct CopyBlock<Dune::BCRSMatrix<Dune::FieldMatrix<Scalar,n,n>,Allocator>,symmetric,true>
1203 {
1204 typedef Dune::BCRSMatrix<Dune::FieldMatrix<Scalar,n,n>,Allocator> Block;
1205
1206 static void apply(Block const& from, std::unique_ptr<Block>& to, bool extractOnlyLowerTriangle, size_t nnz)
1207 {
1208 // copy
1209 if(extractOnlyLowerTriangle == symmetric)
1210 {
1211 to.reset(new Block(from));
1212 return;
1213 }
1214
1215 // only lower triangle is stored in the assembler -> restore full matrix
1216 if( (!extractOnlyLowerTriangle && symmetric) || (extractOnlyLowerTriangle && !symmetric) )
1217 {
1218 std::vector<std::vector<size_t> > bcrsPattern(from.N());
1219 for(size_t i=0; i<from.N(); ++i)
1220 for(size_t j=0; j<=i; ++j)
1221 {
1222 if(from.exists(i,j))
1223 {
1224 bcrsPattern[i].push_back(j);
1225 if(i!=j && !extractOnlyLowerTriangle)
1226 bcrsPattern[j].push_back(i);
1227 }
1228 }
1229 for(std::vector<size_t>& row : bcrsPattern) std::sort(row.begin(),row.end());
1230
1231 to.reset(new Block(from.N(),from.M(),nnz,Block::row_wise));
1232
1233 // init sparsity pattern
1234 for (typename Block::CreateIterator row=to->createbegin(); row!=to->createend(); ++row)
1235 for(size_t col : bcrsPattern[row.index()]) row.insert(col);
1236
1237 // read data
1238 for(size_t row=0; row<bcrsPattern.size(); ++row)
1239 for(size_t col : bcrsPattern[row])
1240 {
1241 if(row >= col) (*to)[row][col] = from[row][col];
1242 else if(!extractOnlyLowerTriangle)
1243 {
1244 for(size_t i=0; i<n; ++i)
1245 for(size_t j=0; j<n; ++j)
1246 (*to)[row][col][i][j] = from[col][row][j][i];
1247 }
1248 }
1249 }
1250 }
1251 };
1252
1253
1254 template <class BCRSMat, int rowIndex, int columnIndex>
1255 struct BlockToBCRS
1256 {
1257 explicit BlockToBCRS(std::unique_ptr<BCRSMat>& m, bool extractOnlyLowerTriangle_, size_t nnz_) : mat(m), extractOnlyLowerTriangle(extractOnlyLowerTriangle_), nnz(nnz_)
1258 {}
1259
1260 template <class MatrixBlock>
1261 void operator()(MatrixBlock const& mb) const
1262 {
1263 CopyBlock<BCRSMat,MatrixBlock::symmetric,MatrixBlock::rowId==rowIndex && MatrixBlock::colId==columnIndex>::apply(mb.globalMatrix(),mat,extractOnlyLowerTriangle,nnz);
1264 }
1265
1266 private:
1267 std::unique_ptr<BCRSMat>& mat;
1268 bool extractOnlyLowerTriangle;
1269 size_t nnz;
1270 };
1271
1272
1273 } // End of namespace AssemblyDetail
1275 //---------------------------------------------------------------------
1276 //---------------------------------------------------------------------
1277
1278
1290 template <class GridView>
1292 {
1293 using Cell = Kaskade::Cell<GridView>;
1295
1296 public:
1297 CachingBoundaryDetector(GridSignals& signals, GridView const& gridView_) :
1298 gridView(gridView_), indexSet(gridView.indexSet())
1299 {
1300 refConnection = signals.informAboutRefinement.connect(int(GridSignals::allocResources),
1301 [=](GridSignals::Status when){this->update(when);});
1303 }
1304
1305 CachingBoundaryDetector(GridView const& gridView_) :
1306 gridView(gridView_), indexSet(gridView.indexSet())
1307 {
1309 }
1310
1311 bool hasBoundaryIntersections(Cell const& cell) const
1312 {
1313 assert(indexSet.size(0)==boundaryFlag.size()); // check for grid adaptation...
1314 return boundaryFlag[indexSet.index(cell)];
1315 }
1316
1317 private:
1318 GridView const& gridView;
1319 typename GridView::IndexSet const& indexSet;
1320 std::vector<bool> boundaryFlag;
1321 boost::signals2::scoped_connection refConnection;
1322
1323 void update(int const status)
1324 {
1325 using namespace Dune;
1326
1327 if (status == GridSignals::AfterRefinement)
1328 {
1329 boundaryFlag.resize(indexSet.size(0));
1330 // TODO: run this loop in parallel (no so easy because of concurrent access to the bool vector...)
1331 for (auto const& cell: elements(gridView))
1332 boundaryFlag[indexSet.index(cell)] = cell.hasBoundaryIntersections();
1333 }
1334 }
1335 };
1336
1344 {
1345 public:
1346 template <class GridView>
1348 {}
1349
1350 template <class GridView>
1352 {}
1353
1354 template <class Entity>
1355 bool hasBoundaryIntersections(Entity const& cell) const
1356 {
1357 return cell.hasBoundaryIntersections();
1358 }
1359 };
1360
1368 {
1369 public:
1370 template <class GridView>
1371 TrivialBoundaryDetector(GridSignals const&, GridView const&)
1372 {}
1373
1374 template <class GridView>
1376 {}
1377
1378 template <class Entity>
1379 bool hasBoundaryIntersections(Entity const& cell) const
1380 {
1381 return true;
1382 }
1383 };
1384
1385
1386 //---------------------------------------------------------------------
1387
1388 //---------------------------------------------------------------------
1389 //---------------------------------------------------------------------
1390
1391 namespace Assembler
1392 {
1402 enum { VALUE=0x1, RHS=0x2, MATRIX=0x4, EVERYTHING=0x7 };
1403 }
1404
1405 // ----------------------------------------------------------------------------------------------
1406 // ----------------------------------------------------------------------------------------------
1407
1425 template <class F,
1426 class SparseIndex = size_t,
1427 class BoundaryDetector = CachingBoundaryDetector<typename F::AnsatzVars::GridView>,
1428 class QuadRule = Dune::QuadratureRule<typename F::AnsatzVars::Grid::ctype,
1429 F::AnsatzVars::Grid::dimension>>
1431 : public AssemblyDetail::FormulationPolicy<F>
1432 {
1433 protected:
1434 typedef typename AssemblyDetail::FormulationPolicy<F> Policy;
1435
1436 public:
1438
1440 typedef F Functional;
1442 typedef typename Functional::AnsatzVars AnsatzVariableSetDescription;
1443
1445 typedef typename Functional::TestVars TestVariableSetDescription;
1446
1448 typedef typename AnsatzVariableSetDescription::Grid Grid;
1449
1453 typedef typename AnsatzVariableSetDescription::GridView GridView;
1454
1456 typedef typename AnsatzVariableSetDescription::Spaces Spaces;
1458 typedef typename AnsatzVariableSetDescription::IndexSet IndexSet;
1459
1463 typedef typename Functional::Scalar field_type;
1464
1466
1467 protected:
1468 typedef typename AnsatzVariableSetDescription::Variables AnsatzVariables;
1469 typedef typename TestVariableSetDescription::Variables TestVariables;
1470
1471 static_assert(std::is_same<typename AnsatzVariableSetDescription::Spaces, typename TestVariableSetDescription::Spaces>::value,
1472 "VariableSetDescriptions for ansatz spaces and test spaces must provide the same space list.");
1473 static_assert(Functional::type==WeakFormulation || std::is_same<AnsatzVariableSetDescription,TestVariableSetDescription>::value,
1474 "For problem type VariationalFunctional, ansatz and test variables must be the same.");
1475
1476 // Whether we need to assemble on inner faces.
1477 constexpr static bool innerBoundaries = hasInnerBoundaryCache<Functional>;
1478
1479 public:
1480
1486 using MatrixBlockArray = typename AssemblyDetail::BlockArray<Policy,AnsatzVariables,TestVariables,SparseIndex>::type;
1487
1493 typedef typename AssemblyDetail::RhsBlockArray<Policy,TestVariables>::type RhsBlockArray;
1494
1498 typedef typename TestVariableSetDescription::template CoefficientVectorRepresentation<>::type RhsArray;
1499
1500 private:
1502 : spaces_(spaces),
1503 gridManager(gridManager_),
1504 gv(boost::fusion::at_c<0>(spaces)->gridView()),
1505 indexSet(gv.indexSet()),
1506 boundaryDetector(gridManager_.signals,gv),
1508 rowBlockFactor(2.0),
1509 localStorageSize(128000) // roughly 128 kB
1510 {
1511 static_assert(symmetryCheck<Functional,std::min(AnsatzVariableSetDescription::noOfVariables,
1512 TestVariableSetDescription::noOfVariables)>,
1513 "If a problem is of type VariationalFunctional, the present diagonal blocks need to be marked as symmetric in D2.");
1515 [this](GridSignals::Status when){this->reactToRefinement(when);});
1516 }
1517
1518
1519 public:
1522 : VariationalFunctionalAssembler(boost::fusion::deref(boost::fusion::begin(spaces))->gridManager(),spaces)
1523 {
1524 }
1525
1526 // injected here for backward compatibility and convenience. TODO: Remove in the long run?
1528 static unsigned int const VALUE = Assembler::VALUE;
1529 static unsigned int const RHS = Assembler::RHS;
1530 static unsigned int const MATRIX = Assembler::MATRIX;
1531 static unsigned int const EVERYTHING = Assembler::EVERYTHING;
1532
1546 {
1548 return *this;
1549 }
1550
1562 {
1563 rowBlockFactor = a;
1564 return *this;
1565 }
1566
1575 {
1576 localStorageSize = s;
1577 return *this;
1578 }
1579
1588 {
1589 gatherTimings = gt;
1590 return *this;
1591 }
1592
1598 void assemble(F const& f, unsigned int flags=Assembler::EVERYTHING, int nThreads=0, bool verbose=false)
1599 {
1600 if (flags)
1601 assemble<AssemblyDetail::TakeAllBlocks>(f,[](auto const& cell) { return true; },
1602 flags,nThreads,verbose);
1603 }
1604
1605 protected:
1606 // Define iterator and entity type for stepping through all cells of the grid.
1607 typedef typename GridView::template Codim<0>::Iterator CellIterator;
1608 typedef typename CellIterator::Entity Entity;
1609
1610
1611 public:
1612
1651 template <class BlockFilter, class CellFilter>
1652 void assemble(F const& f, CellFilter const& cellFilter, unsigned int flags=Assembler::EVERYTHING, int nTasks=0, bool verbose=false)
1653 {
1654 using namespace Dune;
1655 using namespace boost::fusion;
1656 using namespace AssemblyDetail;
1657
1658 auto& timings = Timings::instance();
1659 ScopedTimingSection assemblyTiming("assembly",gatherTimings);
1660
1661 if (gatherTimings) timings.start("get cell ranges");
1662 auto const& cellRanges = gridManager.cellRanges(gv);
1663 if (gatherTimings) timings.stop("get cell ranges");
1664
1665 // Choose the number of tasks
1666 if (nTasks < 1)
1667 nTasks = NumaThreadPool::instance().cpus();
1668
1670 {
1671 if (verbose)
1672 std::cout << "Grid is not thread safe. Reducing number of used threads to 1." << std::endl;
1673 nTasks = 1;
1674 }
1675 else
1676 if(verbose) std::cout << "#tasks:" << nTasks << " " << std::endl;
1677 assert(nTasks>=1);
1678
1679 nTasks = std::min(nTasks,cellRanges.maxRanges());
1680
1681 if (gatherTimings) timings.start("creating rhs/matrix data structures");
1682 // set entries to zero
1683 auto clearGlobalData = [](auto& x) { x = 0; };
1684
1685 if (flags & RHS)
1686 for_each(getRhs().first.data,clearGlobalData);
1687
1688 if (flags & MATRIX)
1689 {
1690 typedef filter_view<MatrixBlockArray,MatrixBlockFilter<BlockFilter>> MatrixBlockFilter;
1691 for_each(MatrixBlockFilter(getMatrix(f)),clearGlobalData);
1692 }
1693 if (gatherTimings) timings.stop("creating rhs/matrix data structures");
1694
1695 // If there is only one task, things are somewhat simpler and more efficient...
1696 if (nTasks == 1)
1697 {
1698 ScopedTimingSection("perform sequential assembly",gatherTimings);
1699 Scalar retval =
1700 assembleCellRange<BlockFilter>(f,cellFilter,flags,gv.template begin<0>(),gv.template end<0>(),0);
1701 if (flags & VALUE)
1702 this->fValue = retval;
1703 }
1704 else
1705 {
1706 ScopedTimingSection parAssTime("perform parallel assembly",gatherTimings);
1707 // Partition the cells in ranges of almost equal size and do the assembly in parallel.
1708 std::vector<Scalar> values(nTasks,0.0);
1709
1710 parallelFor([&cellRanges,this,&f,flags,&values,&cellFilter](int i, int n) {
1711 auto range = cellRanges.range(n,i);
1712 values[i] = this->assembleCellRange<BlockFilter>(f,cellFilter,flags,range.begin(),range.end(),i);
1713 },
1714 nTasks);
1715
1716 // At the end, add all the functional values up
1717 if (flags & VALUE)
1718 this->fValue = std::accumulate(values.begin(),values.end(),0.0);
1719 }
1720
1721 // announce validity of assembled data
1722 validparts |= flags;
1723 }
1724
1725 private:
1726
1727
1739 template <class BlockFilter, class CellFilter>
1740 Scalar assembleCellRange(F const& f, CellFilter const& cellFilter,
1741 unsigned int flags, CellIterator cell, CellIterator last, int threadNo)
1742 {
1743 using namespace Dune;
1744 using namespace boost::fusion;
1745 using namespace AssemblyDetail;
1746
1747 // timer for gathering performance statistics
1748 boost::timer::cpu_timer scatterTimer, evalTimer, localTimer, setupTimer, totalTimer;
1749 if (statistics & localTimerStatistics)
1750 {
1751 evalTimer.stop();
1752 scatterTimer.stop();
1753 localTimer.stop();
1754 }
1755 // Obtain global data structures. Note that here is no race condition even if the
1756 // data is constructed on demand, as the required data structures have been created
1757 // and initialized before.
1758 MatrixBlockArray* globalMatrix = flags & MATRIX? & getMatrix() : nullptr;
1759 RhsArray* globalRhs = flags & RHS ? & getRhs().first : nullptr;
1760 RhsBlockArray* globalRhsBlocks = flags & RHS ? & getRhs().second: nullptr;
1761
1762
1763 using MatrixBlockFilter = filter_view<MatrixBlockArray,AssemblyDetail::MatrixBlockFilter<BlockFilter>>;
1764
1765 // Local contribution to global value.
1766 Scalar fValue = 0;
1767
1768 int const dim = Grid::dimension;
1769 typedef typename Grid::ctype CoordType;
1770
1771 // Allocate local rhs vectors that can be reused on each
1772 // element. Since all variables may have different element types
1773 // (i.e. different number of components), this has to be one local
1774 // rhs per variable.
1775 auto localRhs = as_vector(transform(*globalRhsBlocks,RhsLocalData<Policy>(nSimultaneousBlocks))); // assemble several cells en bloc before scattering
1776
1777 // We assemble local matrices into a buffer from where they are scattered simultaneously. The buffers
1778 // scatter automatically when they are full, such that access to previous local matrices is not possible.
1779 // For DG methods, there are several local matrices that are assembled simultaneously in a particular
1780 // access pattern, such that we use two buffers. The total buffer size is localStorageSize.
1781 using LocalMatrices = decltype(as_vector(transform(MatrixBlockFilter(*globalMatrix),MatrixLocalData(0))));
1782 LocalMatrices localMatrices, localNeighborMatrices;
1783 if (flags & MATRIX)
1784 {
1785 localMatrices = as_vector(transform(MatrixBlockFilter(*globalMatrix),
1786 MatrixLocalData(innerBoundaries? localStorageSize/2: localStorageSize)));
1787 localNeighborMatrices = as_vector(transform(MatrixBlockFilter(*globalMatrix),
1788 MatrixLocalData(innerBoundaries? localStorageSize/2: 0)));
1789 }
1790
1791 // element-local functional value
1792 Scalar floc = 0;
1793
1794 // Shape function cache. Remember that every thread has to use its own cache, thus we create our own here.
1795 typedef ShapeFunctionCache<Grid,Scalar> SfCache;
1796 SfCache sfCache;
1797
1798 // Quadrature rule caches. Remember that every thread has to use its own quadrature caches, thus we create our own here.
1799 QuadratureTraits<QuadRule> quadratureCache;
1800 QuadratureTraits<QuadratureRule<CoordType,dim-1>> faceQuadratureCache;
1801
1802 // Evaluators for shape functions of all FE spaces. Remember that
1803 // every thread has to use its own collection of evaluators (for both current cell and neighbouring cells in case
1804 // we assemble on inner boundaries)!
1805 auto evaluators = getEvaluators(spaces(),&sfCache,AssemblyDetail::derivatives(f,spaces(),localMatrices,localRhs));
1806 using Evaluators = decltype(evaluators);
1807 auto neighbourEvaluators = evaluators;
1808 // Create the "caches" responsible for evaluating the variational functional / weak formulation.
1809 // The inner boundary cache need not exist, thus we evaluate it as 0 if not present. Lambda function
1810 // is a trick to allow using constexpr if.
1811 auto domainCache = f.createDomainCache(flags);
1812 auto boundaryCache = f.createBoundaryCache(flags);
1813 auto innerBoundaryCache = [&](){ if constexpr (innerBoundaries) return f.createInnerBoundaryCache(flags);
1814 else return 0; }();
1815 // The symmetrizer functor. We pass the heterogeneous array of global matrices
1816 // such that run-time meta information, e.g., on enforcing positive definiteness,
1817 // can be used.
1818 SymmetrizeLocalMatrix<Functional> symmetrizeLocalMatrix{&f};
1819
1820 // Dummy variable sets to be fed to boost::fusion algorithms.
1821 typename AnsatzVariableSetDescription::Variables ansatzVars;
1822 typename TestVariableSetDescription::Variables testVars;
1823
1824 if (statistics & localTimerStatistics)
1825 setupTimer.stop();
1826
1827 // Assemble representation: step through all elements, assemble
1828 // the local matrix and rhs, and scatter their entries into the
1829 // global data structures.
1830 if (statistics & localTimerStatistics)
1831 evalTimer.resume();
1832
1833 while(cell!=last)
1834 {
1835 // If the cell is not of interest, skip
1836 if (!cellFilter(*cell))
1837 {
1838 ++cell;
1839 continue;
1840 }
1841
1842 // tell the problem on which cell we are
1843 domainCache.moveTo(*cell);
1844
1845 // for all spaces involved, compute the shape functions and
1846 // their global indices, which are needed for evaluating the
1847 // functional's derivative.
1848 moveEvaluatorsToCell(evaluators,*cell,indexSet.index(*cell));
1849 int const shapeFunctionMaxOrder = maxOrder(evaluators);
1850
1851 if (statistics & localTimerStatistics) localTimer.resume();
1852 if (statistics & localTimerStatistics) evalTimer.stop();
1853
1854 // create new local matrices and right hand sides.
1855 floc = 0;
1856 if (flags & RHS)
1857 for_each(localRhs,ClearLocalRhs<Evaluators,TestVariables>(evaluators));
1858
1859 if (flags & MATRIX)
1860 for_each(localMatrices,NewLocalMatrix<Evaluators,AnsatzVariableSetDescription,
1861 TestVariableSetDescription>(evaluators,evaluators));
1862
1863 // loop over all quadrature points and integrate local stiffness matrix and rhs
1864 int const integrationOrderOnCell = f.integrationOrder(*cell,shapeFunctionMaxOrder,false);
1865 GeometryType gt = cell->type();
1866
1867 QuadRule const qr = quadratureCache.rule(gt,integrationOrderOnCell);
1868 useQuadratureRuleInEvaluators(evaluators,qr,0);
1869
1870 size_t nQuadPos = qr.size();
1871 for (size_t g=0; g<nQuadPos; ++g)
1872 {
1873 // pos of integration point
1874 FieldVector<CoordType,dim> const& quadPos = qr[g].position();
1875
1876 // for all spaces involved, update the evaluators associated to this quadrature point
1877 moveEvaluatorsToIntegrationPoint(evaluators,quadPos,qr,g,0);
1878
1879 // prepare evaluation of functional
1880 domainCache.evaluateAt(quadPos,evaluators);
1881
1882 CoordType weightTimesDetJac(cell->geometry().integrationElement(quadPos)); // determinant of jacobian
1883 assert(weightTimesDetJac > 0);
1884 // weight of quadrature point
1885 weightTimesDetJac *= qr[g].weight();
1886
1887 // step through all local matrix and rhs blocks and update their data
1888 if constexpr (Self::hasValue)
1889 if (flags & VALUE)
1890 floc += weightTimesDetJac*domainCache.d0();
1891 if (flags & RHS)
1892 for_each(localRhs,UpdateLocalRhs<TestVariableSetDescription,Evaluators,Scalar,
1893 typename F::DomainCache>(evaluators,weightTimesDetJac,domainCache));
1894 if (flags & MATRIX)
1895 for_each(localMatrices,UpdateLocalMatrix<AnsatzVariableSetDescription,TestVariableSetDescription,
1896 Evaluators,Scalar,typename F::DomainCache>(
1897 evaluators,weightTimesDetJac,domainCache));
1898 }
1899
1900
1901 // Handle Robin (Cauchy) type boundary conditions and interior face terms. Loop over all
1902 // faces (codim 1) that are exterior boundaries or are relevant due to interior face
1903 // terms and integrate the contribution. We assume that intersections on the domain
1904 // boundary always consist of the whole face (codim 1 subentity).
1905 if(innerBoundaries || boundaryDetector.hasBoundaryIntersections(*cell))
1906 {
1907 int const integrationOrderOnFace = f.integrationOrder(*cell,shapeFunctionMaxOrder,true);
1908
1909 // This would be nice, but since there is no way to obtain an IntersectionIterator from an
1910 // Intersection, we'd need to pass the Intersection directly to boundaryCache.moveTo()
1911 // - which would be nicer, but is a breaking change. Consider this for the next major revision.
1912 //
1913 // for (auto const& face: intersections(gv,*cell))
1914 //
1915 // Instead, we use the old style.
1916 auto faceEnd = gv.iend(*cell);
1917 for (auto face=gv.ibegin(*cell); face!=faceEnd; ++face)
1918 {
1919 // @warning: make SURE the following does always hold: (i)
1920 // On boundary faces, there is exactly one intersection, and
1921 // this covers the whole face. (ii) When an intersection
1922 // covers the whole codim 1 subentity, its geometry in the
1923 // cell is the same as specified in the reference
1924 // element. Currently this is not guaranteed by the Dune
1925 // interface. Maybe there is a relation to the ominous
1926 // TwistUtility from Freiburg?
1927
1928 bool const onBoundary = face->boundary();
1929
1930 // Assemble only if needed.
1931 if (innerBoundaries || onBoundary)
1932 {
1933 GeometryType gt = face->type();
1934 QuadratureRule<CoordType,dim-1> const qr = faceQuadratureCache.rule(gt,integrationOrderOnFace);
1935 size_t const nQuadPos = qr.size();
1936
1937 // Move the caches to the current face (and neighboring evaluators to the adjacent cell).
1938 if (onBoundary)
1939 boundaryCache.moveTo(face);
1940 else if constexpr (innerBoundaries) // the constexpr if just protects the otherwise ill-formed body
1941 {
1942 assert(face->neighbor());
1943 assert(gt == face->geometryInOutside().type());
1944
1945 // If the problem claims the face is not relevant, skip assembly
1946 if (! f.considerFace(*face))
1947 continue;
1948
1949 innerBoundaryCache.moveTo(face);
1950 moveEvaluatorsToCell(neighbourEvaluators,face->outside());
1951
1952 // for each neighbor (i.e. face), there is one local matrix that is computed by quadrature. Let's
1953 // create a new local matrix if needed. The row (test variable) is given by the current cell, and the
1954 // column (ansatz variable) determined by the neighbor.
1955 if (flags & MATRIX)
1956 for_each(localNeighborMatrices,
1958 TestVariableSetDescription>(evaluators,neighbourEvaluators));
1959 }
1960
1961
1962 for (size_t g=0; g<nQuadPos; ++g)
1963 {
1964 // Evaluation by quadrature point index cannot be used on faces due to inconsistent numbering
1965 // of quadrature points relative to the cell (see below). Hence we don't need to announce the quadrature
1966 // rule to the evaluators here.
1967 // useQuadratureRuleInEvaluators(evaluators,qr,face->indexInInside());
1968
1969 // position of integration point
1970 Dune::FieldVector<CoordType,dim-1> quadPos = qr[g].position();
1971 CoordType weightTimesDetJac = qr[g].weight() * face->geometry().integrationElement(quadPos);
1972
1973 // evaluate values at integration points for all spaces
1974 // involved, update the evaluators associated to this
1975 // quadrature point
1976 //
1977 // TODO: check whether this call is
1978 // thread-safe. (currently as of 2010-02-11 this is not
1979 // guaranteed by the Dune interface for UG). Copying the
1980 // quadrature position instead of obtaining a const
1981 // reference minimizes the time window for concurrent
1982 // access.
1983 // Note that global() here returns the *cell-local* position.
1984 FieldVector<CoordType,dim> const quadPosInCell = face->geometryInInside().global(quadPos);
1985
1986 // Move evaluators to integration point
1987 // quadrature points on faces are not consistently indexed (if even symmetric...) - hence evaluating
1988 // shape functions based on quadrature point index is impossible
1989 // moveEvaluatorsToIntegrationPoint(evaluators,quadPosInCell,qr,g,face->indexInInside());
1990 moveEvaluatorsToIntegrationPoint(evaluators,quadPosInCell);
1991
1992 if (onBoundary) // assemble on domain boundary
1993 {
1994 boundaryCache.evaluateAt(quadPos,evaluators);
1995
1996 // step through all rhs blocks and update their local rhs
1997 if constexpr (Self::hasValue)
1998 if (flags & VALUE)
1999 floc += weightTimesDetJac*boundaryCache.d0();
2000 if (flags & RHS)
2001 for_each(localRhs,UpdateLocalRhs<TestVariableSetDescription,Evaluators,Scalar,
2002 typename F::BoundaryCache>(evaluators,weightTimesDetJac,boundaryCache));
2003 if (flags & MATRIX)
2004 for_each(localMatrices,UpdateLocalMatrix<AnsatzVariableSetDescription,TestVariableSetDescription,
2005 Evaluators,Scalar,typename F::BoundaryCache>(evaluators,weightTimesDetJac,boundaryCache));
2006
2007 } // done boundary faces
2008 else if constexpr (innerBoundaries) // assemble on inner boundary ("constexpr if"
2009 { // only because the body may be ill-formed)
2010 // Move in addition the neighboring evaluators to the correct evaluation point
2011 moveEvaluatorsToIntegrationPoint(neighbourEvaluators,face->geometryInOutside().global(quadPos));
2012
2013 innerBoundaryCache.evaluateAt(quadPos,evaluators,neighbourEvaluators);
2014
2015 // Update value. Remember that each face is visited twice, so we add here only our half.
2016 if constexpr (Self::hasValue)
2017 if (flags & VALUE)
2018 floc += 0.5 * weightTimesDetJac*innerBoundaryCache.d0();
2019
2020 using InnerBoundaryCache = typename Functional::InnerBoundaryCache;
2021
2022 // Step through all rhs blocks and update their local rhs. Remember that even if the face
2023 // is visited twice, these visits correspond to different rows (the row being associated
2024 // to the current center cell).
2025 // Hence we do not sum up the samve values twice and have to omit the factor 0.5.
2026 if (flags & RHS)
2027 boost::fusion::for_each(localRhs,UpdateLocalRhs<TestVariableSetDescription,Evaluators,
2028 Scalar,InnerBoundaryCache>
2029 (evaluators,weightTimesDetJac,innerBoundaryCache));
2030
2031 // Update local matrices. As with the rhs, we have to omit the factor 0.5.
2032 if (flags & MATRIX)
2033 {
2034 // update diagonal block, contribution of coupling terms on cell (boolean true)
2035 boost::fusion::for_each(localMatrices,UpdateLocalMatrixFromInnerBoundaryCache<AnsatzVariableSetDescription,
2036 TestVariableSetDescription,Evaluators,Scalar,InnerBoundaryCache>
2037 (evaluators,evaluators,weightTimesDetJac,innerBoundaryCache,true));
2038 // update off-diagonal block, contribution of coupling terms with neighbouring cell (boolean false)
2039 boost::fusion::for_each(localNeighborMatrices,UpdateLocalMatrixFromInnerBoundaryCache<AnsatzVariableSetDescription,
2040 TestVariableSetDescription,Evaluators,Scalar,InnerBoundaryCache>
2041 (evaluators,neighbourEvaluators,weightTimesDetJac,innerBoundaryCache,false));
2042 }
2043 }
2044 }
2045 }
2046 } // done face loop
2047 } // done boundary conditions
2048
2049 // symmetrize local matrix (only the center cell matrices, not the off-diagonal neighbor matrices which are
2050 // not symmetric even for symmetric problems).
2051 if (flags & MATRIX)
2052 for_each(localMatrices,symmetrizeLocalMatrix);
2053
2054 if (statistics & localTimerStatistics) scatterTimer.resume();
2055 if (statistics & localTimerStatistics) localTimer.stop();
2056
2057 // Move to next cell.
2058 ++cell;
2059 // Scatter local data into global data (note that this occurs blockwise, i.e.
2060 // the actual scatter may be delayed). Local matrix buffers scatter automatically.
2061 fValue += floc;
2062 if (flags & RHS)
2063 for_each(localRhs,ScatterLocalRhs<RhsArray>(*globalRhs));
2064
2065 if (statistics & localTimerStatistics) evalTimer.resume(); // count cell iterator increment as well
2066 if (statistics & localTimerStatistics) scatterTimer.stop();
2067 } // end iteration over cells
2068
2069 if (statistics & localTimerStatistics) scatterTimer.resume();
2070 if (statistics & localTimerStatistics) evalTimer.stop();
2071
2072 // scattering of local vectors may not be complete (there may be local data left over in the buffers). Again, local matrices
2073 // scatter automatically on destruction.
2074 if (flags & RHS)
2075 for_each(localRhs,ScatterLocalRhs<RhsArray>(*globalRhs,true));
2076
2077 if (statistics & localTimerStatistics) scatterTimer.stop();
2078
2079 // report statistics
2080 if (statistics & localTimerStatistics)
2081 {
2082 std::cerr << "--------------\nThread " << threadNo << ":\n";
2083 double totalTime = setupTimer.elapsed().wall + evalTimer.elapsed().wall + scatterTimer.elapsed().wall + localTimer.elapsed().wall;
2084 std::cerr.precision(1);
2085 std::cerr.setf(std::ios_base::fixed,std::ios_base::floatfield);
2086 std::cerr << "Total time: " << 100* totalTimer.elapsed().wall / totalTime << "% -- " << totalTimer.format()
2087 << "Setup time: " << 100* setupTimer.elapsed().wall / totalTime << "% -- " << setupTimer.format()
2088 << "Eval times: " << 100* evalTimer.elapsed().wall / totalTime << "% -- " << evalTimer.format()
2089 << "Scatter times: " << 100* scatterTimer.elapsed().wall / totalTime << "% -- " << scatterTimer.format()
2090 << "Local times: " << 100 * localTimer.elapsed().wall / totalTime << "% -- " << localTimer.format();
2091 std::cerr << "--------------\n";
2092 }
2093
2094 return fValue;
2095 }
2096
2097 public:
2102 int valid() { return validparts; }
2103
2109 void flush(int flags=(VALUE|RHS|MATRIX))
2110 {
2111 validparts &= !flags;
2112 if (flags & VALUE) this->fValue = 0;
2113 if (flags & RHS)
2114 {
2115 rhss.reset();
2116 rhsBlocks.reset();
2117 }
2118 if (flags & MATRIX) matrixBlocks.reset();
2119 }
2120
2125 std::pair<size_t,size_t> size(int row, int col) const
2126 {
2127 return std::make_pair(TestVariableSetDescription::degreesOfFreedom(spaces(),row,row+1),
2128 AnsatzVariableSetDescription::degreesOfFreedom(spaces(),col,col+1));
2129 }
2130
2159 template <class Matrix>
2160 Matrix get(bool extractOnlyLowerTriangle=false,
2161 int rbegin=0, int rend=TestVariableSetDescription::noOfVariables,
2162 int cbegin=0, int cend=AnsatzVariableSetDescription::noOfVariables) const
2163 {
2164 // block sizes
2165 auto cSizes = AnsatzVariableSetDescription::variableDimensions(spaces());
2166 auto rSizes = TestVariableSetDescription::variableDimensions(spaces());
2167
2168 // row and column offsets
2169 std::vector<size_t> rowOff(rend-rbegin,0), colOff(cend-cbegin,0);
2170 std::partial_sum(rSizes.begin()+rbegin,rSizes.begin()+rend-1,rowOff.begin()+1);
2171 std::partial_sum(cSizes.begin()+cbegin,cSizes.begin()+cend-1,colOff.begin()+1);
2172
2173 return AssemblyDetail::Fill<Matrix>::apply(getMatrix(), rbegin, cbegin, rowOff, colOff,
2174 extractOnlyLowerTriangle,
2175 nnz(rbegin, rend, cbegin, cend,
2176 extractOnlyLowerTriangle),
2177 nrows(rbegin,rend), ncols(cbegin,cend));
2178 }
2179
2187 template <int row, int col>
2188 auto const& get() const
2189 {
2190 return (*boost::fusion::find_if<AssemblyDetail::IsBlock<row,col>>(getMatrix())).globalMatrix();
2191 }
2192
2198 template <class MatrixType, class BlockInformation>
2199 MatrixType get(bool extractOnlyLowerTriangle) const
2200 {
2201 using BI = BlockInformation;
2202 return get<MatrixType,BI::firstRow,BI::lastRow,BI::firstCol,BI::lastCol>(extractOnlyLowerTriangle);
2203 }
2204
2212 template <class DataOutIter>
2213 void toSequence(int rbegin, int rend, DataOutIter xi) const
2214 {
2215 // WARNING: this assumes that for_each processes the rhsArray in
2216 // correct order from front to back!
2217 for_each(getRhs().first.data,BlockToSequence<DataOutIter>(rbegin,rend,xi));
2218 }
2219
2220
2234 size_t nnz(size_t rbegin=0, size_t rend=TestVariableSetDescription::noOfVariables,
2235 size_t cbegin=0, size_t cend=AnsatzVariableSetDescription::noOfVariables, bool extractOnlyLowerTriangle=false) const
2236 {
2237 size_t n = 0;
2238 boost::fusion::for_each(getMatrix(),CountNonzeros(n,rbegin,rend,cbegin,cend,extractOnlyLowerTriangle));
2239 return n;
2240 }
2241
2245 size_t nrows(int firstBlock=0, int lastBlock=TestVariableSetDescription::noOfVariables) const
2246 {
2247 return TestVariableSetDescription::degreesOfFreedom(spaces(),firstBlock,lastBlock);
2248 }
2249
2253 size_t ncols(int firstBlock=0, int lastBlock=AnsatzVariableSetDescription::noOfVariables) const
2254 {
2255 return AnsatzVariableSetDescription::degreesOfFreedom(spaces(),firstBlock,lastBlock);
2256 }
2257
2261 Spaces const& spaces() const
2262 {
2263 return spaces_;
2264 }
2265
2269 GridView const& gridView() const
2270 {
2271 return gv;
2272 }
2273
2286 {
2287 if (!matrixBlocks)
2288 createMatrix([](auto const& face) { return true; });
2289 return *matrixBlocks;
2290 }
2291
2292 private:
2293 // Returns the matrix, creating it on the fly if necessary.
2294 MatrixBlockArray& getMatrix(F const& f)
2295 {
2296 if (!matrixBlocks)
2297 createMatrix([&](auto const& face)
2298 {
2299 if constexpr (innerBoundaries)
2300 return f.considerFace(face);
2301 else
2302 return true;
2303 });
2304
2305 return *matrixBlocks;
2306 }
2307
2308
2309 template <class FaceOracle>
2310 void createMatrix(FaceOracle const& considerFace) const
2311 {
2312 matrixBlocks.reset(new MatrixBlockArray());
2313 boost::fusion::for_each(*matrixBlocks,[&](auto& block)
2314 {
2315 block.init(this->spaces(),gv.template begin<0>(),gv.template end<0>(),
2316 considerFace);
2317 });
2318 }
2319
2320 public:
2326 template <int first=0, int last=TestVariableSetDescription::noOfVariables>
2327 typename TestVariableSetDescription::template CoefficientVectorRepresentation<first,last>::type rhs() const
2328 {
2329 using Rhs = typename TestVariableSetDescription::template CoefficientVectorRepresentation<first,last>::type;
2330 return Rhs(make_range<first,last>(getRhs().first.data));
2331 }
2332
2333 protected:
2334 std::pair<RhsArray&,RhsBlockArray&> getRhs() const
2335 {
2336 if(rhss.get()==0)
2337 {
2338 // construct global vectors
2339// old version, should be equivalent to the following line.
2340// rhss.reset(new RhsArray(Variables_Detail::VariableRangeCreator<TestVariables,ConstructCoefficientVector<Spaces>>
2341// ::apply(ConstructCoefficientVector<Spaces>(spaces()))));
2342 rhss.reset(new RhsArray(TestVariableSetDescription::template CoefficientVectorRepresentation<>::init(spaces())));
2343 // construct block infos
2344 rhsBlocks.reset(new RhsBlockArray());
2345 // Choose the number of row groups in the matrix blocks. The obvious choice would be 1 in sequential mode, but it
2346 // appears that a somewhat larger number is beneficial, probably due to better memory access locality. For multithreaded
2347 // mode, we choose the number of hardware threads as the default, since then all threads can (in principle) be active
2348 // simultaneously.
2349 int nrg = std::max((int)(rowBlockFactor*boost::thread::hardware_concurrency()),4);
2350 boost::fusion::for_each(*rhsBlocks,[&](auto& block)
2351 {
2352 block.init(this->spaces(),gv.template begin<0>(),gv.template end<0>(),nrg);
2353 });
2354 }
2355
2356 return std::pair<RhsArray&,RhsBlockArray&>(*rhss,*rhsBlocks);
2357 }
2358
2359
2360 // Resource management: delete matrix and rhs data structures as well as cell ranges on
2361 // refinement since they are no longer meaningful on the new grid.
2363 {
2364 if (status == GridSignals::BeforeRefinement)
2365 flush();
2366 }
2367
2368
2369 // resource management
2370 boost::signals2::scoped_connection refConnection;
2371
2374 GridView const& gv;
2376
2377 mutable std::shared_ptr<MatrixBlockArray> matrixBlocks; // matrix block info including global matrices
2378 mutable std::shared_ptr<RhsArray> rhss; // global rhs vectors
2379 mutable std::shared_ptr<RhsBlockArray> rhsBlocks; // rhs block info
2380 BoundaryDetector boundaryDetector;
2382
2383
2387 bool gatherTimings = false; // if true, use default Kaskade timings
2388
2389
2390 template <class IdxOutIter, class DataOutIter>
2392 {
2393 BlockToTriplet(size_t rbegin_, size_t cbegin_, std::vector<size_t> const& rowOff_, std::vector<size_t> const& colOff_,
2394 IdxOutIter& ri_, IdxOutIter& ci_, DataOutIter& xi_, bool extractOnlyLowerTriangle_):
2395 rbegin(rbegin_), cbegin(cbegin_), rowOff(rowOff_), colOff(colOff_),
2396 ri(ri_), ci(ci_), xi(xi_), extractOnlyLowerTriangle(extractOnlyLowerTriangle_)
2397 {}
2398
2399 template <class MatrixBlock>
2400 void operator()(MatrixBlock const& mb) const
2401 {
2402 // Check if block is in requested range
2403 if (inRange(mb.rowId,mb.colId))
2405 rowOff[MatrixBlock::rowId-rbegin],colOff[MatrixBlock::colId-cbegin],
2406 mb.rowId==mb.colId && extractOnlyLowerTriangle,
2407 mb.symmetric);
2408 // For mirror blocks, the transposed block needs to be written
2409 // if the transposed block is in the requested
2410 // range. Transposition is implicitly achieved by swapping
2411 // column and row index output iterators.
2412 if (MatrixBlock::mirror && inRange(mb.colId,mb.rowId) && extractOnlyLowerTriangle==false)
2414 colOff[MatrixBlock::rowId-cbegin],rowOff[MatrixBlock::colId-rbegin],
2415 false,
2416 mb.symmetric);
2417 }
2418
2419 bool inRange(size_t r, size_t c) const
2420 {
2421 return r>=rbegin && r<rbegin+rowOff.size() && c>=cbegin && c<cbegin+colOff.size();
2422 }
2423
2424
2426 std::vector<size_t> const& rowOff;
2427 std::vector<size_t> const& colOff;
2428 IdxOutIter& ri;
2429 IdxOutIter& ci;
2430 DataOutIter& xi;
2432 };
2433
2435 {
2436 CountNonzeros(size_t& n_, size_t rbegin_, size_t rend_, size_t cbegin_, size_t cend_, bool onlyLowerTriangle_):
2437 n(n_), rbegin(rbegin_), rend(rend_), cbegin(cbegin_), cend(cend_), onlyLowerTriangle(onlyLowerTriangle_)
2438 {}
2439
2440 template <class MatrixBlock>
2441 void operator()(MatrixBlock const& mb) const
2442 {
2443 size_t myN = Matrix_to_Triplet<typename MatrixBlock::Matrix>::nnz(mb.globalMatrix(),
2444 (mb.rowId==mb.colId) && onlyLowerTriangle,
2445 mb.symmetric);
2446 // Check if block is in requested range
2447 if (inRange(MatrixBlock::rowId,MatrixBlock::colId))
2448 n += myN;
2449
2450 // For subdiagonal blocks, the transposed block needs to be
2451 // counted if onlyLowerTriangle is false and the transposed
2452 // block is in the requested range and we need to mirror this
2453 // block.
2454 if (MatrixBlock::mirror && inRange(mb.colId,mb.rowId) && onlyLowerTriangle==false)
2455 n += myN;
2456 }
2457
2458 bool inRange(size_t r, size_t c) const
2459 {
2460 return r>=rbegin && r<rend && c>=cbegin && c<cend;
2461 }
2462
2463
2464
2465 size_t& n;
2468 };
2469
2470 template <class DataOutIter>
2472 {
2473 BlockToSequence(int& rbegin_, int& rend_, DataOutIter& out_): rbegin(rbegin_), rend(rend_), out(out_) {}
2474 template <class VectorBlock> void operator()(VectorBlock const& v) const
2475 {
2476 if (rbegin<=0 && rend>0)
2477 out = vectorToSequence(v,out);
2478 --rbegin;
2479 --rend;
2480 }
2481 private:
2482 int& rbegin;
2483 int& rend;
2484 DataOutIter& out;
2485 };
2486
2487 };
2488
2489
2490 // End of VariationalFunctionalAssembler
2491 //---------------------------------------------------------------------
2492 //---------------------------------------------------------------------
2493} /* end of namespace Kaskade */
2494
2495#endif
CachingBoundaryDetector(GridView const &gridView_)
bool hasBoundaryIntersections(Cell const &cell) const
CachingBoundaryDetector(GridSignals &signals, GridView const &gridView_)
ForwardingBoundaryDetector(GridView const &)
ForwardingBoundaryDetector(GridSignals const &, GridView const &)
bool hasBoundaryIntersections(Entity const &cell) const
CellRanges< typename Grid::LeafGridView > const & cellRanges(typename Grid::LeafGridView const &) const
Returns a CellRanges object for the given grid view. The cell ranges are created on demand....
Definition: gridmanager.hh:531
bool gridIsThreadSafe() const
Returns true if concurrent read accesses to the grid do not lead to data races.
Definition: gridmanager.hh:516
static NumaThreadPool & instance(int maxThreads=std::numeric_limits< int >::max())
Returns a globally unique thread pool instance.
int cpus() const
Reports the total number of CPUs (usually a multiple of nodes).
Definition: threading.hh:327
A scope guard object that automatically closes a timing section on destruction.
Definition: timing.hh:181
static Timings & instance()
Returns a reference to a single default instance.
TrivialBoundaryDetector(GridView const &)
TrivialBoundaryDetector(GridSignals const &, GridView const &)
bool hasBoundaryIntersections(Entity const &cell) const
A class for assembling Galerkin representation matrices and right hand sides for variational function...
AnsatzVariableSetDescription::GridView GridView
The grid view on which the variables live.
Functional::AnsatzVars AnsatzVariableSetDescription
ansatz variables
Self & setNSimultaneousBlocks(int n)
Defines how many cells are assembled locally before scattering them together into the global data str...
int valid()
Returns a bitfield specifyign which of the parts have been assembled since construction or flush() ac...
TestVariableSetDescription::template CoefficientVectorRepresentation ::type RhsArray
A LinearProductSpace type of right hand side coefficient vectors.
void flush(int flags=(VALUE|RHS|MATRIX))
Destructs parts of the assembled quatities according to the format (VALUE|RHS|MATRX)
static unsigned int const VALUE
DEPRECATED, enums in the Assembler namespace.
Self & setLocalStorageSize(size_t s)
Defines how many memory the locally assembled matrices may occupy before they are scattered.
Matrix get(bool extractOnlyLowerTriangle=false, int rbegin=0, int rend=TestVariableSetDescription::noOfVariables, int cbegin=0, int cend=AnsatzVariableSetDescription::noOfVariables) const
Extracts the submatrix defined by the half-open block ranges [rbegin,rend), [cbegin,...
Self & setGatherTimings(bool gt)
Whether to gather timing statistics using Kaskade::Timings.
AssemblyDetail::FormulationPolicy< F > Policy
GridView const & gridView() const
The grid view used.
Functional::Scalar field_type
Underlying field type.
void assemble(F const &f, CellFilter const &cellFilter, unsigned int flags=Assembler::EVERYTHING, int nTasks=0, bool verbose=false)
Create data in assembler.
Functional::TestVars TestVariableSetDescription
test variables
typename AssemblyDetail::BlockArray< Policy, AnsatzVariables, TestVariables, SparseIndex >::type MatrixBlockArray
A boost::fusion sequence of AssemblyDetail::MatrixBlock elements for present matrix blocks.
void toSequence(int rbegin, int rend, DataOutIter xi) const
Writes the subvector defined by the half-open block range [rbegin,rend) to the output iterator xi.
auto const & get() const
Extracts a raw single submatrix block indexed by row and column.
void reactToRefinement(GridSignals::Status const status)
TestVariableSetDescription::template CoefficientVectorRepresentation< first, last >::type rhs() const
Returns a contiguous subrange of the rhs coefficient vectors.
AnsatzVariableSetDescription::Grid Grid
grid
std::shared_ptr< RhsArray > rhss
TestVariableSetDescription::Variables TestVariables
GridView::template Codim< 0 >::Iterator CellIterator
size_t ncols(int firstBlock=0, int lastBlock=AnsatzVariableSetDescription::noOfVariables) const
Returns the number of scalar cols in the column block range [firstBlock,lastBlock[.
AssemblyDetail::RhsBlockArray< Policy, TestVariables >::type RhsBlockArray
A boost::fusion sequence of AssemblyDetail::RhsBock elements for present rhs blocks.
AnsatzVariableSetDescription::Spaces Spaces
spaces
std::shared_ptr< MatrixBlockArray > matrixBlocks
size_t nrows(int firstBlock=0, int lastBlock=TestVariableSetDescription::noOfVariables) const
Returns the number of scalar rows in the row block range [firstBlock,lastBlock[.
AnsatzVariableSetDescription::Variables AnsatzVariables
Spaces const & spaces() const
Returns the list of spaces used.
VariationalFunctionalAssembler< F, SparseIndex, BoundaryDetector, QuadRule > Self
VariationalFunctionalAssembler(Spaces const &spaces)
Construct an empty assembler, gridManager is implicitly obtained from the first space.
AnsatzVariableSetDescription::IndexSet IndexSet
index set
std::pair< size_t, size_t > size(int row, int col) const
the size of a matrix block (in terms of scalar rows/columns)
size_t nnz(size_t rbegin=0, size_t rend=TestVariableSetDescription::noOfVariables, size_t cbegin=0, size_t cend=AnsatzVariableSetDescription::noOfVariables, bool extractOnlyLowerTriangle=false) const
Returns the number of structurally nonzero entries in the submatrix defined by the half-open block ra...
MatrixType get(bool extractOnlyLowerTriangle) const
Extracts the submatrix defined by the half-open block ranges given as template parameter.
std::pair< RhsArray &, RhsBlockArray & > getRhs() const
GridManagerBase< Grid > const & gridManager
static unsigned int const EVERYTHING
boost::signals2::scoped_connection refConnection
void assemble(F const &f, unsigned int flags=Assembler::EVERYTHING, int nThreads=0, bool verbose=false)
Assembly without block filter or cell filter.
std::shared_ptr< RhsBlockArray > rhsBlocks
MatrixBlockArray & getMatrix() const
Returns a mutable reference to the sequence of matrix blocks.
Self & setRowBlockFactor(double a)
Defines how many more row blocks in each matrix are used compared to the number of threads.
Utility classes for the definition and use of variational functionals.
void useQuadratureRuleInEvaluators(Evaluator &evals, QuadratureRule const &qr, int subId)
Tells all evaluators to use the given quadrature rule on the given subentity.
void moveEvaluatorsToIntegrationPoint(Evaluators &evals, Dune::FieldVector< CoordType, dim > const &x, Dune::QuadratureRule< CoordType, subDim > const &qr, int ip, int subId)
Moves all provided evaluators to the given integration point, evaluating the shape functions there.
int maxOrder(Evaluators const &evals)
Computes the maximum ansatz order used in a collection of evaluators.
typename boost::fusion::result_of::as_vector< typename boost::fusion::result_of::transform< Spaces, GetEvaluatorTypes >::type >::type Evaluators
the heterogeneous sequence type of Evaluators for the given spaces
@ VariationalFunctional
typename GridView::template Codim< 0 >::Entity Cell
The type of cells (entities of codimension 0) in the grid view.
Definition: gridBasics.hh:35
boost::signals2::signal< void(Status)> informAboutRefinement
A signal that is emitted thrice on grid modifications, once before adaptation takes place and twice a...
Definition: gridmanager.hh:74
Status
The argument type of the signal that is emitted before and after grid adaptation.
Definition: gridmanager.hh:64
Dune::FieldVector< T, n > max(Dune::FieldVector< T, n > x, Dune::FieldVector< T, n > const &y)
Componentwise maximum.
Definition: fixdune.hh:110
Dune::FieldVector< T, n > min(Dune::FieldVector< T, n > x, Dune::FieldVector< T, n > const &y)
Componentwise minimum.
Definition: fixdune.hh:122
void parallelFor(Func const &f, int maxTasks=std::numeric_limits< int >::max())
A parallel for loop that executes the given functor in parallel on different CPUs.
Definition: threading.hh:489
T & removeConst(T const &t)
A convenience template for removing the const qualifier from references and pointers.
Definition: typeTraits.hh:30
bool considerFace(Face const &face, std::vector< int > const &boundaryIds, std::vector< int > const &usedIds)
T transpose(T x)
OutIter vectorToSequence(double x, OutIter iter)
Definition: crsutil.hh:30
void moveEvaluatorsToCell(Evaluators &evals, Cell const &cell)
Moves all provided evaluators to the given cell.
constexpr bool symmetryCheck
Specialize this template for your matrix type in order to use it with VariationalFunctionalAssembler:...
A class that provides access to signals that are emitted from the grid manager on various occasions.
Definition: gridmanager.hh:47
A class template that supports converting certain Dune matrices into the coordinate (triplet) format.
Definition: crsutil.hh:177
std::remove_pointer_t< typename boost::fusion::result_of::value_at_c< Spaces, Idx >::type > type
BlockToSequence(int &rbegin_, int &rend_, DataOutIter &out_)
BlockToTriplet(size_t rbegin_, size_t cbegin_, std::vector< size_t > const &rowOff_, std::vector< size_t > const &colOff_, IdxOutIter &ri_, IdxOutIter &ci_, DataOutIter &xi_, bool extractOnlyLowerTriangle_)
CountNonzeros(size_t &n_, size_t rbegin_, size_t rend_, size_t cbegin_, size_t cend_, bool onlyLowerTriangle_)
Variables and their descriptions.