KASKADE 7 development version
matrixProduct.hh
Go to the documentation of this file.
1/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
2/* */
3/* This file is part of the library KASKADE 7 */
4/* https://www.zib.de/research/projects/kaskade7-finite-element-toolbox */
5/* */
6/* Copyright (C) 2017-2024 Zuse Institute Berlin */
7/* */
8/* KASKADE 7 is distributed under the terms of the ZIB Academic License. */
9/* see $KASKADE/academic.txt */
10/* */
11/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
12
13#ifndef MATRIXPRODUCT_HH
14#define MATRIXPRODUCT_HH
15
17
18#include <algorithm>
19#include <mutex>
20#include <vector>
21
22namespace Kaskade
23{
36 template <class EntryA, class EntryB, class IndexA, class IndexB>
38 {
39 // Compute the resulting entry size, treating the case of scalar entries.
40 constexpr bool scalarA = EntryA::rows*EntryA::cols==1;
41 constexpr bool scalarB = EntryB::rows*EntryB::cols==1;
42 using Entry = std::conditional_t<scalarA,
43 EntryB,
44 std::conditional_t<scalarB,
45 EntryA,
47 static_assert(scalarA || scalarB || (int)EntryA::cols==(int)EntryB::rows, // cols and rows are enums, comparison gives warning
48 "Entry size has to match");
49
50 assert(A.M()==B.N());
51
52 // Every row of A forms the corresponding row of AB by combining those rows of B
53 // which correspond to column indices of entries in the row of A. Thus, for
54 // each row of A, we extract the column index sets of the affected rows of B, and
55 // form their union.
56
57 NumaCRSPatternCreator<IndexA> creator(A.N(),B.M(),false,2);
58
59 std::mutex creatorMutex; // prevent concurrent access
60
61 parallelFor([&](int const k, int const n) // we go for coarse granularity here
62 { // since then we can amortize the
63 std::vector<IndexA> cidx, tmp, tmp2; // allocation of cidx,tmp,tmp2
64
65 for (IndexA i=k*A.N()/n; i<(k+1)*A.N()/n; ++i) // cover certain row range
66 {
67 cidx.clear();
68
69 auto rowA = A[i];
70 for (auto cai=rowA.begin(); cai!=rowA.end(); ++cai)
71 {
72 tmp.clear(); // start with no col indices in B row
73 auto rowB = B[cai.index()];
74 for (auto cbi=rowB.begin(); cbi!=rowB.end(); ++cbi) // but gather them in tmp
75 tmp.push_back(cbi.index());
76
77 tmp2.clear(); // now form the union of this
78 std::set_union(begin(tmp),end(tmp), // B rows' column indices (in tmp)
79 begin(cidx),end(cidx), // and the indices we already have
80 std::back_inserter(tmp2)); // collected (in cidx) -> tmp2
81 std::swap(tmp2,cidx); // and move them again to cidx
82 }
83
84 std::lock_guard<std::mutex> creatorLock(creatorMutex);
85 creator.addElements(&i,&i+1,begin(cidx),end(cidx), // enter all indices at once,
86 true); // mentioning they are sorted
87 }
88 });
89
91
92 // TODO: parallelize in NUMA style. This requires moving it to the NumaBCRSMatrix class
93 parallelFor(0,A.N(),[&](IndexA i) //as there are no allocations,
94 { // we go for fine granularity
95 auto rowA = A[i]; // with one task per row
96 auto rowC = C[i];
97 for (auto cai=rowA.begin(); cai!=rowA.end(); ++cai)
98 {
99 auto rowB = B[cai.index()];
100 for (auto cbi=rowB.begin(); cbi!=rowB.end(); ++cbi) // use normalForm() here to
101 rowC[cbi.index()] += normalForm(*cai) * normalForm(*cbi); // interpret 1x1 blocks as scalars
102 }
103 });
104
105 return C;
106 }
107}
108
109#endif
A NUMA-aware compressed row storage matrix adhering mostly to the Dune ISTL interface (to complete....
Index M() const
The number of columns.
Index N() const
The number of rows.
A NUMA-aware creator for matrix sparsity patterns.
void addElements(IterRow const fromRow, IterRow const toRow, IterCol const fromCol, IterCol const toCol, bool colIsSorted=false)
Enters entries into the sparsity pattern.
auto operator*(NumaBCRSMatrix< EntryA, IndexA > const &A, NumaBCRSMatrix< EntryB, IndexB > const &B)
Computes the matrix-matrix product .
void parallelFor(Func const &f, int maxTasks=std::numeric_limits< int >::max())
A parallel for loop that executes the given functor in parallel on different CPUs.
Definition: threading.hh:489