13#ifndef THREADED_MATRIX
14#define THREADED_MATRIX
18#include <boost/timer/timer.hpp>
20#include "dune/common/config.h"
21#include <dune/istl/bcrsmatrix.hh>
22#include <dune/istl/operators.hh>
38 template <
class Entry,
class Index>
41 template <
class Scalar,
class Index>
42 class MatrixAsTriplet;
44 template <
class Target,
class Source,
class RowIndices,
class ColIndices>
45 Target
submatrix(Source
const& A, RowIndices
const& ri, ColIndices
const& ci);
49 namespace ThreadedMatrixDetail
53 template <
class Entry,
class Allocator,
class Index>
54 void getRowCount(Dune::BCRSMatrix<Entry,Allocator>
const& matrix,
bool isTransposed, std::vector<Index>& rowCount)
56 for (
auto ri=matrix.begin(); ri!=matrix.end(); ++ri)
58 for (
auto ci=ri->begin(); ci!=ri->end(); ++ci)
59 ++rowCount[ci.index()];
61 rowCount[ri.index()] = ri->size();
64 template <
class Entry,
class Index2,
class Index>
67 for (
auto ri=matrix.
begin(); ri!=matrix.
end(); ++ri)
69 for (
auto ci=ri->begin(); ci!=ri->end(); ++ci)
70 ++rowCount[ci.index()];
72 rowCount[ri.index()] = ri->size();
75 template <
class Entry,
class Index,
class Index2>
78 auto const& indices = isTransposed? matrix.
cidx: matrix.
ridx;
87 template <
class Entry,
class Matrix,
bool symmetric,
bool transposed,
class Index>
93 std::vector<Index>
const& )
95 for (Index i=0; i<last-first; ++i)
98 for (
auto ci=matrix[first+i].begin(); ci!=matrix[first+i].end(); ++ci, ++j)
100 values[colStart[i]+j] = *ci;
101 cols[colStart[i]+j] = ci.index();
107 template <
class Entry,
class Matrix,
bool transposed,
class Index>
113 std::vector<Index>
const& nRowEntries)
119 std::vector<Index> entriesInRow(last-first,0);
120 for (Index i=0; i<last-first; ++i)
123 for (
auto ci=matrix[first+i].begin(); ci!=matrix[first+i].end(); ++ci, ++j)
125 values[colStart[i]+j] = *ci;
126 cols[colStart[i]+j] = ci.index();
128 entriesInRow[i] = matrix[first+i].size();
129 assert(entriesInRow[i]<=nRowEntries[first+i]);
134 for (Index r=first+1; r!=matrix.N(); ++r)
135 for (
auto c=matrix[r].begin(); c!=matrix[r].end(); ++c)
138 Index ridx = c.index();
139 if (ridx>=first && ridx<last)
145 assert(ridx-first<colStart.size() && ridx-first < entriesInRow.size());
146 Index pos = colStart[ridx-first] + entriesInRow[ridx-first];
147 assert(pos<colStart[ridx-first+1]);
150 ++entriesInRow[ridx-first];
151 assert(entriesInRow[ridx-first]<=nRowEntries[ridx]);
159 template <
class Entry,
class Matrix,
class Index>
165 std::vector<Index>
const& nRowEntries)
171 std::vector<Index> entriesInRow(last-first,0);
173 for (
auto r=matrix.begin(); r!=matrix.end(); ++r)
174 for (
auto c=r->begin(); c!=r->end(); ++c)
177 Index ridx = c.index();
178 if (ridx>=first && ridx<last)
180 Index cidx = r.index();
182 assert(ridx-first<colStart.size() && ridx-first < entriesInRow.size());
183 Index pos = colStart[ridx-first] + entriesInRow[ridx-first];
184 assert(pos<colStart[ridx-first+1]);
187 ++entriesInRow[ridx-first];
188 assert(entriesInRow[ridx-first]<=nRowEntries[ridx]);
201 template <
class To,
class From,
class scalarsMatch = std::false_type>
204 static void copy(From
const& from, To& to,
bool isTransposed)
206 assert(isTransposed);
213 template <
class To,
class From>
214 struct MatrixEntry<To, From, typename std::is_same<typename To::field_type, typename From::field_type>::type>
216 static void copy(From
const& from, To& to,
bool isTransposed)
218 assert(!isTransposed);
219 static_assert(To::rows == From::rows && To::cols == From::cols,
"Mismatch of dimensions.");
220 for(
int i=0;i<To::rows;++i)
221 for(
int j=0;j<To::cols;++j)
222 to[i][j] =
static_cast<typename To::field_type
>(from[i][j]);
231 template <
class Entry>
234 static void copy(Entry
const& from, Entry& to,
bool isTransposed)
236 if (isTransposed && from.rows > 1 && from.cols > 1 && from.rows != from.cols)
238 std::cerr <<
"Transposing rectangular Dune::FieldMatrix not implemented!" << std::endl
239 <<
"#rows=" << from.rows <<
", #cols=" << from.cols << std::endl;
242 if (isTransposed && from.rows == from.cols)
247 for (
int i=0;i<from.rows;i++)
250 for (
int j=i+1;j<from.cols;j++)
252 typename Entry::field_type s=from[j][i];
269 template <
class Index=
size_t>
303 int node()
const {
return numaNode; }
333 template <
class Index=
size_t>
370 r.reserve(nnzPerRow);
400 template <
class IterRow,
class IterCol>
402 IterCol
const fromCol, IterCol
const toCol,
bool colIsSorted=
false)
406 sortedCols.insert(end(sortedCols),fromCol,toCol);
408 std::sort(begin(sortedCols),end(sortedCols));
409 sortedCols.erase(std::unique(begin(sortedCols),end(sortedCols)),end(sortedCols));
412 for ( ; fromRow != toRow; ++fromRow)
413 if (this->
first() <= *fromRow && *fromRow < this->
last())
417 std::upper_bound(begin(sortedCols),end(sortedCols),*fromRow):
420 tmp.erase(std::set_union(begin(c),end(c),begin(sortedCols),top,
421 begin(tmp)),end(tmp));
436 for (Index i=0; i<cols.size(); ++i)
439 std::iota(begin(cols[i]),end(cols[i]),
static_cast<Index
>(0));
455 for (
auto const& r: cols)
470 size_t balanceForward(
size_t const covered,
size_t const nnz,
int chunks, std::vector<IndexArray>& moveRows);
482 size_t balanceBackward(
size_t covered,
size_t const nnz,
int chunks, std::vector<IndexArray>& moveRows);
485 std::vector<IndexArray> cols;
497 template <
class Index=
size_t>
524 template <
class Expanded,
class Condensed,
class Matrix>
526 Matrix
const& mat,
int node)
534 colStarts[i-
first] = cols.size();
536 auto const& row = mat[eIndices[i]];
538 auto cend = row.end();
539 for (
auto c = row.begin(); c!=cend; ++c)
541 Index ci = cIndices[c.index()];
542 if (ci<eIndices.size())
547 colStarts.back() = cols.size();
565 template <
class Matrix>
572 isTransposed =
false;
579 Index fromRow = this->
first(), toRow = this->
last();
583 fromRow = 0, toRow = matrix.N();
585 for (Index ri=fromRow; ri<toRow; ++ri)
587 auto cend = matrix[ri].end();
588 for (
auto ci=matrix[ri].begin(); ci!=cend; ++ci)
590 Index r = ri, c = ci.index();
594 ++rowCount[r-this->
first()];
596 ++rowCount[c-this->
first()];
601 colStarts.resize(rowCount.size()+1,0);
602 std::partial_sum(rowCount.begin(),rowCount.end(),colStarts.begin()+1);
605 cols.resize(colStarts.back());
608 for (Index ri=fromRow; ri<toRow; ++ri)
610 auto cend = matrix[ri].end();
611 for (
auto ci=matrix[ri].begin(); ci!=cend; ++ci)
613 Index r = ri, c = ci.index();
618 cols[colStarts[r-this->
first()]+rowCount[r-this->
first()]-1] = c;
619 --rowCount[r-this->
first()];
623 cols[colStarts[c-this->
first()]+rowCount[c-this->
first()]-1] = r;
624 --rowCount[c-this->
first()];
630 for (Index ri=0; ri<rowCount.size(); ++ri) {
631 assert(rowCount[ri] == 0);
632 std::sort(cols.begin()+colStarts[ri],cols.begin()+colStarts[ri+1]);
649 template <
class Scalar,
class Index2>
656 isTransposed =
false;
662 for (
size_t i=0; i<matrix.
ridx.size(); ++i)
664 Index r = matrix.
ridx[i], c = matrix.
cidx[i];
668 ++rowCount[r-this->
first()];
670 ++rowCount[c-this->
first()];
674 colStarts.resize(rowCount.size()+1,0);
675 std::partial_sum(rowCount.begin(),rowCount.end(),colStarts.begin()+1);
678 cols.resize(colStarts.back());
681 for (
size_t i=0; i<matrix.
ridx.size(); ++i)
683 Index r = matrix.
ridx[i], c = matrix.
cidx[i];
688 cols[colStarts[r-this->
first()]+rowCount[r-this->
first()]-1] = c;
689 --rowCount[r-this->
first()];
693 cols[colStarts[c-this->
first()]+rowCount[c-this->
first()]-1] = r;
694 --rowCount[c-this->
first()];
699 for (Index ri=0; ri<rowCount.size(); ++ri)
701 assert(rowCount[ri] == 0);
702 auto first = cols.begin()+colStarts[ri];
703 auto last = cols.begin()+colStarts[ri+1];
711 for (Index ri=1; ri<rowCount.size(); ++ri)
713 Index newStart = colStarts[ri-1] + rowCount[ri-1];
714 if (newStart != colStarts[ri])
716 std::copy(cols.begin()+colStarts[ri],cols.begin()+colStarts[ri]+rowCount[ri],cols.begin()+newStart);
717 colStarts[ri] = newStart;
720 if (!rowCount.empty())
721 colStarts.back() = colStarts[rowCount.size()-1]+rowCount.back();
722 cols.erase(cols.begin()+colStarts.back(),cols.end());
730 Index
col(
size_t idx)
const {
return cols[idx]; }
736 size_t colStart(Index row)
const {
return colStarts[row]; }
742 typename std::vector<Index,NumaAllocator<Index>>::const_iterator
colStartIterator(Index row)
const
757 auto b = cols.begin();
759 assert(0 <= r && r+1 < colStarts.size());
760 auto p = std::lower_bound(b+colStarts[r],b+colStarts[r+1],c);
761 if (p==b+colStarts[r+1] || *p!=c)
779 size_t storage()
const {
return cols.size(); }
796 std::vector<size_t,NumaAllocator<size_t>> colStarts;
797 std::vector<Index,NumaAllocator<Index>> cols;
809 template <
class Entry,
class Index>
813 typedef typename std::vector<Index,NumaAllocator<Index>>::const_iterator
ColIterator;
814 typedef typename std::vector<Entry,NumaAllocator<Entry>>::const_iterator
ValueIterator;
859 template <
class Entry,
class Index>
863 typedef typename std::vector<Index,NumaAllocator<Index>>::const_iterator
ColIterator;
864 typedef typename std::vector<Entry,NumaAllocator<Entry>>::iterator
ValueIterator;
883 template <
class Arguments,
class Operation>
900 template <
class Arguments,
class Operation>
921 template <
class Entry,
class Index=
size_t>
946 : pat(pattern_), scatterMutex(4), values(pat->nonzeroes(),init,
NumaAllocator<Entry>(pat->node()))
962 template <
class Matrix>
966 typedef typename Matrix::block_type SuppliedEntry;
968 Index first = pat->first(), last = pat->last();
971 isTransposed =
false;
974 Index fromRow = first, toRow = last;
975 if (isSymmetric && !pat->symmetric())
978 fromRow = 0, toRow = matrix.N();
981 for (Index ri=fromRow; ri<toRow; ++ri)
983 auto cend = matrix[ri].end();
984 for (
auto ci=matrix[ri].begin(); ci!=cend; ++ci)
986 Index r = ri, c = ci.index();
989 if (first<=r && r<last && (!pat->symmetric() || c<=r))
991 assert(pat->position(r,c) < values.size());
994 if (isSymmetric && !pat->symmetric() && c<r && first<=c && c<last)
1011 template <
class Scalar,
class Index2>
1015 Index first = pat->first(), last = pat->last();
1018 isTransposed =
false;
1022 for (
size_t i=0; i<matrix.
ridx.size(); ++i)
1024 Index r = matrix.
ridx[i], c = matrix.
cidx[i];
1027 if (first<=r && r<last && (!pat->symmetric() || c<=r))
1029 assert(pat->position(r,c) < values.size());
1030 values[pat->position(r,c)] = matrix.
data[i];
1032 if (isSymmetric && !pat->symmetric() && c<r && first<=c && c<last)
1033 values[pat->position(c,r)] = matrix.
data[i];
1050 template <
class Matrix>
1051 CRSChunk(Matrix
const& matrix,
bool isSymmetric,
bool isTransposed, Index firstRow, Index lastRow,
bool symmetric,
int node)
1052 :
CRSChunk(std::make_shared<
CRSChunkPattern<Index>>(matrix,isSymmetric,isTransposed,firstRow,lastRow,symmetric,node),
1053 matrix,isSymmetric,isTransposed)
1068 template <
class Expanded,
class Condensed,
class Matrix>
1070 Expanded
const& eIndices, Condensed
const& cIndices, Matrix
const& matrix)
1073 Index first = pat->first(), last = pat->last();
1074 for (Index r=first; r<last; ++r)
1076 auto row = matrix[eIndices[r]];
1077 auto p = begin(values) + pat->colStart(r-first);
1078 for (
auto c=row.begin(); c!=row.end(); ++c)
1079 if (cIndices[c.index()]<eIndices.size())
1082 assert(!std::isnan(*p));
1086 assert(p==begin(values)+pat->colStart(r-first+1));
1098 template <
class Value>
1101 std::fill(begin(values),end(values),a);
1109 template <
class Arguments,
class Operation>
1112 Index first = pat->first();
1113 for (Index r=0; r<pat->last()-first; ++r)
1115 auto eend = e.
end(r);
1116 auto ci = pat->colStartIterator(r);
1117 auto cend = pat->colStartIterator(r+1);
1118 auto vi = begin(values)+pat->colStart(r);
1119 for (
auto ei = e.
begin(r); ei != eend; ++ei)
1121 auto pos = std::find(ci,cend,ei.index());
1122 assert(pos != cend);
1134 template <
class Domain,
class Range>
1138 auto const& p = *pat;
1139 Index first = p.first();
1140 Index last = p.last();
1148 for (Index row=first; row<last; ++row)
1151 typename Range::block_type z(0);
1153 size_t jEnd = p.colStart(row+1-first);
1154 for (
size_t j=p.colStart(row-first); j<jEnd; ++j)
1155 if (Entry::rows==1 && Entry::cols==1)
1156 z.axpy(values[j][0][0],x[p.col(j)]);
1158 values[j].umv(x[p.col(j)],z);
1162 if (initialize) y[row] = z;
1166 if (Entry::rows==Entry::cols && row<x.size())
1167 dp += y[row]*x[row];
1177 template <
class Domain,
class Range>
1180 auto const& p = *pat;
1181 Index first = p.first();
1182 Index last = p.last();
1184 assert(x.size()>=last);
1187 for (Index j=first; j<last; ++j)
1189 size_t jEnd = p.colStart(j+1-first);
1190 for (
size_t k=p.colStart(j-first); k<jEnd; ++k)
1193 if (Entry::rows==1 && Entry::cols==1)
1194 y[i] += a*values[k][0][0] * x[j];
1196 values[k].usmtv(a,x[j],y[i]);
1209 template <
class Domain,
class Range>
1212 auto const& p = *block.pat;
1213 Index firstRow = p.first();
1214 Index lastRow = p.last();
1215 Index firstCol = pat->first();
1216 Index lastCol = pat->last();
1218 assert(lastRow<=y.size());
1221 for (Index row=firstRow; row<lastRow; ++row)
1224 for (Index row=firstRow; row<lastRow; ++row)
1226 auto const& xrow = x[row];
1228 auto cend = p.colStartIterator(row+1-firstRow);
1229 auto ci = firstCol==0?
1230 p.colStartIterator(row-firstRow):
1231 std::lower_bound(p.colStartIterator(row-firstRow),cend,firstCol);
1232 auto vi = block.values.begin() + (ci-p.colStartIterator(0));
1234 for ( ; ci!=cend && *ci<lastCol && *ci<row; ++ci, ++vi)
1235 vi->usmtv(a,xrow,y[*ci]);
1237 for ( ; ci!=cend && *ci<lastCol; ++ci, ++vi)
1238 vi->usmtv(a,xrow,y[*ci]);
1253 return values.begin()+pat->colStart(row);
1259 template <
class LMIterator>
1262 Index firstRow = pat->first();
1263 Index lastRow = pat->last();
1265 if (first==last || firstRow==lastRow)
1268 Index nnzPerRow = pat->nonzeroesPerRow();
1271 Index n = scatterMutex.size();
1272 for (Index i=0; i<n; ++i)
1274 #ifndef KASKADE_SEQUENTIAL
1275 boost::lock_guard<boost::mutex> lock(scatterMutex[i].get());
1281 if (nnzPerRow == pat->columns())
1282 scatterRowRange<0>(first,last,firstRow,rowRangeStart,rowRangeEnd);
1283 else if (nnzPerRow > 10*(first->cidx().size()))
1284 scatterRowRange<1>(first,last,firstRow,rowRangeStart,rowRangeEnd);
1286 scatterRowRange<2>(first,last,firstRow,rowRangeStart,rowRangeEnd);
1295 template <
class F,
class EntryB,
class IndexB>
1298 auto const& p = *pat;
1299 Index first = p.first();
1300 Index last = p.last();
1302 for (Index row=first; row<last; ++row)
1304 auto const& Br = B[row];
1305 size_t jEnd = p.colStart(row+1-first);
1306 auto bri = Br.
begin();
1307 auto brend = Br.
end();
1308 for (
size_t j=p.colStart(row-first); j<jEnd; ++j)
1310 while (bri!= brend && bri.index()<p.col(j))
1312 if (bri!=brend && bri.index()==p.col(j))
1313 values[j] = f(values[j],*bri);
1319 std::shared_ptr<CRSChunkPattern<Index>> pat;
1320 std::vector<Mutex> scatterMutex;
1323 std::vector<Entry,NumaAllocator<Entry>> values;
1328 template <
int sparsity,
class LMIterator>
1329 void scatterRowRange(LMIterator first, LMIterator last, Index startRow, Index firstRow, Index lastRow)
1332 for ( ; first!=last; ++first)
1334 for (
auto rgl: first->ridx())
1336 Index rg = rgl.first;
1340 scatterRow<sparsity>(*first,rg-startRow,rgl.second);
1345 template <
int sparsity,
class LM,
class LIndex>
1346 void scatterRow(LM
const& a, Index rg, LIndex rl)
1348 auto cbegin = pat->colStartIterator(rg);
1349 auto cend = pat->colStartIterator(rg+1);
1354 auto pos = std::find(cbegin,cend,rg+pat->first());
1355 assert(pos != cend);
1356 vbegin[pos-cbegin] += a(rl,rl);
1360 for (
auto cgl: a.cidx())
1362 auto pos = sparsity==0? std::lower_bound(cbegin,cend,cgl.first)
1363 : sparsity==1? std::lower_bound(cbegin,cend,cgl.first)
1364 : std::find(cbegin,cend,cgl.first);
1369 vbegin += pos-cbegin;
1372 *vbegin += a(rl,cgl.second);
1380 template <
class Entry,
class Index>
1423 if (ci==
cEnd || *ci!=c)
1436 if (ci==
cEnd || *ci!=c)
1453 static Entry
const zero(0);
1456 if (ci==
cEnd || *ci!=c)
1472 if (ci==
cEnd || *ci!=c)
1473 throw LookupException(
"write access to nonexistent matrix entry",__FILE__,__LINE__);
1480 typedef typename std::vector<Index,NumaAllocator<Index>>::const_iterator
ColIterator;
1481 typedef typename std::vector<Entry,NumaAllocator<Entry>>::iterator
ValueIterator;
1489 if (0<=row && row<matrix.
N())
1491 auto& c = matrix.
chunk(chunk);
1493 assert(p.first()<=row && row<p.last());
1494 Index first = p.first();
1495 cBegin = p.colStartIterator(row-first);
1496 cEnd = p.colStartIterator(row-first+1);
1497 vBegin = c.valStartIterator(row-first);
1498 vEnd = c.valStartIterator(row-first+1);
1505 template <
class Entry,
class Index>
1514 : matrix(const_cast<
NumaBCRSMatrix<Entry,Index>*>(&matrix_)), row(row_), chunk(getChunk(row_))
1516 assert(row>=0 && row<=matrix->N());
1517 this->
update(*matrix,row,chunk);
1524 if (row==matrix->
N())
1526 while (matrix->
getPattern()->rowStart()[chunk+1]==row)
1528 this->
update(*matrix,row,chunk);
1534 while (matrix->
getPattern()->rowStart()[chunk] > row)
1536 this->
update(*matrix,row,chunk);
1542 if (row > matrix->
N())
1547 chunk = getChunk(row);
1548 this->
update(*matrix,row,chunk);
1574 int getChunk(Index r)
const {
return matrix->
getPattern()->chunk(r); }
1579 template <
class Entry,
class Index>
1615 template <
class Index=
size_t>
1653 : rows(rows_), columns(
cols)
1654 , sym(symmetric && rows==columns)
1657 creators.reserve(
nodes);
1659 for (
int i=0; i<
nodes; ++i)
1668 this->creators[i].reserve(nnzPerRow);
1679 this->creators[i].clear();
1703 assert(fromRow>=0 && toRow<=rows && fromRow<=toRow);
1704 assert(fromCol>=0 && toCol<=columns && fromCol<=toCol);
1706 for (
int r=fromRow; r<toRow; ++r)
1707 for (
int c=fromCol; c<toCol; ++c)
1725 template <
class IterRow,
class IterCol>
1726 void addElements(IterRow
const fromRow, IterRow
const toRow, IterCol
const fromCol, IterCol
const toCol,
bool colIsSorted=
false)
1728 assert(fromCol==toCol || *std::max_element(fromCol,toCol) < columns);
1729 assert(fromRow==toRow || *std::max_element(fromRow,toRow) < rows);
1730 for (
auto& c: creators)
1731 c.addElements(fromRow,toRow,fromCol,toCol,colIsSorted);
1748 template <
class RowRangeSequence,
class ColRangeSequence>
1749 void addElements(RowRangeSequence
const& rrs, ColRangeSequence
const& crs,
bool colsAreSorted=
false)
1751 assert(rrs.size() == crs.size());
1753 for (
auto c=std::begin(crs); c!=std::end(crs); ++c)
1754 assert((*c).empty() || (*std::max_element(std::begin(*c),std::end(*c)) < columns));
1757 auto c = std::begin(crs);
1758 for (
auto r=std::begin(rrs); r!=std::end(rrs); ++r, ++c)
1759 this->creators[i].
addElements(std::begin(*r),std::end(*r),std::begin(*c),std::end(*c),colsAreSorted);
1773 this->creators[i].addAllElements(this->columns);
1782 for (Index i=0; i<
std::min(rows,columns); ++i)
1794 for (
auto const& c: creators)
1795 nnz += c.nonzeroes();
1802 Index
cols()
const {
return columns; }
1818 int nodes()
const {
return creators.size(); }
1834 Index rows, columns;
1835 std::vector<ChunkCreator> creators;
1846 template <
class Index=
size_t>
1864 : patterns(creator.
nodes()), sym(creator.
isSymmetric()), rowSt(creator.
nodes()+1), cols(creator.cols())
1868 this->patterns[i] = std::make_shared<ChunkPattern>(creator.
creator(i));
1869 this->rowSt[i] = this->patterns[i]->first();
1871 rowSt[patterns.size()] = patterns.back()->last();
1887 template <
class Expanded,
class Condensed,
class Matrix>
1888 NumaCRSPattern(Expanded
const& eIndices, Condensed
const& cIndices, Matrix
const& mat)
1889 : patterns(
NumaThreadPool::instance().
nodes()), sym(false), rowSt(eIndices.size(),0), cols(eIndices.size())
1891 assert(mat.M()==mat.N());
1892 assert(eIndices.size() <= cIndices.size());
1894 for (Index r=0; r<eIndices.size(); ++r)
1896 auto row = mat[eIndices[r]];
1897 for (
auto ci=row.begin(); ci!=row.end(); ++ci)
1898 if (cIndices[ci.index()] < eIndices.size())
1904 for (
int i=0; i<patterns.size(); ++i)
1905 patterns[i] = std::make_shared<ChunkPattern>(rowSt[i],rowSt[i+1],eIndices,cIndices,mat,i);
1922 template <
class Matrix>
1924 : sym(symmetric), cols(isTransposed? matrix.
N(): matrix.
M())
1929 Index rows = isTransposed? matrix.M(): matrix.N();
1930 std::vector<size_t> rowCount(rows,0);
1935 patterns.reserve(rowCount.size()-1);
1936 for (
int i=0; i<rowCount.size()-1; ++i)
1937 patterns.push_back(std::make_shared<ChunkPattern>(matrix,
isSymmetric,isTransposed,
1938 rowCount[i],rowCount[i+1],symmetric,i));
1940 rowSt.resize(patterns.size()+1);
1941 for (
int i=0; i<patterns.size(); ++i)
1942 rowSt[i] = patterns[i]->first();
1943 rowSt[patterns.size()] = patterns.back()->last();
1949 Index
N()
const {
return rowSt.back(); }
1954 Index
M()
const {
return cols; }
1964 for (
auto const& p: patterns)
1965 nnz += p->storage();
1975 for (
auto const& p: patterns)
1976 nnz += p->nonzeroes();
1983 int nodes()
const {
return patterns.size(); }
1988 std::shared_ptr<ChunkPattern>
pattern(
int i)
const {
return patterns[i]; }
2005 assert(0<=row && row<=
N());
2006 auto it = std::upper_bound(rowSt.begin(),rowSt.end(),row);
2007 int c = it - rowSt.begin() - 1;
2008 assert(0<=c && c<=patterns.size());
2009 assert(c==patterns.size() || (patterns[c]->first() <= row && row < patterns[c]->last()));
2019 std::vector<Index>
const&
rowStart()
const {
return rowSt; }
2026 return patterns[
chunk(r)]->exists(r,c);
2030 std::vector<std::shared_ptr<ChunkPattern>> patterns;
2032 std::vector<Index> rowSt;
2036 template <
class Index,
class Index2>
2039 assert(pa.
N()==pb.
N() && pa.
M()==pb.
M());
2043 for (
int i=0; i<pa.
nodes(); ++i)
2046 auto const& chunkPattern = *pa.
pattern(i);
2047 Index first = chunkPattern.first();
2048 for (Index row=first; row<chunkPattern.last(); ++row)
2049 for (
auto it=chunkPattern.colStartIterator(row-first); it!=chunkPattern.colStartIterator(row+1-first); ++it)
2057 for (
int i=0; i<pb.
nodes(); ++i)
2060 auto const& chunkPattern = *pb.
pattern(i);
2061 Index2 first = chunkPattern.first();
2062 for (Index2 row=first; row<chunkPattern.last(); ++row)
2063 for (
auto it=chunkPattern.colStartIterator(row-first); it!=chunkPattern.colStartIterator(row+1-first); ++it)
2065 creator.
addElement(
static_cast<Index
>(row),*it);
2067 creator.
addElement(*it,
static_cast<Index
>(row));
2113 template <
class Entry,
class Index=
size_t>
2186 for (
int i=0; i<pattern->nodes(); ++i)
2187 chunks.push_back(
Chunk(i));
2191 this->chunks[i] = std::move(
Chunk(this->pattern->pattern(i),init));
2194 dps.resize(chunks.size());
2202 template <
class Expanded,
class Condensed,
class Matrix>
2203 NumaBCRSMatrix(Expanded
const& eIndices, Condensed
const& cIndices, Matrix
const& mat)
2204 : pattern(std::make_shared<
NumaCRSPattern<Index>>(eIndices,cIndices,mat))
2206 chunks.reserve(pattern->nodes());
2207 for (
int i=0; i<pattern->nodes(); ++i)
2208 chunks.push_back(
Chunk(pattern->pattern(i),eIndices,cIndices,mat));
2209 dps.resize(chunks.size());
2218 template <
class Expanded>
2219 static std::vector<Index> createCondensed(Expanded
const& eIndices, Index n)
2221 std::vector<Index> cIndices(n,eIndices.size());
2222 for (Index i=0; i<eIndices.size(); ++i)
2223 cIndices[eIndices[i]] = i;
2241 template <
class Expanded,
class Matrix>
2257 template <
class Matrix>
2259 bool isSymmetric,
bool isTransposed)
2262 chunks.reserve(pattern->nodes());
2263 for (
int i=0; i<pattern->nodes(); ++i)
2264 chunks.push_back(
Chunk(pattern->pattern(i),matrix,isSymmetric,isTransposed));
2265 dps.resize(chunks.size());
2280 template <
class OtherEntry>
2301 template <
class Matrix>
2303 bool isSymmetric,
bool isTransposed=
false,
bool symmetric=
false)
2305 matrix,isSymmetric,isTransposed)
2307 assert(matrix.N()==matrix.M() || !(symmetric||isSymmetric));
2308 assert(Entry::rows==Entry::cols || !(symmetric||isSymmetric));
2311 symmetric &= Entry::rows==Entry::cols && matrix.N()==matrix.M();
2312 isSymmetric &= Entry::rows==Entry::cols && matrix.N()==matrix.M();
2334 bool isSymmetric,
bool isTransposed=
false,
bool symmetric=
false)
2335 :
NumaBCRSMatrix(isTransposed==false && isSymmetric==symmetric? matrix.pattern
2336 : std::make_shared<
NumaCRSPattern<Index>>(matrix,isSymmetric,isTransposed,symmetric),
2337 matrix,isSymmetric,isTransposed)
2339 assert(matrix.
N()==matrix.
M() || !(symmetric||isSymmetric));
2340 assert(Entry::rows==Entry::cols || !(symmetric||isSymmetric));
2341 assert(isTransposed? (matrix.
N()==
M() && matrix.
M()==
N()): (matrix.
N()==
N() && matrix.
M()==
M()));
2342 assert(!isTransposed || Entry::rows==Entry::cols);
2373 if (chunks.size()>0)
2376 this->chunks[i] = a;
2387 return *
this = Entry(a);
2393 template <
class Arguments,
class Operation>
2396 if (chunks.size()>0)
2399 this->chunks[i] = e[i];
2410 template <
class EntryB,
class IndexB>
2413 entrywiseOp([](Entry
const& a, Entry
const& b) {
return a+b; }, B);
2422 template <
class EntryB,
class IndexB>
2425 entrywiseOp([](Entry
const& a, Entry
const& b) {
return a-b; }, B);
2434 template <
class Factor>
2437 entrywiseOp([=](Entry
const& e, Entry
const&) {
return a*e; }, *
this);
2491 template <
class RowIndices,
class ColIndices>
2494 return submatrix<Self>(*
this,ri,ci);
2509 Index
N()
const {
return pattern->N(); }
2514 Index
M()
const {
return pattern->M(); }
2527 std::shared_ptr<NumaCRSPattern<Index>>
getPattern()
const {
return pattern; }
2539 return pattern->exists(r,c);
2558 for (
auto ri=
begin(); ri!=
end(); ++ri)
2559 for (
auto ci=ri->begin(); ci!=ri->end(); ++ci)
2560 sum += ci->frobenius_norm2();
2576 template <
class X,
class Y>
2587 template <
class X,
class Y>
2588 void mtv(
X const& x,
Y& y)
const { doMtv(1.0,x,y,
true); }
2593 template <
class X,
class Y>
2594 void mmv(
X const& x,
Y& y)
const { doMv(-1.0,x,y,
true); }
2599 template <
class X,
class Y>
2608 template <
class X,
class Y>
2614 template <
class X,
class Y>
2623 template <
class X,
class Y>
2624 void umtv(
X const& x,
Y& y)
const { doMtv(1.0,x,y,
false); }
2629 template <
class X,
class Y>
2638 template <
class X,
class Y>
2672 template <
class LMIterator>
2675 for (
auto& c: chunks)
2676 c.scatter(first,last);
2703 template <
class RowIndices,
class ColIndices,
class BinaryOp=std::plus<Entry>>
2705 BinaryOp
const& op = BinaryOp())
2708 for (
int r=0; r<rows.size(); ++r)
2709 for (
int c=0; c<cols.size(); ++c)
2711 auto ci = (*this)[rows[r]].find(cols[c]);
2712 assert(ci!=(*
this)[rows[r]].
end());
2713 *ci = op(*ci,B[r][c]);
2721 std::shared_ptr<NumaCRSPattern<Index>> pattern;
2722 std::vector<Chunk> chunks;
2723 mutable std::vector<field_type> dps;
2727 template <
class X,
class Y>
2728 Scalar doMv(
Scalar const& a,
X const& x,
Y& y,
bool initToZero)
const
2730 assert(y.size()==
N());
2731 assert(x.size()==
M());
2733 if (chunks.size()>1)
2737 if (pattern->isSymmetric())
2738 for (
int k=i; k<n; ++k)
2739 this->chunks[i].gatherMirrored(a,x,y,this->chunks[k],
true,initToZero&&k==i);
2740 this->dps[i] = this->chunks[i].apply(a,x,y,initToZero&&!pattern->isSymmetric());
2743 return std::accumulate(dps.begin(),dps.end(),0.0);
2747 if (pattern->isSymmetric())
2748 chunks[0].gatherMirrored(a,x,y,chunks[0],
true,initToZero);
2749 return chunks[0].apply(a,x,y,initToZero&&!pattern->isSymmetric());
2754 template <
class X,
class Y>
2755 void doMtv(
Scalar const& a,
X const& x,
Y& y,
bool initToZero)
const
2757 assert(y.size()==
M());
2758 assert(x.size()==
N());
2760 if (pattern->isSymmetric())
2761 doMv(a,x,y,initToZero);
2766 for (
auto const&
chunk: chunks)
2774 template <
class F,
class EntryB,
class IndexB>
2775 void entrywiseOp(F
const& f, NumaBCRSMatrix<EntryB,IndexB>
const& B)
2777 assert(
N()==B.N() &&
M()==B.M());
2780 this->chunks[i].entrywiseOp(f,B);
2794 template <
class Entry,
class Index>
2797 for (
auto ri=A.
begin(); ri!=A.
end(); ++ri)
2798 for (
auto ci=ri->begin(); ci!=ri->end(); ++ci)
2799 out << ri.index() <<
' ' << ci.index() <<
' ' << *ci << std::endl;
2813 template <
class SparseMatrix>
2817 for (
auto ri=A.begin(); ri!=A.end(); ++ri)
2818 for (
auto ci=ri->begin(); ci!=ri->end(); ++ci)
2819 B[ri.index()][ci.index()] = *ci;
2840 template <
class SparseMatrix,
class RowRange,
class ColRange>
2845 using Index =
typename ColRange::value_type;
2849 std::vector<std::pair<Index,Index>> cidx;
2850 cidx.reserve(cols.size());
2855 cidx.push_back(std::make_pair(c,i));
2858 std::sort(begin(cidx),end(cidx),
FirstLess());
2863 for (
size_t r=0; r<rows.size(); ++r)
2864 for (
auto ci=A[rows[r]].begin(); ci!=A[rows[r]].end(); ++ci)
2866 auto it = std::lower_bound(begin(cidx),end(cidx),
2867 std::make_pair(ci.index(),ci.index()),
FirstLess());
2868 if (it!=end(cidx) && it->first==ci.index())
2869 B[r][it->second] = *ci;
2884 template <
class Entry,
class Index,
class Index2>
2892 for (Index row=0; row<AB.N(); ++row)
2894 auto ABrow = AB[row];
2895 for (
auto ci=A[row].begin(); ci!=A[row].
end(); ++ci)
2896 ABrow[ci.index()] += *ci;
2897 for (
auto ci=B[row].begin(); ci!=B[row].
end(); ++ci)
2898 ABrow[
static_cast<Index
>(ci.index())] += *ci;
2917 template <
class Scalar,
int n,
class Index=
size_t>
2921 for (Index i=0; i<N; ++i)
2941 template <
class Scalar,
int n,
int m,
class Index=
size_t>
2955 template <
class Entry,
class Index>
2978 template <
class Target,
class Source,
class RowIndices,
class ColIndices>
2982 std::vector<size_t> rows(A.N()), cols(A.M());
2983 std::iota(rows.begin(),rows.end(),0);
2984 std::iota(cols.begin(),cols.end(),0);
2987 for(
int i=0; i<ri.size();++i)
2988 rows.erase(rows.begin + ri[i]);
2991 for(
int i=0; i<ci.size();++i)
2992 cols.erase(cols.begin + ci[i]);
3009 template <
class Target,
class Source,
class RowIndices>
3013 std::vector<size_t> cols;
3014 return eraseRowsNCols(A,ri,cols);
3029 template <
class Target,
class Source,
class ColIndices>
3033 std::vector<size_t> rows;
3034 return eraseRowsNCols(A,rows,ci);
3048 template <
class Source>
3054 std::vector<size_t> nzFlags(A.M(),0);
3055 for(
auto r=A.begin(); r!=A.end(); ++r)
3056 for(
auto c=r->begin(); c!= r->end(); ++c)
3057 nzFlags.at(c.index()) = 1;
3060 std::vector<size_t> nzCols;
3061 for(
int i=0; i<nzFlags.size(); ++i)
3062 if(nzFlags.at(i) == 1) nzCols.push_back(i);
3085 template<
int row2,
int col2,
int row1,
int col1,
class Scalar,
class Index>
3089 static_assert(row1 % row2 == 0);
3090 static_assert(col1 % col2 == 0);
3091 int const qN = row1/row2;
3092 int const qM = col1/col2;
3098 for(
auto row=A.begin(); row!=A.end(); ++row)
3099 for(
auto col=row->begin(); col!=row->end(); ++col)
3100 for(
int i=0; i<qN; ++i)
3101 for(
int j=0; j<qM; ++j)
3102 creator.
addElement(row.index()*qN + i, col.index()*qM + j);
3107 for(
auto row=A.begin(); row!=A.end(); ++row)
3108 for(
auto col=row->begin(); col!=row->end(); ++col)
3111 for(
int k=0; k<row1; ++k)
3112 for(
int l=0; l<col1; ++l)
3113 B[row.index()*qN + k/row2][col.index()*qM + l/col2][k%row2][l%col2] = mat[k][l];
3133 template<
int blockrows,
int blockcols,
class Scalar,
class Index>
3138 assert(A.N() == B.N());
3139 Index cols = (A.M() + B.M());
3145 for(
auto row=A.begin(); row!=A.end(); ++row)
3146 for(
auto col=row->begin(); col!=row->end(); ++col)
3147 creator.
addElement(row.index(), col.index());
3149 for(
auto row=B.begin(); row!=B.end(); ++row)
3150 for(
auto col=row->begin(); col!=row->end(); ++col)
3151 creator.
addElement(row.index(), col.index()+A.M());
3156 for(
auto row=A.begin(); row!=A.end(); ++row)
3157 for(
auto col=row->begin(); col!=row->end(); ++col)
3158 result[row.index()][col.index()] = *col;
3160 for(
auto row=B.begin(); row!=B.end(); ++row)
3161 for(
auto col=row->begin(); col!=row->end(); ++col)
3162 result[row.index()][col.index()+A.M()] = *col;
3180 template<
int blockrows,
int blockcols,
class Scalar,
class Index>
3185 assert(A.M() == B.M());
3187 Index rows = (A.N()+B.N());
3191 for(
auto row=A.begin(); row!=A.end(); ++row)
3192 for(
auto col=row->begin(); col!=row->end(); ++col)
3193 creator.
addElement(row.index(), col.index());
3195 for(
auto row=B.begin(); row!=B.end(); ++row)
3196 for(
auto col=row->begin(); col!=row->end(); ++col)
3197 creator.
addElement(row.index()+A.N(), col.index());
3202 for(
auto row=A.begin(); row!=A.end(); ++row)
3203 for(
auto col=row->begin(); col!=row->end(); ++col)
3204 result[row.index()][col.index()] = *col;
3206 for(
auto row=B.begin(); row!=B.end(); ++row)
3207 for(
auto col=row->begin(); col!=row->end(); ++col)
3208 result[row.index()+A.N()][col.index()] = *col;
3227 template<
int blockrows,
int blockcols,
class Index=
size_t>
3231 Index cols = (A.M()+B.M());
3232 Index rows = (A.N()+B.N());
3236 for(
auto row=A.begin(); row!=A.end(); ++row)
3237 for(
auto col=row->begin(); col!=row->end(); ++col)
3238 creator.
addElement(row.index(), col.index());
3240 for(
auto row=B.begin(); row!=B.end(); ++row)
3241 for(
auto col=row->begin(); col!=row->end(); ++col)
3242 creator.
addElement(row.index()+A.N(), col.index()+A.M());
3247 for(
auto row=A.begin(); row!=A.end(); ++row)
3248 for(
auto col=row->begin(); col!=row->end(); ++col)
3251 for(
int k=0; k < blockrows; ++k)
3252 for(
int l= 0; l < blockcols; ++l)
3253 result[row.index()][col.index()][k][l] =mat[k][l];
3256 for(
auto row=B.begin(); row!=B.end(); ++row)
3257 for(
auto col=row->begin(); col!=row->end(); ++col)
3260 for(
int k=0; k < blockrows; ++k)
3261 for(
int l= 0; l < blockcols; ++l)
3262 result[row.index()+A.N()][col.index()+A.M()][k][l] = mat[k][l];
An exception that can be thrown whenever a key lookup fails.
std::vector< SparseIndexInt > ridx
row indices
std::vector< SparseIndexInt > cidx
column indices
std::vector< Scalar > data
data
An STL allocator that uses memory of a specific NUMA node only.
A NUMA-aware compressed row storage matrix adhering mostly to the Dune ISTL interface (to complete....
iterator ConstRowIterator
iterator end()
returns an iterator to the first row
Self & operator-=(NumaBCRSMatrix< EntryB, IndexB > const &B)
Subtracts a sparse matrix from this one.
NumaBCRSMatrix(Expanded const &eIndices, Condensed const &cIndices, Matrix const &mat)
Deprecated. Use simpler version without cIndices instead.
const_iterator const_row_type
void mmv(X const &x, Y &y) const
Matrix-vector multiplication .
NumaBCRSMatrix(NumaBCRSMatrix< OtherEntry, Index > const &matrix)
Constructor copying a given matrix.
NumaBCRSMatrix(Matrix const &matrix, bool isSymmetric, bool isTransposed=false, bool symmetric=false)
Constructor.
Self & operator=(Self &&mat)=default
Move assignment.
Chunk & chunk(int i)
Obtains a reference to the given chunk.
Self & operator=(ThreadedMatrixDetail::NumaBCRSMatrixExpression< Arguments, Operation > const &e)
NOT YET IMPLEMENTED Assigns the given Numa matrix expression.
Self & operator+=(NumaBCRSMatrix< EntryB, IndexB > const &B)
Adds a sparse matrix to this one.
size_t nonzeroes() const
Returns the number of structurally nonzero elements.
NumaBCRSMatrix(Self &&A)=default
Move constructor.
Self & operator*=(Factor a)
Multiplication with a "scalar".
NumaBCRSMatrix(std::shared_ptr< NumaCRSPattern< Index > > const &pattern_, Entry const &init=Entry(0))
Constructor creating a matrix from a given sparsity pattern.
std::shared_ptr< NumaCRSPattern< Index > > getPattern() const
Returns a pointer to the sparsity pattern.
Self & operator=(typename Entry::field_type const &a)
Assigns the given scalar value to each entry.
NumaBCRSMatrix(NumaCRSPatternCreator< Index > const &creator, Entry const &init=Entry(0))
Constructor creating a matrix from a given sparsity pattern creator.
bool exists(Index r, Index c) const
returns true if (r,c) is structurally nonzero
void scatter(DynamicMatrix< Entry > const &B, RowIndices const &rows, ColIndices const &cols, BinaryOp const &op=BinaryOp())
Scatters given submarix into the matrix.
field_type smv(field_type const &a, X const &x, Y &y) const
Matrix-vector multiplication .
void scatter(LMIterator first, LMIterator last)
Scatters given sub-matrices into the matrix by adding up their entries.
const_iterator end() const
NumaBCRSMatrix()
Constructs an empty 0x0 matrix.
NumaBCRSMatrix(Self const &A)=default
Copy constructor.
NumaBCRSMatrix(std::shared_ptr< NumaCRSPattern< Index > > const &pattern_, Matrix const &matrix, bool isSymmetric, bool isTransposed)
Constructor copying a given matrix.
ThreadedMatrixDetail::NumaBCRSMatrixIterator< Entry, Index > iterator
iterator type stepping through the rows
Self & operator=(Entry const &a)
Assigns the given value to each entry.
NumaBCRSMatrix(Expanded const &eIndices, Matrix const &mat)
Indexed submatrix constructor.
iterator begin()
returns an iterator to the first row
field_type usmv(field_type const &a, X const &x, Y &y) const
Matrix-vector multiplication and subsequent computation of if A is square.
row_type operator[](Index r)
Subscript operator allowing random access to rows.
const_row_type operator[](Index r) const
void umtv(X const &x, Y &y) const
Matrix-vector multiplication .
const_iterator begin() const
typename iterator::Iterator ColIterator
column iterator stepping through the entries of a row
field_type frobenius_norm() const
Computes the Frobenius norm .
ThreadedMatrixDetail::NumaBCRSMatrixConstIterator< Entry, Index > const_iterator
NumaBCRSMatrix(NumaBCRSMatrix< Entry, Index > const &matrix, bool isSymmetric, bool isTransposed=false, bool symmetric=false)
Constructor.
Self & operator=(Self const &mat)=default
Copy assignment.
void smtv(field_type const &a, X const &x, Y &y) const
Matrix-vector multiplication .
void usmtv(field_type const &a, X const &x, Y &y) const
Matrix-vector multiplication .
field_type umv(X const &x, Y &y) const
Matrix-vector multiplication and subsequent computation of if A is square.
Self operator()(RowIndices const &ri, ColIndices const &ci) const
Index M() const
The number of columns.
void mtv(X const &x, Y &y) const
Matrix-vector multiplication .
typename iterator::ConstIterator ConstColIterator
column iterator stepping through the const entries of a row
field_type frobenius_norm2() const
Computes the square of the Frobenius norm .
Index N() const
The number of rows.
ScalarType< Entry > Scalar
field_type mv(X const &x, Y &y) const
Matrix-vector multiplication with computation of if A is square.
A NUMA-aware creator for matrix sparsity patterns.
Index cols() const
The number of columns.
void addElements(IterRow const fromRow, IterRow const toRow, IterCol const fromCol, IterCol const toCol, bool colIsSorted=false)
Enters entries into the sparsity pattern.
ChunkCreator const & creator(int node) const
Returns the chunk creator.
void balance()
Redistributes the rows to the NUMA chunks in order to have the same number of entries in each chunk.
void addElement(Index row, Index col)
Enters a single entry into the sparsity pattern.
int nodes() const
Returns the number of NUMA nodes/chunks used.
void addElements(RowRangeSequence const &rrs, ColRangeSequence const &crs, bool colsAreSorted=false)
Enters elements into the sparsity pattern.
size_t nonzeroes() const
The number of structurally nonzero elements.
void addAllElements()
Enters all possible elements (defining a dense matrix).
bool isSymmetric() const
Returns the symmetry status of the pattern.
NumaCRSPatternCreator(Index rows_, Index cols, bool symmetric=false, int nnzPerRow=0)
Constructs a rows times cols matrix sparsity structure creator.
void addDiagonal()
Enters the diagonal elements.
void addDenseBlock(Index fromRow, Index toRow, Index fromCol, Index toCol)
Enters a contiguous dense block into the sparsity pattern.
A NUMA-aware compressed row storage sparsity pattern.
std::shared_ptr< ChunkPattern > pattern(int i) const
Returns the individual patterns.
int nodes() const
Returns the number of NUMA nodes/chunks used.
int chunk(Index row) const
Returns the number of the chunk containing the given row.
Index N() const
The number of rows.
NumaCRSPattern()
Constructs an empty 0x0 pattern.
NumaCRSPattern(NumaCRSPatternCreator< Index > const &creator)
Constructor creating a sparsity pattern from the given creator.
NumaCRSPattern(Expanded const &eIndices, Condensed const &cIndices, Matrix const &mat)
Constructor.
bool isSymmetric() const
Returns the symmetry status of the pattern.
Index M() const
The number of columns.
std::vector< Index > const & rowStart() const
Returns the limiting row indices between the chunks.
size_t nonzeroes() const
Returns the number of structurally nonzero entries.
size_t storage() const
Returns the number of stored entries.
bool exists(Index r, Index c) const
queries whether an entry is present in the sparsity pattern
NumaCRSPattern(Matrix const &matrix, bool isSymmetric, bool isTransposed, bool symmetric)
Constructor extracting the sparsity pattern of a given matrix (usually a Dune::BCRSMatrix).
Implementation of thread pools suitable for parallelization of (more or less) memory-bound algorithms...
static NumaThreadPool & instance(int maxThreads=std::numeric_limits< int >::max())
Returns a globally unique thread pool instance.
int nodes() const
Reports the number of NUMA nodes (i.e., memory interfaces/CPU sockets)
This class stores a couple of compressed row storage rows in memory allocated locally on a NUMA node.
CRSChunk(std::shared_ptr< CRSChunkPattern< Index > > const &pattern_, MatrixAsTriplet< Scalar, Index2 > const &matrix, bool isSymmetric, bool isTransposed)
Constructor.
CRSChunk(int node)
Constructor initializing an empty Chunk.
CRSChunkPattern< Index > const & pattern() const
Returns the sparsity pattern of this chunk.
void scatter(LMIterator first, LMIterator last)
Scatters given sub-matrices into the chunk by adding up their entries.
CRSChunk(std::shared_ptr< CRSChunkPattern< Index > > const &pattern_, Entry const &init)
Constructor.
auto valStartIterator(Index row)
Returns an iterator to the start of the values for the given local row index.
void applyTransposed(Scalar a, Domain const &x, Range &y) const
Transpose matrix-vector multiplication or .
Scalar apply(Scalar a, Domain const &x, Range &y, bool initialize) const
Matrix-vector multiplication or and .
CRSChunk(std::shared_ptr< CRSChunkPattern< Index > > const &pattern_, Matrix const &matrix, bool isSymmetric, bool isTransposed)
Constructor.
Self & operator=(Self const &c)=default
Copy assignment.
void gatherMirrored(Scalar a, Domain const &x, Range &y, CRSChunk< Entry, Index > const &block, bool subdiagonal, bool initToZero) const
Matrix-vector multiplication of transposed parts.
void entrywiseOp(F const &f, NumaBCRSMatrix< EntryB, IndexB > const &B)
Performs entry-wise operations on our own matrix and some other.
CRSChunk(std::shared_ptr< CRSChunkPattern< Index > > const &pattern_, Expanded const &eIndices, Condensed const &cIndices, Matrix const &matrix)
Constructor.
Self & operator=(Value const &a)
Assigns the given value to each entry.
CRSChunk(Matrix const &matrix, bool isSymmetric, bool isTransposed, Index firstRow, Index lastRow, bool symmetric, int node)
Constructor.
Self & operator=(ThreadedMatrixDetail::NumaBCRSMatrixExpressionChunk< Arguments, Operation > const &e)
Assigns the given matrix expression componentwise.
typename EntryTraits< Entry >::field_type Scalar
A class supporting two-stage construction of sparsity patterns of NUMA matrix chunks.
size_t nonzeroes() const
Returns the number of stored entries (structurally nonzero elements).
size_t balanceBackward(size_t covered, size_t const nnz, int chunks, std::vector< IndexArray > &moveRows)
Moves rows in and out of the chunk in order to equilibrate the number of nonzeroes.
CRSChunkPatternCreator(Index firstRow, Index lastRow, Index ncols, bool symmetric, int node)
Constructor.
void addAllElements(Index columns)
Enters all possible elements (defining a dense chunk).
void addElements(IterRow fromRow, IterRow const toRow, IterCol const fromCol, IterCol const toCol, bool colIsSorted=false)
Enters elements into the sparsity pattern.
IndexArray const & row(Index i) const
Returns the sorted and unique column indices of elements in row .
void clear()
Clears the complete data, handing back the memory.
size_t balanceForward(size_t const covered, size_t const nnz, int chunks, std::vector< IndexArray > &moveRows)
Moves rows in and out of the chunk in order to equilibrate the number of nonzeroes.
std::vector< Index > IndexArray
An STL container holding a sequence of indices.
void reserve(Index nnzPerRow=8)
Reserves a certain amount of entries per row.
This class maintains the sparsity structure of a couple of matrix rows (a NUMA matrix chunk).
bool exists(Index r, Index c) const
returns true if the entry with global row and column indices is present in the sparsity pattern
size_t colStart(Index row) const
returns the index from which on the entries in given local row are stored
size_t storage() const
Returns the number of stored entries.
CRSChunkPattern(CRSChunkPatternCreator< Index > const &creator)
Constructor.
CRSChunkPattern(Index first, Index last, Expanded const &eIndices, Condensed const &cIndices, Matrix const &mat, int node)
Constructor.
Index nonzeroesPerRow() const
Returns the average number of nonzeroes per row.
size_t position(Index r, Index c) const
returns the position of an element with given global row and column indices
CRSChunkPattern(Matrix const &matrix, bool isSymmetric, bool isTransposed, Index firstRow, Index lastRow, bool symmetric, int node)
Constructor.
CRSChunkPattern(MatrixAsTriplet< Scalar, Index2 > const &matrix, bool isSymmetric, bool isTransposed, Index firstRow, Index lastRow, bool symmetric, int node)
Constructor.
Index col(size_t idx) const
size_t nonzeroes() const
Returns the number of structurally nonzero entries.
std::vector< Index, NumaAllocator< Index > >::const_iterator colStartIterator(Index row) const
an iterator pointing to the start of the column index for the given local row
A base class representing basic meta information about sparsity patterns of NUMA matrix chunks.
Index columns() const
number of columns in the matrix
int node() const
the NUMA node on which to allocate the memory
Index last() const
end of the half-open row range
CRSChunkPatternInfo(Index first_, Index last_, Index cols_, bool symmetric, int node)
Constructor.
Index first() const
start of the covered row range
bool symmetric() const
if true, only the lower triangular part is stored
bool operator!=(Self const &it) const
NumaBCRSMatrixConstIterator(NumaBCRSMatrix< Entry, Index > const &matrix_, Index row_)
Row const * operator->() const
Row const & operator*() const
Dereferentiation yields the row.
bool operator==(Self const &it) const
void operator+=(Index inc)
An iterator stepping through all entries in a row.
std::random_access_iterator_tag iterator_category
bool operator!=(Self const &it) const
NumaBCRSMatrixConstRowIterator(ColIterator col_, ValueIterator val_)
std::vector< Entry, NumaAllocator< Entry > >::const_iterator ValueIterator
Entry const * operator->() const
Entry const & operator*() const
std::vector< Index, NumaAllocator< Index > >::const_iterator ColIterator
bool operator==(Self const &it) const
difference_type operator-(Self const &it) const
std::ptrdiff_t difference_type
iterator end(size_t) const
iterator begin(size_t) const
NumaBCRSMatrixExpressionChunk< Arguments, Operation > const & operator[](int i) const
NumaBCRSMatrixIterator(NumaBCRSMatrix< Entry, Index > &matrix_, Index row_)
Entry & operator*() const
Entry * operator->() const
std::vector< Entry, NumaAllocator< Entry > >::iterator ValueIterator
NumaBCRSMatrixRowIterator(ColIterator col_, ValueIterator val_)
std::vector< Index, NumaAllocator< Index > >::const_iterator ColIterator
Entry const & operator[](Index c) const
Random read access to row entries by global column index.
NumaBCRSMatrixRowIterator< Entry, Index > Iterator
void update(NumaBCRSMatrix< Entry, Index > &matrix, Index row, int chunk)
Entry & operator[](Index c)
Random write access to row entries by global column index.
ConstIterator end() const
Iterator find(Index c)
Looks up a particular entry specified by column index.
ConstIterator begin() const
Start of row entries.
size_type size() const
Returns the number of entries in the current row.
Index const * getindexptr() const
Low-level access to array of (sorted) column indices in this row.
block_type const * getptr() const
Low-level access to array of values in this row.
std::vector< Index, NumaAllocator< Index > >::const_iterator ColIterator
NumaBCRSMatrixConstRowIterator< Entry, Index > ConstIterator
std::vector< Entry, NumaAllocator< Entry > >::iterator ValueIterator
ConstIterator find(Index c) const
Looks up a particular entry specified by column index.
This file contains various utility functions that augment the basic functionality of Dune.
NumaBCRSMatrix< Dune::FieldMatrix< Scalar, row2, col2 >, Index > reshapeBlocks(NumaBCRSMatrix< Dune::FieldMatrix< Scalar, row1, col1 >, Index > const &A)
reshapes NumaBRCSMAtrix entry block structure
NumaBCRSMatrix< Dune::FieldMatrix< Scalar, n, n >, Index > sparseUnitMatrix(Index N)
creates a unit matrix in NUMA block compressed row storage format
NumaBCRSMatrix< Dune::FieldMatrix< Scalar, blockrows, blockcols >, Index > vertcat(NumaBCRSMatrix< Dune::FieldMatrix< Scalar, blockrows, blockcols >, Index > const &A, NumaBCRSMatrix< Dune::FieldMatrix< Scalar, blockrows, blockcols >, Index > const &B)
concatenate two matrices vertically
auto transpose(NumaBCRSMatrix< Entry, Index > const &A)
Creates the transposed sparse matrix .
DynamicMatrix< typename SparseMatrix::block_type > full(SparseMatrix const &A)
Converts a sparse NumaBCRSMatrix to a dense matrix.
Dune::FieldVector< T, n > max(Dune::FieldVector< T, n > x, Dune::FieldVector< T, n > const &y)
Componentwise maximum.
Target eraseCols(Source const &A, ColIndices const &ci)
"deletes" columns by extracting a copy of the matrix keeping only the non-deleted columns.
NumaBCRSMatrix< Dune::FieldMatrix< Scalar, n, m >, Index > sparseZeroMatrix(Index N, Index M)
creates a zero matrix in NUMA block compressed row storage format
DynamicMatrix< typename SparseMatrix::block_type > full(SparseMatrix const &A, RowRange const &rows, ColRange const &cols)
Converts a subrange of a sparse matrix to a dense matrix.
Target eraseRows(Source const &A, RowIndices const &ri)
"deletes" rows by extracting a copy of the matrix keeping only the non-deleted rows.
NumaBCRSMatrix< Dune::FieldMatrix< Scalar, blockrows, blockcols >, Index > horzcat(NumaBCRSMatrix< Dune::FieldMatrix< Scalar, blockrows, blockcols >, Index > const &A, NumaBCRSMatrix< Dune::FieldMatrix< Scalar, blockrows, blockcols >, Index > const &B)
concatenate two matrices horizontally
Target eraseRowsNCols(Source const &A, RowIndices const &ri, ColIndices const &ci)
"deletes" rows/columns by extracting a copy of the matrix keeping the non-deleted rows/columns.
std::vector< size_t > nonZeroColumns(Source const &A)
returns all indices of nonzero columns.
NumaBCRSMatrix< Dune::FieldMatrix< double, blockrows, blockcols >, Index > diagcat(NumaBCRSMatrix< Dune::FieldMatrix< double, blockrows, blockcols >, Index > const &A, NumaBCRSMatrix< Dune::FieldMatrix< double, blockrows, blockcols >, Index > const &B)
concatenate two matrices diagonally, resulting matrix is zero on the off block-diagonal
NumaBCRSMatrix< Entry, Index > operator+(NumaBCRSMatrix< Entry, Index > const &A, NumaBCRSMatrix< Entry, Index2 > const &B)
Matrix addition . The sparsity patterns of both matrices can be different. The size of the matrices h...
Dune::FieldVector< T, n > min(Dune::FieldVector< T, n > x, Dune::FieldVector< T, n > const &y)
Componentwise minimum.
Index uniformWeightRangeStart(BlockIndex i, BlockIndex n, Index m)
Computes partitioning points of ranges for uniform weight distributions.
void parallelForNodes(Func const &f, int maxTasks=std::numeric_limits< int >::max())
A parallel for loop that executes the given functor in parallel on different NUMA nodes.
void equalWeightRanges(std::vector< size_t > &x, size_t n)
Computes partitioning points such that the sum of weights in each partition is roughly the same.
Scalar distance(Point< Scalar, dim > const &first, Point< Scalar, dim > const &second)
void getRowCount(Dune::BCRSMatrix< Entry, Allocator > const &matrix, bool isTransposed, std::vector< Index > &rowCount)
std::ostream & operator<<(std::ostream &s, std::vector< Scalar > const &vec)
typename GetScalar< Type >::type ScalarType
Extracts the scalar field type from linear algebra data types.
NumaCRSPattern< Index > operator+(NumaCRSPattern< Index > const &pa, NumaCRSPattern< Index2 > const &pb)
Target submatrix(Source const &A, RowIndices const &ri, ColIndices const &ci)
A comparator functor that supports sorting std::pair by their first component.
static void init(Index first, Index last, Matrix const &matrix, std::vector< size_t, NumaAllocator< size_t > > &colStart, std::vector< Index, NumaAllocator< Index > > &cols, std::vector< Entry, NumaAllocator< Entry > > &values, std::vector< Index > const &nRowEntries)
static void init(Index first, Index last, Matrix const &matrix, std::vector< size_t, NumaAllocator< size_t > > &colStart, std::vector< Index, NumaAllocator< Index > > &cols, std::vector< Entry, NumaAllocator< Entry > > &values, std::vector< Index > const &nRowEntries)
static void init(Index first, Index last, Matrix const &matrix, std::vector< size_t, NumaAllocator< size_t > > &colStart, std::vector< Index, NumaAllocator< Index > > &cols, std::vector< Entry, NumaAllocator< Entry > > &values, std::vector< Index > const &)
static void copy(Entry const &from, Entry &to, bool isTransposed)
static void copy(From const &from, To &to, bool isTransposed)
static void copy(From const &from, To &to, bool isTransposed)