20#include <boost/iterator/iterator_facade.hpp>
28 template <
class Entry>
class NumaDenseMatrix;
33 namespace NumaDenseMatrixDetail
36 template <
class Entry>
37 class ColumnIterator:
public boost::iterator_facade<ColumnIterator<Entry>,Entry,boost::random_access_traversal_tag>
40 ColumnIterator() =
default;
42 ColumnIterator(Entry* base_,
size_t stride_,
size_t idx_=0)
43 : base(base_), stride(stride_), idx(idx_) {}
45 size_t index()
const {
return idx; }
48 friend class boost::iterator_core_access;
50 bool equal(ColumnIterator<Entry>
const& p)
const
55 void increment() { ++idx; }
56 void decrement() { --idx; }
57 void advance(
typename ColumnIterator<Entry>::difference_type n) { idx += n; }
59 typename ColumnIterator<Entry>::difference_type distance_to(ColumnIterator<E>
const& i)
const {
return i.idx - idx; }
62 Entry& dereference()
const {
return base[idx*stride]; }
71 template <
class Entry>
75 typedef ColumnIterator<Entry> iterator;
76 typedef ColumnIterator<Entry const> const_iterator;
80 Row(Entry* first_,
size_t stride_, std::size_t cols_)
81 : first(first_), stride(stride_), cols(cols_) {}
83 iterator begin() {
return iterator(first,stride,0); }
84 iterator end() {
return iterator(first,stride,cols); }
85 const_iterator begin()
const {
return const_iterator(first,stride,0); }
86 const_iterator end()
const {
return const_iterator(first,stride,cols); }
88 Entry& operator[](
size_t c) {
return first[stride*c]; }
89 Entry
const& operator[](
size_t c)
const {
return first[stride*c]; }
99 template <
class Entry>
100 class RowIterator:
public boost::iterator_facade<RowIterator<Entry>,
101 typename std::conditional<std::is_const<Entry>::value,
102 Row<typename std::remove_const<Entry>::type> const,
104 boost::random_access_traversal_tag>
106 typedef typename std::remove_const<Entry>::type NonConstEntry;
108 typedef typename std::conditional<std::is_const<Entry>::value,
109 NumaDenseMatrix<NonConstEntry>
const,
110 NumaDenseMatrix<Entry>>::type Matrix;
112 typedef boost::iterator_facade<RowIterator<Entry>,
113 typename std::conditional<std::is_const<Entry>::value,
114 Row<NonConstEntry>
const,
116 boost::random_access_traversal_tag> Base;
120 RowIterator() =
default;
122 RowIterator(Matrix& mat_,
size_t idx_)
128 size_t index()
const {
return idx; }
131 friend class boost::iterator_core_access;
132 using DifferenceType = Base::difference_type;
134 bool equal(RowIterator<Entry>
const& other)
const {
return idx==other.idx;}
135 void increment() { update(idx+1); }
136 void decrement() { update(idx-1); }
137 void advance(
typename RowIterator<Entry>::difference_type n) { update(idx+n); }
138 template <
class E> DifferenceType distance_to(RowIterator<E>
const& i)
const {
return static_cast<DifferenceType
>(i.idx) - idx; }
139 typename Base::reference dereference()
const {
return row; }
142 void update(
size_t newidx)
150 row =
typename Base::value_type(
const_cast<NonConstEntry*
>(&mat->chunks[chunk][idx-rowStart]),stride,mat->M());
156 mutable typename Base::value_type row;
161 template <
class T,
int n>
164 typename Dune::FieldTraits<T>::field_type result = 0;
165 for (
int i=0; i<n; ++i)
166 result += frobenius_product(x[i],y[i]);
170 template <
class T,
int n,
int m>
173 typename Dune::FieldTraits<T>::field_type result = 0;
174 for (
int i=0; i<n; ++i)
175 for (
int j=0; j<m; ++j)
176 result += frobenius_product(x[i][j],y[i][j]);
180 double frobenius_product(
double x,
double y) {
return x*y; }
181 float frobenius_product(
float x,
float y) {
return x*y; }
185 template <
class T,
int n>
188 template <
class T,
int n,
int m>
195 template <
class T,
int n>
196 typename Dune::FieldTraits<typename Dune::FieldTraits<T>::field_type>::real_type one_norm(
Dune::FieldVector<T,n> const& x) {
return x.one_norm(); }
198 template <
class T,
int n,
int m>
199 typename Dune::FieldTraits<typename Dune::FieldTraits<T>::field_type>::real_type one_norm(
Dune::FieldMatrix<T,n,m> const& x) {
return x.one_norm(); }
201 double one_norm(
double x) {
return std::abs(x); }
202 float one_norm(
float x) {
return std::abs(x); }
204 template <
class T,
int n>
207 template <
class T,
int n,
int m>
210 void axpy(
double& y,
double a,
double x) { y += a*x; }
211 void axpy(
float& y,
float a,
float x) { y += a*x; }
216 template <
class Entry>
class VectorIterator;
225 template <
class Entry>
229 typedef std::vector<Entry,NumaAllocator<Entry>> Chunk;
230 typedef NumaDenseBase<Entry> Self;
233 typedef size_t size_type;
234 typedef Entry value_type;
239 typedef typename Dune::FieldTraits<Entry>::field_type field_type;
240 typedef typename Dune::FieldTraits<field_type>::real_type real_type;
245 typedef Entry block_type;
255 NumaDenseBase (): NumaDenseBase(0,0)
262 NumaDenseBase (NumaDenseBase
const& other)
263 : rows(other.rows), cols(other.cols), chunks(other.chunks.size())
267 chunks[i] = other.chunks[i];
274 NumaDenseBase(NumaDenseBase&& a) =
default;
279 NumaDenseBase (size_type r, size_type c, value_type v = value_type()) :
284 for (size_type i=0; i<n; ++i)
291 NumaDenseBase& operator=(NumaDenseBase
const& other)
293 assert(chunks.size() == other.chunks.size());
298 chunks[i] = other.chunks[i];
305 Self& operator=(field_type
const& a)
307 for_each([&](Entry& e) { e = a; });
318 void resize (size_type r, size_type c)
323 if (rows==r && cols==c)
328 for (
int i=0; i<chunks.size(); ++i)
336 size_type N()
const {
return rows; }
342 size_type M()
const {
return cols; }
348 field_type dot(Self
const& other)
const
350 assert(rows==other.rows && cols==other.cols);
351 std::vector<field_type> dots(chunks.size());
354 assert(n==chunks.size());
356 for (
size_t k=0; k<chunks[i].size(); ++k)
357 sum += NumaDenseMatrixDetail::frobenius_product(chunks[i][k],other.chunks[i][k]);
360 return std::accumulate(dots.begin(),dots.end(),0);
369 template <
class Result,
class F>
370 Result accumulate(F f, Result
const& init)
const
372 std::vector<Result> rs(chunks.size());
375 assert(n==this->chunks.size());
377 for (
auto const& e: chunks[i])
381 return std::accumulate(rs.begin()+1,rs.end(),rs[0],f);
388 void for_each(F
const& f)
392 assert(n==this->chunks.size());
393 std::for_each(begin(chunks[i]),end(chunks[i]),f);
401 void for_each2(F
const& f, Self
const& other)
403 assert(rows==other.rows && cols==other.cols);
406 assert(n==this->chunks.size());
407 for (
size_t j=0; j<chunks[i].size(); ++j)
408 f(chunks[i][j],other.chunks[i][j]);
413 friend class NumaDenseMatrixDetail::RowIterator<Entry>;
414 friend class NumaDenseMatrixDetail::RowIterator<Entry const>;
415 friend class NumaDenseMatrixDetail::VectorIterator<Entry>;
416 friend class NumaDenseMatrixDetail::VectorIterator<Entry const>;
418 size_type rows, cols;
419 std::vector<Chunk> chunks;
446 template <
class Entry>
450 typedef NumaDenseMatrixDetail::NumaDenseBase<Entry> Base;
459 typedef typename Dune::FieldTraits<Entry>::field_type
field_type;
460 typedef typename Dune::FieldTraits<field_type>::real_type
real_type;
589 this->for_each([&](Entry& e) { e *= a; });
598 return (*
this) *= (1/scalar);
607 this->
for_each2([](Entry& y, Entry
const& x) { y += x; },b);
616 this->
for_each2([](Entry& y, Entry
const& x) { y -= x; },b);
639 template <
class Entry>
642 for (
auto ri=mat.
begin(); ri!=mat.
end(); ++ri)
644 std::copy(ri->begin(),ri->end(),std::ostream_iterator<Entry>(out,
" "));
658 namespace NumaDenseMatrixDetail
661 template <
class Entry>
662 class VectorIterator:
public boost::iterator_facade<VectorIterator<Entry>,Entry,boost::random_access_traversal_tag>
664 typedef typename std::remove_const<Entry>::type NonConstEntry;
666 typedef typename std::conditional<std::is_const<Entry>::value,
670 VectorIterator() =
default;
672 VectorIterator(
Vector& vec_,
typename Vector::size_type idx_)
673 :
vec(&vec_), idx(idx_) {}
675 size_t index()
const {
return idx; }
678 friend class boost::iterator_core_access;
680 bool equal(VectorIterator<Entry>
const& p)
const
685 void increment() { ++idx; }
686 void decrement() { --idx; }
687 void advance(
typename VectorIterator<Entry>::difference_type n) { idx += n; }
689 typename VectorIterator<Entry>::difference_type distance_to(VectorIterator<E>
const& i)
const {
return i.idx - idx; }
690 Entry& dereference()
const
694 return vec->chunks[chunk][idx-chunkStart];
698 typename Vector::size_type idx;
715 template <
class Entry>
716 class NumaVector:
public NumaDenseMatrixDetail::NumaDenseBase<Entry>
719 typedef NumaDenseMatrixDetail::NumaDenseBase<Entry> Base;
722 typedef typename Dune::FieldTraits<Entry>::field_type
field_type;
723 typedef typename Dune::FieldTraits<field_type>::real_type
real_type;
726 typedef NumaDenseMatrixDetail::VectorIterator<Entry>
iterator;
819 this->for_each([&](Entry& e) { e *= a; });
828 return (*
this) *= (1/a);
838 this->
for_each2([](Entry& yi, Entry
const& xi) { yi += xi; },x);
849 this->
for_each2([](Entry& yi, Entry
const& xi) { yi -= xi; },x);
889 return this->accumulate([](
real_type s, Entry
const& e) {
return s+NumaDenseMatrixDetail::one_norm(e); },
real_type(0));
903 template <
class Entry>
907 copy(begin(
vec),end(
vec),ostream_iterator<Entry>(out,
" "));
A dense matrix class tailored towards NUMA architectures.
NumaDenseMatrix()=default
Creates a 0 x 0 matrix.
NumaDenseMatrix(NumaDenseMatrix const &a)=default
Copy constructor.
void resize(size_type r, size_type c)
Resizes the matrix to r x c, leaving the entries in an undefined state.
real_type frobenius_norm() const
frobenius norm: sqrt(sum over squared frobenius norms of entries)
Dune::FieldTraits< field_type >::real_type real_type
size_type M() const
Return the number of columns.
real_type frobenius_norm2() const
frobenius norm squared: sum over squared frobenius norms of entries
RowIterator::value_type row_type
Self & operator=(field_type const &a)
Assignment from scalar.
NumaDenseMatrixDetail::RowIterator< Entry > RowIterator
Self & operator-=(Self const &b)
Matrix subtraction.
ConstRowIterator end() const
Get const iterator to one beyond last row.
std::ostream & operator<<(std::ostream &out, NumaDenseMatrix< Entry > const &mat)
Self & operator/=(field_type const &scalar)
Division by a scalar.
Dune::FieldTraits< Entry >::field_type field_type
the type of the scalar field.
RowIterator end()
Get iterator to one beyond last row.
ConstRowIterator begin() const
Get const iterator to first row.
ConstRowIterator::value_type const_row_type
NumaDenseMatrixDetail::RowIterator< Entry const > ConstRowIterator
Self & operator+=(Self const &b)
Matrix addition. Both matrices have to have the same sizes.
NumaDenseMatrix(size_type r, size_type c, value_type v=value_type())
Creates an r x c matrix with given entries.
Self & operator*=(field_type const &a)
Multiplication with a scalar.
RowIterator begin()
Get iterator to first row.
row_type operator[](size_type row)
The subscript operator.
NumaDenseMatrix(NumaDenseMatrix &&a)=default
Move constructor.
void setSize(size_type r, size_type c)
Resizes the matrix to r x c, leaving the entries in an undefined state.
NumaDenseMatrix & operator=(NumaDenseMatrix const &a)=default
Copy assignment.
static NumaThreadPool & instance(int maxThreads=std::numeric_limits< int >::max())
Returns a globally unique thread pool instance.
int nodes() const
Reports the number of NUMA nodes (i.e., memory interfaces/CPU sockets)
A vector class tailored towards NUMA architectures. This vector distributes its entries in blocks of ...
Self & operator*=(field_type const &a)
iterator begin()
iterator to first entry.
const_iterator begin() const
const iterator to first row.
const_iterator end() const
const iterator to one beyond last entry.
NumaVector(size_type r, value_type v=value_type())
Creates an r vector with given entries.
NumaVector(NumaVector const &a)=default
Copy constructor.
real_type one_norm() const
one-norm: sum over absolute values of entries
real_type two_norm2() const
euclidean norm squared: sum over squared euclidean norms of entries
Self & operator/=(field_type const &a)
NumaVector & operator=(NumaVector const &a)=default
Copy assignment.
Base::value_type value_type
NumaDenseMatrixDetail::VectorIterator< Entry > iterator
field_type dot(Self const &other) const
NumaDenseMatrixDetail::VectorIterator< Entry const > const_iterator
iterator end()
iterator to one beyond last entry.
void resize(size_type r)
Resizes the vector to r entries, leaving the entries in an undefined state.
Entry & operator[](size_type row)
The subscript operator.
Self & axpy(field_type const &a, Self const &x)
NumaVector()
Creates a 0 vector.
Base::size_type size_type
NumaVector(NumaVector &&a)=default
Move constructor.
Dune::FieldTraits< Entry >::field_type field_type
std::ostream & operator<<(std::ostream &out, NumaVector< Entry > const &vec)
Dune::FieldTraits< field_type >::real_type real_type
Self & operator-=(Self const &x)
real_type two_norm() const
euclidean norm: sqrt(sum over squared euclidean norms of entries)
Self & operator+=(Self const &x)
void for_each2(Seq1 const &s1, Seq2 &s2, Func const &func)
Index uniformWeightRangeStart(BlockIndex i, BlockIndex n, Index m)
Computes partitioning points of ranges for uniform weight distributions.
void parallelForNodes(Func const &f, int maxTasks=std::numeric_limits< int >::max())
A parallel for loop that executes the given functor in parallel on different NUMA nodes.
Index uniformWeightRange(Index j, Index n, Index m)
Computes the range in which an index is to be found when partitioned for uniform weights.
void axpy(Dune::BCRSMatrix< Dune::FieldMatrix< Scalar, 1, 1 > > const &P, Dune::BlockVector< Dune::FieldVector< Scalar, n > > const &x, Dune::BlockVector< Dune::FieldVector< Scalar, n > > &y, Scalar alpha=1.0)
Compute . If resetSolution=true computes .
Dune::FieldVector< Scalar, dim > Vector
EntryTraits< Entry >::real_type frobenius_norm2(Entry const &x)