kaskade7/html/matrix_product_8hh_source.html

/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */

/*                                                                           */

/*  This file is part of the library KASKADE 7                               */

/*  https://www.zib.de/research/projects/kaskade7-finite-element-toolbox     */

/*                                                                           */

/*  Copyright (C) 2017-2024 Zuse Institute Berlin                            */

/*                                                                           */

/*  KASKADE 7 is distributed under the terms of the ZIB Academic License.    */

/*    see $KASKADE/academic.txt                                              */

/*                                                                           */

/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */


#ifndef MATRIXPRODUCT_HH

#define MATRIXPRODUCT_HH


#include "linalg/threadedMatrix.hh"


#include <algorithm>

#include <mutex>

#include <vector>


namespace Kaskade

{

  template <class EntryA, class EntryB, class IndexA, class IndexB>

  auto operator*(NumaBCRSMatrix<EntryA,IndexA> const& A, NumaBCRSMatrix<EntryB,IndexB> const& B)

  {

    // Compute the resulting entry size, treating the case of scalar entries.

    constexpr bool scalarA = EntryA::rows*EntryA::cols==1;

    constexpr bool scalarB = EntryB::rows*EntryB::cols==1;

    using Entry = std::conditional_t<scalarA,

                                     EntryB,

                                     std::conditional_t<scalarB,

                                                        EntryA,

                                                        Dune::FieldMatrix<typename EntryA::field_type,EntryA::rows,EntryB::cols>>>;

    static_assert(scalarA || scalarB || (int)EntryA::cols==(int)EntryB::rows, // cols and rows are enums, comparison gives warning

                  "Entry size has to match");


    assert(A.M()==B.N());


    // Every row of A forms the corresponding row of AB by combining those rows of B

    // which correspond to column indices of entries in the row of A. Thus, for

    // each row of A, we extract the column index sets of the affected rows of B, and

    // form their union.


    NumaCRSPatternCreator<IndexA> creator(A.N(),B.M(),false,2);


    std::mutex creatorMutex;                                    // prevent concurrent access


    parallelFor([&](int const k, int const n)                   // we go for coarse granularity here

    {                                                           // since then we can amortize the

      std::vector<IndexA> cidx, tmp, tmp2;                      // allocation of cidx,tmp,tmp2


      for (IndexA i=k*A.N()/n; i<(k+1)*A.N()/n; ++i)            // cover certain row range

      {

        cidx.clear();


        auto rowA = A[i];

        for (auto cai=rowA.begin(); cai!=rowA.end(); ++cai)

        {

          tmp.clear();                                          // start with no col indices in B row

          auto rowB = B[cai.index()];

          for (auto cbi=rowB.begin(); cbi!=rowB.end(); ++cbi)   // but gather them in tmp

            tmp.push_back(cbi.index());


          tmp2.clear();                                         // now form the union of this

          std::set_union(begin(tmp),end(tmp),                   // B rows' column indices (in tmp)

                         begin(cidx),end(cidx),                 // and the indices we already have

                         std::back_inserter(tmp2));             // collected (in cidx) -> tmp2

          std::swap(tmp2,cidx);                                 // and move them again to cidx

        }


        std::lock_guard<std::mutex> creatorLock(creatorMutex);

        creator.addElements(&i,&i+1,begin(cidx),end(cidx),      // enter all indices at once,

                            true);                              // mentioning they are sorted

      }

    });


    NumaBCRSMatrix<Entry,IndexA> C(creator);


    // TODO: parallelize in NUMA style. This requires moving it to the NumaBCRSMatrix class

    parallelFor(0,A.N(),[&](IndexA i)                                 //as there are no allocations,

    {                                                                 // we go for fine granularity

      auto rowA = A[i];                                               // with one task per row

      auto rowC = C[i];

      for (auto cai=rowA.begin(); cai!=rowA.end(); ++cai)

      {

        auto rowB = B[cai.index()];

        for (auto cbi=rowB.begin(); cbi!=rowB.end(); ++cbi)           // use normalForm() here to

          rowC[cbi.index()] += normalForm(*cai) * normalForm(*cbi);   // interpret 1x1 blocks as scalars

      }

    });


    return C;

  }

}


#endif

Dune::FieldMatrix
Definition: errorDistribution.hh:30

Kaskade::NumaBCRSMatrix
A NUMA-aware compressed row storage matrix adhering mostly to the Dune ISTL interface (to complete....
Definition: threadedMatrix.hh:2115

Kaskade::NumaBCRSMatrix::M
Index M() const
The number of columns.
Definition: threadedMatrix.hh:2514

Kaskade::NumaBCRSMatrix::N
Index N() const
The number of rows.
Definition: threadedMatrix.hh:2509

Kaskade::NumaCRSPatternCreator
A NUMA-aware creator for matrix sparsity patterns.
Definition: threadedMatrix.hh:1617

Kaskade::NumaCRSPatternCreator::addElements
void addElements(IterRow const fromRow, IterRow const toRow, IterCol const fromCol, IterCol const toCol, bool colIsSorted=false)
Enters entries into the sparsity pattern.
Definition: threadedMatrix.hh:1726

Kaskade::NumaBCRSMatrix::operator*
auto operator*(NumaBCRSMatrix< EntryA, IndexA > const &A, NumaBCRSMatrix< EntryB, IndexB > const &B)
Computes the matrix-matrix product .
Definition: matrixProduct.hh:37

Kaskade::parallelFor
void parallelFor(Func const &f, int maxTasks=std::numeric_limits< int >::max())
A parallel for loop that executes the given functor in parallel on different CPUs.
Definition: threading.hh:489

Kaskade
Definition: abstract_interface.hh:15

threadedMatrix.hh