kaskade7/html/superlu__solve_8hh_source.html

/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */

/*                                                                           */

/*  This file is part of the library KASKADE 7                               */

/*  https://www.zib.de/research/projects/kaskade7-finite-element-toolbox     */

/*                                                                           */

/*  Copyright (C) 2002-2024 Zuse Institute Berlin                            */

/*                                                                           */

/*  KASKADE 7 is distributed under the terms of the ZIB Academic License.    */

/*    see $KASKADE/academic.txt                                              */

/*                                                                           */

/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */


#ifndef SUPERLU_SOLVE_HH

#define SUPERLU_SOLVE_HH


#include <vector>

#include <iostream>

#include <memory>


#include "slu_ddefs.h"


#include "linalg/factorization.hh"


namespace Kaskade

{


template <class Scalar>

class SUPERLUFactorization: public Factorization<Scalar>

{

public:


  template <class Index>

  SUPERLUFactorization(Index n_,

                       std::vector<Index> const& ridx,

                       std::vector<Index> const& cidx,

                       std::vector<Scalar> const& values)

  : N(ridx.size()), n(n_)

        {

          assert(cidx.size()==N && values.size()==N);


    verbose = Factorization<Scalar>::getVerbose();


          if (this->getVerbose()>=2)

            std::cout << "SuperLU" << " solver, n=" << n << ", nnz=" << N << std::endl;


    set_default_options(&options);

    StatInit(&stat);


    work = 0;

    lwork = 0;

    u = 1.0;

    equil = YES;

    trans = NOTRANS;


    // SuperLU uses int indices. If the matrix is provided with long indices,

    // convert the indices first to the expected type int.

    if (std::is_same_v<int,Index>)

      tripletToCompressedColumn(n, n, N, ridx, cidx, values, Ap, Ai, Az);

    else

    {

      std::vector<int> iRidx(ridx.size()), iCidx(cidx.size());

      std::copy(begin(ridx),end(ridx),begin(iRidx));

      std::copy(begin(cidx),end(cidx),begin(iCidx));

      tripletToCompressedColumn(n, n, N, iRidx, iCidx, values, Ap, Ai, Az);

    }


          dCreate_CompCol_Matrix(&A, n, n, N, &Az[0], &Ai[0], &Ap[0], SLU_NC, SLU_D, SLU_GE);


      nrhs = 1;

      if ( !(rhsb = doubleMalloc(n * nrhs)) ) ABORT("Malloc fails for rhsb[].");

      if ( !(rhsx = doubleMalloc(n * nrhs)) ) ABORT("Malloc fails for rhsx[].");

      dCreate_Dense_Matrix(&B, n, nrhs, rhsb, n, SLU_DN, SLU_D, SLU_GE);

      dCreate_Dense_Matrix(&X, n, nrhs, rhsx, n, SLU_DN, SLU_D, SLU_GE);

      xact = doubleMalloc(n * nrhs);

      ldx = n;

      dGenXtrue(n, nrhs, xact, ldx);

      dFillRHS(trans, nrhs, xact, ldx, &A, &B);


      if ( !(etree = intMalloc(n)) ) ABORT("Malloc fails for etree[].");

      if ( !(perm_r = intMalloc(n)) ) ABORT("Malloc fails for perm_r[].");

      if ( !(perm_c = intMalloc(n)) ) ABORT("Malloc fails for perm_c[].");

      if ( !(R = (double *) SUPERLU_MALLOC(A.nrow * sizeof(double))) )

        ABORT("SUPERLU_MALLOC fails for R[].");

      if ( !(C = (double *) SUPERLU_MALLOC(A.ncol * sizeof(double))) )

        ABORT("SUPERLU_MALLOC fails for C[].");

      int nrhs = 1;

      if ( !(ferr = (double *) SUPERLU_MALLOC(nrhs * sizeof(double))) )

        ABORT("SUPERLU_MALLOC fails for ferr[].");

      if ( !(berr = (double *) SUPERLU_MALLOC(nrhs * sizeof(double))) )

        ABORT("SUPERLU_MALLOC fails for berr[].");


      options.Equil = equil;

      options.DiagPivotThresh = u;

      options.Trans = trans;


      B.ncol = 0;  /* Indicate not to solve the system */

      dgssvx(&options, &A, perm_c, perm_r, etree, equed, R, C,

             &L, &U, work, lwork, &B, &X, &rpg, &rcond, ferr, berr,

             &mem_usage, &stat, &info);

      B.ncol = nrhs;  /* Set the number of right-hand side */


      if ( (info == 0 || info == n+1) && (verbose>0) ) {


          if ( options.PivotGrowth ) printf("Recip. pivot growth = %e\n", rpg);

          if ( options.ConditionNumber )

            printf("Recip. condition number = %e\n", rcond);

      Lstore = (SCformat *) L.Store;

      Ustore = (NCformat *) U.Store;

          printf("No of nonzeros in factor L = %d\n", Lstore->nnz);

      printf("No of nonzeros in factor U = %d\n", Ustore->nnz);

      printf("No of nonzeros in L+U = %ld\n", Lstore->nnz + Ustore->nnz - n);

      printf("FILL ratio = %.1f\n", (float)(Lstore->nnz + Ustore->nnz - n)/N);


          printf("L\\U MB %.3f\ttotal MB needed %.3f\n",

          mem_usage.for_lu/1e6, mem_usage.total_needed/1e6);

          fflush(stdout);


      } else if ( info > 0 && lwork == -1 ) {

        printf("** Estimated memory: %ld bytes\n", info - n);

      } else

      if (info!=0)

        printf("LU factorization: dgssvx() returns info %d\n", info);


      if ( verbose>0 ) StatPrint(&stat);

      StatFree(&stat);

   };


  template <class Index>

  SUPERLUFactorization(Index n_,

                   std::unique_ptr<std::vector<Index>> ridx,

                   std::unique_ptr<std::vector<Index>> cidx,

                   std::unique_ptr<std::vector<Scalar>> values)

        : N(ridx->size()), n(n_)

        {

          assert(cidx->size()==N && values->size()==N);


          if (this->getVerbose()>=2)

            {

              std::cout << "SuperLU" << " solver, n=" << n << ", nnz=" << N << std::endl;

            }


    verbose = Factorization<Scalar>::getVerbose();

    set_default_options(&options);

    StatInit(&stat);


    work = 0;

    lwork = 0;

    u = 1.0;

    equil = YES;

    trans = NOTRANS;


    // SuperLU uses int indices. If the matrix is provided with long indices,

    // convert the indices first to the expected type int.

    if (std::is_same_v<int,Index>)

      tripletToCompressedColumn(n, n, N, *ridx, *cidx, *values, Ap, Ai, Az);

    else

    {

      std::vector<int> iRidx(ridx->size()), iCidx(cidx->size());

      std::copy(begin(*ridx),end(*ridx),begin(iRidx));

      std::copy(begin(*cidx),end(*cidx),begin(iCidx));

      tripletToCompressedColumn(n, n, N, iRidx, iCidx, *values, Ap, Ai, Az);

    }

          tripletToCompressedColumn(n, n, N, ridx, cidx, values, Ap, Ai, Az);


          dCreate_CompCol_Matrix(&A, n, n, N, &Az[0], &Ap[0], &Ai[0], SLU_NC, SLU_D, SLU_GE);


      nrhs = 1;

      if ( !(rhsb = doubleMalloc(n * nrhs)) ) ABORT("Malloc fails for rhsb[].");

      if ( !(rhsx = doubleMalloc(n * nrhs)) ) ABORT("Malloc fails for rhsx[].");

      dCreate_Dense_Matrix(&B, n, nrhs, rhsb, n, SLU_DN, SLU_D, SLU_GE);

      dCreate_Dense_Matrix(&X, n, nrhs, rhsx, n, SLU_DN, SLU_D, SLU_GE);

      xact = doubleMalloc(n * nrhs);

      ldx = n;

      dGenXtrue(n, nrhs, xact, ldx);

      dFillRHS(trans, nrhs, xact, ldx, &A, &B);


      if ( !(etree = intMalloc(n)) ) ABORT("Malloc fails for etree[].");

      if ( !(perm_r = intMalloc(n)) ) ABORT("Malloc fails for perm_r[].");

      if ( !(perm_c = intMalloc(n)) ) ABORT("Malloc fails for perm_c[].");

      if ( !(R = (double *) SUPERLU_MALLOC(A.nrow * sizeof(double))) )

        ABORT("SUPERLU_MALLOC fails for R[].");

      if ( !(C = (double *) SUPERLU_MALLOC(A.ncol * sizeof(double))) )

        ABORT("SUPERLU_MALLOC fails for C[].");

      if ( !(ferr = (double *) SUPERLU_MALLOC(nrhs * sizeof(double))) )

        ABORT("SUPERLU_MALLOC fails for ferr[].");

      if ( !(berr = (double *) SUPERLU_MALLOC(nrhs * sizeof(double))) )

        ABORT("SUPERLU_MALLOC fails for berr[].");


      options.Equil = equil;

      options.DiagPivotThresh = u;

      options.Trans = trans;


      SuperMatrix B, X;

      B.ncol = 0;  /* Indicate not to solve the system */

      dgssvx(&options, &A, perm_c, perm_r, etree, equed, R, C,

             &L, &U, work, lwork, &B, &X, &rpg, &rcond, ferr, berr,

             &mem_usage, &stat, &info);

      B.ncol = nrhs;  /* Set the number of right-hand side */


      if ( (info == 0 || info == n+1) && (verbose>0) ) {

          if ( options.PivotGrowth ) printf("Recip. pivot growth = %e\n", rpg);

          if ( options.ConditionNumber )

            printf("Recip. condition number = %e\n", rcond);

      Lstore = (SCformat *) L.Store;

      Ustore = (NCformat *) U.Store;

          printf("No of nonzeros in factor L = %d\n", Lstore->nnz);

      printf("No of nonzeros in factor U = %d\n", Ustore->nnz);

      printf("No of nonzeros in L+U = %d\n", Lstore->nnz + Ustore->nnz - n);

      printf("FILL ratio = %.1f\n", (float)(Lstore->nnz + Ustore->nnz - n)/N);


          printf("L\\U MB %.3f\ttotal MB needed %.3f\n",

          mem_usage.for_lu/1e6, mem_usage.total_needed/1e6);

          fflush(stdout);


      } else if ( info > 0 && lwork == -1 ) {

        printf("** Estimated memory: %d bytes\n", info - n);

      }

      if (info!=0)

        printf("LU factorization: dgssvx() returns info %d\n", info);


      if ( verbose>0 ) StatPrint(&stat);

      StatFree(&stat);

        };


  ~SUPERLUFactorization()

    {

      SUPERLU_FREE (etree);

      SUPERLU_FREE (perm_r);

      SUPERLU_FREE (perm_c);

      SUPERLU_FREE (R);

      SUPERLU_FREE (C);

      SUPERLU_FREE (ferr);

      SUPERLU_FREE (berr);


      SUPERLU_FREE (rhsb);

      SUPERLU_FREE (rhsx);

      SUPERLU_FREE (xact);


      Destroy_SuperNode_Matrix(&L);

      Destroy_CompCol_Matrix(&U);    }


  void solve(std::vector<Scalar> const& b, std::vector<Scalar>& x, bool transposed=false) const

  {

    assert(b.size()>=n);

    x.resize(n);

    solve(&b[0],&x[0],transposed);

  }


  virtual void solve(Scalar const* b, Scalar* x, bool transposed=false) const

  {

    options.Fact = FACTORED; /* Indicate the factored form of A is supplied. */


    /* Initialize the statistics variables. */

    StatInit(&stat);


    std::copy(b,b+n,rhsb);

    dgssvx(&options, &A, perm_c, perm_r, etree, equed, R, C,

           &L, &U, work, lwork, &B, &X, &rpg, &rcond, ferr, berr,

           &mem_usage, &stat, &info);

    std::copy(rhsx,rhsx+n,x);


    if (info!=0)

      printf("Triangular solve: dgssvx() returns info %d\n", info);


    if ( (info == 0 || info == n+1) && (verbose>0) )

    {

      if ( options.IterRefine )

      {

        printf("Iterative Refinement:\n");

        printf("%8s%8s%16s%16s\n", "rhs", "Steps", "FERR", "BERR");

        for (int i = 0; i < nrhs; ++i)

          printf("%8d%8d%16e%16e\n", i+1, stat.RefineSteps, ferr[i], berr[i]);

      }

      fflush(stdout);

    } else if ( info > 0 && lwork == -1 ) {

      printf("** Estimated memory: %ld bytes\n", info - n);

    }


    if ( verbose>0 ) StatPrint(&stat);

    StatFree(&stat);

  }


  void solve(std::vector<Scalar>& b) const

        {

    assert(b.size()>=n);

    solve(&b[0]);

        }


        virtual void solve(Scalar* b) const

        {

    options.Fact = FACTORED; /* Indicate the factored form of A is supplied. */


    /* Initialize the statistics variables. */

    StatInit(&stat);


    std::copy(b,b+n,rhsb);

    dgssvx(&options, &A, perm_c, perm_r, etree, equed, R, C,

           &L, &U, work, lwork, &B, &X, &rpg, &rcond, ferr, berr,

           &mem_usage, &stat, &info);

    std::copy(rhsx,rhsx+n,b);


    if (info!=0)

      printf("Triangular solve: dgssvx() returns info %d\n", info);


    if ( (info == 0 || info == n+1) && (verbose>0) )

    {

      if ( options.IterRefine )

      {

        printf("Iterative Refinement:\n");

        printf("%8s%8s%16s%16s\n", "rhs", "Steps", "FERR", "BERR");

        for (int i = 0; i < nrhs; ++i)

          printf("%8d%8d%16e%16e\n", i+1, stat.RefineSteps, ferr[i], berr[i]);

      }

      fflush(stdout);

    }

    else if ( info > 0 && lwork == -1 )

    {

      printf("** Estimated memory: %ld bytes\n", info - n);

    }


    if ( verbose>0 )

      StatPrint(&stat);

    StatFree(&stat);

  }


  virtual size_t size() const

  {

    return n;

  }


private:

  size_t const N, n;

  std::vector<int> Ap, Ai;

  std::vector<Scalar> Az;

  mutable superlu_options_t options;

  mutable int nrhs, ldx;

  mutable SuperMatrix A, L, U;

  mutable NCformat       *Ustore;

  mutable SCformat       *Lstore;

  mutable SuperMatrix    B, X;

  mutable int *perm_c; /* column permutation vector */

  mutable int *perm_r; /* row permutations from partial pivoting */

  mutable int *etree;

  mutable double *R, *C;

  mutable double *ferr, *berr;

  mutable double *rhsb, *rhsx, *xact;

  mutable mem_usage_t mem_usage;

  mutable SuperLUStat_t stat;

  mutable char equed[1];

  mutable void *work;

  mutable int info, lwork;

  mutable double u, rpg, rcond;

  mutable yes_no_t equil;

  mutable trans_t trans;

  int verbose;

};

}  // namespace Kaskade

#endif

Kaskade::Factorization
Abstract base class for matrix factorizations.
Definition: factorization.hh:43

Kaskade::Factorization::getVerbose
int getVerbose() const
Definition: factorization.hh:115

Kaskade::SUPERLUFactorization
Factorization of sparse linear systems with mumps.
Definition: superlu_solve.hh:34

Kaskade::SUPERLUFactorization::~SUPERLUFactorization
~SUPERLUFactorization()
Definition: superlu_solve.hh:247

Kaskade::SUPERLUFactorization::solve
void solve(std::vector< Scalar > &b) const
Solves the system for the given right hand side.
Definition: superlu_solve.hh:314

Kaskade::SUPERLUFactorization::solve
virtual void solve(Scalar *b) const
Solves the system  for the given right hand side .
Definition: superlu_solve.hh:320

Kaskade::SUPERLUFactorization::size
virtual size_t size() const
reports the dimension of the system
Definition: superlu_solve.hh:357

Kaskade::SUPERLUFactorization::solve
void solve(std::vector< Scalar > const &b, std::vector< Scalar > &x, bool transposed=false) const
Definition: superlu_solve.hh:268

Kaskade::SUPERLUFactorization::solve
virtual void solve(Scalar const *b, Scalar *x, bool transposed=false) const
Solves the system  for the given right hand side .
Definition: superlu_solve.hh:275

Kaskade::SUPERLUFactorization::SUPERLUFactorization
SUPERLUFactorization(Index n_, std::vector< Index > const &ridx, std::vector< Index > const &cidx, std::vector< Scalar > const &values)
Version of constructor keeping input data in triplet format (aka coordinate format) constant.
Definition: superlu_solve.hh:46

Kaskade::SUPERLUFactorization::SUPERLUFactorization
SUPERLUFactorization(Index n_, std::unique_ptr< std::vector< Index > > ridx, std::unique_ptr< std::vector< Index > > cidx, std::unique_ptr< std::vector< Scalar > > values)
Version of constructor, that destroys input data before factorization: more memory efficient.
Definition: superlu_solve.hh:150

factorization.hh

Kaskade
Definition: abstract_interface.hh:15

Kaskade::tripletToCompressedColumn
void tripletToCompressedColumn(Index nRows, Index nCols, size_t nNonZeros, std::vector< Index > const &ridx, std::vector< Index > const &cidx, std::vector< Scalar > const &values, std::vector< Index > &Ap, std::vector< Index > &Ai, std::vector< Scalar > &Az)
Converts a matrix in triplet format to a compressed column format.