KASKADE 7 development version
superlu_solve.hh
Go to the documentation of this file.
1/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
2/* */
3/* This file is part of the library KASKADE 7 */
4/* https://www.zib.de/research/projects/kaskade7-finite-element-toolbox */
5/* */
6/* Copyright (C) 2002-2024 Zuse Institute Berlin */
7/* */
8/* KASKADE 7 is distributed under the terms of the ZIB Academic License. */
9/* see $KASKADE/academic.txt */
10/* */
11/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
12
13#ifndef SUPERLU_SOLVE_HH
14#define SUPERLU_SOLVE_HH
15
16#include <vector>
17#include <iostream>
18#include <memory>
19
20#include "slu_ddefs.h"
21
23
24namespace Kaskade
25{
26
32template <class Scalar>
34{
35public:
36
38
45 template <class Index>
47 std::vector<Index> const& ridx,
48 std::vector<Index> const& cidx,
49 std::vector<Scalar> const& values)
50 : N(ridx.size()), n(n_)
51 {
52 assert(cidx.size()==N && values.size()==N);
53
55
56 if (this->getVerbose()>=2)
57 std::cout << "SuperLU" << " solver, n=" << n << ", nnz=" << N << std::endl;
58
59 set_default_options(&options);
60 StatInit(&stat);
61
62 work = 0;
63 lwork = 0;
64 u = 1.0;
65 equil = YES;
66 trans = NOTRANS;
67
68 // SuperLU uses int indices. If the matrix is provided with long indices,
69 // convert the indices first to the expected type int.
70 if (std::is_same_v<int,Index>)
71 tripletToCompressedColumn(n, n, N, ridx, cidx, values, Ap, Ai, Az);
72 else
73 {
74 std::vector<int> iRidx(ridx.size()), iCidx(cidx.size());
75 std::copy(begin(ridx),end(ridx),begin(iRidx));
76 std::copy(begin(cidx),end(cidx),begin(iCidx));
77 tripletToCompressedColumn(n, n, N, iRidx, iCidx, values, Ap, Ai, Az);
78 }
79
80 dCreate_CompCol_Matrix(&A, n, n, N, &Az[0], &Ai[0], &Ap[0], SLU_NC, SLU_D, SLU_GE);
81
82 nrhs = 1;
83 if ( !(rhsb = doubleMalloc(n * nrhs)) ) ABORT("Malloc fails for rhsb[].");
84 if ( !(rhsx = doubleMalloc(n * nrhs)) ) ABORT("Malloc fails for rhsx[].");
85 dCreate_Dense_Matrix(&B, n, nrhs, rhsb, n, SLU_DN, SLU_D, SLU_GE);
86 dCreate_Dense_Matrix(&X, n, nrhs, rhsx, n, SLU_DN, SLU_D, SLU_GE);
87 xact = doubleMalloc(n * nrhs);
88 ldx = n;
89 dGenXtrue(n, nrhs, xact, ldx);
90 dFillRHS(trans, nrhs, xact, ldx, &A, &B);
91
92 if ( !(etree = intMalloc(n)) ) ABORT("Malloc fails for etree[].");
93 if ( !(perm_r = intMalloc(n)) ) ABORT("Malloc fails for perm_r[].");
94 if ( !(perm_c = intMalloc(n)) ) ABORT("Malloc fails for perm_c[].");
95 if ( !(R = (double *) SUPERLU_MALLOC(A.nrow * sizeof(double))) )
96 ABORT("SUPERLU_MALLOC fails for R[].");
97 if ( !(C = (double *) SUPERLU_MALLOC(A.ncol * sizeof(double))) )
98 ABORT("SUPERLU_MALLOC fails for C[].");
99 int nrhs = 1;
100 if ( !(ferr = (double *) SUPERLU_MALLOC(nrhs * sizeof(double))) )
101 ABORT("SUPERLU_MALLOC fails for ferr[].");
102 if ( !(berr = (double *) SUPERLU_MALLOC(nrhs * sizeof(double))) )
103 ABORT("SUPERLU_MALLOC fails for berr[].");
104
105 options.Equil = equil;
106 options.DiagPivotThresh = u;
107 options.Trans = trans;
108
109 B.ncol = 0; /* Indicate not to solve the system */
110 dgssvx(&options, &A, perm_c, perm_r, etree, equed, R, C,
111 &L, &U, work, lwork, &B, &X, &rpg, &rcond, ferr, berr,
112 &mem_usage, &stat, &info);
113 B.ncol = nrhs; /* Set the number of right-hand side */
114
115 if ( (info == 0 || info == n+1) && (verbose>0) ) {
116
117 if ( options.PivotGrowth ) printf("Recip. pivot growth = %e\n", rpg);
118 if ( options.ConditionNumber )
119 printf("Recip. condition number = %e\n", rcond);
120 Lstore = (SCformat *) L.Store;
121 Ustore = (NCformat *) U.Store;
122 printf("No of nonzeros in factor L = %d\n", Lstore->nnz);
123 printf("No of nonzeros in factor U = %d\n", Ustore->nnz);
124 printf("No of nonzeros in L+U = %ld\n", Lstore->nnz + Ustore->nnz - n);
125 printf("FILL ratio = %.1f\n", (float)(Lstore->nnz + Ustore->nnz - n)/N);
126
127 printf("L\\U MB %.3f\ttotal MB needed %.3f\n",
128 mem_usage.for_lu/1e6, mem_usage.total_needed/1e6);
129 fflush(stdout);
130
131 } else if ( info > 0 && lwork == -1 ) {
132 printf("** Estimated memory: %ld bytes\n", info - n);
133 } else
134 if (info!=0)
135 printf("LU factorization: dgssvx() returns info %d\n", info);
136
137 if ( verbose>0 ) StatPrint(&stat);
138 StatFree(&stat);
139 };
140
142
149 template <class Index>
151 std::unique_ptr<std::vector<Index>> ridx,
152 std::unique_ptr<std::vector<Index>> cidx,
153 std::unique_ptr<std::vector<Scalar>> values)
154 : N(ridx->size()), n(n_)
155 {
156 assert(cidx->size()==N && values->size()==N);
157
158 if (this->getVerbose()>=2)
159 {
160 std::cout << "SuperLU" << " solver, n=" << n << ", nnz=" << N << std::endl;
161 }
162
164 set_default_options(&options);
165 StatInit(&stat);
166
167 work = 0;
168 lwork = 0;
169 u = 1.0;
170 equil = YES;
171 trans = NOTRANS;
172
173 // SuperLU uses int indices. If the matrix is provided with long indices,
174 // convert the indices first to the expected type int.
175 if (std::is_same_v<int,Index>)
176 tripletToCompressedColumn(n, n, N, *ridx, *cidx, *values, Ap, Ai, Az);
177 else
178 {
179 std::vector<int> iRidx(ridx->size()), iCidx(cidx->size());
180 std::copy(begin(*ridx),end(*ridx),begin(iRidx));
181 std::copy(begin(*cidx),end(*cidx),begin(iCidx));
182 tripletToCompressedColumn(n, n, N, iRidx, iCidx, *values, Ap, Ai, Az);
183 }
184 tripletToCompressedColumn(n, n, N, ridx, cidx, values, Ap, Ai, Az);
185
186 dCreate_CompCol_Matrix(&A, n, n, N, &Az[0], &Ap[0], &Ai[0], SLU_NC, SLU_D, SLU_GE);
187
188 nrhs = 1;
189 if ( !(rhsb = doubleMalloc(n * nrhs)) ) ABORT("Malloc fails for rhsb[].");
190 if ( !(rhsx = doubleMalloc(n * nrhs)) ) ABORT("Malloc fails for rhsx[].");
191 dCreate_Dense_Matrix(&B, n, nrhs, rhsb, n, SLU_DN, SLU_D, SLU_GE);
192 dCreate_Dense_Matrix(&X, n, nrhs, rhsx, n, SLU_DN, SLU_D, SLU_GE);
193 xact = doubleMalloc(n * nrhs);
194 ldx = n;
195 dGenXtrue(n, nrhs, xact, ldx);
196 dFillRHS(trans, nrhs, xact, ldx, &A, &B);
197
198 if ( !(etree = intMalloc(n)) ) ABORT("Malloc fails for etree[].");
199 if ( !(perm_r = intMalloc(n)) ) ABORT("Malloc fails for perm_r[].");
200 if ( !(perm_c = intMalloc(n)) ) ABORT("Malloc fails for perm_c[].");
201 if ( !(R = (double *) SUPERLU_MALLOC(A.nrow * sizeof(double))) )
202 ABORT("SUPERLU_MALLOC fails for R[].");
203 if ( !(C = (double *) SUPERLU_MALLOC(A.ncol * sizeof(double))) )
204 ABORT("SUPERLU_MALLOC fails for C[].");
205 if ( !(ferr = (double *) SUPERLU_MALLOC(nrhs * sizeof(double))) )
206 ABORT("SUPERLU_MALLOC fails for ferr[].");
207 if ( !(berr = (double *) SUPERLU_MALLOC(nrhs * sizeof(double))) )
208 ABORT("SUPERLU_MALLOC fails for berr[].");
209
210 options.Equil = equil;
211 options.DiagPivotThresh = u;
212 options.Trans = trans;
213
214 SuperMatrix B, X;
215 B.ncol = 0; /* Indicate not to solve the system */
216 dgssvx(&options, &A, perm_c, perm_r, etree, equed, R, C,
217 &L, &U, work, lwork, &B, &X, &rpg, &rcond, ferr, berr,
218 &mem_usage, &stat, &info);
219 B.ncol = nrhs; /* Set the number of right-hand side */
220
221 if ( (info == 0 || info == n+1) && (verbose>0) ) {
222 if ( options.PivotGrowth ) printf("Recip. pivot growth = %e\n", rpg);
223 if ( options.ConditionNumber )
224 printf("Recip. condition number = %e\n", rcond);
225 Lstore = (SCformat *) L.Store;
226 Ustore = (NCformat *) U.Store;
227 printf("No of nonzeros in factor L = %d\n", Lstore->nnz);
228 printf("No of nonzeros in factor U = %d\n", Ustore->nnz);
229 printf("No of nonzeros in L+U = %d\n", Lstore->nnz + Ustore->nnz - n);
230 printf("FILL ratio = %.1f\n", (float)(Lstore->nnz + Ustore->nnz - n)/N);
231
232 printf("L\\U MB %.3f\ttotal MB needed %.3f\n",
233 mem_usage.for_lu/1e6, mem_usage.total_needed/1e6);
234 fflush(stdout);
235
236 } else if ( info > 0 && lwork == -1 ) {
237 printf("** Estimated memory: %d bytes\n", info - n);
238 }
239 if (info!=0)
240 printf("LU factorization: dgssvx() returns info %d\n", info);
241
242 if ( verbose>0 ) StatPrint(&stat);
243 StatFree(&stat);
244 };
245
246
248 {
249 SUPERLU_FREE (etree);
250 SUPERLU_FREE (perm_r);
251 SUPERLU_FREE (perm_c);
252 SUPERLU_FREE (R);
253 SUPERLU_FREE (C);
254 SUPERLU_FREE (ferr);
255 SUPERLU_FREE (berr);
256
257 SUPERLU_FREE (rhsb);
258 SUPERLU_FREE (rhsx);
259 SUPERLU_FREE (xact);
260
261 Destroy_SuperNode_Matrix(&L);
262 Destroy_CompCol_Matrix(&U); }
263
268 void solve(std::vector<Scalar> const& b, std::vector<Scalar>& x, bool transposed=false) const
269 {
270 assert(b.size()>=n);
271 x.resize(n);
272 solve(&b[0],&x[0],transposed);
273 }
274
275 virtual void solve(Scalar const* b, Scalar* x, bool transposed=false) const
276 {
277 options.Fact = FACTORED; /* Indicate the factored form of A is supplied. */
278
279 /* Initialize the statistics variables. */
280 StatInit(&stat);
281
282 std::copy(b,b+n,rhsb);
283 dgssvx(&options, &A, perm_c, perm_r, etree, equed, R, C,
284 &L, &U, work, lwork, &B, &X, &rpg, &rcond, ferr, berr,
285 &mem_usage, &stat, &info);
286 std::copy(rhsx,rhsx+n,x);
287
288 if (info!=0)
289 printf("Triangular solve: dgssvx() returns info %d\n", info);
290
291 if ( (info == 0 || info == n+1) && (verbose>0) )
292 {
293 if ( options.IterRefine )
294 {
295 printf("Iterative Refinement:\n");
296 printf("%8s%8s%16s%16s\n", "rhs", "Steps", "FERR", "BERR");
297 for (int i = 0; i < nrhs; ++i)
298 printf("%8d%8d%16e%16e\n", i+1, stat.RefineSteps, ferr[i], berr[i]);
299 }
300 fflush(stdout);
301 } else if ( info > 0 && lwork == -1 ) {
302 printf("** Estimated memory: %ld bytes\n", info - n);
303 }
304
305 if ( verbose>0 ) StatPrint(&stat);
306 StatFree(&stat);
307 }
308
309
314 void solve(std::vector<Scalar>& b) const
315 {
316 assert(b.size()>=n);
317 solve(&b[0]);
318 }
319
320 virtual void solve(Scalar* b) const
321 {
322 options.Fact = FACTORED; /* Indicate the factored form of A is supplied. */
323
324 /* Initialize the statistics variables. */
325 StatInit(&stat);
326
327 std::copy(b,b+n,rhsb);
328 dgssvx(&options, &A, perm_c, perm_r, etree, equed, R, C,
329 &L, &U, work, lwork, &B, &X, &rpg, &rcond, ferr, berr,
330 &mem_usage, &stat, &info);
331 std::copy(rhsx,rhsx+n,b);
332
333 if (info!=0)
334 printf("Triangular solve: dgssvx() returns info %d\n", info);
335
336 if ( (info == 0 || info == n+1) && (verbose>0) )
337 {
338 if ( options.IterRefine )
339 {
340 printf("Iterative Refinement:\n");
341 printf("%8s%8s%16s%16s\n", "rhs", "Steps", "FERR", "BERR");
342 for (int i = 0; i < nrhs; ++i)
343 printf("%8d%8d%16e%16e\n", i+1, stat.RefineSteps, ferr[i], berr[i]);
344 }
345 fflush(stdout);
346 }
347 else if ( info > 0 && lwork == -1 )
348 {
349 printf("** Estimated memory: %ld bytes\n", info - n);
350 }
351
352 if ( verbose>0 )
353 StatPrint(&stat);
354 StatFree(&stat);
355 }
356
357 virtual size_t size() const
358 {
359 return n;
360 }
361
362private:
363 size_t const N, n;
364 std::vector<int> Ap, Ai;
365 std::vector<Scalar> Az;
366 mutable superlu_options_t options;
367 mutable int nrhs, ldx;
368 mutable SuperMatrix A, L, U;
369 mutable NCformat *Ustore;
370 mutable SCformat *Lstore;
371 mutable SuperMatrix B, X;
372 mutable int *perm_c; /* column permutation vector */
373 mutable int *perm_r; /* row permutations from partial pivoting */
374 mutable int *etree;
375 mutable double *R, *C;
376 mutable double *ferr, *berr;
377 mutable double *rhsb, *rhsx, *xact;
378 mutable mem_usage_t mem_usage;
379 mutable SuperLUStat_t stat;
380 mutable char equed[1];
381 mutable void *work;
382 mutable int info, lwork;
383 mutable double u, rpg, rcond;
384 mutable yes_no_t equil;
385 mutable trans_t trans;
386 int verbose;
387};
388} // namespace Kaskade
389#endif
Abstract base class for matrix factorizations.
Factorization of sparse linear systems with mumps.
void solve(std::vector< Scalar > &b) const
Solves the system for the given right hand side.
virtual void solve(Scalar *b) const
Solves the system for the given right hand side .
virtual size_t size() const
reports the dimension of the system
void solve(std::vector< Scalar > const &b, std::vector< Scalar > &x, bool transposed=false) const
virtual void solve(Scalar const *b, Scalar *x, bool transposed=false) const
Solves the system for the given right hand side .
SUPERLUFactorization(Index n_, std::vector< Index > const &ridx, std::vector< Index > const &cidx, std::vector< Scalar > const &values)
Version of constructor keeping input data in triplet format (aka coordinate format) constant.
SUPERLUFactorization(Index n_, std::unique_ptr< std::vector< Index > > ridx, std::unique_ptr< std::vector< Index > > cidx, std::unique_ptr< std::vector< Scalar > > values)
Version of constructor, that destroys input data before factorization: more memory efficient.
void tripletToCompressedColumn(Index nRows, Index nCols, size_t nNonZeros, std::vector< Index > const &ridx, std::vector< Index > const &cidx, std::vector< Scalar > const &values, std::vector< Index > &Ap, std::vector< Index > &Ai, std::vector< Scalar > &Az)
Converts a matrix in triplet format to a compressed column format.