KASKADE 7 development version
additiveschwarz.hh
Go to the documentation of this file.
1/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
2/* */
3/* This file is part of the library KASKADE 7 */
4/* https://www.zib.de/research/projects/kaskade7-finite-element-toolbox */
5/* */
6/* Copyright (C) 2002-2011 Zuse Institute Berlin */
7/* */
8/* KASKADE 7 is distributed under the terms of the ZIB Academic License. */
9/* see $KASKADE/academic.txt */
10/* */
11/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
12
13#ifndef ADDSCHWARZ_HH
14#define ADDSCHWARZ_HH
15
16#include <iostream>
17#include <vector>
18#include <boost/timer/timer.hpp>
19#include <dune/istl/preconditioners.hh>
20
21#include "linalg/triplet.hh"
22
23namespace Kaskade
24{
26
27 extern "C" void dgetrf_(int *m, int *n, double *a, int *lda, int *ipiv, int *info);
28 extern "C" void dgetrs_(char *trans, int *n, int *nrhs, double *a, int *lda, int *ipiv,
29 double *b, int *ldb, int *info);
30
31 //---------------------------------------------------------------------
32
33 // TODO: document this
34 template <class Op>
35 class AdditiveSchwarzPreconditioner: public Dune::Preconditioner<typename Op::domain_type, typename Op::range_type>
36 {
37 typedef typename Op::domain_type domain_type;
38 typedef typename Op::domain_type range_type;
39 typedef typename domain_type::field_type field_type;
40
41 public:
42 static int const category = Dune::SolverCategory::sequential;
43
47 AdditiveSchwarzPreconditioner(Op const& op, schwarzType select, int verbosity=2)
48 {
49 MatrixAsTriplet<field_type> A = op.template get<MatrixAsTriplet<field_type> >();
50 size_t k, n = A.nrows();
51 switch (select)
52 {
53 case DIAGONAL:
54 iBlocks.resize(n+1);
55 iBlocksColIndices.resize(n);
56 for (k=0; k<n; k++)
57 {
58 iBlocks[k] = k;
59 iBlocksColIndices[k] = k;
60 }
61 iBlocks[n] = n;
62 break;
63 case COMPLETE:
64 iBlocks.resize(2);
65 iBlocksColIndices.resize(n);
66 iBlocks[0] = 0;
67 iBlocks[1] = n;
68 for (k=0; k<n; k++)
69 {
70 iBlocksColIndices[k] = k;
71 }
72 break;
73 }
74 init(A,verbosity);
75 }
76
77 AdditiveSchwarzPreconditioner(Op const& op, size_t first, size_t last, int verbosity=2)
78 {
79 MatrixAsTriplet<field_type> A = op.template get<MatrixAsTriplet<field_type> >();
80 size_t i, k, nBlocks = last-first, cnt, nnz = A.nnz();
81 iBlocks.resize(nBlocks+1);
82 // printf("AdditiveSchwarzPreconditioner: n=%ld, nBlocks=%ld\n", n, nBlocks);
83 std::vector<size_t> nCounts(nBlocks);
84 for (k=0; k<nBlocks; k++)
85 nCounts[k] = 1;
86 cnt = nBlocks;
87
88 for (i=0; i<nnz; i++)
89 {
90 size_t ri = A.ridx[i];
91 size_t ci = A.cidx[i];
92 if ((ri>=first)&&(ri<last))
93 {
94 if ((ci<first)||(ci>=last))
95 {
96 nCounts[ri-first]++;
97 cnt++;
98 }
99 }
100 // else
101 // {
102 // if ((ci>=first)||(ci<last))
103 // {
104 // nCounts[ci-first]++;
105 // cnt++;
106 // }
107 // }
108 }
109 // printf("nCounts: cnt=%ld ", cnt);
110 // for(k=0; k<nBlocks; k++) printf("%3ld ", nCounts[k]);
111 // printf("\n");
112
113 iBlocksColIndices.resize(cnt);
114 iBlocks[0] = 0;
115 for (k=1; k<=nBlocks; k++)
116 {
117 iBlocks[k] = iBlocks[k-1]+nCounts[k-1];
118 iBlocksColIndices[iBlocks[k-1]] = first+k-1;
119 nCounts[k-1] = 1;
120 }
121 assert(iBlocks[nBlocks]==cnt);
122 // printf("iBlocks: ");
123 // for(k=0; k<=nBlocks; k++) printf("%3ld ", iBlocks[k]);
124 // printf("\n");
125 for (i=0; i<nnz; i++)
126 {
127 size_t ri = A.ridx[i];
128 size_t ci = A.cidx[i];
129 if ((ri>=first)&&(ri<last))
130 {
131 if ((ci<first)||(ci>=last))
132 {
133 iBlocksColIndices[iBlocks[ri-first]+nCounts[ri-first]] = ci;
134 nCounts[ri-first]++;
135 }
136 }
137 // else
138 // {
139 // if ((ci>=first)||(ci<last))
140 // {
141 // iBlocksColIndices[iBlocks[ci-first]+nCounts[ci-first]] = ri;
142 // nCounts[ci-first]++;
143 // }
144 // }
145 }
146
147 init(A,verbosity);
148 }
149
150 void init(MatrixAsTriplet<field_type> &A, int verbosity=2)
151 {
152 size_t i, k, n = A.nrows(), nnz = A.nnz();
153 // computing the amount of storage needed vo all sub-matrices
154
155 size_t nBlocks = iBlocks.size()-1;
156 // printf("AdditiveSchwarzPreconditioner: n=%ld, nBlocks=%ld\n", n, nBlocks);
157 // for (k=0; k<nBlocks; k++) {
158 // printf("%3ld: ", k);
159 // for (i=iBlocks[k]; i<iBlocks[k+1]; i++) printf("%3ld ", iBlocksColIndices[i]);
160 // printf("\n");
161 // }
162 blocks.resize(nBlocks);
163 ipiv.resize(nBlocks);
164 size_t maxBlockSize = -1;
165 size_t memSize = 0, inverseIndexSize = 0;
166 for (k=0; k<nBlocks; k++)
167 {
168 int blockSize = iBlocks[k+1]-iBlocks[k];
169 if (blockSize>maxBlockSize)
170 maxBlockSize = blockSize;
171 memSize += blockSize*blockSize;
172 inverseIndexSize += blockSize;
173 }
174 // printf(" memSize=%ld, inverseIndexSize=%ld\n", memSize, inverseIndexSize);
175
176 // allocating the memory
177
178 blocks[0] = (field_type*)malloc(memSize*sizeof(field_type));
179 ipiv[0] = (int*)malloc(inverseIndexSize*sizeof(int));
180 for (k=1; k<nBlocks; k++)
181 {
182 int blockSize = iBlocks[k]-iBlocks[k-1];
183 blocks[k] = blocks[k-1]+blockSize*blockSize;
184 ipiv[k] = ipiv[k-1]+blockSize;
185 }
186 // printf("blocks allocated: \n");
187 // for (k=0; k<nBlocks; k++) {
188 // int blockSize = iBlocks[k+1]-iBlocks[k];
189 // printf("%8p %d ", blocks[k], blockSize*blockSize);
190 // }
191 // printf("\n");
192
193 // which index is used in which submatrix? (inverse)
194
195 std::vector<size_t> usedCnts(n);
196 std::vector<size_t> usedIn(n+1);
197 std::vector<size_t> usedInIndices(inverseIndexSize);
198
199 for (k=0; k<n; k++)
200 usedCnts[k] = 0;
201 for (k=0; k<nBlocks; k++)
202 {
203 for (i=iBlocks[k]; i<iBlocks[k+1]; i++)
204 {
205 usedCnts[iBlocksColIndices[i]]++;
206 }
207 }
208 // printf("usedCnts: ");
209 // for (k=0; k<n; k++) printf(" %3ld", usedCnts[k]);
210 // printf("\n");
211 size_t cnt = 0;
212 for (k=0; k<n; k++)
213 {
214 usedIn[k] = cnt;
215 cnt += usedCnts[k];
216 }
217 if (cnt!=inverseIndexSize) printf("Assert cnt=%ld!=%ld=inverseIndexSize\n", cnt, inverseIndexSize);
218 usedIn[n] = inverseIndexSize;
219
220 for (k=0; k<n; k++)
221 usedCnts[k] = 0;
222 for (k=0; k<nBlocks; k++)
223 {
224 for (i=iBlocks[k]; i<iBlocks[k+1]; i++)
225 {
226 size_t ii = iBlocksColIndices[i];
227 usedInIndices[usedIn[ii]+usedCnts[ii]] = k;
228 usedCnts[ii]++;
229 }
230 }
231 // printf("Inverse index: n=%ld, nBlocks=%ld\n", n, nBlocks);
232 // for (k=0; k<n; k++) {
233 // printf("%3ld: ", k);
234 // for (i=usedIn[k]; i<usedIn[k+1]; i++) printf("%3ld ", usedInIndices[i]);
235 // printf("\n");
236 // }
237
238 // filling the submatrices
239
240 for (i=0; i<nnz; i++)
241 {
242 size_t ri = A.ridx[i];
243 size_t ci = A.cidx[i];
244 field_type vali = A.data[i];
245 for (k=usedIn[ri]; k<usedIn[ri+1]; k++)
246 {
247 size_t kk = usedInIndices[k];
248 // printf("A(%ld,%ld)=%14.5e in Block %ld\n", ri, ci, vali, kk);
249 size_t subi, subk, bsize = iBlocks[kk+1]-iBlocks[kk];
250 // now we have found the blocknummer kk
251 for (subi=iBlocks[kk]; subi<iBlocks[kk+1]; subi++)
252 if (ri==iBlocksColIndices[subi]) break;
253 if (subi==iBlocks[kk+1]) continue;
254 subi -= iBlocks[kk];
255 for (subk=iBlocks[kk]; subk<iBlocks[kk+1]; subk++)
256 if (ci==iBlocksColIndices[subk]) break;
257 if (subk==iBlocks[kk+1]) continue;
258 subk -= iBlocks[kk];
259 // now we have the indices for the submatrix
260 // printf("subA(%ld,%ld) %8p set to%14.5e\n", subi, subk, &blocks[kk][subi*bsize+subk], vali);
261 blocks[kk][subi*bsize+subk] = vali; // or adding? preset blocks with zero!
262 }
263 }
264
265 // calling lapacks to compute the inverses inverseIndexSize
266
267 for (k=0; k<nBlocks; k++)
268 {
269 size_t bsize = iBlocks[k+1]-iBlocks[k];
270 // printf("block=%ld %8p, bsize=%ld, ipiv=%8p\n", k, blocks[k], bsize, ipiv[k]);
271 // for (int j=0; j<bsize; j++)
272 // {
273 // printf("%8p:", &blocks[k][j*bsize]);
274 // for (int l=0; l<bsize; l++)
275 // printf("%14.5e ", blocks[k][j*bsize+l]);
276 // printf("\n");
277 // }
278 if (bsize==1)
279 {
280 if (fabs(blocks[k][0])<1.0e-20)
281 {
282 printf("error diagonal element to small %e\n", blocks[k][0]);
283 exit(1);
284 }
285 blocks[k][0] = 1.0/blocks[k][0];
286 }
287 else
288 {
289 int info, f_n = bsize;
290 dgetrf_(&f_n, &f_n, blocks[k], &f_n, ipiv[k], &info);
291 if (info!=0)
292 {
293 printf("error from dgetrf_: info = %d, block=%ld %8p, bsize=%ld\n", info,
294 k, blocks[k], bsize);
295 exit(1);
296 }
297 }
298 }
299
300 if ( verbosity>=2 )
301 {
302 std::cout << "PrecondType::ADDITIVESCHWARZ: n=" << n << ", nnz=" << nnz << ", time=" << boost::timer::format(timer.elapsed()) << "\n";
303 }
304 }
305
307 {
308 free(blocks[0]);
309 free(ipiv[0]);
310 }
311
312 virtual void pre(domain_type&, range_type&) {}
313 virtual void post (domain_type&) {}
314
315 virtual void apply (domain_type& x, range_type const& y)
316 {
317 size_t i, k;
318 int n = y.dim();
319 std::vector<field_type> b(n), bb(n), bbb(n);
320 y.write(b.begin());
321 int nBlocks = iBlocks.size()-1;
322 // printf("apply: n=%d, nBlock=%d\n", n, nBlocks);
323 for (k=0; k<nBlocks; k++)
324 {
325 int nn, nrhs = 1, lda, ldb, info, cnt;
326 char trans[1] = {'N'};
327 cnt = 0;
328 for (i=iBlocks[k]; i<iBlocks[k+1]; i++)
329 {
330 size_t ii = iBlocksColIndices[i];
331 // printf("bb[%d] %14.5e = b[%ld] %14.5e\n", cnt, bb[cnt], ii, b[ii]);
332 bb[cnt] = b[ii];
333 cnt++;
334 }
335 nn = lda = ldb = iBlocks[k+1]-iBlocks[k];
336 // printf("apply%3ld: cnt=%d %8p, lng=%d\n", k, cnt, ipiv[k], lda);
337 if (nn==1)
338 {
339 bb[0] = bb[0]*blocks[k][0];
340 }
341 else
342 {
343 dgetrs_(trans, &nn, &nrhs, blocks[k], &lda, ipiv[k], &bb[0], &ldb, &info);
344 if (info!=0)
345 {
346 printf("error from dgetrs_: info =%d\n", info);
347 exit(1);
348 }
349 }
350 cnt = 0;
351 for (i=iBlocks[k]; i<iBlocks[k+1]; i++)
352 {
353 size_t ii = iBlocksColIndices[i];
354 // printf("b[%ld] %14.5e = bb[%d] %14.5e\n", ii, b[ii], cnt, b[cnt]);
355 bbb[ii] += bb[cnt];
356 cnt++;
357 }
358 }
359 x.read(bbb.begin());
360 }
361
362 private:
363 std::vector<size_t> iBlocks, iBlocksColIndices;
364 std::vector<field_type*> blocks;
365 std::vector<int*> ipiv;
366 boost::timer::cpu_timer timer;
367 };
368} // namespace Kaskade
369#endif
virtual void apply(domain_type &x, range_type const &y)
virtual void pre(domain_type &, range_type &)
AdditiveSchwarzPreconditioner(Op const &op, size_t first, size_t last, int verbosity=2)
void init(MatrixAsTriplet< field_type > &A, int verbosity=2)
AdditiveSchwarzPreconditioner(Op const &op, schwarzType select, int verbosity=2)
virtual void post(domain_type &)
size_t nnz() const
Definition: triplet.hh:254
SparseIndexInt nrows() const
Returns number of rows (computes them, if not known)
Definition: triplet.hh:202
std::vector< SparseIndexInt > ridx
row indices
Definition: triplet.hh:669
std::vector< SparseIndexInt > cidx
column indices
Definition: triplet.hh:671
std::vector< Scalar > data
data
Definition: triplet.hh:673
void dgetrf_(int *m, int *n, double *a, int *lda, int *ipiv, int *info)
void dgetrs_(char *trans, int *n, int *nrhs, double *a, int *lda, int *ipiv, double *b, int *ldb, int *info)