suanPan
Loading...
Searching...
No Matches
SparseMatSuperLU.hpp
Go to the documentation of this file.
1/*******************************************************************************
2 * Copyright (C) 2017-2023 Theodore Chang
3 *
4 * This program is free software: you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation, either version 3 of the License, or
7 * (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program. If not, see <http://www.gnu.org/licenses/>.
16 ******************************************************************************/
29// ReSharper disable CppCStyleCast
30#ifndef SPARSEMATSUPERLU_HPP
31#define SPARSEMATSUPERLU_HPP
32
33#include <superlu-mt/superlu-mt.h>
34#include "SparseMat.hpp"
35#include "csc_form.hpp"
36
37template<sp_d T> class SparseMatSuperLU final : public SparseMat<T> {
38 SuperMatrix A{}, L{}, U{}, B{};
39
40#ifndef SUANPAN_SUPERLUMT
41 superlu_options_t options{};
42
43 SuperLUStat_t stat{};
44#else
45 const int ordering_num = 1;
46
47 Gstat_t stat{};
48#endif
49
50 void* t_val = nullptr;
51 int* t_row = nullptr;
52 int* t_col = nullptr;
53
54 int* perm_r = nullptr;
55 int* perm_c = nullptr;
56
57 bool allocated = false;
58
59 template<sp_d ET> void alloc_supermatrix(csc_form<ET, int>&&);
60 void dealloc_supermatrix();
61
62 template<sp_d ET> void wrap_b(const Mat<ET>&);
63 template<sp_d ET> void tri_solve(int&);
64 template<sp_d ET> void full_solve(int&);
65
66 int solve_trs(Mat<T>&, Mat<T>&&);
67 int solve_trs(Mat<T>&, const Mat<T>&);
68
69public:
70 SparseMatSuperLU(uword, uword, uword = 0);
72 SparseMatSuperLU(SparseMatSuperLU&&) noexcept = delete;
73 SparseMatSuperLU& operator=(const SparseMatSuperLU&) = delete;
74 SparseMatSuperLU& operator=(SparseMatSuperLU&&) noexcept = delete;
75 ~SparseMatSuperLU() override;
76
77 void zeros() override;
78
79 unique_ptr<MetaMat<T>> make_copy() override;
80
81 int direct_solve(Mat<T>&, Mat<T>&&) override;
82 int direct_solve(Mat<T>&, const Mat<T>&) override;
83};
84
85template<sp_d T> template<sp_d ET> void SparseMatSuperLU<T>::alloc_supermatrix(csc_form<ET, int>&& in) {
86 dealloc_supermatrix();
87
88 auto t_size = sizeof(ET) * in.n_elem;
89 t_val = superlu_malloc(t_size);
90 memcpy(t_val, (void*)in.val_mem(), t_size);
91
92 t_size = sizeof(int) * in.n_elem;
93 t_row = (int*)superlu_malloc(t_size);
94 memcpy(t_row, (void*)in.row_mem(), t_size);
95
96 t_size = sizeof(int) * (in.n_cols + 1llu);
97 t_col = (int*)superlu_malloc(t_size);
98 memcpy(t_col, (void*)in.col_mem(), t_size);
99
100 if(std::is_same_v<ET, double>) {
101 using E = double;
102 dCreate_CompCol_Matrix(&A, in.n_rows, in.n_cols, in.n_elem, (E*)t_val, t_row, t_col, Stype_t::SLU_NC, Dtype_t::SLU_D, Mtype_t::SLU_GE);
103 }
104 else {
105 using E = float;
106 sCreate_CompCol_Matrix(&A, in.n_rows, in.n_cols, in.n_elem, (E*)t_val, t_row, t_col, Stype_t::SLU_NC, Dtype_t::SLU_S, Mtype_t::SLU_GE);
107 }
108
109 perm_r = (int*)superlu_malloc(sizeof(int) * (this->n_rows + 1));
110 perm_c = (int*)superlu_malloc(sizeof(int) * (this->n_cols + 1));
111
112 allocated = true;
113}
114
115template<sp_d T> void SparseMatSuperLU<T>::dealloc_supermatrix() {
116 if(!allocated) return;
117
118 Destroy_SuperMatrix_Store(&A);
119#ifdef SUANPAN_SUPERLUMT
120 Destroy_SuperNode_SCP(&L);
121 Destroy_CompCol_NCP(&U);
122#else
123 Destroy_SuperNode_Matrix(&L);
124 Destroy_CompCol_Matrix(&U);
125#endif
126
127 if(t_val) superlu_free(t_val);
128 if(t_row) superlu_free(t_row);
129 if(t_col) superlu_free(t_col);
130 if(perm_r) superlu_free(perm_r);
131 if(perm_c) superlu_free(perm_c);
132
133 allocated = false;
134}
135
136template<sp_d T> template<sp_d ET> void SparseMatSuperLU<T>::wrap_b(const Mat<ET>& in_mat) {
137 if(std::is_same_v<ET, float>) {
138 using E = float;
139 sCreate_Dense_Matrix(&B, (int)in_mat.n_rows, (int)in_mat.n_cols, (E*)in_mat.memptr(), (int)in_mat.n_rows, Stype_t::SLU_DN, Dtype_t::SLU_S, Mtype_t::SLU_GE);
140 }
141 else {
142 using E = double;
143 dCreate_Dense_Matrix(&B, (int)in_mat.n_rows, (int)in_mat.n_cols, (E*)in_mat.memptr(), (int)in_mat.n_rows, Stype_t::SLU_DN, Dtype_t::SLU_D, Mtype_t::SLU_GE);
144 }
145}
146
147template<sp_d T> template<sp_d ET> void SparseMatSuperLU<T>::tri_solve(int& flag) {
148#ifdef SUANPAN_SUPERLUMT
149 if(std::is_same_v<ET, float>) sgstrs(NOTRANS, &L, &U, perm_c, perm_r, &B, &stat, &flag);
150 else dgstrs(NOTRANS, &L, &U, perm_c, perm_r, &B, &stat, &flag);
151#else
152 superlu::gstrs<ET>(options.Trans, &L, &U, perm_c, perm_r, &B, &stat, &flag);
153#endif
154
155 Destroy_SuperMatrix_Store(&B);
156}
157
158template<sp_d T> template<sp_d ET> void SparseMatSuperLU<T>::full_solve(int& flag) {
159#ifdef SUANPAN_SUPERLUMT
160 get_perm_c(ordering_num, &A, perm_c);
161 if(std::is_same_v<ET, float>) psgssv(SUANPAN_NUM_THREADS, &A, perm_c, perm_r, &L, &U, &B, &flag);
162 else pdgssv(SUANPAN_NUM_THREADS, &A, perm_c, perm_r, &L, &U, &B, &flag);
163#else
164 superlu::gssv<ET>(&options, &A, perm_c, perm_r, &L, &U, &B, &stat, &flag);
165#endif
166
167 Destroy_SuperMatrix_Store(&B);
168}
169
170template<sp_d T> SparseMatSuperLU<T>::SparseMatSuperLU(const uword in_row, const uword in_col, const uword in_elem)
171 : SparseMat<T>(in_row, in_col, in_elem) {
172#ifndef SUANPAN_SUPERLUMT
173 set_default_options(&options);
174 options.IterRefine = std::is_same_v<T, float> ? superlu::IterRefine_t::SLU_SINGLE : superlu::IterRefine_t::SLU_DOUBLE;
175 options.Equil = superlu::yes_no_t::NO;
176
177 arrayops::fill_zeros(reinterpret_cast<char*>(&stat), sizeof(SuperLUStat_t));
178
179 StatInit(&stat);
180#else
181 StatAlloc(static_cast<int>(in_col), SUANPAN_NUM_THREADS, sp_ienv(1), sp_ienv(2), &stat);
182 StatInit(static_cast<int>(in_col), SUANPAN_NUM_THREADS, &stat);
183#endif
184}
185
187 : SparseMat<T>(other) {
188#ifndef SUANPAN_SUPERLUMT
189 set_default_options(&options);
190 options.IterRefine = std::is_same_v<T, float> ? superlu::IterRefine_t::SLU_SINGLE : superlu::IterRefine_t::SLU_DOUBLE;
191 options.Equil = superlu::yes_no_t::NO;
192
193 arrayops::fill_zeros(reinterpret_cast<char*>(&stat), sizeof(SuperLUStat_t));
194
195 StatInit(&stat);
196#else
197 StatAlloc(static_cast<int>(other.n_cols), SUANPAN_NUM_THREADS, sp_ienv(1), sp_ienv(2), &stat);
198 StatInit(static_cast<int>(other.n_cols), SUANPAN_NUM_THREADS, &stat);
199#endif
200}
201
203 dealloc_supermatrix();
204 StatFree(&stat);
205}
206
207template<sp_d T> void SparseMatSuperLU<T>::zeros() {
209 dealloc_supermatrix();
210}
211
212template<sp_d T> unique_ptr<MetaMat<T>> SparseMatSuperLU<T>::make_copy() { return std::make_unique<SparseMatSuperLU<T>>(*this); }
213
214template<sp_d T> int SparseMatSuperLU<T>::direct_solve(Mat<T>& out_mat, const Mat<T>& in_mat) {
215 if(this->factored) return solve_trs(out_mat, in_mat);
216
217 this->factored = true;
218
219 auto flag = 0;
220
221 if(std::is_same_v<T, float> || Precision::FULL == this->setting.precision) {
222 alloc_supermatrix(csc_form<T, int>(this->triplet_mat));
223
224 out_mat = in_mat;
225
226 wrap_b(out_mat);
227
228 full_solve<T>(flag);
229
230 return flag;
231 }
232
233 alloc_supermatrix(csc_form<float, int>(this->triplet_mat));
234
235 const fmat f_mat(arma::size(in_mat), fill::none);
236
237 wrap_b(f_mat);
238
239 full_solve<float>(flag);
240
241 return 0 == flag ? solve_trs(out_mat, in_mat) : flag;
242}
243
244template<sp_d T> int SparseMatSuperLU<T>::solve_trs(Mat<T>& out_mat, const Mat<T>& in_mat) {
245 auto flag = 0;
246
247 if(std::is_same_v<T, float> || Precision::FULL == this->setting.precision) {
248 out_mat = in_mat;
249
250 wrap_b(out_mat);
251
252 tri_solve<T>(flag);
253
254 return flag;
255 }
256
257 out_mat.zeros(arma::size(in_mat));
258
259 mat full_residual = in_mat;
260
261 auto multiplier = norm(full_residual);
262
263 auto counter = 0u;
264 while(counter++ < this->setting.iterative_refinement) {
265 if(multiplier < this->setting.tolerance) break;
266
267 auto residual = conv_to<fmat>::from(full_residual / multiplier);
268
269 wrap_b(residual);
270
271 tri_solve<float>(flag);
272
273 if(0 != flag) break;
274
275 const mat incre = multiplier * conv_to<mat>::from(residual);
276
277 out_mat += incre;
278
279 suanpan_debug("Mixed precision algorithm multiplier: {:.5E}.\n", multiplier = norm(full_residual -= this->operator*(incre)));
280 }
281
282 return flag;
283}
284
285template<sp_d T> int SparseMatSuperLU<T>::direct_solve(Mat<T>& out_mat, Mat<T>&& in_mat) {
286 if(this->factored) return solve_trs(out_mat, std::forward<Mat<T>>(in_mat));
287
288 this->factored = true;
289
290 auto flag = 0;
291
292 if(std::is_same_v<T, float> || Precision::FULL == this->setting.precision) {
293 alloc_supermatrix(csc_form<T, int>(this->triplet_mat));
294
295 wrap_b(in_mat);
296
297 full_solve<T>(flag);
298
299 out_mat = std::move(in_mat);
300
301 return flag;
302 }
303
304 alloc_supermatrix(csc_form<float, int>(this->triplet_mat));
305
306 const fmat f_mat(arma::size(in_mat), fill::none);
307
308 wrap_b(f_mat);
309
310 full_solve<float>(flag);
311
312 return 0 == flag ? solve_trs(out_mat, std::forward<Mat<T>>(in_mat)) : flag;
313}
314
315template<sp_d T> int SparseMatSuperLU<T>::solve_trs(Mat<T>& out_mat, Mat<T>&& in_mat) {
316 auto flag = 0;
317
318 if(std::is_same_v<T, float> || Precision::FULL == this->setting.precision) {
319 wrap_b(in_mat);
320
321 tri_solve<T>(flag);
322
323 out_mat = std::move(in_mat);
324
325 return flag;
326 }
327
328 out_mat.zeros(arma::size(in_mat));
329
330 auto multiplier = arma::norm(in_mat);
331
332 auto counter = 0u;
333 while(counter++ < this->setting.iterative_refinement) {
334 if(multiplier < this->setting.tolerance) break;
335
336 auto residual = conv_to<fmat>::from(in_mat / multiplier);
337
338 wrap_b(residual);
339
340 tri_solve<float>(flag);
341
342 if(0 != flag) break;
343
344 const mat incre = multiplier * conv_to<mat>::from(residual);
345
346 out_mat += incre;
347
348 suanpan_debug("Mixed precision algorithm multiplier: {:.5E}.\n", multiplier = norm(in_mat -= this->operator*(incre)));
349 }
350
351 return flag;
352}
353#endif
354
A MetaMat class that holds matrices.
Definition: MetaMat.hpp:39
const uword n_cols
Definition: MetaMat.hpp:49
const uword n_rows
Definition: MetaMat.hpp:48
A SparseMat class that holds matrices.
Definition: SparseMat.hpp:34
A SparseMatSuperLU class that holds matrices.
Definition: SparseMatSuperLU.hpp:37
SparseMatSuperLU(SparseMatSuperLU &&) noexcept=delete
Definition: csc_form.hpp:25
int SUANPAN_NUM_THREADS
Definition: command.cpp:67
Definition: suanPan.h:318
void zeros() override
Definition: SparseMat.hpp:79
~SparseMatSuperLU() override
Definition: SparseMatSuperLU.hpp:202
unique_ptr< MetaMat< T > > make_copy() override
Definition: SparseMatSuperLU.hpp:212
int direct_solve(Mat< T > &, Mat< T > &&) override
Definition: SparseMatSuperLU.hpp:285
void zeros() override
Definition: SparseMatSuperLU.hpp:207
SparseMatSuperLU(uword, uword, uword=0)
Definition: SparseMatSuperLU.hpp:170
double norm(const vec &)
Definition: tensor.cpp:302
#define suanpan_debug(...)
Definition: suanPan.h:295