74 IT n{-1}, kl{-1}, ku{-1}, lead{-1}, block{-1}, lines{-1};
76 std::vector<DT> a, b, work;
82 auto init_storage(
const IT n,
const IT kl,
const IT ku) {
86 loc.lead = 2 * (loc.kl + loc.ku) + 1;
87 loc.block = std::max(loc.n / std::max(IT{1},
this->ctx.n_rows - 1) + 1, std::max(loc.kl + loc.ku + 1,
this->ctx.row_block(loc.n)));
88 loc.block = std::min(loc.block, loc.n);
89 loc.lines = this->ctx.rows(loc.n, loc.block);
90 loc.desc1d_a = {501, this->trans_ctx.context, loc.n, loc.block, 0, loc.lead, 0, 0, 0};
93 loc.a.resize(loc.lead * loc.lines + loc.ku);
94 loc.ipiv.resize(std::min(loc.n, loc.lines + loc.kl + loc.ku), -987654);
97 using base_t::to_band;
98 using base_t::to_full;
118 auto operator()(
const IT
i,
const IT
j)
const {
120 if(
i -
j > kl ||
j -
i > ku)
return IT{-1};
121 return 2 * ku + kl +
i + 2 *
j * (kl + ku);
125 template<band_container_t AT, full_container_t BT> IT solve(
AT&& A,
BT&&
B) {
return solve(to_band(std::forward<AT>(A)), to_full(std::forward<BT>(
B))); }
126 template<band_container_t AT> IT solve(
AT&& A,
full_mat<DT, IT>&&
B) {
return solve(to_band(std::forward<AT>(A)), to_full(std::move(
B))); }
129 if(!this->ctx.is_valid() || !
this->trans_ctx.is_valid())
return 0;
131 if(A.n_rows != A.n_cols || A.n_cols !=
B.n_rows)
return -1;
133 init_storage(A.n_cols, A.kl, A.ku);
137 this->trans_ctx.scatter(
139 this->trans_ctx.desc_g(loc.lead, loc.n),
141 this->trans_ctx.desc_l(loc.lead, loc.n, loc.lead, loc.block, loc.lead)
144 const IT
laf = (loc.block + 6 * loc.kl + 13 * loc.ku) * (loc.kl + loc.ku);
145 const IT lwork = std::max(
B.n_cols * (loc.block + 2 * loc.kl + 4 * loc.ku), IT{1});
146 loc.work.resize(
laf + lwork);
150 if constexpr(std::is_same_v<DT, double>) {
152 pdgbtrf(&loc.n, &loc.kl, &loc.ku, (
E*)loc.a.data(), &
this->ONE, loc.desc1d_a.data(), loc.ipiv.data(), (
E*)loc.work.data(), &
laf, (
E*)(loc.work.data() +
laf), &lwork, &info);
154 else if constexpr(std::is_same_v<DT, float>) {
156 psgbtrf(&loc.n, &loc.kl, &loc.ku, (
E*)loc.a.data(), &
this->ONE, loc.desc1d_a.data(), loc.ipiv.data(), (
E*)loc.work.data(), &
laf, (
E*)(loc.work.data() +
laf), &lwork, &info);
158 else if constexpr(std::is_same_v<DT, complex16>) {
160 pzgbtrf(&loc.n, &loc.kl, &loc.ku, (
E*)loc.a.data(), &
this->ONE, loc.desc1d_a.data(), loc.ipiv.data(), (
E*)loc.work.data(), &
laf, (
E*)(loc.work.data() +
laf), &lwork, &info);
162 else if constexpr(std::is_same_v<DT, complex8>) {
164 pcgbtrf(&loc.n, &loc.kl, &loc.ku, (
E*)loc.a.data(), &
this->ONE, loc.desc1d_a.data(), loc.ipiv.data(), (
E*)loc.work.data(), &
laf, (
E*)(loc.work.data() +
laf), &lwork, &info);
168 if((info = this->trans_ctx.amx(info)) != 0)
return info;
170 return solve(std::move(
B));
174 static constexpr char TRANS =
'N';
176 if(
B.n_rows != loc.n)
return -1;
178 if(!this->ctx.is_valid() || !
this->trans_ctx.is_valid())
return 0;
180 const auto lead_b = std::max(loc.block, loc.lines);
182 loc.b.resize(
lead_b *
B.n_cols);
184 const auto full_desc_b = this->ctx.desc_g(
B.n_rows,
B.n_cols);
189 const IT
laf = (loc.block + 6 * loc.kl + 13 * loc.ku) * (loc.kl + loc.ku);
190 const IT lwork = std::max(
B.n_cols * (loc.block + 2 * loc.kl + 4 * loc.ku), IT{1});
191 loc.work.resize(
laf + lwork);
197 if constexpr(std::is_same_v<DT, double>) {
199 pdgbtrs(&TRANS, &loc.n, &loc.kl, &loc.ku, &
B.n_cols, (
E*)loc.a.data(), &
this->ONE, loc.desc1d_a.data(), loc.ipiv.data(), (
E*)loc.b.data(), &
this->ONE,
desc1d_b.data(), (
E*)loc.work.data(), &
laf, (
E*)(loc.work.data() +
laf), &lwork, &info);
201 else if constexpr(std::is_same_v<DT, float>) {
203 psgbtrs(&TRANS, &loc.n, &loc.kl, &loc.ku, &
B.n_cols, (
E*)loc.a.data(), &
this->ONE, loc.desc1d_a.data(), loc.ipiv.data(), (
E*)loc.b.data(), &
this->ONE,
desc1d_b.data(), (
E*)loc.work.data(), &
laf, (
E*)(loc.work.data() +
laf), &lwork, &info);
205 else if constexpr(std::is_same_v<DT, complex16>) {
207 pzgbtrs(&TRANS, &loc.n, &loc.kl, &loc.ku, &
B.n_cols, (
E*)loc.a.data(), &
this->ONE, loc.desc1d_a.data(), loc.ipiv.data(), (
E*)loc.b.data(), &
this->ONE,
desc1d_b.data(), (
E*)loc.work.data(), &
laf, (
E*)(loc.work.data() +
laf), &lwork, &info);
209 else if constexpr(std::is_same_v<DT, complex8>) {
211 pcgbtrs(&TRANS, &loc.n, &loc.kl, &loc.ku, &
B.n_cols, (
E*)loc.a.data(), &
this->ONE, loc.desc1d_a.data(), loc.ipiv.data(), (
E*)loc.b.data(), &
this->ONE,
desc1d_b.data(), (
E*)loc.work.data(), &
laf, (
E*)(loc.work.data() +
laf), &lwork, &info);