d5/da0/_sparse_mat_super_l_u_8hpp_source.html

/*******************************************************************************

 * Copyright (C) 2017-2023 Theodore Chang

 *

 * This program is free software: you can redistribute it and/or modify

 * it under the terms of the GNU General Public License as published by

 * the Free Software Foundation, either version 3 of the License, or

 * (at your option) any later version.

 *

 * This program is distributed in the hope that it will be useful,

 * but WITHOUT ANY WARRANTY; without even the implied warranty of

 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the

 * GNU General Public License for more details.

 *

 * You should have received a copy of the GNU General Public License

 * along with this program.  If not, see <http://www.gnu.org/licenses/>.

 ******************************************************************************/

// ReSharper disable CppCStyleCast

#ifndef SPARSEMATSUPERLU_HPP

#define SPARSEMATSUPERLU_HPP


#include <superlu-mt/superlu-mt.h>

#include "SparseMat.hpp"

#include "csc_form.hpp"


template<sp_d T> class SparseMatSuperLU final : public SparseMat<T> {

    SuperMatrix A{}, L{}, U{}, B{};


#ifndef SUANPAN_SUPERLUMT

    superlu_options_t options{};


    SuperLUStat_t stat{};

#else

    const int ordering_num = 1;


    Gstat_t stat{};

#endif


    void* t_val = nullptr;

    int* t_row = nullptr;

    int* t_col = nullptr;


    int* perm_r = nullptr;

    int* perm_c = nullptr;


    bool allocated = false;


    template<sp_d ET> void alloc_supermatrix(csc_form<ET, int>&&);

    void dealloc_supermatrix();


    template<sp_d ET> void wrap_b(const Mat<ET>&);

    template<sp_d ET> void tri_solve(int&);

    template<sp_d ET> void full_solve(int&);


    int solve_trs(Mat<T>&, Mat<T>&&);

    int solve_trs(Mat<T>&, const Mat<T>&);


public:

    SparseMatSuperLU(uword, uword, uword = 0);

    SparseMatSuperLU(const SparseMatSuperLU&);

    SparseMatSuperLU(SparseMatSuperLU&&) noexcept = delete;

    SparseMatSuperLU& operator=(const SparseMatSuperLU&) = delete;

    SparseMatSuperLU& operator=(SparseMatSuperLU&&) noexcept = delete;

    ~SparseMatSuperLU() override;


    void zeros() override;


    unique_ptr<MetaMat<T>> make_copy() override;


    int direct_solve(Mat<T>&, Mat<T>&&) override;

    int direct_solve(Mat<T>&, const Mat<T>&) override;

};


template<sp_d T> template<sp_d ET> void SparseMatSuperLU<T>::alloc_supermatrix(csc_form<ET, int>&& in) {

    dealloc_supermatrix();


    auto t_size = sizeof(ET) * in.n_elem;

    t_val = superlu_malloc(t_size);

    memcpy(t_val, (void*)in.val_mem(), t_size);


    t_size = sizeof(int) * in.n_elem;

    t_row = (int*)superlu_malloc(t_size);

    memcpy(t_row, (void*)in.row_mem(), t_size);


    t_size = sizeof(int) * (in.n_cols + 1llu);

    t_col = (int*)superlu_malloc(t_size);

    memcpy(t_col, (void*)in.col_mem(), t_size);


    if(std::is_same_v<ET, double>) {

        using E = double;

        dCreate_CompCol_Matrix(&A, in.n_rows, in.n_cols, in.n_elem, (E*)t_val, t_row, t_col, Stype_t::SLU_NC, Dtype_t::SLU_D, Mtype_t::SLU_GE);

    }

    else {

        using E = float;

        sCreate_CompCol_Matrix(&A, in.n_rows, in.n_cols, in.n_elem, (E*)t_val, t_row, t_col, Stype_t::SLU_NC, Dtype_t::SLU_S, Mtype_t::SLU_GE);

    }


    perm_r = (int*)superlu_malloc(sizeof(int) * (this->n_rows + 1));

    perm_c = (int*)superlu_malloc(sizeof(int) * (this->n_cols + 1));


    allocated = true;

}


template<sp_d T> void SparseMatSuperLU<T>::dealloc_supermatrix() {

    if(!allocated) return;


    Destroy_SuperMatrix_Store(&A);

#ifdef SUANPAN_SUPERLUMT

    Destroy_SuperNode_SCP(&L);

    Destroy_CompCol_NCP(&U);

#else

    Destroy_SuperNode_Matrix(&L);

    Destroy_CompCol_Matrix(&U);

#endif


    if(t_val) superlu_free(t_val);

    if(t_row) superlu_free(t_row);

    if(t_col) superlu_free(t_col);

    if(perm_r) superlu_free(perm_r);

    if(perm_c) superlu_free(perm_c);


    allocated = false;

}


template<sp_d T> template<sp_d ET> void SparseMatSuperLU<T>::wrap_b(const Mat<ET>& in_mat) {

    if(std::is_same_v<ET, float>) {

        using E = float;

        sCreate_Dense_Matrix(&B, (int)in_mat.n_rows, (int)in_mat.n_cols, (E*)in_mat.memptr(), (int)in_mat.n_rows, Stype_t::SLU_DN, Dtype_t::SLU_S, Mtype_t::SLU_GE);

    }

    else {

        using E = double;

        dCreate_Dense_Matrix(&B, (int)in_mat.n_rows, (int)in_mat.n_cols, (E*)in_mat.memptr(), (int)in_mat.n_rows, Stype_t::SLU_DN, Dtype_t::SLU_D, Mtype_t::SLU_GE);

    }

}


template<sp_d T> template<sp_d ET> void SparseMatSuperLU<T>::tri_solve(int& flag) {

#ifdef SUANPAN_SUPERLUMT

    if(std::is_same_v<ET, float>) sgstrs(NOTRANS, &L, &U, perm_c, perm_r, &B, &stat, &flag);

    else dgstrs(NOTRANS, &L, &U, perm_c, perm_r, &B, &stat, &flag);

#else

    superlu::gstrs<ET>(options.Trans, &L, &U, perm_c, perm_r, &B, &stat, &flag);

#endif


    Destroy_SuperMatrix_Store(&B);

}


template<sp_d T> template<sp_d ET> void SparseMatSuperLU<T>::full_solve(int& flag) {

#ifdef SUANPAN_SUPERLUMT

    get_perm_c(ordering_num, &A, perm_c);

    if(std::is_same_v<ET, float>) psgssv(SUANPAN_NUM_THREADS, &A, perm_c, perm_r, &L, &U, &B, &flag);

    else pdgssv(SUANPAN_NUM_THREADS, &A, perm_c, perm_r, &L, &U, &B, &flag);

#else

    superlu::gssv<ET>(&options, &A, perm_c, perm_r, &L, &U, &B, &stat, &flag);

#endif


    Destroy_SuperMatrix_Store(&B);

}


template<sp_d T> SparseMatSuperLU<T>::SparseMatSuperLU(const uword in_row, const uword in_col, const uword in_elem)

    : SparseMat<T>(in_row, in_col, in_elem) {

#ifndef SUANPAN_SUPERLUMT

    set_default_options(&options);

    options.IterRefine = std::is_same_v<T, float> ? superlu::IterRefine_t::SLU_SINGLE : superlu::IterRefine_t::SLU_DOUBLE;

    options.Equil = superlu::yes_no_t::NO;


    arrayops::fill_zeros(reinterpret_cast<char*>(&stat), sizeof(SuperLUStat_t));


    StatInit(&stat);

#else

    StatAlloc(static_cast<int>(in_col), SUANPAN_NUM_THREADS, sp_ienv(1), sp_ienv(2), &stat);

    StatInit(static_cast<int>(in_col), SUANPAN_NUM_THREADS, &stat);

#endif

}


template<sp_d T> SparseMatSuperLU<T>::SparseMatSuperLU(const SparseMatSuperLU& other)

    : SparseMat<T>(other) {

#ifndef SUANPAN_SUPERLUMT

    set_default_options(&options);

    options.IterRefine = std::is_same_v<T, float> ? superlu::IterRefine_t::SLU_SINGLE : superlu::IterRefine_t::SLU_DOUBLE;

    options.Equil = superlu::yes_no_t::NO;


    arrayops::fill_zeros(reinterpret_cast<char*>(&stat), sizeof(SuperLUStat_t));


    StatInit(&stat);

#else

    StatAlloc(static_cast<int>(other.n_cols), SUANPAN_NUM_THREADS, sp_ienv(1), sp_ienv(2), &stat);

    StatInit(static_cast<int>(other.n_cols), SUANPAN_NUM_THREADS, &stat);

#endif

}


template<sp_d T> SparseMatSuperLU<T>::~SparseMatSuperLU() {

    dealloc_supermatrix();

    StatFree(&stat);

}


template<sp_d T> void SparseMatSuperLU<T>::zeros() {

    SparseMat<T>::zeros();

    dealloc_supermatrix();

}


template<sp_d T> unique_ptr<MetaMat<T>> SparseMatSuperLU<T>::make_copy() { return std::make_unique<SparseMatSuperLU<T>>(*this); }


template<sp_d T> int SparseMatSuperLU<T>::direct_solve(Mat<T>& out_mat, const Mat<T>& in_mat) {

    if(this->factored) return solve_trs(out_mat, in_mat);


    this->factored = true;


    auto flag = 0;


    if(std::is_same_v<T, float> || Precision::FULL == this->setting.precision) {

        alloc_supermatrix(csc_form<T, int>(this->triplet_mat));


        out_mat = in_mat;


        wrap_b(out_mat);


        full_solve<T>(flag);


        return flag;

    }


    alloc_supermatrix(csc_form<float, int>(this->triplet_mat));


    const fmat f_mat(arma::size(in_mat), fill::none);


    wrap_b(f_mat);


    full_solve<float>(flag);


    return 0 == flag ? solve_trs(out_mat, in_mat) : flag;

}


template<sp_d T> int SparseMatSuperLU<T>::solve_trs(Mat<T>& out_mat, const Mat<T>& in_mat) {

    auto flag = 0;


    if(std::is_same_v<T, float> || Precision::FULL == this->setting.precision) {

        out_mat = in_mat;


        wrap_b(out_mat);


        tri_solve<T>(flag);


        return flag;

    }


    out_mat.zeros(arma::size(in_mat));


    mat full_residual = in_mat;


    auto multiplier = norm(full_residual);


    auto counter = 0u;

    while(counter++ < this->setting.iterative_refinement) {

        if(multiplier < this->setting.tolerance) break;


        auto residual = conv_to<fmat>::from(full_residual / multiplier);


        wrap_b(residual);


        tri_solve<float>(flag);


        if(0 != flag) break;


        const mat incre = multiplier * conv_to<mat>::from(residual);


        out_mat += incre;


        suanpan_debug("Mixed precision algorithm multiplier: {:.5E}.\n", multiplier = norm(full_residual -= this->operator*(incre)));

    }


    return flag;

}


template<sp_d T> int SparseMatSuperLU<T>::direct_solve(Mat<T>& out_mat, Mat<T>&& in_mat) {

    if(this->factored) return solve_trs(out_mat, std::forward<Mat<T>>(in_mat));


    this->factored = true;


    auto flag = 0;


    if(std::is_same_v<T, float> || Precision::FULL == this->setting.precision) {

        alloc_supermatrix(csc_form<T, int>(this->triplet_mat));


        wrap_b(in_mat);


        full_solve<T>(flag);


        out_mat = std::move(in_mat);


        return flag;

    }


    alloc_supermatrix(csc_form<float, int>(this->triplet_mat));


    const fmat f_mat(arma::size(in_mat), fill::none);


    wrap_b(f_mat);


    full_solve<float>(flag);


    return 0 == flag ? solve_trs(out_mat, std::forward<Mat<T>>(in_mat)) : flag;

}


template<sp_d T> int SparseMatSuperLU<T>::solve_trs(Mat<T>& out_mat, Mat<T>&& in_mat) {

    auto flag = 0;


    if(std::is_same_v<T, float> || Precision::FULL == this->setting.precision) {

        wrap_b(in_mat);


        tri_solve<T>(flag);


        out_mat = std::move(in_mat);


        return flag;

    }


    out_mat.zeros(arma::size(in_mat));


    auto multiplier = arma::norm(in_mat);


    auto counter = 0u;

    while(counter++ < this->setting.iterative_refinement) {

        if(multiplier < this->setting.tolerance) break;


        auto residual = conv_to<fmat>::from(in_mat / multiplier);


        wrap_b(residual);


        tri_solve<float>(flag);


        if(0 != flag) break;


        const mat incre = multiplier * conv_to<mat>::from(residual);


        out_mat += incre;


        suanpan_debug("Mixed precision algorithm multiplier: {:.5E}.\n", multiplier = norm(in_mat -= this->operator*(incre)));

    }


    return flag;

}

#endif


Precision::FULL
@ FULL

SparseMat.hpp

MetaMat
A MetaMat class that holds matrices.
Definition: MetaMat.hpp:39

MetaMat::n_cols
const uword n_cols
Definition: MetaMat.hpp:49

MetaMat::n_rows
const uword n_rows
Definition: MetaMat.hpp:48

SparseMat
A SparseMat class that holds matrices.
Definition: SparseMat.hpp:34

SparseMatSuperLU
A SparseMatSuperLU class that holds matrices.
Definition: SparseMatSuperLU.hpp:37

SparseMatSuperLU::SparseMatSuperLU
SparseMatSuperLU(SparseMatSuperLU &&) noexcept=delete

csc_form
Definition: csc_form.hpp:25

SUANPAN_NUM_THREADS
int SUANPAN_NUM_THREADS
Definition: command.cpp:67

sp_d
Definition: suanPan.h:318

csc_form.hpp

PlaneType::E
@ E

SparseMat::zeros
void zeros() override
Definition: SparseMat.hpp:79

SparseMatSuperLU::~SparseMatSuperLU
~SparseMatSuperLU() override
Definition: SparseMatSuperLU.hpp:202

SparseMatSuperLU::make_copy
unique_ptr< MetaMat< T > > make_copy() override
Definition: SparseMatSuperLU.hpp:212

SparseMatSuperLU::direct_solve
int direct_solve(Mat< T > &, Mat< T > &&) override
Definition: SparseMatSuperLU.hpp:285

SparseMatSuperLU::zeros
void zeros() override
Definition: SparseMatSuperLU.hpp:207

SparseMatSuperLU::SparseMatSuperLU
SparseMatSuperLU(uword, uword, uword=0)
Definition: SparseMatSuperLU.hpp:170

DOF::T
@ T

tensor::strain::norm
double norm(const vec &)
Definition: tensor.cpp:302

suanpan_debug
#define suanpan_debug(...)
Definition: suanPan.h:295