This graph shows which files directly or indirectly include this file:

Macros
#define	REAL

Functions
int	CORE_slarfx2 (int side, int N, float V, float TAU, float C1, int LDC1, float C2, int LDC2)
int	CORE_slarfx2c (int uplo, float V, float TAU, float C1, float C2, float *C3)
int	CORE_slarfx2ce (int uplo, float V, float TAU, float C1, float C2, float *C3)
int	CORE_shbelr (int uplo, int N, PLASMA_desc A, float V, float *TAU, int st, int ed, int eltsize)
int	CORE_shbrce (int uplo, int N, PLASMA_desc A, float V, float *TAU, int st, int ed, int eltsize)
int	CORE_shblrx (int uplo, int N, PLASMA_desc A, float V, float *TAU, int st, int ed, int eltsize)
int	CORE_sgbelr (int uplo, int N, PLASMA_desc A, float V, float *TAU, int st, int ed, int eltsize)
int	CORE_sgbrce (int uplo, int N, PLASMA_desc A, float V, float *TAU, int st, int ed, int eltsize)
int	CORE_sgblrx (int uplo, int N, PLASMA_desc A, float V, float *TAU, int st, int ed, int eltsize)
void	CORE_sasum (int storev, int uplo, int M, int N, float A, int lda, float work)
void	CORE_sgeadd (int M, int N, float alpha, float A, int LDA, float B, int LDB)
void	CORE_sbrdalg (PLASMA_enum uplo, int N, int NB, PLASMA_desc pA, float C, float *S, int i, int j, int m, int grsiz)
int	CORE_sgelqt (int M, int N, int IB, float A, int LDA, float T, int LDT, float TAU, float WORK)
void	CORE_sgemm (int transA, int transB, int M, int N, int K, float alpha, float A, int LDA, float B, int LDB, float beta, float *C, int LDC)
int	CORE_sgeqrt (int M, int N, int IB, float A, int LDA, float T, int LDT, float TAU, float WORK)
int	CORE_sgessm (int M, int N, int K, int IB, int IPIV, float L, int LDL, float *A, int LDA)
int	CORE_sgetrf (int M, int N, float A, int LDA, int IPIV, int *INFO)
int	CORE_sgetrf_incpiv (int M, int N, int IB, float A, int LDA, int IPIV, int *INFO)
int	CORE_sgetrf_reclap (const int M, const int N, float A, const int LDA, int IPIV, int *info)
int	CORE_sgetrf_rectil (const PLASMA_desc A, int IPIV, int info)
void	CORE_sgetrip (int m, int n, float A, float work)
void	CORE_slacpy (PLASMA_enum uplo, int M, int N, float A, int LDA, float B, int LDB)
void	CORE_slange (int norm, int M, int N, float A, int LDA, float work, float *normA)
void	CORE_slansy (int norm, int uplo, int N, float A, int LDA, float work, float *normA)
void	CORE_slaset (PLASMA_enum uplo, int n1, int n2, float alpha, float beta, float *tileA, int ldtilea)
void	CORE_slaset2 (PLASMA_enum uplo, int n1, int n2, float alpha, float *tileA, int ldtilea)
void	CORE_slaswp (int N, float A, int LDA, int I1, int I2, int IPIV, int INC)
int	CORE_slaswp_ontile (PLASMA_desc descA, int i1, int i2, int *ipiv, int inc)
int	CORE_slaswpc_ontile (PLASMA_desc descA, int i1, int i2, int *ipiv, int inc)
void	CORE_slauum (int uplo, int N, float *A, int LDA)
int	CORE_spamm (int op, int side, int storev, int M, int N, int K, int L, float A1, int LDA1, float A2, int LDA2, float V, int LDV, float W, int LDW)
int	CORE_sparfb (int side, int trans, int direct, int storev, int M1, int N1, int M2, int N2, int K, int L, float A1, int LDA1, float A2, int LDA2, float V, int LDV, float T, int LDT, float *WORK, int LDWORK)
int	CORE_spemv (int trans, int storev, int M, int N, int L, float ALPHA, float A, int LDA, float X, int INCX, float BETA, float Y, int INCY, float WORK)
void	CORE_splgsy (float bump, int m, int n, float *A, int lda, int bigM, int m0, int n0, unsigned long long int seed)
void	CORE_splrnt (int m, int n, float *A, int lda, int bigM, int m0, int n0, unsigned long long int seed)
void	CORE_spotrf (int uplo, int N, float A, int LDA, int INFO)
void	CORE_sshift (int s, int m, int n, int L, float *A)
void	CORE_sshiftw (int s, int cl, int m, int n, int L, float A, float W)
int	CORE_sssssm (int M1, int N1, int M2, int N2, int K, int IB, float A1, int LDA1, float A2, int LDA2, float L1, int LDL1, float L2, int LDL2, int *IPIV)
void	CORE_ssymm (int side, int uplo, int M, int N, float alpha, float A, int LDA, float B, int LDB, float beta, float *C, int LDC)
void	CORE_ssyrk (int uplo, int trans, int N, int K, float alpha, float A, int LDA, float beta, float C, int LDC)
void	CORE_ssyr2k (int uplo, int trans, int N, int K, float alpha, float A, int LDA, float B, int LDB, float beta, float *C, int LDC)
void	CORE_sswpab (int i, int n1, int n2, float A, float work)
int	CORE_sswptr_ontile (PLASMA_desc descA, int i1, int i2, int ipiv, int inc, float Akk, int ldak)
void	CORE_strdalg (PLASMA_enum uplo, int N, int NB, PLASMA_desc pA, float C, float *S, int i, int j, int m, int grsiz)
void	CORE_strmm (int side, int uplo, int transA, int diag, int M, int N, float alpha, float A, int LDA, float B, int LDB)
void	CORE_strsm (int side, int uplo, int transA, int diag, int M, int N, float alpha, float A, int LDA, float B, int LDB)
void	CORE_strtri (int uplo, int diag, int N, float A, int LDA, int info)
int	CORE_stslqt (int M, int N, int IB, float A1, int LDA1, float A2, int LDA2, float T, int LDT, float TAU, float *WORK)
int	CORE_stsmlq (int side, int trans, int M1, int N1, int M2, int N2, int K, int IB, float A1, int LDA1, float A2, int LDA2, float V, int LDV, float T, int LDT, float *WORK, int LDWORK)
int	CORE_stsmlq_corner (int m1, int n1, int m2, int n2, int m3, int n3, int k, int ib, int nb, float A1, int lda1, float A2, int lda2, float A3, int lda3, float V, int ldv, float T, int ldt, float WORK, int ldwork)
int	CORE_stsmlq_sytra1 (int side, int trans, int m1, int n1, int m2, int n2, int k, int ib, float A1, int lda1, float A2, int lda2, float V, int ldv, float T, int ldt, float *WORK, int ldwork)
int	CORE_stsmqr (int side, int trans, int M1, int N1, int M2, int N2, int K, int IB, float A1, int LDA1, float A2, int LDA2, float V, int LDV, float T, int LDT, float *WORK, int LDWORK)
int	CORE_stsmqr_corner (int m1, int n1, int m2, int n2, int m3, int n3, int k, int ib, int nb, float A1, int lda1, float A2, int lda2, float A3, int lda3, float V, int ldv, float T, int ldt, float WORK, int ldwork)
int	CORE_stsmqr_sytra1 (int side, int trans, int m1, int n1, int m2, int n2, int k, int ib, float A1, int lda1, float A2, int lda2, float V, int ldv, float T, int ldt, float *WORK, int ldwork)
int	CORE_stsqrt (int M, int N, int IB, float A1, int LDA1, float A2, int LDA2, float T, int LDT, float TAU, float *WORK)
int	CORE_ststrf (int M, int N, int IB, int NB, float U, int LDU, float A, int LDA, float L, int LDL, int IPIV, float WORK, int LDWORK, int INFO)
int	CORE_sttmqr (int side, int trans, int M1, int N1, int M2, int N2, int K, int IB, float A1, int LDA1, float A2, int LDA2, float V, int LDV, float T, int LDT, float *WORK, int LDWORK)
int	CORE_sttqrt (int M, int N, int IB, float A1, int LDA1, float A2, int LDA2, float T, int LDT, float TAU, float *WORK)
int	CORE_sttmlq (int side, int trans, int M1, int N1, int M2, int N2, int K, int IB, float A1, int LDA1, float A2, int LDA2, float V, int LDV, float T, int LDT, float *WORK, int LDWORK)
int	CORE_sttlqt (int M, int N, int IB, float A1, int LDA1, float A2, int LDA2, float T, int LDT, float TAU, float *WORK)
int	CORE_sormlq (int side, int trans, int M, int N, int IB, int K, float V, int LDV, float T, int LDT, float C, int LDC, float WORK, int LDWORK)
int	CORE_sormqr (int side, int trans, int M, int N, int K, int IB, float V, int LDV, float T, int LDT, float C, int LDC, float WORK, int LDWORK)
void	QUARK_CORE_sasum (Quark quark, Quark_Task_Flags task_flags, PLASMA_enum storev, PLASMA_enum uplo, int m, int n, float A, int lda, int szeA, float work, int szeW)
void	QUARK_CORE_sasum_f1 (Quark quark, Quark_Task_Flags task_flags, PLASMA_enum storev, PLASMA_enum uplo, int m, int n, float A, int lda, int szeA, float work, int szeW, float *fake, int szeF)
void	QUARK_CORE_sgeadd (Quark quark, Quark_Task_Flags task_flags, int m, int n, int nb, float alpha, float A, int lda, float B, int ldb)
void	QUARK_CORE_sbrdalg (Quark quark, Quark_Task_Flags task_flags, int uplo, int N, int NB, PLASMA_desc A, float C, float S, int i, int j, int m, int grsiz, int BAND, int PCOL, int ACOL, int MCOL)
void	QUARK_CORE_sgelqt (Quark quark, Quark_Task_Flags task_flags, int m, int n, int ib, int nb, float A, int lda, float T, int ldt)
void	QUARK_CORE_sgemm (Quark quark, Quark_Task_Flags task_flags, int transA, int transB, int m, int n, int k, int nb, float alpha, float A, int lda, float B, int ldb, float beta, float *C, int ldc)
void	QUARK_CORE_sgemm2 (Quark quark, Quark_Task_Flags task_flags, int transA, int transB, int m, int n, int k, int nb, float alpha, float A, int lda, float B, int ldb, float beta, float *C, int ldc)
void	QUARK_CORE_sgemm_f2 (Quark quark, Quark_Task_Flags task_flags, int transA, int transB, int m, int n, int k, int nb, float alpha, float A, int lda, float B, int ldb, float beta, float C, int ldc, float fake1, int szefake1, int flag1, float *fake2, int szefake2, int flag2)
void	QUARK_CORE_sgemm_p2 (Quark quark, Quark_Task_Flags task_flags, int transA, int transB, int m, int n, int k, int nb, float alpha, float A, int lda, float B, int ldb, float beta, float C, int ldc)
void	QUARK_CORE_sgemm_p2f1 (Quark quark, Quark_Task_Flags task_flags, int transA, int transB, int m, int n, int k, int nb, float alpha, float A, int lda, float B, int ldb, float beta, float C, int ldc, float *fake1, int szefake1, int flag1)
void	QUARK_CORE_sgemm_p3 (Quark quark, Quark_Task_Flags task_flags, int transA, int transB, int m, int n, int k, int nb, float alpha, float A, int lda, float B, int ldb, float beta, float **C, int ldc)
void	QUARK_CORE_sgeqrt (Quark quark, Quark_Task_Flags task_flags, int m, int n, int ib, int nb, float A, int lda, float T, int ldt)
void	QUARK_CORE_sgessm (Quark quark, Quark_Task_Flags task_flags, int m, int n, int k, int ib, int nb, int IPIV, float L, int ldl, float *A, int lda)
void	QUARK_CORE_sgetrf (Quark quark, Quark_Task_Flags task_flags, int m, int n, int nb, float A, int lda, int IPIV, PLASMA_sequence sequence, PLASMA_request request, PLASMA_bool check_info, int iinfo)
void	QUARK_CORE_sgetrf_incpiv (Quark quark, Quark_Task_Flags task_flags, int m, int n, int ib, int nb, float A, int lda, int IPIV, PLASMA_sequence sequence, PLASMA_request request, PLASMA_bool check_info, int iinfo)
void	QUARK_CORE_sgetrf_reclap (Quark quark, Quark_Task_Flags task_flags, int m, int n, int nb, float A, int lda, int IPIV, PLASMA_sequence sequence, PLASMA_request request, PLASMA_bool check_info, int iinfo, int nbthread)
void	QUARK_CORE_sgetrf_rectil (Quark quark, Quark_Task_Flags task_flags, PLASMA_desc A, float Amn, int size, int IPIV, PLASMA_sequence sequence, PLASMA_request request, PLASMA_bool check_info, int iinfo, int nbthread)
void	QUARK_CORE_sgetrip (Quark quark, Quark_Task_Flags task_flags, int m, int n, float *A, int szeA)
void	QUARK_CORE_sgetrip_f1 (Quark quark, Quark_Task_Flags task_flags, int m, int n, float A, int szeA, float fake, int szeF, int paramF)
void	QUARK_CORE_sgetrip_f2 (Quark quark, Quark_Task_Flags task_flags, int m, int n, float A, int szeA, float fake1, int szeF1, int paramF1, float *fake2, int szeF2, int paramF2)
void	QUARK_CORE_ssymm (Quark quark, Quark_Task_Flags task_flags, int side, int uplo, int m, int n, int nb, float alpha, float A, int lda, float B, int ldb, float beta, float *C, int ldc)
void	QUARK_CORE_ssygst (Quark quark, Quark_Task_Flags task_flags, int itype, int uplo, int N, float A, int LDA, float B, int LDB, PLASMA_sequence sequence, PLASMA_request request, int iinfo)
void	QUARK_CORE_ssyrk (Quark quark, Quark_Task_Flags task_flags, int uplo, int trans, int n, int k, int nb, float alpha, float A, int lda, float beta, float C, int ldc)
void	QUARK_CORE_ssyr2k (Quark quark, Quark_Task_Flags task_flags, int uplo, int trans, int n, int k, int nb, float alpha, float A, int lda, float B, int LDB, float beta, float *C, int ldc)
void	QUARK_CORE_ssyrfb (Quark quark, Quark_Task_Flags task_flags, int uplo, int n, int k, int ib, int nb, float A, int lda, float T, int ldt, float *C, int ldc)
void	QUARK_CORE_slacpy (Quark quark, Quark_Task_Flags task_flags, PLASMA_enum uplo, int m, int n, int mb, float A, int lda, float B, int ldb)
void	QUARK_CORE_slange (Quark quark, Quark_Task_Flags task_flags, int norm, int M, int N, float A, int LDA, int szeA, int szeW, float result)
void	QUARK_CORE_slange_f1 (Quark quark, Quark_Task_Flags task_flags, int norm, int M, int N, float A, int LDA, int szeA, int szeW, float result, float *fake, int szeF)
void	QUARK_CORE_slansy (Quark quark, Quark_Task_Flags task_flags, int norm, int uplo, int N, float A, int LDA, int szeA, int szeW, float result)
void	QUARK_CORE_slansy_f1 (Quark quark, Quark_Task_Flags task_flags, int norm, int uplo, int N, float A, int LDA, int szeA, int szeW, float result, float *fake, int szeF)
void	QUARK_CORE_slaset (Quark quark, Quark_Task_Flags task_flags, PLASMA_enum uplo, int n1, int n2, float alpha, float beta, float *tileA, int ldtilea)
void	QUARK_CORE_slaset2 (Quark quark, Quark_Task_Flags task_flags, PLASMA_enum uplo, int n1, int n2, float alpha, float *tileA, int ldtilea)
void	QUARK_CORE_slaswp (Quark quark, Quark_Task_Flags task_flags, int n, float A, int lda, int i1, int i2, int ipiv, int inc)
void	QUARK_CORE_slaswp_f2 (Quark quark, Quark_Task_Flags task_flags, int n, float A, int lda, int i1, int i2, int ipiv, int inc, float fake1, int szefake1, int flag1, float fake2, int szefake2, int flag2)
void	QUARK_CORE_slaswp_ontile (Quark quark, Quark_Task_Flags task_flags, PLASMA_desc descA, float A, int i1, int i2, int ipiv, int inc, float *fakepanel)
void	QUARK_CORE_slaswp_ontile_f2 (Quark quark, Quark_Task_Flags task_flags, PLASMA_desc descA, float A, int i1, int i2, int ipiv, int inc, float fake1, int szefake1, int flag1, float fake2, int szefake2, int flag2)
void	QUARK_CORE_slaswpc_ontile (Quark quark, Quark_Task_Flags task_flags, PLASMA_desc descA, float A, int i1, int i2, int ipiv, int inc, float *fakepanel)
void	QUARK_CORE_slauum (Quark quark, Quark_Task_Flags task_flags, int uplo, int n, int nb, float *A, int lda)
void	QUARK_CORE_splgsy (Quark quark, Quark_Task_Flags task_flags, float bump, int m, int n, float *A, int lda, int bigM, int m0, int n0, unsigned long long int seed)
void	QUARK_CORE_splrnt (Quark quark, Quark_Task_Flags task_flags, int m, int n, float *A, int lda, int bigM, int m0, int n0, unsigned long long int seed)
void	QUARK_CORE_spotrf (Quark quark, Quark_Task_Flags task_flags, int uplo, int n, int nb, float A, int lda, PLASMA_sequence sequence, PLASMA_request *request, int iinfo)
void	QUARK_CORE_sshift (Quark quark, Quark_Task_Flags task_flags, int s, int m, int n, int L, float *A)
void	QUARK_CORE_sshiftw (Quark quark, Quark_Task_Flags task_flags, int s, int cl, int m, int n, int L, float A, float W)
void	QUARK_CORE_sssssm (Quark quark, Quark_Task_Flags task_flags, int m1, int n1, int m2, int n2, int k, int ib, int nb, float A1, int lda1, float A2, int lda2, float L1, int ldl1, float L2, int ldl2, int *IPIV)
void	QUARK_CORE_sswpab (Quark quark, Quark_Task_Flags task_flags, int i, int n1, int n2, float *A, int szeA)
void	QUARK_CORE_sswptr_ontile (Quark quark, Quark_Task_Flags task_flags, PLASMA_desc descA, float Aij, int i1, int i2, int ipiv, int inc, float *Akk, int ldak)
void	QUARK_CORE_strdalg (Quark quark, Quark_Task_Flags task_flags, int uplo, int N, int NB, PLASMA_desc A, float C, float S, int i, int j, int m, int grsiz, int BAND, int PCOL, int ACOL, int MCOL)
void	QUARK_CORE_strmm (Quark quark, Quark_Task_Flags task_flags, int side, int uplo, int transA, int diag, int m, int n, int nb, float alpha, float A, int lda, float B, int ldb)
void	QUARK_CORE_strmm_p2 (Quark quark, Quark_Task_Flags task_flags, int side, int uplo, int transA, int diag, int m, int n, int nb, float alpha, float A, int lda, float *B, int ldb)
void	QUARK_CORE_strsm (Quark quark, Quark_Task_Flags task_flags, int side, int uplo, int transA, int diag, int m, int n, int nb, float alpha, float A, int lda, float B, int ldb)
void	QUARK_CORE_strtri (Quark quark, Quark_Task_Flags task_flags, int uplo, int diag, int n, int nb, float A, int lda, PLASMA_sequence sequence, PLASMA_request *request, int iinfo)
void	QUARK_CORE_stslqt (Quark quark, Quark_Task_Flags task_flags, int m, int n, int ib, int nb, float A1, int lda1, float A2, int lda2, float *T, int ldt)
void	QUARK_CORE_stsmlq (Quark quark, Quark_Task_Flags task_flags, int side, int trans, int m1, int n1, int m2, int n2, int k, int ib, int nb, float A1, int lda1, float A2, int lda2, float V, int ldv, float T, int ldt)
void	QUARK_CORE_stsmlq_sytra1 (Quark quark, Quark_Task_Flags task_flags, int side, int trans, int m1, int n1, int m2, int n2, int k, int ib, int nb, float A1, int lda1, float A2, int lda2, float V, int ldv, float T, int ldt)
void	QUARK_CORE_stsmlq_corner (Quark quark, Quark_Task_Flags task_flags, int m1, int n1, int m2, int n2, int m3, int n3, int k, int ib, int nb, float A1, int lda1, float A2, int lda2, float A3, int lda3, float V, int ldv, float *T, int ldt)
void	QUARK_CORE_stsmqr (Quark quark, Quark_Task_Flags task_flags, int side, int trans, int m1, int n1, int m2, int n2, int k, int ib, int nb, float A1, int lda1, float A2, int lda2, float V, int ldv, float T, int ldt)
void	QUARK_CORE_stsmqr_sytra1 (Quark quark, Quark_Task_Flags task_flags, int side, int trans, int m1, int n1, int m2, int n2, int k, int ib, int nb, float A1, int lda1, float A2, int lda2, float V, int ldv, float T, int ldt)
void	QUARK_CORE_stsmqr_corner (Quark quark, Quark_Task_Flags task_flags, int m1, int n1, int m2, int n2, int m3, int n3, int k, int ib, int nb, float A1, int lda1, float A2, int lda2, float A3, int lda3, float V, int ldv, float *T, int ldt)
void	QUARK_CORE_stsqrt (Quark quark, Quark_Task_Flags task_flags, int m, int n, int ib, int nb, float A1, int lda1, float A2, int lda2, float *T, int ldt)
void	QUARK_CORE_ststrf (Quark quark, Quark_Task_Flags task_flags, int m, int n, int ib, int nb, float U, int ldu, float A, int lda, float L, int ldl, int IPIV, PLASMA_sequence sequence, PLASMA_request request, PLASMA_bool check_info, int iinfo)
void	QUARK_CORE_sttmqr (Quark quark, Quark_Task_Flags task_flags, int side, int trans, int m1, int n1, int m2, int n2, int k, int ib, int nb, float A1, int lda1, float A2, int lda2, float V, int ldv, float T, int ldt)
void	QUARK_CORE_sttqrt (Quark quark, Quark_Task_Flags task_flags, int m, int n, int ib, int nb, float A1, int lda1, float A2, int lda2, float *T, int ldt)
void	QUARK_CORE_sttmlq (Quark quark, Quark_Task_Flags task_flags, int side, int trans, int m1, int n1, int m2, int n2, int k, int ib, int nb, float A1, int lda1, float A2, int lda2, float V, int ldv, float T, int ldt)
void	QUARK_CORE_sttlqt (Quark quark, Quark_Task_Flags task_flags, int m, int n, int ib, int nb, float A1, int lda1, float A2, int lda2, float *T, int ldt)
void	QUARK_CORE_spamm (Quark quark, Quark_Task_Flags task_flags, int op, int side, int storev, int m, int n, int k, int l, float A1, int lda1, float A2, int lda2, float V, int ldv, float W, int ldw)
void	QUARK_CORE_sormlq (Quark quark, Quark_Task_Flags task_flags, int side, int trans, int m, int n, int ib, int nb, int k, float A, int lda, float T, int ldt, float *C, int ldc)
void	QUARK_CORE_sormqr (Quark quark, Quark_Task_Flags task_flags, int side, int trans, int m, int n, int k, int ib, int nb, float A, int lda, float T, int ldt, float *C, int ldc)
void	CORE_sasum_quark (Quark *quark)
void	CORE_sasum_f1_quark (Quark *quark)
void	CORE_sgeadd_quark (Quark *quark)
void	CORE_sbrdalg_quark (Quark *quark)
void	CORE_sgelqt_quark (Quark *quark)
void	CORE_sgemm_quark (Quark *quark)
void	CORE_sgeqrt_quark (Quark *quark)
void	CORE_sgessm_quark (Quark *quark)
void	CORE_sgetrf_quark (Quark *quark)
void	CORE_sgetrf_incpiv_quark (Quark *quark)
void	CORE_sgetrf_reclap_quark (Quark *quark)
void	CORE_sgetrf_rectil_quark (Quark *quark)
void	CORE_sgetrip_quark (Quark *quark)
void	CORE_sgetrip_f1_quark (Quark *quark)
void	CORE_sgetrip_f2_quark (Quark *quark)
void	CORE_ssygst_quark (Quark *quark)
void	CORE_ssyrfb_quark (Quark *quark)
void	CORE_slacpy_quark (Quark *quark)
void	CORE_slange_quark (Quark *quark)
void	CORE_slange_f1_quark (Quark *quark)
void	CORE_slansy_quark (Quark *quark)
void	CORE_slansy_f1_quark (Quark *quark)
void	CORE_slaset_quark (Quark *quark)
void	CORE_slaset2_quark (Quark *quark)
void	CORE_slauum_quark (Quark *quark)
void	CORE_spamm_quark (Quark *quark)
void	CORE_splgsy_quark (Quark *quark)
void	CORE_splrnt_quark (Quark *quark)
void	CORE_spotrf_quark (Quark *quark)
void	CORE_sshift_quark (Quark *quark)
void	CORE_sshiftw_quark (Quark *quark)
void	CORE_sssssm_quark (Quark *quark)
void	CORE_ssymm_quark (Quark *quark)
void	CORE_ssyrk_quark (Quark *quark)
void	CORE_ssyr2k_quark (Quark *quark)
void	CORE_sswpab_quark (Quark *quark)
void	CORE_sswptr_ontile_quark (Quark *quark)
void	CORE_strdalg_quark (Quark *quark)
void	CORE_strmm_quark (Quark *quark)
void	CORE_strsm_quark (Quark *quark)
void	CORE_strtri_quark (Quark *quark)
void	CORE_stslqt_quark (Quark *quark)
void	CORE_stsmlq_quark (Quark *quark)
void	CORE_stsmlq_sytra1_quark (Quark *quark)
void	CORE_stsmlq_corner_quark (Quark *quark)
void	CORE_stsmqr_quark (Quark *quark)
void	CORE_stsmqr_sytra1_quark (Quark *quark)
void	CORE_stsmqr_corner_quark (Quark *quark)
void	CORE_stsqrt_quark (Quark *quark)
void	CORE_ststrf_quark (Quark *quark)
void	CORE_sttmqr_quark (Quark *quark)
void	CORE_sttqrt_quark (Quark *quark)
void	CORE_sttmlq_quark (Quark *quark)
void	CORE_sttlqt_quark (Quark *quark)
void	CORE_sormlq_quark (Quark *quark)
void	CORE_sormqr_quark (Quark *quark)
void	CORE_slaswp_quark (Quark *quark)
void	CORE_slaswp_f2_quark (Quark *quark)
void	CORE_slaswp_ontile_quark (Quark *quark)
void	CORE_slaswp_ontile_f2_quark (Quark *quark)
void	CORE_slaswpc_ontile_quark (Quark *quark)
void	CORE_strmm_p2_quark (Quark *quark)
void	CORE_sgemm_f2_quark (Quark *quark)
void	CORE_sgemm_p2_quark (Quark *quark)
void	CORE_sgemm_p2f1_quark (Quark *quark)
void	CORE_sgemm_p3_quark (Quark *quark)

Detailed Description

PLASMA auxiliary routines PLASMA is a software package provided by Univ. of Tennessee, Univ. of California Berkeley and Univ. of Colorado Denver

Version:: 2.4.5

Author:: Jakub Kurzak; Hatem Ltaief; Mathieu Faverge; Azzam Haidar

Date:: 2010-11-15 s Tue Nov 22 14:35:11 2011

Definition in file core_sblas.h.

Macro Definition Documentation

#define REAL

Definition at line 21 of file core_sblas.h.

Function Documentation

void CORE_sasum	(	int	storev,
		int	uplo,
		int	M,
		int	N,
		float *	A,
		int	lda,
		float *	work
	)

Definition at line 28 of file core_sasum.c.

References PlasmaColumnwise, PlasmaLower, PlasmaUpper, PlasmaUpperLower, and sum().

{
    float *tmpA;
    float *tmpW, sum, abs;
    int i,j;
    switch (uplo) {
    case PlasmaUpper:
        for (j = 0; j < N; j++) {
            tmpA = A+(j*lda);
            sum = 0.0;
            for (i = 0; i < j; i++) {
                abs      = fabsf(*tmpA);
                sum     += abs;
                work[i] += abs;
                tmpA++;
            }
            work[j] += sum + fabsf(*tmpA);
        }
        break;
    case PlasmaLower:
        for (j = 0; j < N; j++) {
            tmpA = A+(j*lda)+j;
            sum = 0.0;
            work[j] += fabsf(*tmpA);
            tmpA++;
            for (i = j+1; i < M; i++) {
                abs      = fabsf(*tmpA);
                sum     += abs;
                work[i] += abs;
                tmpA++;
            }
            work[j] += sum;
        }
        break;
    case PlasmaUpperLower:
    default:
        if (storev == PlasmaColumnwise) {
            for (j = 0; j < N; j++) {
                /* work[j] += cblas_sasum(M, &(A[j*lda]), 1); */
                tmpA = A+(j*lda);
                for (i = 0; i < M; i++) {
                    work[j] +=  fabsf(*tmpA);
                    tmpA++;
                }
            }
        }
        else {
            for (j = 0; j < N; j++) {
                tmpA = A+(j*lda);
                tmpW = work;
                for (i = 0; i < M; i++) {
                    /* work[i] += fabsf( A[j*lda+i] );*/
                    *tmpW += fabsf( *tmpA );
                    tmpA++; tmpW++;
                }
            }
        }
    }
}

Here is the call graph for this function:

Here is the caller graph for this function:

void CORE_sasum_f1_quark ( Quark * quark )

Definition at line 162 of file core_sasum.c.

References A, CORE_sasum(), quark_unpack_args_8, storev, and uplo.

{
    int storev;
    int uplo;
    int M;
    int N;
    float *A;
    int lda;
    float *work;
    float *fake;
    quark_unpack_args_8(quark, storev, uplo, M, N, A, lda, work, fake);
    CORE_sasum(storev, uplo, M, N, A, lda, work);
}

Here is the call graph for this function:

Here is the caller graph for this function:

void CORE_sasum_quark ( Quark * quark )

Declarations of QUARK wrappers (called by QUARK) - alphabetical order

Definition at line 119 of file core_sasum.c.

References A, CORE_sasum(), quark_unpack_args_7, storev, and uplo.

{
    int storev;
    int uplo;
    int M;
    int N;
    float *A;
    int lda;
    float *work;
    quark_unpack_args_7(quark, storev, uplo, M, N, A, lda, work);
    CORE_sasum(storev, uplo, M, N, A, lda, work);
}

Here is the call graph for this function:

Here is the caller graph for this function:

void CORE_sbrdalg	(	PLASMA_enum	uplo,
		int	N,
		int	NB,
		PLASMA_desc *	pA,
		float *	V,
		float *	TAU,
		int	i,
		int	j,
		int	m,
		int	grsiz
	)

CORE_sbrdalg is a part of the bidiagonal reduction algorithm (bulgechasing). It correspond to a local driver of the kernels that should be executed on a single core.

Parameters:

[in]	uplo	PlasmaLower: PlasmaUpper:
[in]	N	The order of the matrix A. N >= 0.
[in]	NB	The size of the Bandwidth of the matrix A, which correspond to the tile size. NB >= 0.
[in]	pA	A pointer to the descriptor of the matrix A.
[out]	V	float array, dimension (N). The scalar elementary reflectors are written in this array. So it is used as a workspace for V at each step of the bulge chasing algorithm.
[out]	TAU	float array, dimension (N). The scalar factors of the elementary reflectors are written in thisarray. So it is used as a workspace for TAU at each step of the bulge chasing algorithm.
[in]	i	Integer that refer to the current sweep. (outer loop).
[in]	j	Integer that refer to the sweep to chase.(inner loop).
[in]	m	Integer that refer to a sweep step, to ensure order dependencies.
[in]	grsiz	Integer that refer to the size of a group. group mean the number of kernel that should be executed sequentially on the same core. group size is a trade-off between locality (cache reuse) and parallelism. a small group size increase parallelism while a large group size increase cache reuse.

Returns:

Return values:

PLASMA_SUCCESS	successful exit
<0	if -i, the i-th argument had an illegal value

Definition at line 83 of file core_sbrdalg.c.

References A, CORE_sgbelr(), CORE_sgblrx(), CORE_sgbrce(), plasma_desc_t::dtyp, min, and plasma_element_size().

{
    int    k, shift=3;
    int    myid, colpt, stind, edind, blklastind, stepercol;
    size_t eltsize;
    PLASMA_desc A = *pA;
    eltsize = plasma_element_size(A.dtyp);
    k = shift / grsiz;
    stepercol = (k*grsiz == shift) ? k : k+1;
    for (k = 0; k < grsiz; k++){
        myid = (i-j)*(stepercol*grsiz) +(m-1)*grsiz + k+1;
        if(myid%2 ==0) {
            colpt      = (myid/2) * NB + 1 + j - 1;
            stind      = colpt - NB + 1;
            edind      = min(colpt, N);
            blklastind = colpt;
        } else {
            colpt      = ((myid+1)/2)*NB + 1 +j -1 ;
            stind      = colpt-NB+1;
            edind      = min(colpt,N);
            if( (stind>=edind-1) && (edind==N) )
                blklastind = N;
            else
                blklastind = 0;
        }
        if( myid == 1 )
           CORE_sgbelr(uplo, N, &A, V, TAU, stind, edind, eltsize);
        else if(myid%2 == 0)
           CORE_sgbrce(uplo, N, &A, V, TAU, stind, edind, eltsize);
        else /*if(myid%2 == 1)*/
           CORE_sgblrx(uplo, N, &A, V, TAU, stind, edind, eltsize);
        if(blklastind >= (N-1))  break;
    }
}

Here is the call graph for this function:

Here is the caller graph for this function:

void CORE_sbrdalg_quark ( Quark * quark )

Definition at line 161 of file core_sbrdalg.c.

References CORE_sbrdalg(), quark_unpack_args_10, TAU, uplo, and V.

{
    PLASMA_desc *pA;
    float *V;
    float *TAU;
    int    uplo;
    int    N, NB;
    int    i, j, m, grsiz;
    quark_unpack_args_10(quark, uplo, N, NB, pA, V, TAU, i, j, m, grsiz);
    CORE_sbrdalg(uplo, N, NB, pA, V, TAU, i, j, m, grsiz);
}

Here is the call graph for this function:

Here is the caller graph for this function:

int CORE_sgbelr	(	int	uplo,
		int	N,
		PLASMA_desc *	A,
		float *	V,
		float *	TAU,
		int	st,
		int	ed,
		int	eltsize
	)

Definition at line 78 of file core_sgbelr.c.

References A, CORE_slarfx2(), CORE_slarfx2ce(), coreblas_error, ELTLDD, max, plasma_desc_t::mb, min, PLASMA_SUCCESS, PlasmaLeft, PlasmaLower, PlasmaRight, PlasmaUpper, TAU, and V.

{
    int    NB, J1, J2;
    int    len1, len2, t1ed, t2st;
    int    i;
    static float zzero = 0.0;
    PLASMA_desc vA=*A;
    /* Check input arguments */
    if (N < 0) {
        coreblas_error(2, "Illegal value of N");
        return -2;
    }
    if (ed <= st) {
        coreblas_error(6, "Illegal value of st and ed (internal)");
        return -6;
    }
    /* Quick return */
    if (N == 0)
        return PLASMA_SUCCESS;
    NB = A->mb;
    if( uplo == PlasmaLower ){
        /* ========================
         *       LOWER CASE
         * ========================*/
        for (i = ed; i >= st+1 ; i--){
            /* generate Householder to annihilate a(i+k-1,i) within the band*/
            *V(i)          = *A(i, (st-1));
            *A(i, (st-1))  = zzero;
            LAPACKE_slarfg_work( 2, A((i-1),(st-1)), V(i), 1, TAU(i));
            /* apply reflector from the left (horizontal row) and from the right for only the diagonal 2x2.*/
            J1    = st;
            J2    = i-2;
            t1ed  = (J2/NB)*NB;
            t2st  = max(t1ed+1,J1);
            len1  = t1ed-J1+1; 
            len2  = J2-t2st+1;
            if(len1>0)CORE_slarfx2(PlasmaLeft, len1 , *V(i), (*TAU(i)), A(i-1, J1  ), ELTLDD(vA, (i-1)),  A(i,  J1 ), ELTLDD(vA, i) );
            if(len2>0)CORE_slarfx2(PlasmaLeft, len2 , *V(i), (*TAU(i)), A(i-1, t2st), ELTLDD(vA, (i-1)),  A(i, t2st), ELTLDD(vA, i) );
            CORE_slarfx2ce(PlasmaLower, V(i), TAU(i), A(i-1,i-1), A(i,i-1), A(i,i)); 
        }
        /* APPLY RIGHT ON THE REMAINING ELEMENT OF KERNEL 1 */
        for (i = ed; i >= st+1 ; i--){
            J1    = i+1;
            J2    = min(ed,N);
            t1ed  = (J2/NB)*NB;
            t2st  = max(t1ed+1,J1);
            len1  = t1ed-J1+1; 
            len2  = J2-t2st+1;
            if(len1>0)CORE_slarfx2(PlasmaRight, len1, (*V(i)), (*TAU(i)), A(J1,i-1),   ELTLDD(vA, J1)  , A(J1  , i), ELTLDD(vA, J1)   );
            if(len2>0)CORE_slarfx2(PlasmaRight, len2, (*V(i)), (*TAU(i)), A(t2st,i-1), ELTLDD(vA, t2st), A(t2st, i), ELTLDD(vA, t2st) );
        }
    } else {
        /* ========================
         *       UPPER CASE
         * ========================*/
        for (i = ed; i >= st+1 ; i--){
            /* generate Householder to annihilate a(i+k-1,i) within the band*/
            *V(i)          = *A((st-1),  i);
            *A((st-1),  i) = zzero;
            LAPACKE_slarfg_work( 2, A((st-1), (i-1)), V(i), 1, TAU(i));
            /* apply reflector from the left (horizontal row) and from the right for only the diagonal 2x2.*/
            J1    = st;
            J2    = i-2;
            t1ed  = (J2/NB)*NB;
            t2st  = max(t1ed+1,J1);
            len1  = t1ed-J1+1; 
            len2  = J2-t2st+1;
            if(len1>0)CORE_slarfx2(PlasmaRight, len1, (*V(i)), (*TAU(i)), A(J1,i-1),   ELTLDD(vA, J1)  , A(J1  , i), ELTLDD(vA, J1)   );
            if(len2>0)CORE_slarfx2(PlasmaRight, len2, (*V(i)), (*TAU(i)), A(t2st,i-1), ELTLDD(vA, t2st), A(t2st, i), ELTLDD(vA, t2st) );
            CORE_slarfx2ce(PlasmaUpper, V(i), TAU(i), A((i-1),(i-1)), A((i-1), i), A(i,i));
        }
        /* APPLY LEFT ON THE REMAINING ELEMENT OF KERNEL 1*/
        for (i = ed; i >= st+1 ; i--){
            J1    = i+1;
            J2    = min(ed,N);
            t1ed  = (J2/NB)*NB;
            t2st  = max(t1ed+1,J1);
            len1  = t1ed-J1+1; 
            len2  = J2-t2st+1;
            if(len1>0)CORE_slarfx2(PlasmaLeft, len1 , *V(i), (*TAU(i)), A(i-1, J1  ), ELTLDD(vA, (i-1)),  A(i,  J1 ), ELTLDD(vA, i) );
            if(len2>0)CORE_slarfx2(PlasmaLeft, len2 , *V(i), (*TAU(i)), A(i-1, t2st), ELTLDD(vA, (i-1)),  A(i, t2st), ELTLDD(vA, i) );
        }
    }  /* end of else for the upper case*/
    return PLASMA_SUCCESS;
}

Here is the call graph for this function:

Here is the caller graph for this function:

int CORE_sgblrx	(	int	uplo,
		int	N,
		PLASMA_desc *	A,
		float *	V,
		float *	TAU,
		int	st,
		int	ed,
		int	eltsize
	)

Definition at line 78 of file core_sgblrx.c.

References A, CORE_slarfx2(), CORE_slarfx2ce(), coreblas_error, ELTLDD, max, plasma_desc_t::mb, min, PLASMA_SUCCESS, PlasmaLeft, PlasmaLower, PlasmaRight, PlasmaUpper, TAU, and V.

{
    int    NB, J1, J2;
    int    len1, len2, t1ed, t2st;
    int    i;
    PLASMA_desc vA=*A;
    /* Check input arguments */
    if (N < 0) {
        coreblas_error(2, "Illegal value of N");
        return -2;
    }
    if (ed <= st) {
        coreblas_error(6, "Illegal value of st and ed (internal)");
        return -6;
    }
    /* Quick return */
    if (N == 0)
        return PLASMA_SUCCESS;
    NB = A->mb;
    if( uplo == PlasmaLower ){
        /* ========================
         *       LOWER CASE
         * ========================*/
        for (i = ed; i >= st+1 ; i--){
            /* apply reflector from the left (horizontal row) and from the right for only the diagonal 2x2.*/
            J1    = st;
            J2    = i-2;
            t1ed  = (J2/NB)*NB;
            t2st  = max(t1ed+1,J1);
            len1  = t1ed-J1+1; 
            len2  = J2-t2st+1;
            if(len1>0)CORE_slarfx2(PlasmaLeft, len1 , *V(i), (*TAU(i)), A(i-1, J1  ), ELTLDD(vA, (i-1)),  A(i,  J1 ), ELTLDD(vA, i) );
            if(len2>0)CORE_slarfx2(PlasmaLeft, len2 , *V(i), (*TAU(i)), A(i-1, t2st), ELTLDD(vA, (i-1)),  A(i, t2st), ELTLDD(vA, i) );
            CORE_slarfx2ce(PlasmaLower, V(i), TAU(i), A(i-1,i-1), A(i,i-1), A(i,i)); 
        }
        /* APPLY RIGHT ON THE REMAINING ELEMENT OF KERNEL 1 */
        for (i = ed; i >= st+1 ; i--){
            J1    = i+1;
            J2    = min(ed,N);
            t1ed  = (J2/NB)*NB;
            t2st  = max(t1ed+1,J1);
            len1  = t1ed-J1+1; 
            len2  = J2-t2st+1;
            if(len1>0)CORE_slarfx2(PlasmaRight, len1, (*V(i)), (*TAU(i)), A(J1,i-1),   ELTLDD(vA, J1)  , A(J1  , i), ELTLDD(vA, J1)   );
            if(len2>0)CORE_slarfx2(PlasmaRight, len2, (*V(i)), (*TAU(i)), A(t2st,i-1), ELTLDD(vA, t2st), A(t2st, i), ELTLDD(vA, t2st) );
        }
    } else {
        /* ========================
         *       UPPER CASE
         * ========================*/
        for (i = ed; i >= st+1 ; i--){
            /* apply reflector from the left (horizontal row) and from the right for only the diagonal 2x2.*/
            J1    = st;
            J2    = i-2;
            t1ed  = (J2/NB)*NB;
            t2st  = max(t1ed+1,J1);
            len1  = t1ed-J1+1; 
            len2  = J2-t2st+1;
            if(len1>0)CORE_slarfx2(PlasmaRight, len1, (*V(i)), (*TAU(i)), A(J1,i-1),   ELTLDD(vA, J1)  , A(J1  , i), ELTLDD(vA, J1)   );
            if(len2>0)CORE_slarfx2(PlasmaRight, len2, (*V(i)), (*TAU(i)), A(t2st,i-1), ELTLDD(vA, t2st), A(t2st, i), ELTLDD(vA, t2st) );
            CORE_slarfx2ce(PlasmaUpper, V(i), TAU(i), A(i-1,i-1), A(i-1, i), A(i,i));
        }
        /* APPLY LEFT ON THE REMAINING ELEMENT OF KERNEL 1 */
        for (i = ed; i >= st+1 ; i--){
            J1    = i+1;
            J2    = min(ed,N);
            t1ed  = (J2/NB)*NB;
            t2st  = max(t1ed+1,J1);
            len1  = t1ed-J1+1; 
            len2  = J2-t2st+1;
            if(len1>0)CORE_slarfx2(PlasmaLeft, len1 , *V(i), (*TAU(i)), A(i-1, J1  ), ELTLDD(vA, (i-1)),  A(i,  J1 ), ELTLDD(vA, i) );
            if(len2>0)CORE_slarfx2(PlasmaLeft, len2 , *V(i), (*TAU(i)), A(i-1, t2st), ELTLDD(vA, (i-1)),  A(i, t2st), ELTLDD(vA, i) );
        }
    }  /* end of else for the upper case */
    return PLASMA_SUCCESS;
}

Here is the call graph for this function:

Here is the caller graph for this function:

int CORE_sgbrce	(	int	uplo,
		int	N,
		PLASMA_desc *	A,
		float *	V,
		float *	TAU,
		int	st,
		int	ed,
		int	eltsize
	)

Definition at line 76 of file core_sgbrce.c.

References A, CORE_slarfx2(), coreblas_error, ELTLDD, max, plasma_desc_t::mb, min, PLASMA_SUCCESS, PlasmaLeft, PlasmaLower, PlasmaRight, TAU, and V.

{
    int    NB, J1, J2, J3, KDM2, len, pt;
    int    len1, len2, t1ed, t2st;
    int    i;
    static float zzero = 0.0;
    PLASMA_desc vA=*A;
    /* Check input arguments */
    if (N < 0) {
        coreblas_error(2, "Illegal value of N");
        return -2;
    }
    if (ed <= st) {
        coreblas_error(6, "Illegal value of st and ed (internal)");
        return -6;
    }
    /* Quick return */
    if (N == 0)
        return PLASMA_SUCCESS;
    NB = A->mb;
    KDM2 = A->mb-2;
    if( uplo == PlasmaLower ){
        /* ========================
         *       LOWER CASE
         * ========================*/
        for (i = ed; i >= st+1 ; i--){
            /* apply Householder from the right. and create newnnz outside the band if J3 < N */
            J1  = ed+1;
            J2  = min((i+1+KDM2), N);
            J3  = min((J2+1), N);
            len = J3-J1+1;
            if(J3>J2)*A(J3,(i-1))=zzero;/* could be removed because A is supposed to be band.*/
            t1ed  = (J3/NB)*NB;
            t2st  = max(t1ed+1,J1);
            len1  = t1ed-J1+1; 
            len2  = J3-t2st+1;
            if(len1>0)CORE_slarfx2(PlasmaRight, len1, (*V(i)), (*TAU(i)), A(J1,  i-1), ELTLDD(vA, J1)  , A(J1  , i), ELTLDD(vA, J1)  );
            if(len2>0)CORE_slarfx2(PlasmaRight, len2, (*V(i)), (*TAU(i)), A(t2st,i-1), ELTLDD(vA, t2st), A(t2st, i), ELTLDD(vA, t2st));
            len    = J3-J2; 
            if(len>0){
                /* generate Householder to annihilate a(j+kd,j-1) within the band */
                *V(J3)         = *A(J3,(i-1));
                *A(J3,(i-1))   = 0.0;
                LAPACKE_slarfg_work( 2, A(J2,(i-1)), V(J3), 1, TAU(J3));
            }
        }
        /* APPLY LEFT ON THE REMAINING ELEMENT OF KERNEL 2 */
        for (i = ed; i >= st+1 ; i--){
            J2  = min((i+1+KDM2), N);
            J3  = min((J2+1), N);
            len    = J3-J2;
            if(len>0){
                pt    = J2;
                J1    = i;
                J2    = min(ed,N);
                t1ed  = (J2/NB)*NB;
                t2st  = max(t1ed+1,J1);
                len1  = t1ed-J1+1; 
                len2  = J2-t2st+1;
                if(len1>0)CORE_slarfx2(PlasmaLeft, len1 , *V(J3), (*TAU(J3)), A(pt, i   ), ELTLDD(vA, pt),  A((pt+1),  i  ), ELTLDD(vA, pt+1) );
                if(len2>0)CORE_slarfx2(PlasmaLeft, len2 , *V(J3), (*TAU(J3)), A(pt, t2st), ELTLDD(vA, pt),  A((pt+1), t2st), ELTLDD(vA, pt+1) );
            }
        }
    } else {
        /* ========================
         *       UPPER CASE
         * ========================*/
        for (i = ed; i >= st+1 ; i--){
            /* apply Householder from the right. and create newnnz outside the band if J3 < N */
            J1  = ed+1;
            J2  = min((i+1+KDM2), N);
            J3  = min((J2+1), N);
            len = J3-J1+1;
            if(J3>J2)*A((i-1), J3)=zzero;
            t1ed  = (J3/NB)*NB;
            t2st  = max(t1ed+1,J1);
            len1  = t1ed-J1+1; 
            len2  = J3-t2st+1;
            if(len1>0)CORE_slarfx2(PlasmaLeft, len1 , *V(i), (*TAU(i)), A(i-1, J1  ), ELTLDD(vA, i-1),  A(i,  J1 ), ELTLDD(vA, i) );
            if(len2>0)CORE_slarfx2(PlasmaLeft, len2 , *V(i), (*TAU(i)), A(i-1, t2st), ELTLDD(vA, i-1),  A(i, t2st), ELTLDD(vA, i) );
            /* if nonzero element a(j+kd,j-1) has been created outside the band (if index < N) then eliminate it. */
            len    = J3-J2; 
            if(len>0){
                /* generate Householder to annihilate a(j+kd,j-1) within the band */
                *V(J3)         = *A(i-1, J3);
                *A(i-1, J3)  = 0.0;
                LAPACKE_slarfg_work( 2, A(i-1, J2), V(J3), 1, TAU(J3));
            }
        }
        /* APPLY RIGHT ON THE REMAINING ELEMENT OF KERNEL 2 */
        for (i = ed; i >= st+1 ; i--){
            /* find if there was a nnz created. if yes apply right else nothing to be done. */
            J2  = min((i+1+KDM2), N);
            J3  = min((J2+1), N);
            len    = J3-J2;
            if(len>0){
                pt    = J2;
                J1    = i;
                J2    = min(ed,N);
                t1ed  = (J2/NB)*NB;
                t2st  = max(t1ed+1,J1);
                len1  = t1ed-J1+1; 
                len2  = J2-t2st+1;
                if(len1>0)CORE_slarfx2(PlasmaRight, len1 , (*V(J3)), (*TAU(J3)), A(i   , pt), ELTLDD(vA, i),     A(i,    pt+1), ELTLDD(vA, i) );
                if(len2>0)CORE_slarfx2(PlasmaRight, len2 , (*V(J3)), (*TAU(J3)), A(t2st, pt), ELTLDD(vA, t2st),  A(t2st, pt+1), ELTLDD(vA, t2st) );
            }
        }
    } /* end of else for the upper case */
    return PLASMA_SUCCESS;
}

Here is the call graph for this function:

Here is the caller graph for this function:

void CORE_sgeadd	(	int	M,
		int	N,
		float	alpha,
		float *	A,
		int	LDA,
		float *	B,
		int	LDB
	)

Definition at line 26 of file core_sgeadd.c.

References cblas_saxpy().

{
    int j;
    if (M == LDA && M == LDB)
        cblas_saxpy(M*N, (alpha), A, 1, B, 1);
    else {
        for (j = 0; j < N; j++)
            cblas_saxpy(M, (alpha), &A[j*LDA], 1, &B[j*LDB], 1);
    }
}

Here is the call graph for this function:

Here is the caller graph for this function:

void CORE_sgeadd_quark ( Quark * quark )

Definition at line 67 of file core_sgeadd.c.

References A, B, cblas_saxpy(), and quark_unpack_args_7.

{
    int M;
    int N;
    float alpha;
    float *A;
    int LDA;
    float *B;
    int LDB;
    int j;
    quark_unpack_args_7(quark, M, N, alpha, A, LDA, B, LDB);
    if (M == LDA && M == LDB)
        cblas_saxpy(M*N, (alpha), A, 1, B, 1);
    else {
        for (j = 0; j < N; j++)
            cblas_saxpy(M, (alpha), &A[j*LDA], 1, &B[j*LDB], 1);
    }
}

Here is the call graph for this function:

Here is the caller graph for this function:

int CORE_sgelqt	(	int	M,
		int	N,
		int	IB,
		float *	A,
		int	LDA,
		float *	T,
		int	LDT,
		float *	TAU,
		float *	WORK
	)

CORE_sgelqt - computes a LQ factorization of a complex M-by-N tile A: A = L * Q.

The tile Q is represented as a product of elementary reflectors

Q = H(k)' . . . H(2)' H(1)', where k = min(M,N).

Each H(i) has the form

H(i) = I - tau * v * v'

where tau is a complex scalar, and v is a complex vector with v(1:i-1) = 0 and v(i) = 1; g(v(i+1:n)) is stored on exit in A(i,i+1:n), and tau in TAU(i).

Parameters:

[in]	M	The number of rows of the tile A. M >= 0.
[in]	N	The number of columns of the tile A. N >= 0.
[in]	IB	The inner-blocking size. IB >= 0.
[in,out]	A	On entry, the M-by-N tile A. On exit, the elements on and below the diagonal of the array contain the M-by-min(M,N) lower trapezoidal tile L (L is lower triangular if M <= N); the elements above the diagonal, with the array TAU, represent the unitary tile Q as a product of elementary reflectors (see Further Details).
[in]	LDA	The leading dimension of the array A. LDA >= max(1,M).
[out]	T	The IB-by-N triangular factor T of the block reflector. T is upper triangular by block (economic storage); The rest of the array is not referenced.
[in]	LDT	The leading dimension of the array T. LDT >= IB.
[out]	TAU	The scalar factors of the elementary reflectors (see Further Details).
[out]	WORK

Returns:

Return values:

PLASMA_SUCCESS	successful exit
<0	if -i, the i-th argument had an illegal value

Definition at line 85 of file core_sgelqt.c.

References coreblas_error, lapack_const, max, min, PLASMA_SUCCESS, PlasmaForward, PlasmaNoTrans, PlasmaRight, and PlasmaRowwise.

{
    int i, k, sb;
    /* Check input arguments */
    if (M < 0) {
        coreblas_error(1, "Illegal value of M");
        return -1;
    }
    if (N < 0) {
        coreblas_error(2, "Illegal value of N");
        return -2;
    }
    if ((IB < 0) || ( (IB == 0) && ((M > 0) && (N > 0)) )) {
        coreblas_error(3, "Illegal value of IB");
        return -3;
    }
    if ((LDA < max(1,M)) && (M > 0)) {
        coreblas_error(5, "Illegal value of LDA");
        return -5;
    }
    if ((LDT < max(1,IB)) && (IB > 0)) {
        coreblas_error(7, "Illegal value of LDT");
        return -7;
    }
    /* Quick return */
    if ((M == 0) || (N == 0) || (IB == 0))
        return PLASMA_SUCCESS;
    k = min(M, N);
    for(i = 0; i < k; i += IB) {
        sb = min(IB, k-i);
        LAPACKE_sgelq2_work(LAPACK_COL_MAJOR, sb, N-i, 
                            &A[LDA*i+i], LDA, &TAU[i], WORK);
        LAPACKE_slarft_work(LAPACK_COL_MAJOR,
            lapack_const(PlasmaForward),
            lapack_const(PlasmaRowwise),
            N-i, sb,
            &A[LDA*i+i], LDA, &TAU[i],
            &T[LDT*i], LDT);
        
        if (M > i+sb) {
            LAPACKE_slarfb_work(
                LAPACK_COL_MAJOR,
                lapack_const(PlasmaRight),
                lapack_const(PlasmaNoTrans),
                lapack_const(PlasmaForward),
                lapack_const(PlasmaRowwise),
                M-i-sb, N-i, sb,
                &A[LDA*i+i],      LDA,
                &T[LDT*i],        LDT,
                &A[LDA*i+(i+sb)], LDA,
                WORK, M-i-sb);
        }
    }
    return PLASMA_SUCCESS;
}

Here is the caller graph for this function:

void CORE_sgelqt_quark ( Quark * quark )

Definition at line 180 of file core_sgelqt.c.

References A, CORE_sgelqt(), quark_unpack_args_9, T, and TAU.

{
    int m;
    int n;
    int ib;
    float *A;
    int lda;
    float *T;
    int ldt;
    float *TAU;
    float *WORK;
    quark_unpack_args_9(quark, m, n, ib, A, lda, T, ldt, TAU, WORK);
    CORE_sgelqt(m, n, ib, A, lda, T, ldt, TAU, WORK);
}

Here is the call graph for this function:

Here is the caller graph for this function:

void CORE_sgemm	(	int	transA,
		int	transB,
		int	M,
		int	N,
		int	K,
		float	alpha,
		float *	A,
		int	LDA,
		float *	B,
		int	LDB,
		float	beta,
		float *	C,
		int	LDC
	)

Definition at line 28 of file core_sgemm.c.

References cblas_sgemm(), and CblasColMajor.

{
    cblas_sgemm(
        CblasColMajor,
        (CBLAS_TRANSPOSE)transA, (CBLAS_TRANSPOSE)transB,
        M, N, K,
        (alpha), A, LDA,
        B, LDB,
        (beta), C, LDC);
}

Here is the call graph for this function:

Here is the caller graph for this function:

void CORE_sgemm_f2_quark ( Quark * quark )

Definition at line 171 of file core_sgemm.c.

References A, B, C, cblas_sgemm(), CblasColMajor, and quark_unpack_args_15.

{
    int transA;
    int transB;
    int M;
    int N;
    int K;
    float alpha;
    float *A;
    int LDA;
    float *B;
    int LDB;
    float beta;
    float *C;
    int LDC;
    void *fake1, *fake2;
    
    quark_unpack_args_15(quark, transA, transB, M, N, K, alpha, 
                         A, LDA, B, LDB, beta, C, LDC, fake1, fake2);
    cblas_sgemm(
        CblasColMajor,
        (CBLAS_TRANSPOSE)transA, (CBLAS_TRANSPOSE)transB,
        M, N, K,
        (alpha), A, LDA,
        B, LDB,
        (beta), C, LDC);
}

Here is the call graph for this function:

Here is the caller graph for this function:

void CORE_sgemm_p2_quark ( Quark * quark )

Definition at line 234 of file core_sgemm.c.

References A, B, C, cblas_sgemm(), CblasColMajor, and quark_unpack_args_13.

{
    int transA;
    int transB;
    int M;
    int N;
    int K;
    float alpha;
    float *A;
    int LDA;
    float **B;
    int LDB;
    float beta;
    float *C;
    int LDC;
    
    quark_unpack_args_13(quark, transA, transB, M, N, K, alpha, 
                         A, LDA, B, LDB, beta, C, LDC);
    cblas_sgemm(
        CblasColMajor,
        (CBLAS_TRANSPOSE)transA, (CBLAS_TRANSPOSE)transB,
        M, N, K,
        (alpha), A, LDA,
        *B, LDB,
        (beta), C, LDC);
}

Here is the call graph for this function:

Here is the caller graph for this function:

void CORE_sgemm_p2f1_quark ( Quark * quark )

Definition at line 360 of file core_sgemm.c.

References A, B, C, cblas_sgemm(), CblasColMajor, and quark_unpack_args_14.

{
    int transA;
    int transB;
    int M;
    int N;
    int K;
    float alpha;
    float *A;
    int LDA;
    float **B;
    int LDB;
    float beta;
    float *C;
    int LDC;
    void *fake1;
    
    quark_unpack_args_14(quark, transA, transB, M, N, K, alpha, 
                         A, LDA, B, LDB, beta, C, LDC, fake1);
    cblas_sgemm(
        CblasColMajor,
        (CBLAS_TRANSPOSE)transA, (CBLAS_TRANSPOSE)transB,
        M, N, K,
        (alpha), A, LDA,
        *B, LDB,
        (beta), C, LDC);
}

Here is the call graph for this function:

Here is the caller graph for this function:

void CORE_sgemm_p3_quark ( Quark * quark )

Definition at line 296 of file core_sgemm.c.

References A, B, C, cblas_sgemm(), CblasColMajor, and quark_unpack_args_13.

{
    int transA;
    int transB;
    int M;
    int N;
    int K;
    float alpha;
    float *A;
    int LDA;
    float *B;
    int LDB;
    float beta;
    float **C;
    int LDC;
    
    quark_unpack_args_13(quark, transA, transB, M, N, K, alpha, 
                         A, LDA, B, LDB, beta, C, LDC);
    cblas_sgemm(
        CblasColMajor,
        (CBLAS_TRANSPOSE)transA, (CBLAS_TRANSPOSE)transB,
        M, N, K,
        (alpha), A, LDA,
        B, LDB,
        (beta), *C, LDC);
}

Here is the call graph for this function:

Here is the caller graph for this function:

void CORE_sgemm_quark ( Quark * quark )

Definition at line 106 of file core_sgemm.c.

References A, B, C, cblas_sgemm(), CblasColMajor, and quark_unpack_args_13.

{
    int transA;
    int transB;
    int m;
    int n;
    int k;
    float alpha;
    float *A;
    int lda;
    float *B;
    int ldb;
    float beta;
    float *C;
    int ldc;
    quark_unpack_args_13(quark, transA, transB, m, n, k, alpha, A, lda, B, ldb, beta, C, ldc);
    cblas_sgemm(
        CblasColMajor,
        (CBLAS_TRANSPOSE)transA, (CBLAS_TRANSPOSE)transB,
        m, n, k,
        (alpha), A, lda,
        B, ldb,
        (beta), C, ldc);
}

Here is the call graph for this function:

Here is the caller graph for this function:

int CORE_sgeqrt	(	int	M,
		int	N,
		int	IB,
		float *	A,
		int	LDA,
		float *	T,
		int	LDT,
		float *	TAU,
		float *	WORK
	)

CORE_sgeqrt computes a QR factorization of a complex M-by-N tile A: A = Q * R.

The tile Q is represented as a product of elementary reflectors

Q = H(1) H(2) . . . H(k), where k = min(M,N).

Each H(i) has the form

H(i) = I - tau * v * v'

where tau is a complex scalar, and v is a complex vector with v(1:i-1) = 0 and v(i) = 1; v(i+1:m) is stored on exit in A(i+1:m,i), and tau in TAU(i).

Parameters:

[in]	M	The number of rows of the tile A. M >= 0.
[in]	N	The number of columns of the tile A. N >= 0.
[in]	IB	The inner-blocking size. IB >= 0.
[in,out]	A	On entry, the M-by-N tile A. On exit, the elements on and above the diagonal of the array contain the min(M,N)-by-N upper trapezoidal tile R (R is upper triangular if M >= N); the elements below the diagonal, with the array TAU, represent the unitary tile Q as a product of elementary reflectors (see Further Details).
[in]	LDA	The leading dimension of the array A. LDA >= max(1,M).
[out]	T	The IB-by-N triangular factor T of the block reflector. T is upper triangular by block (economic storage); The rest of the array is not referenced.
[in]	LDT	The leading dimension of the array T. LDT >= IB.
[out]	TAU	The scalar factors of the elementary reflectors (see Further Details).
[out]	WORK

Returns:

Return values:

PLASMA_SUCCESS	successful exit
<0	if -i, the i-th argument had an illegal value

Definition at line 86 of file core_sgeqrt.c.

References coreblas_error, lapack_const, max, min, PLASMA_SUCCESS, PlasmaColumnwise, PlasmaForward, PlasmaLeft, and PlasmaTrans.

{
    int i, k, sb;
    /* Check input arguments */
    if (M < 0) {
        coreblas_error(1, "Illegal value of M");
        return -1;
    }
    if (N < 0) {
        coreblas_error(2, "Illegal value of N");
        return -2;
    }
    if ((IB < 0) || ( (IB == 0) && ((M > 0) && (N > 0)) )) {
        coreblas_error(3, "Illegal value of IB");
        return -3;
    }
    if ((LDA < max(1,M)) && (M > 0)) {
        coreblas_error(5, "Illegal value of LDA");
        return -5;
    }
    if ((LDT < max(1,IB)) && (IB > 0)) {
        coreblas_error(7, "Illegal value of LDT");
        return -7;
    }
    /* Quick return */
    if ((M == 0) || (N == 0) || (IB == 0))
        return PLASMA_SUCCESS;
    k = min(M, N);
    for(i = 0; i < k; i += IB) {
        sb = min(IB, k-i);
        LAPACKE_sgeqr2_work(LAPACK_COL_MAJOR, M-i, sb, 
                            &A[LDA*i+i], LDA, &TAU[i], WORK);
        LAPACKE_slarft_work(LAPACK_COL_MAJOR,
            lapack_const(PlasmaForward),
            lapack_const(PlasmaColumnwise),
            M-i, sb,
            &A[LDA*i+i], LDA, &TAU[i],
            &T[LDT*i], LDT);
        if (N > i+sb) {
            LAPACKE_slarfb_work(
                LAPACK_COL_MAJOR,
                lapack_const(PlasmaLeft),
                lapack_const(PlasmaTrans),
                lapack_const(PlasmaForward),
                lapack_const(PlasmaColumnwise),
                M-i, N-i-sb, sb,
                &A[LDA*i+i],      LDA,
                &T[LDT*i],        LDT,
                &A[LDA*(i+sb)+i], LDA,
                WORK, N-i-sb);
        }
    }
    return PLASMA_SUCCESS;
}

Here is the caller graph for this function:

void CORE_sgeqrt_quark ( Quark * quark )

Definition at line 181 of file core_sgeqrt.c.

References A, CORE_sgeqrt(), quark_unpack_args_9, T, and TAU.

{
    int m;
    int n;
    int ib;
    float *A;
    int lda;
    float *T;
    int ldt;
    float *TAU;
    float *WORK;
    quark_unpack_args_9(quark, m, n, ib, A, lda, T, ldt, TAU, WORK);
    CORE_sgeqrt(m, n, ib, A, lda, T, ldt, TAU, WORK);
}

Here is the call graph for this function:

Here is the caller graph for this function:

int CORE_sgessm	(	int	M,
		int	N,
		int	K,
		int	IB,
		int *	IPIV,
		float *	L,
		int	LDL,
		float *	A,
		int	LDA
	)

CORE_sgessm applies the factor L computed by CORE_sgetrf_incpiv to a complex M-by-N tile A.

Parameters:

[in]	M	The number of rows of the tile A. M >= 0.
[in]	N	The number of columns of the tile A. N >= 0.
[in]	K
[in]	IB	The inner-blocking size. IB >= 0.
[in]	IPIV	as returned by CORE_sgetrf_incpiv.
[in]	L	The NB-by-NB lower triangular tile.
[in]	LDL	The leading dimension of the array L. LDL >= max(1,NB).
[in,out]	A	On entry, the M-by-N tile A. On exit, updated by the application of L.
[in]	LDA	The leading dimension of the array A. LDA >= max(1,M).

Returns:

Return values:

PLASMA_SUCCESS	successful exit
<0	if INFO = -k, the k-th argument had an illegal value

Definition at line 68 of file core_sgessm.c.

References cblas_sgemm(), cblas_strsm(), CblasColMajor, CblasLeft, CblasLower, CblasNoTrans, CblasUnit, coreblas_error, max, min, and PLASMA_SUCCESS.

{
    static float zone  =  1.0;
    static float mzone = -1.0;
    static int                ione  =  1;
    int i, sb;
    int tmp, tmp2;
    /* Check input arguments */
    if (M < 0) {
        coreblas_error(1, "Illegal value of M");
        return -1;
    }
    if (N < 0) {
        coreblas_error(2, "Illegal value of N");
        return -2;
    }
    if (K < 0) {
        coreblas_error(3, "Illegal value of K");
        return -3;
    }
    if (IB < 0) {
        coreblas_error(4, "Illegal value of IB");
        return -4;
    }
    if ((LDL < max(1,M)) && (M > 0)) {
        coreblas_error(7, "Illegal value of LDL");
        return -7;
    }
    if ((LDA < max(1,M)) && (M > 0)) {
        coreblas_error(9, "Illegal value of LDA");
        return -9;
    }
    /* Quick return */
    if ((M == 0) || (N == 0) || (K == 0) || (IB == 0))
        return PLASMA_SUCCESS;
    for(i = 0; i < K; i += IB) {
        sb = min(IB, K-i);
        /*
         * Apply interchanges to columns I*IB+1:IB*( I+1 )+1.
         */
        tmp  = i+1;
        tmp2 = i+sb;
        LAPACKE_slaswp_work(LAPACK_COL_MAJOR, N, A, LDA, tmp, tmp2, IPIV, ione);
        /*
         * Compute block row of U.
         */
        cblas_strsm(
            CblasColMajor, CblasLeft, CblasLower, CblasNoTrans, CblasUnit,
            sb, N, (zone),
            &L[LDL*i+i], LDL,
            &A[i], LDA );
        if (i+sb < M) {
        /*
        * Update trailing submatrix.
        */
        cblas_sgemm(
            CblasColMajor, CblasNoTrans, CblasNoTrans,
            M-(i+sb), N, sb,
            (mzone), &L[LDL*i+(i+sb)], LDL,
            &A[i], LDA,
            (zone), &A[i+sb], LDA );
        }
    }
    return PLASMA_SUCCESS;
}

Here is the call graph for this function:

Here is the caller graph for this function:

void CORE_sgessm_quark ( Quark * quark )

Definition at line 172 of file core_sgessm.c.

References A, CORE_sgessm(), IPIV, L, and quark_unpack_args_9.

{
    int m;
    int n;
    int k;
    int ib;
    int *IPIV;
    float *L;
    int ldl;
    float *A;
    int lda;
    quark_unpack_args_9(quark, m, n, k, ib, IPIV, L, ldl, A, lda);
    CORE_sgessm(m, n, k, ib, IPIV, L, ldl, A, lda);
}

Here is the call graph for this function:

Here is the caller graph for this function:

int CORE_sgetrf	(	int	M,
		int	N,
		float *	A,
		int	LDA,
		int *	IPIV,
		int *	INFO
	)

Definition at line 22 of file core_sgetrf.c.

References PLASMA_SUCCESS.

{
    *info = LAPACKE_sgetrf_work(LAPACK_COL_MAJOR, m, n, A, lda, IPIV );
    return PLASMA_SUCCESS;
}

int CORE_sgetrf_incpiv	(	int	M,
		int	N,
		int	IB,
		float *	A,
		int	LDA,
		int *	IPIV,
		int *	INFO
	)

CORE_sgetrf_incpiv computes an LU factorization of a general M-by-N tile A using partial pivoting with row interchanges.

The factorization has the form

A = P * L * U

where P is a permutation matrix, L is lower triangular with unit diagonal elements (lower trapezoidal if m > n), and U is upper triangular (upper trapezoidal if m < n).

This is the right-looking Level 2.5 BLAS version of the algorithm.

Parameters:

[in]	M	The number of rows of the tile A. M >= 0.
[in]	N	The number of columns of the tile A. N >= 0.
[in]	IB	The inner-blocking size. IB >= 0.
[in,out]	A	On entry, the M-by-N tile to be factored. On exit, the factors L and U from the factorization A = PLU; the unit diagonal elements of L are not stored.
[in]	LDA	The leading dimension of the array A. LDA >= max(1,M).
[out]	IPIV	The pivot indices; for 1 <= i <= min(M,N), row i of the tile was interchanged with row IPIV(i).
[out]	INFO	See returned value.

Returns:

Return values:

PLASMA_SUCCESS	successful exit
<0	if INFO = -k, the k-th argument had an illegal value
>0	if INFO = k, U(k,k) is exactly zero. The factorization has been completed, but the factor U is exactly singular, and division by zero will occur if it is used to solve a system of equations.

Definition at line 83 of file core_sgetrf_incpiv.c.

References CORE_sgessm(), coreblas_error, max, min, and PLASMA_SUCCESS.

{
    int i, j, k, sb;
    int iinfo;
    /* Check input arguments */
    *INFO = 0;
    if (M < 0) {
        coreblas_error(1, "Illegal value of M");
        return -1;
    }
    if (N < 0) {
        coreblas_error(2, "Illegal value of N");
        return -2;
    }
    if (IB < 0) {
        coreblas_error(3, "Illegal value of IB");
        return -3;
    }
    if ((LDA < max(1,M)) && (M > 0)) {
        coreblas_error(5, "Illegal value of LDA");
        return -5;
    }
    /* Quick return */
    if ((M == 0) || (N == 0) || (IB == 0))
        return PLASMA_SUCCESS;
    k = min(M, N);
    for(i =0 ; i < k; i += IB) {
        sb = min(IB, k-i);
        /*
         * Factor diagonal and subdiagonal blocks and test for exact singularity.
         */
        iinfo = LAPACKE_sgetf2_work(LAPACK_COL_MAJOR, M-i, sb, &A[LDA*i+i], LDA, &IPIV[i]);
        /*
         * Adjust INFO and the pivot indices.
         */
        if((*INFO == 0) && (iinfo > 0))
            *INFO = iinfo + i;
        if (i+sb < N) {
            CORE_sgessm(
                M-i, N-(i+sb), sb, sb,
                &IPIV[i],
                &A[LDA*i+i], LDA,
                &A[LDA*(i+sb)+i], LDA);
        }
        for(j = i; j < i+sb; j++) {
            IPIV[j] = i + IPIV[j];
        }
    }
    return PLASMA_SUCCESS;
}

Here is the call graph for this function:

Here is the caller graph for this function:

void CORE_sgetrf_incpiv_quark ( Quark * quark )

Definition at line 174 of file core_sgetrf_incpiv.c.

References A, CORE_sgetrf_incpiv(), IPIV, plasma_sequence_flush(), PLASMA_SUCCESS, and quark_unpack_args_10.

{
    int m;
    int n;
    int ib;
    float *A;
    int lda;
    int *IPIV;
    PLASMA_sequence *sequence;
    PLASMA_request *request;
    PLASMA_bool check_info;
    int iinfo;
    int info;
    quark_unpack_args_10(quark, m, n, ib, A, lda, IPIV, sequence, request, check_info, iinfo);
    CORE_sgetrf_incpiv(m, n, ib, A, lda, IPIV, &info);
    if (info != PLASMA_SUCCESS && check_info)
        plasma_sequence_flush(quark, sequence, request, iinfo+info);
}

Here is the call graph for this function:

Here is the caller graph for this function:

void CORE_sgetrf_quark ( Quark * quark )

Definition at line 61 of file core_sgetrf.c.

References A, IPIV, plasma_sequence_flush(), PLASMA_SUCCESS, and quark_unpack_args_9.

{
    int m;
    int n;
    float *A;
    int lda;
    int *IPIV;
    PLASMA_sequence *sequence;
    PLASMA_request *request;
    PLASMA_bool check_info;
    int iinfo;
    int info;
    quark_unpack_args_9(quark, m, n, A, lda, IPIV, sequence, request, check_info, iinfo);
    info = LAPACKE_sgetrf_work(LAPACK_COL_MAJOR, m, n, A, lda, IPIV );
    if (info != PLASMA_SUCCESS && check_info)
        plasma_sequence_flush(quark, sequence, request, iinfo+info);
}

Here is the call graph for this function:

Here is the caller graph for this function:

int CORE_sgetrf_reclap	(	const int	M,
		const int	N,
		float *	A,
		const int	LDA,
		int *	IPIV,
		int *	info
	)

Definition at line 307 of file core_sgetrf_reclap.c.

References coreblas_error, max, min, and PLASMA_SUCCESS.

{
    int thidx = info[1];
    int thcnt = min( info[2], M / N );
    int minMN = min(M, N);
    if( M < 0 ) {
        coreblas_error(1, "illegal value of M");
        return -1;
    }
    if( N < 0 ) {
        coreblas_error(2, "illegal value of N");
        return -2;
    }
    if( LDA < max(1, M) ) {
        coreblas_error(5, "illegal value of LDA");
        return -5;
    }
    
    /*
     * Quick return
     */
    if ( (M == 0) || (N == 0) || (thidx >= thcnt) ){
      return PLASMA_SUCCESS;
    }
    *info = 0;
    CORE_sgetrf_reclap_rec( M, minMN, A, LDA, IPIV, info, 
                            thidx, thcnt, 0 );
    if ( N > minMN ) {
        CORE_sgetrf_reclap_update(M, 0, minMN, N-minMN,
                                  A, LDA, IPIV, 
                                  thidx, thcnt);
    }
    return info[0];
}

Here is the caller graph for this function:

void CORE_sgetrf_reclap_quark ( Quark * quark )

Definition at line 381 of file core_sgetrf_reclap.c.

References A, CORE_sgetrf_reclap(), IPIV, plasma_sequence_flush(), PLASMA_SUCCESS, QUARK_Get_RankInTask(), and quark_unpack_args_10.

{
    int M;
    int N;
    float *A;
    int LDA;
    int *IPIV;
    PLASMA_sequence *sequence;
    PLASMA_request *request;
    PLASMA_bool check_info;
    int iinfo;
    int info[3];
    int maxthreads;
    quark_unpack_args_10(quark, M, N, A, LDA, IPIV, sequence, request, 
                         check_info, iinfo, maxthreads );
    info[1] = QUARK_Get_RankInTask(quark);
    info[2] = maxthreads;
    CORE_sgetrf_reclap( M, N, A, LDA, IPIV, info );
    if (info[1] == 0 && info[0] != PLASMA_SUCCESS && check_info)
        plasma_sequence_flush(quark, sequence, request, iinfo + info[0] );
}

Here is the call graph for this function:

Here is the caller graph for this function:

int CORE_sgetrf_rectil	(	const PLASMA_desc	A,
		int *	IPIV,
		int *	info
	)

Definition at line 653 of file core_sgetrf_rectil.c.

References coreblas_error, plasma_desc_t::m, min, plasma_desc_t::mt, plasma_desc_t::n, and plasma_desc_t::nt.

{
    int ft, lt; 
    int thidx = info[1];
    int thcnt = min( info[2], A.mt );
    int minMN = min( A.m, A.n );
    float pivot;
    if ( A.nt > 1 ) {
        coreblas_error(1, "Illegal value of A.nt");
        return -1;
    }
    if ( thidx >= thcnt )
      return 0;
    int q = A.mt / thcnt;
    int r = A.mt % thcnt;
    if (thidx < r) {
        q++;
        ft = thidx * q;
        lt = ft + q;
    } else {
        ft = r * (q + 1) + (thidx - r) * q;
        lt = ft + q;
        lt = min( lt, A.mt );
    }
    
    info[0] = 0;
    CORE_sgetrf_rectil_rec( A, IPIV, info, &pivot,
                            thidx, thcnt, 0, minMN, ft, lt);
   
    if ( A.n > minMN ) {
        CORE_sgetrf_rectil_update( A, IPIV, 
                                   0, minMN, A.n-minMN,
                                   thidx, thcnt, 
                                   ft, lt);
    }
    return info[0];
}

Here is the caller graph for this function:

void CORE_sgetrf_rectil_quark ( Quark * quark )

Definition at line 726 of file core_sgetrf_rectil.c.

References A, CORE_sgetrf_rectil(), IPIV, plasma_sequence_flush(), PLASMA_SUCCESS, QUARK_Get_RankInTask(), and quark_unpack_args_8.

{
    PLASMA_desc A;
    float *Amn;
    int *IPIV;
    PLASMA_sequence *sequence;
    PLASMA_request *request;
    PLASMA_bool check_info;
    int iinfo;
    int info[3];
    int maxthreads;
    quark_unpack_args_8(quark, A, Amn, IPIV, sequence, request, 
                        check_info, iinfo, maxthreads );
    info[1] = QUARK_Get_RankInTask(quark);
    info[2] = maxthreads;
    CORE_sgetrf_rectil( A, IPIV, info );
    if (info[1] == 0 && info[0] != PLASMA_SUCCESS && check_info)
        plasma_sequence_flush(quark, sequence, request, iinfo + info[0] );
}

Here is the call graph for this function:

Here is the caller graph for this function:

void CORE_sgetrip	(	int	m,
		int	n,
		float *	A,
		float *	W
	)

CORE_sgetrip transposes a m-by-n matrix in place using an extra workspace of size m-by-n. Note : For square tile, workspace is not used.

Parameters:

[in]	m	Number of lines of tile A
[in]	n	Number of columns of tile A
[in,out]	A	Tile of size m-by-n On exit, A = trans(A)
[out]	W	Workspace of size n-by-m if n != m, NULL otherwise.

Definition at line 54 of file core_sgetrip.c.

                                                    {
    float t;
    int i, j;
    
    if( m != n ) {
        /* rectangular transposition (use workspace) */
        for (i=0; i<m; i++) {
            for (j=0; j<n; j++) {
                W[j+i*n] = A[i+j*m];
            }
        }
        memcpy(A, W, m*n*sizeof(float));
    }
    else {
        /* square transposition (swap pairwise) */
        for (i=0; i<m; i++) {
            for (j=i+1; j<n; j++) {
                t        = A[j+i*n];
                A[j+i*n] = A[i+j*m];
                A[i+j*m] = t;
            }
        }
    }
}

Here is the caller graph for this function:

void CORE_sgetrip_f1_quark ( Quark * quark )

Definition at line 138 of file core_sgetrip.c.

References A, CORE_sgetrip(), quark_unpack_args_5, and W.

{
    int m;
    int n;
    float *A;
    float *W;
    float *fake;
    quark_unpack_args_5(quark, m, n, A, W, fake);
    CORE_sgetrip(m, n, A, W);
}

Here is the call graph for this function:

Here is the caller graph for this function:

void CORE_sgetrip_f2_quark ( Quark * quark )

Definition at line 178 of file core_sgetrip.c.

References A, CORE_sgetrip(), quark_unpack_args_6, and W.

{
    int m;
    int n;
    float *A;
    float *W;
    float *fake1;
    float *fake2;
    quark_unpack_args_6(quark, m, n, A, W, fake1, fake2);
    CORE_sgetrip(m, n, A, W);
}

Here is the call graph for this function:

Here is the caller graph for this function:

void CORE_sgetrip_quark ( Quark * quark )

Definition at line 101 of file core_sgetrip.c.

References A, CORE_sgetrip(), quark_unpack_args_4, and W.

{
    int m;
    int n;
    float *A;
    float *W;
    quark_unpack_args_4(quark, m, n, A, W);
    CORE_sgetrip(m, n, A, W);
}

Here is the call graph for this function:

Here is the caller graph for this function:

int CORE_shbelr	(	int	uplo,
		int	N,
		PLASMA_desc *	A,
		float *	V,
		float *	TAU,
		int	st,
		int	ed,
		int	eltsize
	)

Definition at line 78 of file core_shbelr.c.

References A, CORE_slarfx2(), CORE_slarfx2c(), coreblas_error, ELTLDD, max, plasma_desc_t::mb, min, PLASMA_SUCCESS, PlasmaLeft, PlasmaLower, PlasmaRight, PlasmaUpper, TAU, and V.

{
    int    NB, J1, J2;
    int    len1, len2, t1ed, t2st;
    int    i;
    static float zzero = 0.0;
    PLASMA_desc vA=*A;
    /* Check input arguments */
    if (N < 0) {
        coreblas_error(2, "Illegal value of N");
        return -2;
    }
    if (ed <= st) {
        coreblas_error(23, "Illegal value of st and ed (internal)");
        return -23;
    }
    /* Quick return */
    if (N == 0)
        return PLASMA_SUCCESS;
    NB = A->mb;
    if( uplo == PlasmaLower ) {
       /* ========================
        *     LOWER CASE
        * ========================*/
       for (i = ed; i >= st+1 ; i--){
          /* generate Householder to annihilate a(i+k-1,i) within the band */
          *V(i)          = *A(i, (st-1));
          *A(i, (st-1))  = zzero;
          LAPACKE_slarfg_work( 2, A((i-1),(st-1)), V(i), 1, TAU(i));
          /* apply reflector from the left (horizontal row) and from the right for only the diagonal 2x2.*/
          J1    = st;
          J2    = i-2;
          t1ed  = (J2/NB)*NB;
          t2st  = max(t1ed+1,J1);
          len1  = t1ed-J1+1; /* can be negative */
          len2  = J2-t2st+1;
          if(len1>0)CORE_slarfx2(PlasmaLeft, len1 , *V(i), (*TAU(i)), A(i-1, J1  ), ELTLDD(vA, i-1),  A(i,  J1 ), ELTLDD(vA, i) );
          if(len2>0)CORE_slarfx2(PlasmaLeft, len2 , *V(i), (*TAU(i)), A(i-1, t2st), ELTLDD(vA, i-1),  A(i, t2st), ELTLDD(vA, i) );
          CORE_slarfx2c(PlasmaLower, *V(i), *TAU(i), A(i-1, i-1), A(i, i-1), A(i, i)); 
       }
       /* APPLY RIGHT ON THE REMAINING ELEMENT OF KERNEL 1 */
       for (i = ed; i >= st+1 ; i--){
          J1    = i+1;
          J2    = min(ed,N);
          t1ed  = (J2/NB)*NB;
          t2st  = max(t1ed+1,J1);
          len1  = t1ed-J1+1; /* can be negative */
          len2  = J2-t2st+1;
          if(len1>0)CORE_slarfx2(PlasmaRight, len1, *V(i), *TAU(i), A(J1,  i-1), ELTLDD(vA, J1)  , A(J1  , i), ELTLDD(vA, J1)   );
          if(len2>0)CORE_slarfx2(PlasmaRight, len2, *V(i), *TAU(i), A(t2st,i-1), ELTLDD(vA, t2st), A(t2st, i), ELTLDD(vA, t2st) );
       }
    }else{
       /* ========================
        *       UPPER CASE
        * ========================*/
       for (i = ed; i >= st+1 ; i--){
          /* generate Householder to annihilate a(i+k-1,i) within the band*/
          *V(i)          = *A((st-1),  i);
          *A((st-1),  i) = zzero;
          LAPACKE_slarfg_work( 2, A(st-1, i-1), V(i), 1, TAU(i));
          /* apply reflector from the left (horizontal row) and from the right for only the diagonal 2x2.*/
          J1    = st;
          J2    = i-2;
          t1ed  = (J2/NB)*NB;
          t2st  = max(t1ed+1,J1);
          len1  = t1ed-J1+1; /* can be negative */
          len2  = J2-t2st+1;
          if(len1>0)CORE_slarfx2(PlasmaRight, len1, (*V(i)), (*TAU(i)), A(J1,  i-1), ELTLDD(vA, J1)  , A(J1  , i), ELTLDD(vA, J1)   );
          if(len2>0)CORE_slarfx2(PlasmaRight, len2, (*V(i)), (*TAU(i)), A(t2st,i-1), ELTLDD(vA, t2st), A(t2st, i), ELTLDD(vA, t2st) );
          CORE_slarfx2c(PlasmaUpper, *V(i), *TAU(i), A(i-1, i-1), A(i-1, i), A(i,i));
       }
       /* APPLY LEFT ON THE REMAINING ELEMENT OF KERNEL 1 */
       for (i = ed; i >= st+1 ; i--){
          J1    = i+1;
          J2    = min(ed,N);
          t1ed  = (J2/NB)*NB;
          t2st  = max(t1ed+1,J1);
          len1  = t1ed-J1+1; /* can be negative */
          len2  = J2-t2st+1;
          if(len1>0)CORE_slarfx2(PlasmaLeft, len1 , (*V(i)), *TAU(i), A(i-1, J1  ), ELTLDD(vA, i-1),  A(i,  J1 ), ELTLDD(vA, i) );
          if(len2>0)CORE_slarfx2(PlasmaLeft, len2 , (*V(i)), *TAU(i), A(i-1, t2st), ELTLDD(vA, i-1),  A(i, t2st), ELTLDD(vA, i) );
       }
    }  /* end of else for the upper case */
    return PLASMA_SUCCESS;
}

Here is the call graph for this function:

Here is the caller graph for this function:

int CORE_shblrx	(	int	uplo,
		int	N,
		PLASMA_desc *	A,
		float *	V,
		float *	TAU,
		int	st,
		int	ed,
		int	eltsize
	)

Definition at line 76 of file core_shblrx.c.

References A, CORE_slarfx2(), CORE_slarfx2c(), coreblas_error, ELTLDD, max, plasma_desc_t::mb, min, PLASMA_SUCCESS, PlasmaLeft, PlasmaLower, PlasmaRight, PlasmaUpper, TAU, and V.

{
    int    NB, J1, J2;
    int    len1, len2, t1ed, t2st;
    int    i;
    PLASMA_desc vA=*A;
    /* Check input arguments */
    if (N < 0) {
        coreblas_error(2, "Illegal value of N");
        return -2;
    }
    if (ed <= st) {
        coreblas_error(6, "Illegal value of st and ed (internal)");
        return -6;
    }
    /* Quick return */
    if (N == 0)
        return PLASMA_SUCCESS;
    NB = A->mb;
    if( uplo == PlasmaLower ){
        /* ========================
         *       LOWER CASE
         * ========================*/
        for (i = ed; i >= st+1 ; i--){
            /* apply reflector from the left (horizontal row) and from the right for only the diagonal 2x2.*/
            J1    = st;
            J2    = i-2;
            t1ed  = (J2/NB)*NB;
            t2st  = max(t1ed+1,J1);
            len1  = t1ed-J1+1; 
            len2  = J2-t2st+1;
            if(len1>0)CORE_slarfx2(PlasmaLeft, len1 , *V(i), (*TAU(i)), A(i-1, J1  ), ELTLDD(vA, i-1),  A(i,  J1 ), ELTLDD(vA, i) );
            if(len2>0)CORE_slarfx2(PlasmaLeft, len2 , *V(i), (*TAU(i)), A(i-1, t2st), ELTLDD(vA, i-1),  A(i, t2st), ELTLDD(vA, i) );
            CORE_slarfx2c(PlasmaLower, *V(i), *TAU(i), A(i-1,i-1), A(i,i-1), A(i,i)); 
        }
        /* APPLY RIGHT ON THE REMAINING ELEMENT OF KERNEL 1 */
        for (i = ed; i >= st+1 ; i--){
            J1    = i+1;
            J2    = min(ed,N);
            t1ed  = (J2/NB)*NB;
            t2st  = max(t1ed+1,J1);
            len1  = t1ed-J1+1; 
            len2  = J2-t2st+1;
            if(len1>0)CORE_slarfx2(PlasmaRight, len1, *V(i), *TAU(i), A(J1,  i-1), ELTLDD(vA, J1)  , A(J1  , i), ELTLDD(vA, J1)   );
            if(len2>0)CORE_slarfx2(PlasmaRight, len2, *V(i), *TAU(i), A(t2st,i-1), ELTLDD(vA, t2st), A(t2st, i), ELTLDD(vA, t2st) );
        }
    } else {
        /* ========================
         *       UPPER CASE
         * ========================*/
        for (i = ed; i >= st+1 ; i--){
            /* apply reflector from the left (horizontal row) and from the right for only the diagonal 2x2.*/
            J1    = st;
            J2    = i-2;
            t1ed  = (J2/NB)*NB;
            t2st  = max(t1ed+1,J1);
            len1  = t1ed-J1+1; 
            len2  = J2-t2st+1;
            if(len1>0)CORE_slarfx2(PlasmaRight, len1, (*V(i)), (*TAU(i)), A(J1,  i-1), ELTLDD(vA, J1)  , A(J1  , i), ELTLDD(vA, J1)   );
            if(len2>0)CORE_slarfx2(PlasmaRight, len2, (*V(i)), (*TAU(i)), A(t2st,i-1), ELTLDD(vA, t2st), A(t2st, i), ELTLDD(vA, t2st) );
            CORE_slarfx2c(PlasmaUpper, *V(i), *TAU(i), A(i-1,i-1), A(i-1, i), A(i,i));
        }
        /* APPLY LEFT ON THE REMAINING ELEMENT OF KERNEL 1 */
        for (i = ed; i >= st+1 ; i--){
            J1    = i+1;
            J2    = min(ed,N);
            t1ed  = (J2/NB)*NB;
            t2st  = max(t1ed+1,J1);
            len1  = t1ed-J1+1; 
            len2  = J2-t2st+1;
            if(len1>0)CORE_slarfx2(PlasmaLeft, len1 , (*V(i)), *TAU(i), A(i-1, J1  ), ELTLDD(vA, i-1),  A(i,  J1 ), ELTLDD(vA, i) );
            if(len2>0)CORE_slarfx2(PlasmaLeft, len2 , (*V(i)), *TAU(i), A(i-1, t2st), ELTLDD(vA, i-1),  A(i, t2st), ELTLDD(vA, i) );
        }
    }  /* end of else for the upper case */
    return PLASMA_SUCCESS;
}

Here is the call graph for this function:

Here is the caller graph for this function:

int CORE_shbrce	(	int	uplo,
		int	N,
		PLASMA_desc *	A,
		float *	V,
		float *	TAU,
		int	st,
		int	ed,
		int	eltsize
	)

Definition at line 76 of file core_shbrce.c.

References A, CORE_slarfx2(), coreblas_error, ELTLDD, max, plasma_desc_t::mb, min, PLASMA_SUCCESS, PlasmaLeft, PlasmaLower, PlasmaRight, TAU, and V.

{
    int    NB, J1, J2, J3, KDM2, len, pt;
    int    len1, len2, t1ed, t2st;
    int    i;
    static float zzero = 0.0;
    PLASMA_desc vA=*A;
    /* Check input arguments */
    if (N < 0) {
        coreblas_error(2, "Illegal value of N");
        return -2;
    }
    if (ed <= st) {
        coreblas_error(6, "Illegal value of st and ed (internal)");
        return -6;
    }
    /* Quick return */
    if (N == 0)
        return PLASMA_SUCCESS;
    NB = A->mb;
    KDM2 = A->mb-2;
    if( uplo == PlasmaLower ) {
        /* ========================
         *       LOWER CASE
         * ========================*/
        for (i = ed; i >= st+1 ; i--){
            /* apply Householder from the right. and create newnnz outside the band if J3 < N */
            J1  = ed+1;
            J2  = min((i+1+KDM2), N);
            J3  = min((J2+1), N);
            len = J3-J1+1;
            if(J3>J2)*A(J3,(i-1))=zzero;/* could be removed because A is supposed to be band.*/
            t1ed  = (J3/NB)*NB;
            t2st  = max(t1ed+1,J1);
            len1  = t1ed-J1+1; /* can be negative*/
            len2  = J3-t2st+1;
            if(len1>0)CORE_slarfx2(PlasmaRight, len1, *V(i), *TAU(i), A(J1,  i-1), ELTLDD(vA, J1),   A(J1  , i), ELTLDD(vA, J1)  );
            if(len2>0)CORE_slarfx2(PlasmaRight, len2, *V(i), *TAU(i), A(t2st,i-1), ELTLDD(vA, t2st), A(t2st, i), ELTLDD(vA, t2st));
            /* if nonzero element a(j+kd,j-1) has been created outside the band (if index < N) then eliminate it.*/
            len    = J3-J2; // soit 1 soit 0
            if(len>0){
                /* generate Householder to annihilate a(j+kd,j-1) within the band */
                *V(J3)         = *A(J3,i-1);
                *A(J3,i-1)   = 0.0;
                LAPACKE_slarfg_work( 2, A(J2,i-1), V(J3), 1, TAU(J3));
            }
        }
        /* APPLY LEFT ON THE REMAINING ELEMENT OF KERNEL 2 */
        for (i = ed; i >= st+1 ; i--){
            /* find if there was a nnz created. if yes apply left else nothing to be done.*/
            J2  = min((i+1+KDM2), N);
            J3  = min((J2+1), N);
            len    = J3-J2;
            if(len>0){
                pt    = J2;
                J1    = i;
                J2    = min(ed,N);
                t1ed  = (J2/NB)*NB;
                t2st  = max(t1ed+1,J1);
                len1  = t1ed-J1+1;  /* can be negative*/
                len2  = J2-t2st+1;
                if(len1>0)CORE_slarfx2(PlasmaLeft, len1 , *V(J3), (*TAU(J3)), A(pt, i   ), ELTLDD(vA, pt),  A((pt+1),  i  ), ELTLDD(vA, pt+1) );
                if(len2>0)CORE_slarfx2(PlasmaLeft, len2 , *V(J3), (*TAU(J3)), A(pt, t2st), ELTLDD(vA, pt),  A((pt+1), t2st), ELTLDD(vA, pt+1) );
            }
        }
    } else {
        /* ========================
         *       UPPER CASE
         * ========================*/
        for (i = ed; i >= st+1 ; i--){
            /* apply Householder from the right. and create newnnz outside the band if J3 < N */
            J1  = ed+1;
            J2  = min((i+1+KDM2), N);
            J3  = min((J2+1), N);
            len = J3-J1+1;
            if(J3>J2)*A((i-1), J3)=zzero;/* could be removed because A is supposed to be band.*/
            t1ed  = (J3/NB)*NB;
            t2st  = max(t1ed+1,J1);
            len1  = t1ed-J1+1;  /* can be negative*/
            len2  = J3-t2st+1;
            if(len1>0)CORE_slarfx2(PlasmaLeft, len1 , (*V(i)), *TAU(i), A(i-1, J1  ), ELTLDD(vA, (i-1)),  A(i,  J1 ), ELTLDD(vA, i) );
            if(len2>0)CORE_slarfx2(PlasmaLeft, len2 , (*V(i)), *TAU(i), A(i-1, t2st), ELTLDD(vA, (i-1)),  A(i, t2st), ELTLDD(vA, i) );
            /* if nonzero element a(j+kd,j-1) has been created outside the band (if index < N) then eliminate it.*/
            len    = J3-J2; /* either 1 soit 0*/
            if(len>0){
                /* generate Householder to annihilate a(j+kd,j-1) within the band*/
                *V(J3)         = *A((i-1), J3);
                *A((i-1), J3)  = 0.0;
                LAPACKE_slarfg_work( 2, A((i-1), J2), V(J3), 1, TAU(J3));
            }
        }
        /* APPLY RIGHT ON THE REMAINING ELEMENT OF KERNEL 2*/
        for (i = ed; i >= st+1 ; i--){
            /* find if there was a nnz created. if yes apply right else nothing to be done.*/
            J2  = min((i+1+KDM2), N);
            J3  = min((J2+1), N);
            len    = J3-J2;
            if(len>0){
                pt    = J2;
                J1    = i;
                J2    = min(ed,N);
                t1ed  = (J2/NB)*NB;
                t2st  = max(t1ed+1,J1);
                len1  = t1ed-J1+1;  /* can be negative*/
                len2  = J2-t2st+1;
                if(len1>0)CORE_slarfx2(PlasmaRight, len1 , (*V(J3)), (*TAU(J3)), A(i   , pt), ELTLDD(vA, i),     A(i,    pt+1), ELTLDD(vA, i) );
                if(len2>0)CORE_slarfx2(PlasmaRight, len2 , (*V(J3)), (*TAU(J3)), A(t2st, pt), ELTLDD(vA, t2st),  A(t2st, pt+1), ELTLDD(vA, t2st) );
            }
        }
    } /* end of else for the upper case */
    return PLASMA_SUCCESS;
}

Here is the call graph for this function:

Here is the caller graph for this function:

void CORE_slacpy	(	PLASMA_enum	uplo,
		int	M,
		int	N,
		float *	A,
		int	LDA,
		float *	B,
		int	LDB
	)

Definition at line 29 of file core_slacpy.c.

References lapack_const.

{
    LAPACKE_slacpy_work(
        LAPACK_COL_MAJOR,
        lapack_const(uplo),
        M, N, A, LDA, B, LDB);
}

Here is the caller graph for this function:

void CORE_slacpy_quark ( Quark * quark )

Definition at line 66 of file core_slacpy.c.

References A, B, lapack_const, quark_unpack_args_7, and uplo.

{
    PLASMA_enum uplo;
    int M;
    int N;
    float *A;
    int LDA;
    float *B;
    int LDB;
    quark_unpack_args_7(quark, uplo, M, N, A, LDA, B, LDB);
    LAPACKE_slacpy_work(
        LAPACK_COL_MAJOR,
        lapack_const(uplo),
        M, N, A, LDA, B, LDB);
}

Here is the caller graph for this function:

void CORE_slange	(	int	norm,
		int	M,
		int	N,
		float *	A,
		int	LDA,
		float *	work,
		float *	normA
	)

Definition at line 29 of file core_slange.c.

References lapack_const.

{
    *normA = LAPACKE_slange_work(
        LAPACK_COL_MAJOR,
        lapack_const(norm),
        M, N, A, LDA, work);
}

Here is the caller graph for this function:

void CORE_slange_f1_quark ( Quark * quark )

Definition at line 114 of file core_slange.c.

References A, lapack_const, norm, and quark_unpack_args_8.

{
    float *normA;
    int norm;
    int M;
    int N;
    float *A;
    int LDA;
    float *work;
    float *fake;
    quark_unpack_args_8(quark, norm, M, N, A, LDA, work, normA, fake);
    *normA = LAPACKE_slange_work(
        LAPACK_COL_MAJOR,
        lapack_const(norm),
        M, N, A, LDA, work);
}

Here is the caller graph for this function:

void CORE_slange_quark ( Quark * quark )

Definition at line 67 of file core_slange.c.

References A, lapack_const, norm, and quark_unpack_args_7.

{
    float *normA;
    int norm;
    int M;
    int N;
    float *A;
    int LDA;
    float *work;
    quark_unpack_args_7(quark, norm, M, N, A, LDA, work, normA);
    *normA = LAPACKE_slange_work(
        LAPACK_COL_MAJOR,
        lapack_const(norm),
        M, N, A, LDA, work);
}

Here is the caller graph for this function:

void CORE_slansy	(	int	norm,
		int	uplo,
		int	N,
		float *	A,
		int	LDA,
		float *	work,
		float *	normA
	)

Definition at line 29 of file core_slansy.c.

References lapack_const.

{
    *normA = LAPACKE_slansy_work(
        LAPACK_COL_MAJOR,
        lapack_const(norm), lapack_const(uplo),
        N, A, LDA, work);
}

Here is the caller graph for this function:

void CORE_slansy_f1_quark ( Quark * quark )

Definition at line 114 of file core_slansy.c.

References A, lapack_const, norm, quark_unpack_args_8, and uplo.

{
    float *normA;
    int norm;
    int uplo;
    int N;
    float *A;
    int LDA;
    float *work;
    float *fake;
    quark_unpack_args_8(quark, norm, uplo, N, A, LDA, work, normA, fake);
    *normA = LAPACKE_slansy_work(
        LAPACK_COL_MAJOR,
        lapack_const(norm),
        lapack_const(uplo),
        N, A, LDA, work);
}

Here is the caller graph for this function:

void CORE_slansy_quark ( Quark * quark )

Definition at line 67 of file core_slansy.c.

References A, lapack_const, norm, quark_unpack_args_7, and uplo.

{
    float *normA;
    int norm;
    int uplo;
    int N;
    float *A;
    int LDA;
    float *work;
    quark_unpack_args_7(quark, normA, norm, uplo, N, A, LDA, work);
    *normA = LAPACKE_slansy_work(
        LAPACK_COL_MAJOR,
        lapack_const(norm), lapack_const(uplo),
        N, A, LDA, work);
}

Here is the caller graph for this function:

int CORE_slarfx2	(	PLASMA_enum	side,
		int	N,
		float	V,
		float	TAU,
		float *	C1,
		int	LDC1,
		float *	C2,
		int	LDC2
	)

Declarations of serial kernels - alphabetical order

Purpose

CORE_slarfx2 applies a complex elementary reflector H to a complex m by n matrix C, from either the left or the right. H is represented in the form

  H = I - tau * v * v'

where tau is a complex scalar and v is a complex vector.

If tau = 0, then H is taken to be the unit matrix

This version uses inline code if H has order < 11.

Arguments

Parameters:

[in]	side	PlasmaLeft : form H * C PlasmaRight: form C * H
[in]	N	The number of columns of C1 and C2 if side = PlasmaLeft. The number of rows of C1 and C2 if side = PlasmaRight.
[in]	V	The float complex V in the representation of H.
[in]	TAU	The value tau in the representation of H.
[in,out]	C1	dimension (LDC1,N), if side = PlasmaLeft dimension (LDC1,1), if side = PlasmaRight On entry, the m by n matrix C1. On exit, C1 is overwritten by the matrix H * C1 if SIDE = PlasmaLeft, or C1 * H if SIDE = PlasmaRight.
[in]	LDC1	The leading dimension of the array C1. LDC1 >= max(1,N), if side == PlasmaRight. LDC1 >= 1, otherwise.
[in,out]	C2	dimension (LDC2,N), if side = PlasmaLeft dimension (LDC2,1), if side = PlasmaRight On entry, the m by n matrix C2. On exit, C2 is overwritten by the matrix H * C2 if SIDE = PlasmaLeft, or C2 * H if SIDE = PlasmaRight.
[in]	LDC2	The leading dimension of the array C2. LDC2 >= max(1,N), if side == PlasmaRight. LDC2 >= 1, otherwise.

Returns:

Return values:

PLASMA_SUCCESS	successful exit
<0	if -i, the i-th argument had an illegal value

Definition at line 86 of file core_slarfx_tbrd.c.

References PLASMA_SUCCESS, PlasmaLeft, T2, TAU, and V.

{
    float V2, T2, SUM;
    int j;
    if (TAU == (float)0.0)
        return PLASMA_SUCCESS;
    /*
     * Special code for 2 x 2 Householder where V1 = I
    */
    if(side==PlasmaLeft){
        V2 = (V);
        T2 = TAU*(V2);
        for (j = 0; j < N ; j++, C1+=LDC1 ) {
            SUM = *C1 + V2 * (*C2);
            *C1 = *C1 - SUM*TAU;
            *C2 = *C2 - SUM*T2;
            C2 += LDC2;
        }
    }
    else {
        V2 = V;
        T2 = TAU*(V2);
        for (j = 0; j < N ; j++, C1++){
            SUM = *C1 + V2 * (*C2);
            *C1 = *C1 - SUM*TAU;
            *C2 = *C2 - SUM*T2;
            C2++;
        }
    }
    return PLASMA_SUCCESS;
}

Here is the caller graph for this function:

int CORE_slarfx2c	(	PLASMA_enum	uplo,
		float	V,
		float	TAU,
		float *	C1,
		float *	C2,
		float *	C3
	)

Purpose

CORE_slarfx2c applies a complex elementary reflector H to a diagonal corner C=[C1, C2, C3], from both the left and the right side. C = H * C * H. It is used in the case of Hermetian. If PlasmaLower, a left apply is followed by a right apply. If PlasmaUpper, a right apply is followed by a left apply. H is represented in the form

This routine is a special code for a corner C diagonal block C1 C2 C3

H = I - tau * v * v'

where tau is a complex scalar and v is a complex vector.

If tau = 0, then H is taken to be the unit matrix

This version uses inline code if H has order < 11.

Arguments

Parameters:

[in]	uplo	= PlasmaUpper: Upper triangle of A is stored; = PlasmaLower: Lower triangle of A is stored.
[in]	V	The float complex V in the representation of H.
[in]	TAU	The value tau in the representation of H.
[in,out]	C1	On entry, the element C1. On exit, C1 is overwritten by the result H * C * H.
[in,out]	C2	On entry, the element C2. On exit, C2 is overwritten by the result H * C * H.
[in,out]	C3	On entry, the element C3. On exit, C3 is overwritten by the result H * C * H.

Returns:

Return values:

PLASMA_SUCCESS	successful exit
<0	if -i, the i-th argument had an illegal value

Definition at line 185 of file core_slarfx_tbrd.c.

References PLASMA_SUCCESS, PlasmaLower, T2, TAU, and V.

{
    float T2, SUM, TEMP;
    /* Quick return */
    if (TAU == (float)0.0)
        return PLASMA_SUCCESS;
    /*
     *        Special code for a diagonal block  C1
     *                                           C2  C3
     */
    if(uplo==PlasmaLower) {   
        /* 
         *  Do the corner Left then Right (used for the lower case
         *  tridiag) L and R for the 2x2 corner
         *             C(N-1, N-1)  C(N-1,N)        C1  TEMP 
         *             C(N  , N-1)  C(N  ,N)        C2  C3 
         *  For Left : use (TAU) and V. 
         *  For Right: nothing, keep TAU and V. 
         *  Left 1 ==> C1 
         *             C2 
         */
        TEMP = (*C2); /*  copy C2 here before modifying it. */
        T2   = (TAU) * V;
        SUM  = *C1 + (V) * (*C2);
        *C1  = *C1 - SUM * (TAU);
        *C2  = *C2 - SUM * T2;
        /*  Left 2 ==> TEMP */
        /*             C3 */
        SUM  = TEMP + (V) * (*C3);
        TEMP = TEMP - SUM * (TAU);
        *C3  = *C3  - SUM * T2;
        /*  Right 1 ==>  C1 TEMP.  NB: no need to compute corner (2,2)=TEMP */
        T2  = TAU * (V);
        SUM = *C1 + V*TEMP;
        *C1 = *C1 - SUM*TAU;
        /*  Right 2 ==> C2 C3 */
        SUM = *C2   + V*(*C3);
        *C2 = *C2   - SUM*TAU;
        *C3 = *C3   - SUM*T2;
    }
    else { 
        /*  
         * Do the corner Right then Left (used for the upper case tridiag) 
         *             C(N-1, N-1)  C(N-1,N)        C1    C2 
         *             C(N  , N-1)  C(N  ,N)        TEMP  C3 
         *  For Left : use TAU       and (V). 
         *  For Right: use (TAU) and (V). 
         *  Right 1 ==> C1 C2 
         */
        V    = (V);
        TEMP = (*C2); /*  copy C2 here before modifying it. */
        T2   = (TAU) * (V);
        SUM  = *C1 + V   * (*C2);
        *C1  = *C1 - SUM * (TAU);
        *C2  = *C2 - SUM * T2;
        /*  Right 2 ==> TEMP C3 */
        SUM  = TEMP + V   * (*C3);
        TEMP = TEMP - SUM * (TAU);
        *C3  = *C3  - SUM * T2;
        /*  Left 1 ==> C1 */
        /*             TEMP. NB: no need to compute corner (2,1)=TEMP */
        T2  = TAU * V;
        SUM = *C1 + (V) * TEMP;
        *C1 = *C1 - SUM * TAU;
        /*  Left 2 ==> C2 */
        /*             C3 */
        SUM = *C2 + (V) * (*C3);
        *C2 = *C2 - SUM * TAU;
        *C3 = *C3 - SUM * T2;
    }
    return PLASMA_SUCCESS;
}

Here is the caller graph for this function:

int CORE_slarfx2ce	(	PLASMA_enum	uplo,
		float *	V,
		float *	TAU,
		float *	C1,
		float *	C2,
		float *	C3
	)

Purpose

CORE_slarfx2c applies a complex elementary reflector H to a diagonal corner C=[C1, C2, C3], from both the left and the right side. C = H * C * H. It is used in the case of general matrices, where it create a nnz at the NEW_NNZ position, then it eliminate it and update the reflector V and TAU. If PlasmaLower, a left apply is followed by a right apply. If PlasmaUpper, a right apply is followed by a left apply. H is represented in the form

This routine is a special code for a corner C diagonal block C1 NEW_NNZ C2 C3

H = I - tau * v * v'

where tau is a complex scalar and v is a complex vector.

If tau = 0, then H is taken to be the unit matrix

This version uses inline code if H has order < 11.

Arguments

Parameters:

[in]	uplo	= PlasmaUpper: Upper triangle of A is stored; = PlasmaLower: Lower triangle of A is stored.
[in,out]	V	On entry, the float complex V in the representation of H. On exit, the float complex V in the representation of H, updated by the elimination of the NEW_NNZ created by the left apply in case of PlasmaLower or the right apply in case of PlasmaUpper.
[in]	TAU	On entry, the value tau in the representation of H. On exit, the value tau in the representation of H, updated by the elimination of the NEW_NNZ created by the left apply in case of PlasmaLower or the right apply in case of PlasmaUpper.
[in,out]	C1	On entry, the element C1. On exit, C1 is overwritten by the result H * C * H.
[in,out]	C2	On entry, the element C2. On exit, C2 is overwritten by the result H * C * H.
[in,out]	C3	On entry, the element C3. On exit, C3 is overwritten by the result H * C * H.

Returns:

Return values:

PLASMA_SUCCESS	successful exit
<0	if -i, the i-th argument had an illegal value

Definition at line 335 of file core_slarfx_tbrd.c.

References PLASMA_SUCCESS, PlasmaLower, PlasmaUpper, T2, and V.

{
    float T2, SUM, TEMP, VIN, TAUIN;
    /* Quick return */
    if (*TAU == (float)0.0)
        return PLASMA_SUCCESS;
    /*
     *        Special code for a diagonal block  C1
     *                                           C2  C3
     */
    if(uplo==PlasmaLower){    
        /*
         *  Do the corner for the lower case BIDIAG ==> Left then will
         *  create a new nnz. eliminate it and modify V TAU and then
         *  Right L and R for the 2x2 corner
         *             C(N-1, N-1)  C(N-1,N)        C1  TEMP 
         *             C(N  , N-1)  C(N  ,N)        C2  C3 
         */
        VIN   = *V;
        TAUIN = (*TAU);
        /*  Left 1 ==> C1 */
        /*             C2 */
        VIN  = (VIN);
        T2   = TAUIN * (VIN);
        SUM  = *C1   + VIN*(*C2);
        *C1  = *C1   - SUM*TAUIN;
        *C2  = *C2   - SUM*T2;
        /*   new nnz at TEMP and update C3 */
        SUM  =        VIN * (*C3);
        TEMP =      - SUM * TAUIN;
        *C3  = *C3  - SUM * T2;
        /*  generate Householder to annihilate the nonzero created at TEMP */
        *V    = TEMP;
        LAPACKE_slarfg_work( 2, C1, V, 1, TAU);
        VIN   = (*V);
        TAUIN = (*TAU);
        /*  Right 1 ==> C2 C3 */
        /* VIN     = VIN */
        T2  = TAUIN * (VIN);
        SUM = *C2   + VIN*(*C3);
        *C2 = *C2   - SUM*TAUIN;
        *C3 = *C3   - SUM*T2;
    }else if(uplo==PlasmaUpper){ 
        /*  
         * Do the corner for the upper case BIDIAG ==> Right then will
        *  create a new nnz. eliminate it and modify V TAU and then
        *  Left
        *             C(N-1, N-1)  C(N-1,N)        C1    C2 
        *             C(N  , N-1)  C(N  ,N)        TEMP  C3 
        *  For Left : use (TAU) and V. 
        *  For Right: use (TAU) and (V) as input. 
        */
        VIN   = (*V);
        TAUIN = (*TAU);
        /*  Right 1 ==> C1 C2 */
        /* VIN     = VIN */
        T2   = TAUIN*(VIN);
        SUM  = *C1  + VIN*(*C2);
        *C1  = *C1  - SUM*TAUIN;
        *C2  = *C2  - SUM*T2;
        /*   new nnz at TEMP and update C3 */
        SUM  =        VIN * (*C3);
        TEMP =      - SUM * TAUIN;
        *C3  = *C3  - SUM * T2;
        /*  generate Householder to annihilate the nonzero created at TEMP */
        *V    = TEMP;
        LAPACKE_slarfg_work( 2, C1, V, 1, TAU);
        VIN   = *V;
        TAUIN = (*TAU);
        /*  apply from the Left using the NEW V TAU to the remaining 2 elements [C2 C3] */
        /*  Left 2 ==> C2 */
        /*             C3 */
        VIN = (VIN);
        T2  = TAUIN*(VIN);
        SUM = *C2 + VIN*(*C3);
        *C2 = *C2 - SUM*TAUIN;
        *C3 = *C3 - SUM*T2;
    }
    return PLASMA_SUCCESS;
}

Here is the caller graph for this function:

void CORE_slaset	(	PLASMA_enum	uplo,
		int	M,
		int	N,
		float	alpha,
		float	beta,
		float *	A,
		int	LDA
	)

CORE_slaset - Sets the elements of the matrix A on the diagonal to beta and on the off-diagonals to alpha

Parameters:

[in]	uplo	Specifies which elements of the matrix are to be set = PlasmaUpper: Upper part of A is set; = PlasmaLower: Lower part of A is set; = PlasmaUpperLower: ALL elements of A are set.
[in]	M	The number of rows of the matrix A. M >= 0.
[in]	N	The number of columns of the matrix A. N >= 0.
[in]	alpha	The constant to which the off-diagonal elements are to be set.
[in]	beta	The constant to which the diagonal elements are to be set.
[in,out]	A	On entry, the M-by-N tile A. On exit, A has been set accordingly.
[in]	LDA	The leading dimension of the array A. LDA >= max(1,M).

Definition at line 58 of file core_slaset.c.

References lapack_const.

{
    LAPACKE_slaset_work(
        LAPACK_COL_MAJOR,
        lapack_const(uplo),
        M, N, alpha, beta, A, LDA);
}

Here is the caller graph for this function:

void CORE_slaset2	(	PLASMA_enum	uplo,
		int	M,
		int	N,
		float	alpha,
		float *	A,
		int	LDA
	)

CORE_slaset2 - Sets the elements of the matrix A to alpha. Not LAPACK compliant! Read below.

Parameters:

[in]	uplo	Specifies which elements of the matrix are to be set = PlasmaUpper: STRICT Upper part of A is set to alpha; = PlasmaLower: STRICT Lower part of A is set to alpha; = PlasmaUpperLower: ALL elements of A are set to alpha. Not LAPACK Compliant.
[in]	M	The number of rows of the matrix A. M >= 0.
[in]	N	The number of columns of the matrix A. N >= 0.
[in]	alpha	The constant to which the elements are to be set.
[in,out]	A	On entry, the M-by-N tile A. On exit, A has been set to alpha accordingly.
[in]	LDA	The leading dimension of the array A. LDA >= max(1,M).

Definition at line 56 of file core_slaset2.c.

References lapack_const, PlasmaLower, and PlasmaUpper.

{
    if (uplo == PlasmaUpper) {
        LAPACKE_slaset_work(
            LAPACK_COL_MAJOR,
            lapack_const(uplo),
            M, N-1, alpha, alpha, A+LDA, LDA);
    }
    else if (uplo == PlasmaLower) {
        LAPACKE_slaset_work(
            LAPACK_COL_MAJOR,
            lapack_const(uplo),
            M-1, N, alpha, alpha, A+1, LDA);
    }
    else {
        LAPACKE_slaset_work(
            LAPACK_COL_MAJOR,
            lapack_const(uplo),
            M, N, alpha, alpha, A, LDA);
    }
}

Here is the caller graph for this function:

void CORE_slaset2_quark ( Quark * quark )

Definition at line 103 of file core_slaset2.c.

References A, CORE_slaset2(), quark_unpack_args_6, and uplo.

{
    int uplo;
    int M;
    int N;
    float alpha;
    float *A;
    int LDA;
    quark_unpack_args_6(quark, uplo, M, N, alpha, A, LDA);
    CORE_slaset2(uplo, M, N, alpha, A, LDA);
}

Here is the call graph for this function:

Here is the caller graph for this function:

void CORE_slaset_quark ( Quark * quark )

Definition at line 95 of file core_slaset.c.

References A, lapack_const, quark_unpack_args_7, and uplo.

{
    int uplo;
    int M;
    int N;
    float alpha;
    float beta;
    float *A;
    int LDA;
    quark_unpack_args_7(quark, uplo, M, N, alpha, beta, A, LDA);
    LAPACKE_slaset_work(
        LAPACK_COL_MAJOR,
        lapack_const(uplo),
        M, N, alpha, beta, A, LDA);
}

Here is the caller graph for this function:

void CORE_slaswp	(	int	N,
		float *	A,
		int	LDA,
		int	I1,
		int	I2,
		int *	IPIV,
		int	INC
	)

Definition at line 29 of file core_slaswp.c.

{
    LAPACKE_slaswp_work( LAPACK_COL_MAJOR, N, A, LDA, I1, I2, IPIV, INC );
}

void CORE_slaswp_f2_quark ( Quark * quark )

Definition at line 102 of file core_slaswp.c.

References A, and quark_unpack_args_9.

{
    int n, lda, i1, i2, inc;
    int *ipiv;
    float *A;
    void *fake1, *fake2;
    
    quark_unpack_args_9(quark, n, A, lda, i1, i2, ipiv, inc, fake1, fake2);
    LAPACKE_slaswp_work(LAPACK_COL_MAJOR, n, A, lda, i1, i2, ipiv, inc );
}

Here is the caller graph for this function:

int CORE_slaswp_ontile	(	PLASMA_desc	descA,
		int	i1,
		int	i2,
		int *	ipiv,
		int	inc
	)

CORE_slaswp_ontile apply the slaswp function on a matrix stored in tile layout

Parameters:

[in,out]	A	The descriptor of the matrix A to permute.
[in]	i1	The first element of IPIV for which a row interchange will be done.
[in]	i2	The last element of IPIV for which a row interchange will be done.
[in]	ipiv	The pivot indices; Only the element in position i1 to i2 are accessed. The pivot are offset by A.i.
[in]	inc	The increment between successive values of IPIV. If IPIV is negative, the pivots are applied in reverse order.

Definition at line 147 of file core_slaswp.c.

References A, BLKLDD, cblas_sswap(), coreblas_error, plasma_desc_t::i, plasma_desc_t::m, plasma_desc_t::mb, plasma_desc_t::mt, plasma_desc_t::n, plasma_desc_t::nt, and PLASMA_SUCCESS.

{
    int i, j, ip, it;
    float *A1;
    int lda1, lda2;
    /* Change i1 to C notation */
    i1--;
    if ( descA.nt > 1 ) {
        coreblas_error(1, "Illegal value of descA.nt");
        return -1;
    }
    if ( i1 < 0 ) {
        coreblas_error(2, "Illegal value of i1");
        return -2;
    }
    if ( (i2 < i1) || (i2 > descA.m) ) {
        coreblas_error(3, "Illegal value of i2");
        return -3;
    }
    if ( ! ( (i2 - i1 - i1%descA.mb -1) < descA.mb ) ) {
        coreblas_error(2, "Illegal value of i1,i2. They have to be part of the same block.");
        return -3;
    }
    it = i1 / descA.mb;
    if (inc > 0) {
        A1 = A(it, 0);
        lda1 = BLKLDD(descA, 0);
        for (j = i1; j < i2; ++j, ipiv+=inc) {
            ip = (*ipiv) - descA.i - 1;
            if ( ip != j )
            {
                it = ip / descA.mb;
                i  = ip % descA.mb;
                lda2 = BLKLDD(descA, it);
                cblas_sswap(descA.n, A1       + j, lda1,
                                     A(it, 0) + i, lda2 );
            }
        }   
    }
    else
    {
        A1 = A(it, 0);
        lda1 = BLKLDD(descA, descA.mt-1);
        
        i1--;
        ipiv = &ipiv[(1-i2)*inc];
        for (j = i2-1; j > i1; --j, ipiv+=inc) {
            ip = (*ipiv) - descA.i - 1;
            if ( ip != j )
            {
                it = ip / descA.mb;
                i  = ip % descA.mb;
                lda2 = BLKLDD(descA, it);
                cblas_sswap(descA.n, A1       + j, lda1,
                                     A(it, 0) + i, lda2 );
            }
        }
    }
    return PLASMA_SUCCESS;
}

Here is the call graph for this function:

Here is the caller graph for this function:

void CORE_slaswp_ontile_f2_quark ( Quark * quark )

Definition at line 279 of file core_slaswp.c.

References A, CORE_slaswp_ontile(), and quark_unpack_args_8.

{
    int i1, i2, inc;
    int *ipiv;
    float *A;
    PLASMA_desc descA;
    void *fake1, *fake2;
    quark_unpack_args_8(quark, descA, A, i1, i2, ipiv, inc, fake1, fake2);
    CORE_slaswp_ontile(descA, i1, i2, ipiv, inc);
}

Here is the call graph for this function:

Here is the caller graph for this function:

void CORE_slaswp_ontile_quark ( Quark * quark )

Definition at line 238 of file core_slaswp.c.

References A, CORE_slaswp_ontile(), and quark_unpack_args_7.

{
    int i1, i2, inc;
    int *ipiv;
    float *A, *fake;
    PLASMA_desc descA;
    quark_unpack_args_7(quark, descA, A, i1, i2, ipiv, inc, fake);
    CORE_slaswp_ontile(descA, i1, i2, ipiv, inc);
}

Here is the call graph for this function:

Here is the caller graph for this function:

void CORE_slaswp_quark ( Quark * quark )

Definition at line 61 of file core_slaswp.c.

References A, and quark_unpack_args_7.

{
    int n, lda, i1, i2, inc;
    int *ipiv;
    float *A;
    
    quark_unpack_args_7(quark, n, A, lda, i1, i2, ipiv, inc);
    LAPACKE_slaswp_work(LAPACK_COL_MAJOR, n, A, lda, i1, i2, ipiv, inc );
}

Here is the caller graph for this function:

int CORE_slaswpc_ontile	(	PLASMA_desc	descA,
		int	i1,
		int	i2,
		int *	ipiv,
		int	inc
	)

CORE_slaswpc_ontile apply the slaswp function on a matrix stored in tile layout

Parameters:

[in,out]	A	The descriptor of the matrix A to permute.
[in]	i1	The first element of IPIV for which a column interchange will be done.
[in]	i2	The last element of IPIV for which a column interchange will be done.
[in]	ipiv	The pivot indices; Only the element in position i1 to i2 are accessed. The pivot are offset by A.i.
[in]	inc	The increment between successive values of IPIV. If IPIV is negative, the pivots are applied in reverse order.

Definition at line 430 of file core_slaswp.c.

References A, BLKLDD, cblas_sswap(), coreblas_error, plasma_desc_t::j, plasma_desc_t::m, plasma_desc_t::mt, plasma_desc_t::n, plasma_desc_t::nb, and PLASMA_SUCCESS.

{
    int i, j, ip, it;
    float *A1;
    int lda;
    if ( descA.mt > 1 ) {
        coreblas_error(1, "Illegal value of descA.mt");
        return -1;
    }
    if ( i1 < 1 ) {
        coreblas_error(2, "Illegal value of i1");
        return -2;
    }
    if ( (i2 < i1) || (i2 > descA.n) ) {
        coreblas_error(3, "Illegal value of i2");
        return -3;
    }
    if ( ! ( (i2 - i1 - i1%descA.nb -1) < descA.nb ) ) {
        coreblas_error(2, "Illegal value of i1,i2. They have to be part of the same block.");
        return -3;
    }
    lda = BLKLDD(descA, 0);
        
    it = i1 / descA.nb;
    if (inc > 0) {
        A1 = A(0, it);
        for (j = i1-1; j < i2; ++j, ipiv+=inc) {
            ip = (*ipiv) - descA.j - 1;
            if ( ip != j )
            {
                it = ip / descA.nb;
                i  = ip % descA.nb;
                cblas_sswap(descA.m, A1       + j*lda, 1,
                                     A(0, it) + i*lda, 1 );
            }
        }   
    }
    else
    {
        A1 = A(0, it);
        i1 -= 2;
        ipiv = &ipiv[(1-i2)*inc];
        for (j = i2-1; j > i1; --j, ipiv+=inc) {
            ip = (*ipiv) - descA.j - 1;
            if ( ip != j )
            {
                it = ip / descA.nb;
                i  = ip % descA.nb;
                cblas_sswap(descA.m, A1       + j*lda, 1,
                                     A(0, it) + i*lda, 1 );
            }
        }
    }
    return PLASMA_SUCCESS;
}

Here is the call graph for this function:

Here is the caller graph for this function:

void CORE_slaswpc_ontile_quark ( Quark * quark )

Definition at line 516 of file core_slaswp.c.

References A, CORE_slaswpc_ontile(), and quark_unpack_args_7.

{
    int i1, i2, inc;
    int *ipiv;
    float *A, *fake;
    PLASMA_desc descA;
    quark_unpack_args_7(quark, descA, A, i1, i2, ipiv, inc, fake);
    CORE_slaswpc_ontile(descA, i1, i2, ipiv, inc);
}

Here is the call graph for this function:

Here is the caller graph for this function:

void CORE_slauum	(	int	uplo,
		int	N,
		float *	A,
		int	LDA
	)

Definition at line 29 of file core_slauum.c.

References lapack_const.

{
    LAPACKE_slauum_work(LAPACK_COL_MAJOR, lapack_const(uplo), N, A, LDA );
}

void CORE_slauum_quark ( Quark * quark )

Definition at line 57 of file core_slauum.c.

References A, lapack_const, quark_unpack_args_4, and uplo.

{
    int uplo;
    int N;
    float *A;
    int LDA;
    quark_unpack_args_4(quark, uplo, N, A, LDA);
    LAPACKE_slauum_work(LAPACK_COL_MAJOR, lapack_const(uplo), N, A, LDA);
}

Here is the caller graph for this function:

int CORE_sormlq	(	int	side,
		int	trans,
		int	M,
		int	N,
		int	K,
		int	IB,
		float *	A,
		int	LDA,
		float *	T,
		int	LDT,
		float *	C,
		int	LDC,
		float *	WORK,
		int	LDWORK
	)

CORE_sormlq overwrites the general complex M-by-N tile C with

              SIDE = 'L'     SIDE = 'R'

TRANS = 'N': Q * C C * Q TRANS = 'C': Q**T * C C * Q**T

where Q is a complex unitary matrix defined as the product of k elementary reflectors

Q = H(k) . . . H(2) H(1)

as returned by CORE_sgelqt. Q is of order M if SIDE = 'L' and of order N if SIDE = 'R'.

Parameters:

[in]	side	PlasmaLeft : apply Q or QT from the Left; PlasmaRight : apply Q or QT from the Right.
[in]	trans	PlasmaNoTrans : No transpose, apply Q; PlasmaTrans : Transpose, apply Q**T.
[in]	M	The number of rows of the tile C. M >= 0.
[in]	N	The number of columns of the tile C. N >= 0.
[in]	K	The number of elementary reflectors whose product defines the matrix Q. If SIDE = PlasmaLeft, M >= K >= 0; if SIDE = PlasmaRight, N >= K >= 0.
[in]	IB	The inner-blocking size. IB >= 0.
[in]	A	Dimension: (LDA,M) if SIDE = PlasmaLeft, (LDA,N) if SIDE = PlasmaRight, The i-th row must contain the vector which defines the elementary reflector H(i), for i = 1,2,...,k, as returned by CORE_sgelqt in the first k rows of its array argument A.
[in]	LDA	The leading dimension of the array A. LDA >= max(1,K).
[out]	T	The IB-by-K triangular factor T of the block reflector. T is upper triangular by block (economic storage); The rest of the array is not referenced.
[in]	LDT	The leading dimension of the array T. LDT >= IB.
[in,out]	C	On entry, the M-by-N tile C. On exit, C is overwritten by QC or QTC or CQT or CQ.
[in]	LDC	The leading dimension of the array C. LDC >= max(1,M).
[in,out]	WORK	On exit, if INFO = 0, WORK(1) returns the optimal LDWORK.
[in]	LDWORK	The dimension of the array WORK. If SIDE = PlasmaLeft, LDWORK >= max(1,N); if SIDE = PlasmaRight, LDWORK >= max(1,M).

Returns:

Return values:

PLASMA_SUCCESS	successful exit
<0	if -i, the i-th argument had an illegal value

Definition at line 108 of file core_sormlq.c.

References coreblas_error, lapack_const, max, min, PLASMA_SUCCESS, PlasmaForward, PlasmaLeft, PlasmaNoTrans, PlasmaRight, PlasmaRowwise, and PlasmaTrans.

{
    int i, kb;
    int i1, i3;
    int nq, nw;
    int ic = 0;
    int jc = 0;
    int ni = N;
    int mi = M;
    /* Check input arguments */
    if ((side != PlasmaLeft) && (side != PlasmaRight)) {
        coreblas_error(1, "Illegal value of side");
        return -1;
    }
    /*
     * NQ is the order of Q and NW is the minimum dimension of WORK
     */
    if (side == PlasmaLeft) {
        nq = M;
        nw = N;
    }
    else {
        nq = N;
        nw = M;
    }
    if ((trans != PlasmaNoTrans) && (trans != PlasmaTrans)) {
        coreblas_error(2, "Illegal value of trans");
        return -2;
    }
    if (M < 0) {
        coreblas_error(3, "Illegal value of M");
        return -3;
    }
    if (N < 0) {
        coreblas_error(4, "Illegal value of N");
        return -4;
    }
    if ((K < 0) || (K > nq)) {
        coreblas_error(5, "Illegal value of K");
        return -5;
    }
    if ((IB < 0) || ( (IB == 0) && ((M > 0) && (N > 0)) )) {
        coreblas_error(6, "Illegal value of IB");
        return -6;
    }
    if ((LDA < max(1,K)) && (K > 0)) {
        coreblas_error(8, "Illegal value of LDA");
        return -8;
    }
    if ((LDC < max(1,M)) && (M > 0)) {
        coreblas_error(12, "Illegal value of LDC");
        return -12;
    }
    if ((LDWORK < max(1,nw)) && (nw > 0)) {
        coreblas_error(14, "Illegal value of LDWORK");
        return -14;
    }
    /* Quick return */
    if ((M == 0) || (N == 0) || (K == 0))
        return PLASMA_SUCCESS;
    if (((side == PlasmaLeft) && (trans == PlasmaNoTrans))
        || ((side == PlasmaRight) && (trans != PlasmaNoTrans))) {
        i1 = 0;
        i3 = IB;
    }
    else {
        i1 = ( ( K-1 ) / IB )*IB;
        i3 = -IB;
    }
    if( trans == PlasmaNoTrans) {
        trans = PlasmaTrans;
    }
    else {
        trans = PlasmaNoTrans;
    }
    for(i = i1; (i >- 1) && (i < K); i+=i3 ) {
        kb = min(IB, K-i);
        if (side == PlasmaLeft) {
            /*
             * H or H' is applied to C(i:m,1:n)
             */
            mi = M - i;
            ic = i;
        }
        else {
            /*
             * H or H' is applied to C(1:m,i:n)
             */
            ni = N - i;
            jc = i;
        }
        /*
         * Apply H or H'
         */
        LAPACKE_slarfb_work(LAPACK_COL_MAJOR,
            lapack_const(side),
            lapack_const(trans),
            lapack_const(PlasmaForward),
            lapack_const(PlasmaRowwise),
            mi, ni, kb,
            &A[LDA*i+i], LDA,
            &T[LDT*i], LDT,
            &C[LDC*jc+ic], LDC,
            WORK, LDWORK);
    }
    return PLASMA_SUCCESS;
}

Here is the caller graph for this function:

void CORE_sormlq_quark ( Quark * quark )

Definition at line 264 of file core_sormlq.c.

References A, C, CORE_sormlq(), quark_unpack_args_14, side, T, and trans.

{
    int side;
    int trans;
    int m;
    int n;
    int k;
    int ib;
    float *A;
    int lda;
    float *T;
    int ldt;
    float *C;
    int ldc;
    float *WORK;
    int ldwork;
    quark_unpack_args_14(quark, side, trans, m, n, k, ib,
                         A, lda, T, ldt, C, ldc, WORK, ldwork);
    CORE_sormlq(side, trans, m, n, k, ib, 
                A, lda, T, ldt, C, ldc, WORK, ldwork);
}

Here is the call graph for this function:

Here is the caller graph for this function:

int CORE_sormqr	(	int	side,
		int	trans,
		int	M,
		int	N,
		int	K,
		int	IB,
		float *	A,
		int	LDA,
		float *	T,
		int	LDT,
		float *	C,
		int	LDC,
		float *	WORK,
		int	LDWORK
	)

CORE_sormqr overwrites the general complex M-by-N tile C with

              SIDE = 'L'     SIDE = 'R'

TRANS = 'N': Q * C C * Q TRANS = 'C': Q**T * C C * Q**T

where Q is a complex unitary matrix defined as the product of k elementary reflectors

Q = H(1) H(2) . . . H(k)

as returned by CORE_sgeqrt. Q is of order M if SIDE = 'L' and of order N if SIDE = 'R'.

Parameters:

[in]	side	PlasmaLeft : apply Q or QT from the Left; PlasmaRight : apply Q or QT from the Right.
[in]	trans	PlasmaNoTrans : No transpose, apply Q; PlasmaTrans : Transpose, apply Q**T.
[in]	M	The number of rows of the tile C. M >= 0.
[in]	N	The number of columns of the tile C. N >= 0.
[in]	K	The number of elementary reflectors whose product defines the matrix Q. If SIDE = PlasmaLeft, M >= K >= 0; if SIDE = PlasmaRight, N >= K >= 0.
[in]	IB	The inner-blocking size. IB >= 0.
[in]	A	Dimension: (LDA,K) The i-th column must contain the vector which defines the elementary reflector H(i), for i = 1,2,...,k, as returned by CORE_sgeqrt in the first k columns of its array argument A.
[in]	LDA	The leading dimension of the array A. If SIDE = PlasmaLeft, LDA >= max(1,M); if SIDE = PlasmaRight, LDA >= max(1,N).
[out]	T	The IB-by-K triangular factor T of the block reflector. T is upper triangular by block (economic storage); The rest of the array is not referenced.
[in]	LDT	The leading dimension of the array T. LDT >= IB.
[in,out]	C	On entry, the M-by-N tile C. On exit, C is overwritten by QC or QTC or CQT or CQ.
[in]	LDC	The leading dimension of the array C. LDC >= max(1,M).
[in,out]	WORK	On exit, if INFO = 0, WORK(1) returns the optimal LDWORK.
[in]	LDWORK	The dimension of the array WORK. If SIDE = PlasmaLeft, LDWORK >= max(1,N); if SIDE = PlasmaRight, LDWORK >= max(1,M).

Returns:

Return values:

PLASMA_SUCCESS	successful exit
<0	if -i, the i-th argument had an illegal value

Definition at line 108 of file core_sormqr.c.

References coreblas_error, lapack_const, max, min, PLASMA_SUCCESS, PlasmaColumnwise, PlasmaForward, PlasmaLeft, PlasmaNoTrans, PlasmaRight, and PlasmaTrans.

{
    int i, kb;
    int i1, i3;
    int nq, nw;
    int ic = 0;
    int jc = 0;
    int ni = N;
    int mi = M;
    /* Check input arguments */
    if ((side != PlasmaLeft) && (side != PlasmaRight)) {
        coreblas_error(1, "Illegal value of side");
        return -1;
    }
    /*
     * NQ is the order of Q and NW is the minimum dimension of WORK
     */
    if (side == PlasmaLeft) {
        nq = M;
        nw = N;
    }
    else {
        nq = N;
        nw = M;
    }
    if ((trans != PlasmaNoTrans) && (trans != PlasmaTrans)) {
        coreblas_error(2, "Illegal value of trans");
        return -2;
    }
    if (M < 0) {
        coreblas_error(3, "Illegal value of M");
        return -3;
    }
    if (N < 0) {
        coreblas_error(4, "Illegal value of N");
        return -4;
    }
    if ((K < 0) || (K > nq)) {
        coreblas_error(5, "Illegal value of K");
        return -5;
    }
    if ((IB < 0) || ( (IB == 0) && ((M > 0) && (N > 0)) )) {
        coreblas_error(6, "Illegal value of IB");
        return -6;
    }
    if ((LDA < max(1,nq)) && (nq > 0)) {
        coreblas_error(8, "Illegal value of LDA");
        return -8;
    }
    if ((LDC < max(1,M)) && (M > 0)) {
        coreblas_error(12, "Illegal value of LDC");
        return -12;
    }
    if ((LDWORK < max(1,nw)) && (nw > 0)) {
        coreblas_error(14, "Illegal value of LDWORK");
        return -14;
    }
    /* Quick return */
    if ((M == 0) || (N == 0) || (K == 0))
        return PLASMA_SUCCESS;
    if (((side == PlasmaLeft) && (trans != PlasmaNoTrans))
        || ((side == PlasmaRight) && (trans == PlasmaNoTrans))) {
        i1 = 0;
        i3 = IB;
    }
    else {
        i1 = ( ( K-1 ) / IB )*IB;
        i3 = -IB;
    }
    for(i = i1; (i >- 1) && (i < K); i+=i3 ) {
        kb = min(IB, K-i);
        if (side == PlasmaLeft) {
            /*
             * H or H' is applied to C(i:m,1:n)
             */
            mi = M - i;
            ic = i;
        }
        else {
            /*
             * H or H' is applied to C(1:m,i:n)
             */
            ni = N - i;
            jc = i;
        }
        /*
         * Apply H or H'
         */
        LAPACKE_slarfb_work(LAPACK_COL_MAJOR,
            lapack_const(side),
            lapack_const(trans),
            lapack_const(PlasmaForward),
            lapack_const(PlasmaColumnwise),
            mi, ni, kb,
            &A[LDA*i+i], LDA,
            &T[LDT*i], LDT,
            &C[LDC*jc+ic], LDC,
            WORK, LDWORK);
    }
    return PLASMA_SUCCESS;
}

Here is the caller graph for this function:

void CORE_sormqr_quark ( Quark * quark )

Definition at line 257 of file core_sormqr.c.

References A, C, CORE_sormqr(), quark_unpack_args_14, side, T, and trans.

{
    int side;
    int trans;
    int m;
    int n;
    int k;
    int ib;
    float *A;
    int lda;
    float *T;
    int ldt;
    float *C;
    int ldc;
    float *WORK;
    int ldwork;
    quark_unpack_args_14(quark, side, trans, m, n, k, ib,
                         A, lda, T, ldt, C, ldc, WORK, ldwork);
    CORE_sormqr(side, trans, m, n, k, ib, 
                A, lda, T, ldt, C, ldc, WORK, ldwork);
}

Here is the call graph for this function:

Here is the caller graph for this function:

int CORE_spamm	(	int	op,
		int	side,
		int	storev,
		int	M,
		int	N,
		int	K,
		int	L,
		float *	A1,
		int	LDA1,
		float *	A2,
		int	LDA2,
		float *	V,
		int	LDV,
		float *	W,
		int	LDW
	)

ZPAMM performs one of the matrix-matrix operations

              LEFT                      RIGHT

OP PlasmaW : W = A1 + op(V) * A2 or W = A1 + A2 * op(V) OP PlasmaA2 : A2 = A2 - op(V) * W or A2 = A2 - W * op(V)

where op( V ) is one of

op( V ) = V or op( V ) = V**T or op( V ) = V**T,

A1, A2 and W are general matrices, and V is:

  l = k: rectangle + triangle
  l < k: rectangle + trapezoid
  l = 0: rectangle

Size of V, both rowwise and columnwise, is:

side trans size

left N M x K T K x M right N K x N

T N x K

LEFT (columnwise and rowwise):

        |    K    |                 |         M         |
     _  __________   _              _______________        _ 
        |    |    |                 |             | \

V: | | | V': |_____________|___\ K | | | M-L | | M | | | |__________________| _ |____| | _ \ | | | M - L | L | \ | | L _ \|____| _

RIGHT (columnwise and rowwise):

    |         K         |                   |    N    |   
    _______________        _             _  __________   _
    |             | \                       |    |    |

V': |_____________|___\ N V: | | | | | | | | K-L |__________________| _ K | | | |____| | _ | K - L | L | \ | | \ | | L _ \|____| _

Arguments

Parameters:

[in]	OP	OP specifies which operation to perform: @arg PlasmaW : W = A1 + op(V) * A2 or W = A1 + A2 * op(V) @arg PlasmaA2 : A2 = A2 - op(V) * W or A2 = A2 - W * op(V)
[in]	SIDE	SIDE specifies whether op( V ) multiplies A2 or W from the left or right as follows: @arg PlasmaLeft : multiply op( V ) from the left OP PlasmaW : W = A1 + op(V) * A2 OP PlasmaA2 : A2 = A2 - op(V) * W @arg PlasmaRight : multiply op( V ) from the right OP PlasmaW : W = A1 + A2 * op(V) OP PlasmaA2 : A2 = A2 - W * op(V)
[in]	STOREV	Indicates how the vectors which define the elementary reflectors are stored in V: @arg PlasmaColumnwise @arg PlasmaRowwise
[in]	M	The number of rows of the A1, A2 and W If SIDE is PlasmaLeft, the number of rows of op( V )
[in]	N	The number of columns of the A1, A2 and W If SIDE is PlasmaRight, the number of columns of op( V )
[in]	K	If SIDE is PlasmaLeft, the number of columns of op( V ) If SIDE is PlasmaRight, the number of rows of op( V )
[in]	L	The size of the triangular part of V
[in]	A1	On entry, the M-by-N tile A1.
[in]	LDA1	The leading dimension of the array A1. LDA1 >= max(1,M).
[in,out]	A2	On entry, the M-by-N tile A2. On exit, if OP is PlasmaA2 A2 is overwritten
[in]	LDA2	The leading dimension of the tile A2. LDA2 >= max(1,M).
[in]	V	The matrix V as described above. If SIDE is PlasmaLeft : op( V ) is M-by-K If SIDE is PlasmaRight: op( V ) is K-by-N
[in]	LDV	The leading dimension of the array V.
[in,out]	W	On entry, the M-by-N matrix W. On exit, W is overwritten either if OP is PlasmaA2 or PlasmaW. If OP is PlasmaA2, W is an input and is used as a workspace.
[in]	LDW	The leading dimension of array WORK.

Returns:

Return values:

PLASMA_SUCCESS	successful exit
<0	if -i, the i-th argument had an illegal value

Definition at line 174 of file core_spamm.c.

References CblasLower, CblasUpper, coreblas_error, L, PLASMA_SUCCESS, PlasmaA2, PlasmaColumnwise, PlasmaLeft, PlasmaNoTrans, PlasmaRight, PlasmaRowwise, PlasmaTrans, PlasmaW, trans, and uplo.

{
    int vi2, vi3, uplo, trans, info;
    /* Check input arguments */
    if ((op != PlasmaW) && (op != PlasmaA2)) {
        coreblas_error(1, "Illegal value of op");
        return -1;
    }
    if ((side != PlasmaLeft) && (side != PlasmaRight)) {
        coreblas_error(2, "Illegal value of side");
        return -2;
    }
    if ((storev != PlasmaColumnwise) && (storev != PlasmaRowwise)) {
        coreblas_error(3, "Illegal value of storev");
        return -3;
    }
    if (M < 0) {
        coreblas_error(4, "Illegal value of M");
        return -4;
    }
    if (N < 0) {
        coreblas_error(5, "Illegal value of N");
        return -5;
    }
    if (K < 0) {
        coreblas_error(6, "Illegal value of K");
        return -6;
    }
    if (L < 0) {
        coreblas_error(7, "Illegal value of L");
        return -7;
    }
    if (LDA1 < 0) {
        coreblas_error(9, "Illegal value of LDA1");
        return -9;
    }
    if (LDA2 < 0) {
        coreblas_error(11, "Illegal value of LDA2");
        return -11;
    }
    if (LDV < 0) {
        coreblas_error(13, "Illegal value of LDV");
        return -13;
    }
    if (LDW < 0) {
        coreblas_error(15, "Illegal value of LDW");
        return -15;
    }
    /* Quick return */
    if ((M == 0) || (N == 0) || (K == 0))
        return PLASMA_SUCCESS;
    /*
     * TRANS is set as:
     *
     *        -------------------------------------
     *         side   direct     PlasmaW  PlasmaA2
     *        -------------------------------------
     *         left   colwise       T        N
     *                rowwise       N        T
     *         right  colwise       N        T
     *                rowwise       T        N
     *        -------------------------------------
     */
    /* Columnwise*/
    if (storev == PlasmaColumnwise) {
        uplo = CblasUpper;
        if (side == PlasmaLeft) {
            trans = op == PlasmaA2 ? PlasmaNoTrans : PlasmaTrans;
            vi2 = trans == PlasmaNoTrans ? M - L : K - L;
        }
        else {
            trans = op == PlasmaW ? PlasmaNoTrans : PlasmaTrans;
            vi2 = trans == PlasmaNoTrans ? K - L : N - L;
        }
        vi3 = LDV * L;
    }
    /* Rowwise */
    else {
        uplo = CblasLower;
        if (side == PlasmaLeft) {
            trans = op == PlasmaW ? PlasmaNoTrans : PlasmaTrans;
            vi2 = trans == PlasmaNoTrans ? K - L : M - L;
        }
        else {
            trans = op == PlasmaA2 ? PlasmaNoTrans : PlasmaTrans;
            vi2 = trans == PlasmaNoTrans ? N - L : K - L;
        }
        vi2 *= LDV;
        vi3  = L;
    }
    
    if (op==PlasmaW) {
        info = CORE_spamm_w(
                side, trans, uplo, M, N, K, L, vi2, vi3,
                A1, LDA1, A2, LDA2, V, LDV, W, LDW);
        if (info != 0)
            return info;
    } else if (op==PlasmaA2) {
        info = CORE_spamm_a2(
                side, trans, uplo, M, N, K, L, vi2, vi3,
                A2, LDA2, V, LDV, W, LDW);
        if (info != 0)
            return info;
    }
    return PLASMA_SUCCESS;
}

Here is the caller graph for this function:

void CORE_spamm_quark ( Quark * quark )

Definition at line 600 of file core_spamm.c.

References CORE_spamm(), L, quark_unpack_args_15, side, storev, V, and W.

{
    int op;
    int side;
    int storev;
    int M;
    int N;
    int K;
    int L;
    float *A1;
    int LDA1;
    float *A2;
    int LDA2;
    float *V;
    int LDV;
    float *W;
    int LDW;
    quark_unpack_args_15(quark, op, side, storev, M, N, K, L,
            A1, LDA1, A2, LDA2, V, LDV, W, LDW);
    CORE_spamm( op, side, storev, M, N, K, L, A1, LDA1, A2, LDA2, V, LDV, W, LDW);
}

Here is the call graph for this function:

Here is the caller graph for this function:

int CORE_sparfb	(	int	side,
		int	trans,
		int	direct,
		int	storev,
		int	M1,
		int	N1,
		int	M2,
		int	N2,
		int	K,
		int	L,
		float *	A1,
		int	LDA1,
		float *	A2,
		int	LDA2,
		float *	V,
		int	LDV,
		float *	T,
		int	LDT,
		float *	WORK,
		int	LDWORK
	)

CORE_sparfb applies a complex upper triangular block reflector H or its transpose H' to a complex rectangular matrix formed by coupling two tiles A1 and A2. Matrix V is:

    COLUMNWISE                    ROWWISE

   |     K     |                 |      N2-L     |   L  |
__ _____________ __           __ _________________        __
   |    |      |                 |               | \
   |    |      |                 |               |   \    L

M2-L | | | K |_______________|_____\ __ | | | M2 | | __ |____| | | | K-L \ | | __ |______________________| __ L \ | | __ \|______| __ | N2 |

| L | K-L |

Parameters:

[in]	side	PlasmaLeft : apply Q or QT from the Left; PlasmaRight : apply Q or QT from the Right.
[in]	trans	PlasmaNoTrans : No transpose, apply Q; PlasmaTrans : ConjTranspose, apply Q**T.
[in]	direct	Indicates how H is formed from a product of elementary reflectors PlasmaForward : H = H(1) H(2) . . . H(k) (Forward) PlasmaBackward : H = H(k) . . . H(2) H(1) (Backward)
[in]	storev	Indicates how the vectors which define the elementary reflectors are stored: PlasmaColumnwise PlasmaRowwise
[in]	M1	The number of columns of the tile A1. M1 >= 0.
[in]	N1	The number of rows of the tile A1. N1 >= 0.
[in]	M2	The number of columns of the tile A2. M2 >= 0.
[in]	N2	The number of rows of the tile A2. N2 >= 0.
[in]	K	The order of the matrix T (= the number of elementary reflectors whose product defines the block reflector).
[in]	L	The size of the triangular part of V
[in,out]	A1	On entry, the M1-by-N1 tile A1. On exit, A1 is overwritten by the application of Q.
[in]	LDA1	The leading dimension of the array A1. LDA1 >= max(1,N1).
[in,out]	A2	On entry, the M2-by-N2 tile A2. On exit, A2 is overwritten by the application of Q.
[in]	LDA2	The leading dimension of the tile A2. LDA2 >= max(1,N2).
[in]	V	(LDV,K) if STOREV = 'C' (LDV,M2) if STOREV = 'R' and SIDE = 'L' (LDV,N2) if STOREV = 'R' and SIDE = 'R' Matrix V.
[in]	LDV	The leading dimension of the array V. If STOREV = 'C' and SIDE = 'L', LDV >= max(1,M2); if STOREV = 'C' and SIDE = 'R', LDV >= max(1,N2); if STOREV = 'R', LDV >= K.
[out]	T	The triangular K-by-K matrix T in the representation of the block reflector. T is upper triangular by block (economic storage); The rest of the array is not referenced.
[in]	LDT	The leading dimension of the array T. LDT >= K.
[in,out]	WORK
[in]	LDWORK	The dimension of the array WORK.

Returns:

Return values:

PLASMA_SUCCESS	successful exit
<0	if -i, the i-th argument had an illegal value

Definition at line 131 of file core_sparfb.c.

References cblas_saxpy(), cblas_strmm(), CblasColMajor, CblasLeft, CblasNonUnit, CblasRight, CblasUpper, CORE_spamm(), coreblas_error, PLASMA_ERR_NOT_SUPPORTED, PLASMA_SUCCESS, PlasmaA2, PlasmaBackward, PlasmaColumnwise, PlasmaForward, PlasmaLeft, PlasmaNoTrans, PlasmaRight, PlasmaRowwise, PlasmaTrans, and PlasmaW.

{
    static float zone  =  1.0;
    static float mzone = -1.0;
    int j;
    /* Check input arguments */
    if ((side != PlasmaLeft) && (side != PlasmaRight)) {
        coreblas_error(1, "Illegal value of side");
        return -1;
    }
    if ((trans != PlasmaNoTrans) && (trans != PlasmaTrans)) {
        coreblas_error(2, "Illegal value of trans");
        return -2;
    }
    if ((direct != PlasmaForward) && (direct != PlasmaBackward)) {
        coreblas_error(3, "Illegal value of direct");
        return -3;
    }
    if ((storev != PlasmaColumnwise) && (storev != PlasmaRowwise)) {
        coreblas_error(4, "Illegal value of storev");
        return -4;
    }
    if (M1 < 0) {
        coreblas_error(5, "Illegal value of M1");
        return -5;
    }
    if (N1 < 0) {
        coreblas_error(6, "Illegal value of N1");
        return -6;
    }
    if ((M2 < 0) ||
        ( (side == PlasmaRight) && (M1 != M2) ) ) {
        coreblas_error(7, "Illegal value of M2");
        return -7;
    }
    if ((N2 < 0) ||
        ( (side == PlasmaLeft) && (N1 != N2) ) ) {
        coreblas_error(8, "Illegal value of N2");
        return -8;
    }
    if (K < 0) {
        coreblas_error(9, "Illegal value of K");
        return -9;
    }
    /* Quick return */
    if ((M1 == 0) || (N1 == 0) || (M2 == 0) || (N2 == 0) || (K == 0))
        return PLASMA_SUCCESS;
    if (direct == PlasmaForward) {
        if (side == PlasmaLeft) {
            /*
             * Column or Rowwise / Forward / Left
             * ----------------------------------
             *
             * Form  H * A  or  H' * A  where  A = ( A1 )
             *                                     ( A2 )
             */
            /* W = A1 + op(V) * A2 */
            CORE_spamm(
                    PlasmaW, PlasmaLeft, storev,
                    K, N1, M2, L,
                    A1, LDA1,
                    A2, LDA2,
                    V, LDV,
                    WORK, LDWORK);
            /* W = op(T) * W */
            cblas_strmm(
                CblasColMajor, CblasLeft, CblasUpper,
                (CBLAS_TRANSPOSE)trans, CblasNonUnit, K, N2,
                (zone), T, LDT, WORK, LDWORK);
            /* A1 = A1 - W */
            for(j = 0; j < N1; j++) {
                cblas_saxpy(
                        K, (mzone),
                        &WORK[LDWORK*j], 1,
                        &A1[LDA1*j], 1);
            }
            /* A2 = A2 - op(V) * W  */
            /* W also changes: W = V * W, A2 = A2 - W */
            CORE_spamm(
                    PlasmaA2, PlasmaLeft, storev,
                    M2, N2, K, L,
                    A1, LDA1,
                    A2, LDA2,
                    V, LDV,
                    WORK, LDWORK);
        }
        else {
            /* 
             * Column or Rowwise / Forward / Right
             * -----------------------------------
             *
             * Form  H * A  or  H' * A  where A  = ( A1 A2 )
             *
             */
            /* W = A1 + A2 * op(V) */
            CORE_spamm(
                    PlasmaW, PlasmaRight, storev,
                    M1, K, N2, L,
                    A1, LDA1,
                    A2, LDA2,
                    V, LDV,
                    WORK, LDWORK);
            /* W = W * op(T) */
            cblas_strmm(
                CblasColMajor, CblasRight, CblasUpper,
                (CBLAS_TRANSPOSE)trans, CblasNonUnit, M2, K,
                (zone), T, LDT, WORK, LDWORK);
            /* A1 = A1 - W */
            for(j = 0; j < K; j++) {
                cblas_saxpy(
                        M1, (mzone),
                        &WORK[LDWORK*j], 1,
                        &A1[LDA1*j], 1);
            }
            /* A2 = A2 - W * op(V) */
            /* W also changes: W = W * V', A2 = A2 - W */
            CORE_spamm(
                    PlasmaA2, PlasmaRight, storev,
                    M2, N2, K, L,
                    A1, LDA1,
                    A2, LDA2,
                    V, LDV,
                    WORK, LDWORK);
        }
    }
    else {
        coreblas_error(3, "Not implemented (Backward / Left or Right)");
        return PLASMA_ERR_NOT_SUPPORTED;
    }
    return PLASMA_SUCCESS;
}

Here is the call graph for this function:

Here is the caller graph for this function:

int CORE_spemv	(	int	trans,
		int	storev,
		int	M,
		int	N,
		int	L,
		float	ALPHA,
		float *	A,
		int	LDA,
		float *	X,
		int	INCX,
		float	BETA,
		float *	Y,
		int	INCY,
		float *	WORK
	)

SPEMV performs one of the matrix-vector operations

y = alpha*op( A )*x + beta*y

where op( A ) is one of

op( A ) = A or op( A ) = A**T or op( A ) = A**T,

alpha and beta are scalars, x and y are vectors and A is a pentagonal matrix (see further details).

Arguments

Parameters:

[in]	storev	@arg PlasmaColumnwise : array A stored columwise @arg PlasmaRowwise : array A stored rowwise
[in]	trans	@arg PlasmaNoTrans : y := alphaAx + betay. @arg PlasmaTrans : y := alphaA*Tx + betay. @arg PlasmaTrans : y := alphaA*Tx + beta*y.
[in]	M	Number of rows of the matrix A. M must be at least zero.
[in]	N	Number of columns of the matrix A. N must be at least zero.
[in]	L	Order of triangle within the matrix A (L specifies the shape of the matrix A; see further details).
[in]	ALPHA	Scalar alpha.
[in]	A	Array of size LDA-by-N. On entry, the leading M by N part of the array A must contain the matrix of coefficients.
[in]	LDA	Leading dimension of array A.
[in]	X	On entry, the incremented array X must contain the vector x.
[in]	INCX	Increment for the elements of X. INCX must not be zero.
[in]	BETA	Scalar beta.
[in,out]	Y	On entry, the incremented array Y must contain the vector y.
[out]	INCY	Increment for the elements of Y. INCY must not be zero.
[in]	WORK	Workspace array of size at least L.

Further Details

         |     N    |
      _   ___________   _
         |          |

A: | | M-L | | | | M _ |..... | \ : | L \ : | _ \:_____| _

| L | N-L |

Returns:

Return values:

PLASMA_SUCCESS	successful exit
<0	if -i, the i-th argument had an illegal value

Definition at line 118 of file core_spemv.c.

References cblas_saxpy(), cblas_scopy(), cblas_sgemv(), cblas_sscal(), cblas_strmv(), CblasColMajor, coreblas_error, L, max, min, PLASMA_SUCCESS, PlasmaColumnwise, PlasmaLower, PlasmaNonUnit, PlasmaNoTrans, PlasmaRowwise, PlasmaTrans, and PlasmaUpper.

{
   /*
    *  y = alpha * op(A) * x + beta * y
    */
    int K;
    static float zzero = 0.0;
    
    /* Check input arguments */
    if ((trans != PlasmaNoTrans) && (trans != PlasmaTrans) && (trans != PlasmaTrans)) {
        coreblas_error(1, "Illegal value of trans");
        return -1;
    }
    if ((storev != PlasmaColumnwise) && (storev != PlasmaRowwise)) {
        coreblas_error(2, "Illegal value of storev");
        return -2;
    }
    if (!( ((storev == PlasmaColumnwise) && (trans != PlasmaNoTrans)) ||
           ((storev == PlasmaRowwise) && (trans == PlasmaNoTrans)) )) {
        coreblas_error(2, "Illegal values of trans/storev");
        return -2;
    }
    if (M < 0) {
        coreblas_error(3, "Illegal value of M");
        return -3;
    }
    if (N < 0) {
        coreblas_error(4, "Illegal value of N");
        return -4;
    }
    if (L > min(M ,N)) {
        coreblas_error(5, "Illegal value of L");
        return -5;
    }
    if (LDA < max(1,M)) {
        coreblas_error(8, "Illegal value of LDA");
        return -8;
    }
    if (INCX < 1) {
        coreblas_error(10, "Illegal value of INCX");
        return -10;
    }
    if (INCY < 1) {
        coreblas_error(13, "Illegal value of INCY");
        return -13;
    }
    /* Quick return */
    if ((M == 0) || (N == 0))
        return PLASMA_SUCCESS;
    if ((ALPHA == zzero) && (BETA == zzero))
        return PLASMA_SUCCESS;
    /* If L < 2, there is no triangular part */
    if (L == 1) L = 0;
    
    /* Columnwise */
    if (storev == PlasmaColumnwise) {
        /*
         *        ______________
         *        |      |     |    A1: A[ 0 ]
         *        |      |     |    A2: A[ M-L ]
         *        |  A1  |     |    A3: A[ (N-L) * LDA ]
         *        |      |     |
         *        |______| A3  |
         *        \      |     |
         *          \ A2 |     |
         *            \  |     |
         *              \|_____|
         *
         */
        
        
        /* Columnwise / NoTrans */
        if (trans == PlasmaNoTrans) {
            coreblas_error(1, "The case PlasmaNoTrans / PlasmaColumnwise is not yet implemented");
            return -1;
        } 
        /* Columnwise / [Conj]Trans */
        else {
        
            /* L top rows of y */
            if (L > 0) {
                
                /* w = A_2' * x_2 */
                cblas_scopy(
                            L, &X[INCX*(M-L)], INCX, WORK, 1);
                cblas_strmv(
                            CblasColMajor, (CBLAS_UPLO)PlasmaUpper,
                            (CBLAS_TRANSPOSE)trans,
                            (CBLAS_DIAG)PlasmaNonUnit,
                            L, &A[M-L], LDA, WORK, 1);
                
                if (M > L) {
                    
                    /* y_1 = beta * y_1 [ + alpha * A_1 * x_1 ] */
                    cblas_sgemv(
                                CblasColMajor, (CBLAS_TRANSPOSE)trans,
                                M-L, L, (ALPHA), A, LDA,
                                X, INCX, (BETA), Y, INCY);
                    
                    /* y_1 = y_1 + alpha * w */
                    cblas_saxpy(L, (ALPHA), WORK, 1, Y, INCY);
                    
                } else {
                    
                    /* y_1 = y_1 + alpha * w */
                    if (BETA == zzero) {
                        cblas_sscal(L, (ALPHA), WORK, 1);
                        cblas_scopy(L, WORK, 1, Y, INCY);
                    } else {
                        cblas_sscal(L, (BETA), Y, INCY);
                        cblas_saxpy(L, (ALPHA), WORK, 1, Y, INCY);
                    }
                }
            }
            
            /* N-L bottom rows of Y */
            if (N > L) {
                K = N - L;
                cblas_sgemv(
                            CblasColMajor, (CBLAS_TRANSPOSE)trans,
                            M, K, (ALPHA), &A[LDA*L], LDA,
                            X, INCX, (BETA), &Y[INCY*L], INCY);
            }
        }
    }
    /* Rowwise */
    else {
        /*
         * --------------
         * |            | \           A1:  A[ 0 ]
         * |    A1      |   \         A2:  A[ (N-L) * LDA ]
         * |            | A2  \       A3:  A[ L ]
         * |--------------------\
         * |        A3          |
         * ----------------------
         *
         */
        
        /* Rowwise / NoTrans */
        if (trans == PlasmaNoTrans) {
            /* L top rows of A and y */
            if (L > 0) {
                
                /* w = A_2 * x_2 */
                cblas_scopy(
                            L, &X[INCX*(N-L)], INCX, WORK, 1);
                cblas_strmv(
                            CblasColMajor, (CBLAS_UPLO)PlasmaLower,
                            (CBLAS_TRANSPOSE)PlasmaNoTrans,
                            (CBLAS_DIAG)PlasmaNonUnit,
                            L, &A[LDA*(N-L)], LDA, WORK, 1);
                
                if (N > L) {
                    
                    /* y_1 = beta * y_1 [ + alpha * A_1 * x_1 ] */
                    cblas_sgemv(
                                CblasColMajor, (CBLAS_TRANSPOSE)PlasmaNoTrans,
                                L, N-L, (ALPHA), A, LDA,
                                X, INCX, (BETA), Y, INCY);
                    
                    /* y_1 = y_1 + alpha * w */
                    cblas_saxpy(L, (ALPHA), WORK, 1, Y, INCY);
                    
                } else {
                    
                    /* y_1 = y_1 + alpha * w */
                    if (BETA == zzero) {
                        cblas_sscal(L, (ALPHA), WORK, 1);
                        cblas_scopy(L, WORK, 1, Y, INCY);
                    } else {
                        cblas_sscal(L, (BETA), Y, INCY);
                        cblas_saxpy(L, (ALPHA), WORK, 1, Y, INCY);
                    }
                }
            }
            
            /* M-L bottom rows of Y */
            if (M > L) {
                cblas_sgemv(
                        CblasColMajor, (CBLAS_TRANSPOSE)PlasmaNoTrans,
                        M-L, N, (ALPHA), &A[L], LDA,
                        X, INCX, (BETA), &Y[INCY*L], INCY);
            }
        }
        /* Rowwise / [Conj]Trans */
        else {
            coreblas_error(1, "The case Plasma[Conj]Trans / PlasmaRowwise is not yet implemented");
            return -1;
        }
    } 
    
    return PLASMA_SUCCESS;
}

Here is the call graph for this function:

Here is the caller graph for this function:

void CORE_splgsy	(	float	bump,
		int	m,
		int	n,
		float *	A,
		int	lda,
		int	bigM,
		int	m0,
		int	n0,
		unsigned long long int	seed
	)

Definition at line 64 of file core_splgsy.c.

References A, NBELEM, Rnd64_A, Rnd64_C, and RndF_Mul.

{
    float *tmp = A;
    int64_t i, j;
    unsigned long long int ran, jump;
    jump = m0 + n0 * bigM;
    /*
     * Tile diagonal
     */
    if ( m0 == n0 ) {
        for (j = 0; j < n; j++) {
            ran = Rnd64_jump( NBELEM * jump, seed );
            for (i = j; i < m; i++) {
                *tmp = 0.5f - ran * RndF_Mul;
                ran  = Rnd64_A * ran + Rnd64_C;
#ifdef COMPLEX
                *tmp += I*(0.5f - ran * RndF_Mul);
                ran   = Rnd64_A * ran + Rnd64_C;
#endif
                tmp++;
            }
            tmp  += (lda - i + j + 1);
            jump += bigM + j;
        }
        for (j = 0; j < n; j++) {
            A[j+j*lda] += bump;
            for (i=0; i<j; i++) {
                A[lda*j+i] = A[lda*i+j];
            }
        }
    } 
    /*
     * Lower part
     */
    else if ( m0 > n0 ) {
        for (j = 0; j < n; j++) {
            ran = Rnd64_jump( NBELEM * jump, seed );
            for (i = 0; i < m; i++) {
                *tmp = 0.5f - ran * RndF_Mul;
                ran  = Rnd64_A * ran + Rnd64_C;
#ifdef COMPLEX
                *tmp += I*(0.5f - ran * RndF_Mul);
                ran   = Rnd64_A * ran + Rnd64_C;
#endif
                tmp++;
            }
            tmp  += (lda - i);
            jump += bigM;
        }
    }
    /*
     * Upper part
     */
    else if ( m0 < n0 ) {
        /* Overwrite jump */
        jump = n0 + m0 * bigM;
        for (i = 0; i < m; i++) {
            ran = Rnd64_jump( NBELEM * jump, seed );
            for (j = 0; j < n; j++) {
                A[j*lda+i] = 0.5f - ran * RndF_Mul;
                ran = Rnd64_A * ran + Rnd64_C;
#ifdef COMPLEX
                A[j*lda+i] += I*(0.5f - ran * RndF_Mul);
                ran = Rnd64_A * ran + Rnd64_C;
#endif
            }
            jump += bigM;
        }
    }
}

Here is the caller graph for this function:

void CORE_splgsy_quark ( Quark * quark )

Definition at line 172 of file core_splgsy.c.

References A, CORE_splgsy(), and quark_unpack_args_9.

{
    float bump;
    int m;
    int n;
    float *A;
    int lda;
    int bigM;
    int m0;
    int n0;
    unsigned long long int seed;
    quark_unpack_args_9( quark, bump, m, n, A, lda, bigM, m0, n0, seed );
    CORE_splgsy( bump, m, n, A, lda, bigM, m0, n0, seed );
}

Here is the call graph for this function:

Here is the caller graph for this function:

void CORE_splrnt	(	int	m,
		int	n,
		float *	A,
		int	lda,
		int	bigM,
		int	m0,
		int	n0,
		unsigned long long int	seed
	)

Definition at line 64 of file core_splrnt.c.

References A, NBELEM, Rnd64_A, Rnd64_C, and RndF_Mul.

{
    float *tmp = A;
    int64_t i, j;
    unsigned long long int ran, jump;
    jump = m0 + n0 * bigM;
    for (j=0; j<n; ++j ) {
        ran = Rnd64_jump( NBELEM*jump, seed );
        for (i = 0; i < m; ++i) {
            *tmp = 0.5f - ran * RndF_Mul;
            ran  = Rnd64_A * ran + Rnd64_C;
#ifdef COMPLEX
            *tmp += I*(0.5f - ran * RndF_Mul);
            ran   = Rnd64_A * ran + Rnd64_C;
#endif
            tmp++;
        }
        tmp  += lda-i;
        jump += bigM;
    }
}

Here is the caller graph for this function:

void CORE_splrnt_quark ( Quark * quark )

Definition at line 116 of file core_splrnt.c.

References A, CORE_splrnt(), and quark_unpack_args_8.

{
    int m;
    int n;
    float *A;
    int lda;
    int bigM;
    int m0;
    int n0;
    unsigned long long int seed;
    quark_unpack_args_8( quark, m, n, A, lda, bigM, m0, n0, seed );
    CORE_splrnt( m, n, A, lda, bigM, m0, n0, seed );
}

Here is the call graph for this function:

Here is the caller graph for this function:

void CORE_spotrf	(	int	uplo,
		int	N,
		float *	A,
		int	LDA,
		int *	INFO
	)

Definition at line 29 of file core_spotrf.c.

References lapack_const.

{
    *INFO = LAPACKE_spotrf_work(
        LAPACK_COL_MAJOR,
        lapack_const(uplo),
        N, A, LDA );
}

Here is the caller graph for this function:

void CORE_spotrf_quark ( Quark * quark )

Definition at line 65 of file core_spotrf.c.

References A, lapack_const, plasma_sequence_flush(), PLASMA_SUCCESS, quark_unpack_args_7, plasma_sequence_t::status, and uplo.

{
    int uplo;
    int n;
    float *A;
    int lda;
    PLASMA_sequence *sequence;
    PLASMA_request *request;
    int iinfo;
    int info;
    quark_unpack_args_7(quark, uplo, n, A, lda, sequence, request, iinfo);
    info = LAPACKE_spotrf_work(
        LAPACK_COL_MAJOR,
        lapack_const(uplo),
        n, A, lda);
    if (sequence->status == PLASMA_SUCCESS && info != 0)
      plasma_sequence_flush(quark, sequence, request, iinfo+info);
}

Here is the call graph for this function:

Here is the caller graph for this function:

void CORE_sshift	(	int	s,
		int	m,
		int	n,
		int	L,
		float *	A
	)

CORE_sshift Shift a cycle of block. Same as core_sshiftw but you don't need to provide the workspace. As a matter of fact, the cycle cannot be split anymore to keep data coherency.

Parameters:

[in]	s	Start value in the cycle
[in]	m	Number of lines of tile A
[in]	n	Number of columns of tile A
[in]	L	Length of each block of data to move
[in,out]	A	Matrix of size m-by-n with each element of size L. On exit, A = A', where A' contains the permutations

Definition at line 175 of file core_sshift.c.

References CORE_sshiftw(), and W.

                                                       {
    float *W;
    W = (float*)malloc(L * sizeof(float));
    memcpy(W, &(A[s*L]), L*sizeof(float));
    CORE_sshiftw(s, 0, m, n, L, A, W);
    free(W);
}

Here is the call graph for this function:

void CORE_sshift_quark ( Quark * quark )

Definition at line 208 of file core_sshift.c.

References A, CORE_sshiftw(), L, quark_unpack_args_6, and W.

{
    int s;
    int m;
    int n;
    int L;
    float *A;
    float *W;
    quark_unpack_args_6(quark, s, m, n, L, A, W);
    memcpy(W, &(A[s*L]), L*sizeof(float));
    CORE_sshiftw(s, 0, m, n, L, A, W);
}

Here is the call graph for this function:

Here is the caller graph for this function:

void CORE_sshiftw	(	int	s,
		int	cl,
		int	m,
		int	n,
		int	L,
		float *	A,
		float *	W
	)

CORE_sshiftw Shift a linear chain of block using a supplied workspace by following the cycle defined by: k_(i+1) = (k_i * m) % q;

Parameters:

[in]	s	Start value in the cycle
[in]	cl	Cycle length if cl == 0, all the permutations from the cycle are done else the cycle is split onto several threads and the number of permutation to do has to be specified to not get overlap
[in]	m	Number of lines of tile A
[in]	n	Number of columns of tile A
[in]	L	Length of each block of data to move
[in,out]	A	Matrix of size m-by-n with each element of size L. On exit, A = A', where A' contains the permutations
[in]	W	Array of size L. On entry, must contain: W(:) = A(sL:sL+L-1)

Definition at line 66 of file core_sshift.c.

References L.

                                                                          {
    int64_t k, k1;
    int     i, j, q, kL, k1L;
    q = m * n - 1;
    k = s;
    if( cl != 0 ) {
        for (i=1; i<cl; i++) {
            k1 = (k * m) % (int64_t)q;
            
            /* A(k*L:k*L+L-1) = A(k1*L:k1*L+L-1) */
            kL  = k *L;
            k1L = k1*L;
            for(j=0; j<L; j++) {
                A[kL+j] = A[k1L+j];
            }
            k = k1;
        }
    } 
    else {
        while (1) {
            k1 = (k * m) % (int64_t)q;
            if( k1 == s ) 
                break;
            
            /* A(k*L:k*L+L-1) = A(k1*L:k1*L+L-1) */
            kL  = k *L;
            k1L = k1*L;
            for (j=0; j<L; j++) {
                A[kL+j] = A[k1L+j];
            }
            k = k1;
        }
    }
    memcpy(&(A[k*L]), W, L*sizeof(float));
}

Here is the caller graph for this function:

void CORE_sshiftw_quark ( Quark * quark )

Definition at line 130 of file core_sshift.c.

References A, CORE_sshiftw(), L, quark_unpack_args_7, and W.

{
    int s;
    int cl;
    int m;
    int n;
    int L;
    float *A;
    float *W;
    quark_unpack_args_7(quark, s, cl, m, n, L, A, W);
    CORE_sshiftw(s, cl, m, n, L, A, W);
}

Here is the call graph for this function:

Here is the caller graph for this function:

int CORE_sssssm	(	int	M1,
		int	N1,
		int	M2,
		int	N2,
		int	K,
		int	IB,
		float *	A1,
		int	LDA1,
		float *	A2,
		int	LDA2,
		float *	L1,
		int	LDL1,
		float *	L2,
		int	LDL2,
		int *	IPIV
	)

CORE_ststrf computes an LU factorization of a complex matrix formed by an upper triangular M1-by-N1 tile U on top of a M2-by-N2 tile A (N1 == N2) using partial pivoting with row interchanges.

This is the right-looking Level 2.5 BLAS version of the algorithm.

Parameters:

[in]	M1	The number of rows of the tile A1. M1 >= 0.
[in]	N1	The number of columns of the tile A1. N1 >= 0.
[in]	M2	The number of rows of the tile A2. M2 >= 0.
[in]	N2	The number of columns of the tile A2. N2 >= 0.
[in]	K	The number of columns of the tiles L1 and L2. K >= 0.
[in]	IB	The inner-blocking size. IB >= 0.
[in,out]	A1	On entry, the M1-by-N1 tile A1. On exit, A1 is overwritten by the application of L.
[in]	LDA1	The leading dimension of the array A1. LDA1 >= max(1,M1).
[in,out]	A2	On entry, the M2-by-N2 tile A2. On exit, A2 is overwritten by the application of L.
[in]	LDA2	The leading dimension of the array A2. LDA2 >= max(1,M2).
[in]	L1	The IB-by-K lower triangular tile as returned by CORE_ststrf.
[in]	LDL1	The leading dimension of the array L1. LDL1 >= max(1,IB).
[in]	L2	The M2-by-N2 tile as returned by CORE_ststrf.
[in]	LDL2	The leading dimension of the array L2. LDL2 >= max(1,M2).
[in]	IPIV	as returned by CORE_ststrf.

Returns:

Return values:

PLASMA_SUCCESS	successful exit
<0	if INFO = -k, the k-th argument had an illegal value

Definition at line 90 of file core_sssssm.c.

References cblas_sgemm(), cblas_sswap(), cblas_strsm(), CblasColMajor, CblasLeft, CblasLower, CblasNoTrans, CblasUnit, coreblas_error, max, min, and PLASMA_SUCCESS.

{
    static float zone  = 1.0;
    static float mzone =-1.0;
    int i, ii, sb;
    int im, ip;
    /* Check input arguments */
    if (M1 < 0) {
        coreblas_error(1, "Illegal value of M1");
        return -1;
    }
    if (N1 < 0) {
        coreblas_error(2, "Illegal value of N1");
        return -2;
    }
    if (M2 < 0) {
        coreblas_error(3, "Illegal value of M2");
        return -3;
    }
    if (N2 < 0) {
        coreblas_error(4, "Illegal value of N2");
        return -4;
    }
    if (K < 0) {
        coreblas_error(5, "Illegal value of K");
        return -5;
    }
    if (IB < 0) {
        coreblas_error(6, "Illegal value of IB");
        return -6;
    }
    if (LDA1 < max(1,M1)) {
        coreblas_error(8, "Illegal value of LDA1");
        return -8;
    }
    if (LDA2 < max(1,M2)) {
        coreblas_error(10, "Illegal value of LDA2");
        return -10;
    }
    if (LDL1 < max(1,IB)) {
        coreblas_error(12, "Illegal value of LDL1");
        return -12;
    }
    if (LDL2 < max(1,M2)) {
        coreblas_error(14, "Illegal value of LDL2");
        return -14;
    }
    /* Quick return */
    if ((M1 == 0) || (N1 == 0) || (M2 == 0) || (N2 == 0) || (K == 0) || (IB == 0))
        return PLASMA_SUCCESS;
    ip = 0;
    for(ii = 0; ii < K; ii += IB) {
        sb = min(K-ii, IB);
        for(i = 0; i < sb; i++) {
            im = IPIV[ip]-1;
            if (im != (ii+i)) {
                im = im - M1;
                cblas_sswap(N1, &A1[ii+i], LDA1, &A2[im], LDA2);
            }
            ip = ip + 1;
        }
        cblas_strsm(
            CblasColMajor, CblasLeft, CblasLower,
            CblasNoTrans, CblasUnit,
            sb, N1, (zone),
            &L1[LDL1*ii], LDL1,
            &A1[ii], LDA1);
        cblas_sgemm(
            CblasColMajor, CblasNoTrans, CblasNoTrans,
            M2, N2, sb,
            (mzone), &L2[LDL2*ii], LDL2,
            &A1[ii], LDA1,
            (zone), A2, LDA2);
    }
    return PLASMA_SUCCESS;
}

Here is the call graph for this function:

Here is the caller graph for this function:

void CORE_sssssm_quark ( Quark * quark )

Definition at line 219 of file core_sssssm.c.

References CORE_sssssm(), IPIV, and quark_unpack_args_15.

{
    int m1;
    int n1;
    int m2;
    int n2;
    int k;
    int ib;
    float *A1;
    int lda1;
    float *A2;
    int lda2;
    float *L1;
    int ldl1;
    float *L2;
    int ldl2;
    int *IPIV;
    quark_unpack_args_15(quark, m1, n1, m2, n2, k, ib, A1, lda1, A2, lda2, L1, ldl1, L2, ldl2, IPIV);
    CORE_sssssm(m1, n1, m2, n2, k, ib, A1, lda1, A2, lda2, L1, ldl1, L2, ldl2, IPIV);
}

Here is the call graph for this function:

Here is the caller graph for this function:

void CORE_sswpab	(	int	i,
		int	n1,
		int	n2,
		float *	A,
		float *	work
	)

CORE_sswpab swaps two adjacent contiguous blocks of data.

n1                     n2

+————-+——————————-+

become : n2 n1 +——————————-+————-+

Parameters:

[in,out]	A	Array of size i+n1+n2. On entry, a block of size n1 followed by a block of size n2. On exit, the block of size n1 follows the block of size n2.
[in]	i	First block starts at A[i].
[in]	n1	Size of the first block to swap.
[in]	n2	Size of the second block to swap.
[out]	work	Workspace array of size min(n1, n2).

Definition at line 63 of file core_sswpab.c.

                                                               {
    float *A0 = &(A[i]);
    float *A1 = &(A[i+n1]);
    float *A2 = &(A[i+n2]);
    int j;
    
    if( n1 < n2 ) {
        memcpy(work,  A0, n1*sizeof(float));
        for (j=0; j<n2; j++)
            A0[j] = A1[j];
        memcpy(A2, work,  n1*sizeof(float));
    } else {
        memcpy(work,  A1, n2*sizeof(float));
        for (j=n1-1; j>-1; j--)
            A2[j] = A0[j];
        memcpy(A0, work,  n2*sizeof(float));
    }
}

Here is the caller graph for this function:

void CORE_sswpab_quark ( Quark * quark )

Definition at line 107 of file core_sswpab.c.

References A, CORE_sswpab(), and quark_unpack_args_5.

{
    int i;
    int n1;
    int n2;
    float *A;
    float *work;
    
    quark_unpack_args_5(quark, i, n1, n2, A, work);
    CORE_sswpab( i, n1, n2, A, work);
}

Here is the call graph for this function:

Here is the caller graph for this function:

int CORE_sswptr_ontile	(	PLASMA_desc	descA,
		int	i1,
		int	i2,
		int *	ipiv,
		int	inc,
		float *	Akk,
		int	ldak
	)

CORE_sswptr_ontile apply the slaswp function on a matrix stored in tile layout, followed by a strsm on the first tile of the panel.

Parameters:

[in,out]	A	The descriptor of the matrix A to permute.
[in]	i1	The first element of IPIV for which a row interchange will be done.
[in]	i2	The last element of IPIV for which a row interchange will be done.
[in]	ipiv	The pivot indices; Only the element in position i1 to i2 are accessed. The pivot are offset by A.i.
[in]	inc	The increment between successive values of IPIV. If IPIV is negative, the pivots are applied in reverse order.

Definition at line 325 of file core_slaswp.c.

References A, BLKLDD, cblas_strsm(), CblasColMajor, CblasLeft, CblasLower, CblasNoTrans, CblasUnit, CORE_slaswp_ontile(), coreblas_error, plasma_desc_t::m, plasma_desc_t::mb, plasma_desc_t::mt, plasma_desc_t::n, plasma_desc_t::nt, and PLASMA_SUCCESS.

{
    float zone  = 1.0;
    int lda;
    int m = descA.mt == 1 ? descA.m : descA.mb;
    if ( descA.nt > 1 ) {
        coreblas_error(1, "Illegal value of descA.nt");
        return -1;
    }
    if ( i1 < 1 ) {
        coreblas_error(2, "Illegal value of i1");
        return -2;
    }
    if ( (i2 < i1) || (i2 > m) ) {
        coreblas_error(3, "Illegal value of i2");
        return -3;
    }
    CORE_slaswp_ontile(descA, i1, i2, ipiv, inc);
    lda = BLKLDD(descA, 0);
    cblas_strsm( CblasColMajor, CblasLeft, CblasLower, 
                 CblasNoTrans, CblasUnit,
                 m, descA.n, (zone), 
                 Akk,     ldak, 
                 A(0, 0), lda );
    return PLASMA_SUCCESS;
}

Here is the call graph for this function:

Here is the caller graph for this function:

void CORE_sswptr_ontile_quark ( Quark * quark )

Definition at line 385 of file core_slaswp.c.

References A, CORE_sswptr_ontile(), and quark_unpack_args_8.

{
    int i1, i2, inc, ldak;
    int *ipiv;
    float *A, *Akk;
    PLASMA_desc descA;
    quark_unpack_args_8(quark, descA, A, i1, i2, ipiv, inc, Akk, ldak);
    CORE_sswptr_ontile(descA, i1, i2, ipiv, inc, Akk, ldak);
}

Here is the call graph for this function:

Here is the caller graph for this function:

void CORE_ssygst_quark ( Quark * quark )

Definition at line 67 of file core_ssygst.c.

References A, B, itype, lapack_const, plasma_sequence_flush(), PLASMA_SUCCESS, quark_unpack_args_10, plasma_sequence_t::status, and uplo.

{
    int itype;
    PLASMA_enum uplo;
    int n;
    float *A;
    int lda;
    float *B;
    int ldb;
    PLASMA_sequence *sequence;
    PLASMA_request *request;
    int iinfo;
    int info;
    quark_unpack_args_10(quark, itype, uplo, n, A, lda, B, ldb, sequence, request, iinfo);
    info = LAPACKE_ssygst_work(
        LAPACK_COL_MAJOR,
        itype,
        lapack_const(uplo),
        n, A, lda, B, ldb);
    if (sequence->status == PLASMA_SUCCESS && info != 0)
      plasma_sequence_flush(quark, sequence, request, iinfo+info);
}

Here is the call graph for this function:

Here is the caller graph for this function:

void CORE_ssymm	(	int	side,
		int	uplo,
		int	M,
		int	N,
		float	alpha,
		float *	A,
		int	LDA,
		float *	B,
		int	LDB,
		float	beta,
		float *	C,
		int	LDC
	)

Definition at line 28 of file core_ssymm.c.

References cblas_ssymm(), and CblasColMajor.

{
    cblas_ssymm(
        CblasColMajor,
        (CBLAS_SIDE)side, (CBLAS_UPLO)uplo,
        M, N,
        (alpha), A, LDA,
        B, LDB,
        (beta), C, LDC);
}

Here is the call graph for this function:

Here is the caller graph for this function:

void CORE_ssymm_quark ( Quark * quark )

Definition at line 77 of file core_ssymm.c.

References A, B, C, cblas_ssymm(), CblasColMajor, quark_unpack_args_12, side, and uplo.

{
    int side;
    int uplo;
    int M;
    int N;
    float alpha;
    float *A;
    int LDA;
    float *B;
    int LDB;
    float beta;
    float *C;
    int LDC;
    quark_unpack_args_12(quark, side, uplo, M, N, alpha, A, LDA, B, LDB, beta, C, LDC);
    cblas_ssymm(
        CblasColMajor,
        (CBLAS_SIDE)side, (CBLAS_UPLO)uplo,
        M, N,
        (alpha), A, LDA,
        B, LDB,
        (beta), C, LDC);
}

Here is the call graph for this function:

Here is the caller graph for this function:

void CORE_ssyr2k	(	int	uplo,
		int	trans,
		int	N,
		int	K,
		float	alpha,
		float *	A,
		int	LDA,
		float *	B,
		int	LDB,
		float	beta,
		float *	C,
		int	LDC
	)

Definition at line 28 of file core_ssyr2k.c.

References cblas_ssyr2k(), and CblasColMajor.

{
    cblas_ssyr2k(
        CblasColMajor,
        (CBLAS_UPLO)uplo, (CBLAS_TRANSPOSE)trans,
        N, K,
        (alpha), A, LDA, B, LDB,
        (beta), C, LDC);
}

Here is the call graph for this function:

Here is the caller graph for this function:

void CORE_ssyr2k_quark ( Quark * quark )

Definition at line 76 of file core_ssyr2k.c.

References A, B, C, CORE_ssyr2k(), quark_unpack_args_12, trans, and uplo.

{
    int uplo;
    int trans;
    int n;
    int k;
    float alpha;
    float *A;
    int lda;
    float *B;
    int ldb;
    float beta;
    float *C;
    int ldc;
    quark_unpack_args_12(quark, uplo, trans, n, k, alpha, A, lda, B, ldb, beta, C, ldc);
    CORE_ssyr2k(uplo, trans, n, k, alpha, A, lda, B, ldb, beta, C, ldc);
}

Here is the call graph for this function:

Here is the caller graph for this function:

void CORE_ssyrfb_quark ( Quark * quark )

Definition at line 215 of file core_ssyrfb.c.

References A, C, CORE_ssyrfb(), quark_unpack_args_13, T, and uplo.

{
    PLASMA_enum uplo;
    int n;
    int k;
    int ib;
    int nb;
    float *A;
    int lda;
    float *T;
    int ldt;
    float *C;
    int ldc;
    float *WORK;
    int ldwork;
    quark_unpack_args_13(quark, uplo, n, k, ib, nb, A, lda, T, ldt, C, ldc, WORK, ldwork);
    CORE_ssyrfb(uplo, n, k, ib, nb, A, lda, T, ldt, C, ldc, WORK, ldwork);
}

Here is the call graph for this function:

Here is the caller graph for this function:

void CORE_ssyrk	(	int	uplo,
		int	trans,
		int	N,
		int	K,
		float	alpha,
		float *	A,
		int	LDA,
		float	beta,
		float *	C,
		int	LDC
	)

Definition at line 28 of file core_ssyrk.c.

References cblas_ssyrk(), and CblasColMajor.

{
    cblas_ssyrk(
        CblasColMajor,
        (CBLAS_UPLO)uplo, (CBLAS_TRANSPOSE)trans,
        N, K,
        (alpha), A, LDA,
        (beta), C, LDC);
}

Here is the call graph for this function:

Here is the caller graph for this function:

void CORE_ssyrk_quark ( Quark * quark )

Definition at line 72 of file core_ssyrk.c.

References A, C, cblas_ssyrk(), CblasColMajor, quark_unpack_args_10, trans, and uplo.

{
    int uplo;
    int trans;
    int n;
    int k;
    float alpha;
    float *A;
    int lda;
    float beta;
    float *C;
    int ldc;
    quark_unpack_args_10(quark, uplo, trans, n, k, alpha, A, lda, beta, C, ldc);
    cblas_ssyrk(
        CblasColMajor,
        (CBLAS_UPLO)uplo, (CBLAS_TRANSPOSE)trans,
        n, k,
        (alpha), A, lda,
        (beta), C, ldc);
}

Here is the call graph for this function:

Here is the caller graph for this function:

void CORE_strdalg	(	PLASMA_enum	uplo,
		int	N,
		int	NB,
		PLASMA_desc *	pA,
		float *	V,
		float *	TAU,
		int	i,
		int	j,
		int	m,
		int	grsiz
	)

CORE_strdalg is a part of the tridiagonal reduction algorithm (bulgechasing) It correspond to a local driver of the kernels that should be executed on a single core.

Parameters:

[in]	uplo	PlasmaLower: PlasmaUpper:
[in]	N	The order of the matrix A. N >= 0.
[in]	NB	The size of the Bandwidth of the matrix A, which correspond to the tile size. NB >= 0.
[in]	pA	A pointer to the descriptor of the matrix A.
[out]	V	float array, dimension (N). The scalar elementary reflectors are written in this array. So it is used as a workspace for V at each step of the bulge chasing algorithm.
[out]	TAU	float array, dimension (N). The scalar factors of the elementary reflectors are written in thisarray. So it is used as a workspace for TAU at each step of the bulge chasing algorithm.
[in]	i	Integer that refer to the current sweep. (outer loop).
[in]	j	Integer that refer to the sweep to chase.(inner loop).
[in]	m	Integer that refer to a sweep step, to ensure order dependencies.
[in]	grsiz	Integer that refer to the size of a group. group mean the number of kernel that should be executed sequentially on the same core. group size is a trade-off between locality (cache reuse) and parallelism. a small group size increase parallelism while a large group size increase cache reuse.

Returns:

Return values:

PLASMA_SUCCESS	successful exit
<0	if -i, the i-th argument had an illegal value

Definition at line 82 of file core_strdalg.c.

References A, CORE_shbelr(), CORE_shblrx(), CORE_shbrce(), plasma_desc_t::dtyp, min, and plasma_element_size().

{
    int    k, shift=3;
    int    myid, colpt, stind, edind, blklastind, stepercol;
    size_t eltsize;
    PLASMA_desc A = *pA;
    eltsize = plasma_element_size(A.dtyp);
    k = shift / grsiz;
    stepercol = (k*grsiz == shift) ? k : k+1;
    for (k = 0; k < grsiz; k++){
        myid = (i-j)*(stepercol*grsiz) +(m-1)*grsiz + k+1;
        if(myid%2 ==0) {
            colpt      = (myid/2) * NB + 1 + j - 1;
            stind      = colpt - NB + 1;
            edind      = min(colpt, N);
            blklastind = colpt;
        } else {
            colpt      = ((myid+1)/2)*NB + 1 +j -1 ;
            stind      = colpt-NB+1;
            edind      = min(colpt,N);
            if( (stind>=edind-1) && (edind==N) )
                blklastind = N;
            else
                blklastind = 0;
        }
        if( myid == 1 )
           CORE_shbelr(uplo, N, &A, V, TAU, stind, edind, eltsize);
        else if(myid%2 == 0)
           CORE_shbrce(uplo, N, &A, V, TAU, stind, edind, eltsize);
        else /*if(myid%2 == 1)*/
           CORE_shblrx(uplo, N, &A, V, TAU, stind, edind, eltsize);
        if(blklastind >= (N-1))  break;
    }
}

Here is the call graph for this function:

Here is the caller graph for this function:

void CORE_strdalg_quark ( Quark * quark )

Definition at line 160 of file core_strdalg.c.

References CORE_strdalg(), quark_unpack_args_10, TAU, uplo, and V.

{
    PLASMA_desc *pA;
    float *V;
    float *TAU;
    int    uplo;
    int    N, NB;
    int    i, j, m, grsiz;
    quark_unpack_args_10(quark, uplo, N, NB, pA, V, TAU, i, j, m, grsiz);
    CORE_strdalg(uplo, N, NB, pA, V, TAU, i, j, m, grsiz);
}

Here is the call graph for this function:

Here is the caller graph for this function:

void CORE_strmm	(	int	side,
		int	uplo,
		int	transA,
		int	diag,
		int	M,
		int	N,
		float	alpha,
		float *	A,
		int	LDA,
		float *	B,
		int	LDB
	)

Definition at line 28 of file core_strmm.c.

References cblas_strmm(), and CblasColMajor.

{
    cblas_strmm(
        CblasColMajor,
        (CBLAS_SIDE)side, (CBLAS_UPLO)uplo,
        (CBLAS_TRANSPOSE)transA, (CBLAS_DIAG)diag,
        M, N,
        (alpha), A, LDA,
        B, LDB);
}

Here is the call graph for this function:

void CORE_strmm_p2_quark ( Quark * quark )

Definition at line 132 of file core_strmm.c.

References A, B, cblas_strmm(), CblasColMajor, diag, quark_unpack_args_11, side, and uplo.

{
    int side;
    int uplo;
    int transA;
    int diag;
    int M;
    int N;
    float alpha;
    float *A;
    int LDA;
    float **B;
    int LDB;
    quark_unpack_args_11(quark, side, uplo, transA, diag, M, N, alpha, A, LDA, B, LDB);
    cblas_strmm(
        CblasColMajor,
        (CBLAS_SIDE)side, (CBLAS_UPLO)uplo,
        (CBLAS_TRANSPOSE)transA, (CBLAS_DIAG)diag,
        M, N,
        (alpha), A, LDA,
        *B, LDB);
}

Here is the call graph for this function:

Here is the caller graph for this function:

void CORE_strmm_quark ( Quark * quark )

Definition at line 76 of file core_strmm.c.

References A, B, cblas_strmm(), CblasColMajor, diag, quark_unpack_args_11, side, and uplo.

{
    int side;
    int uplo;
    int transA;
    int diag;
    int M;
    int N;
    float alpha;
    float *A;
    int LDA;
    float *B;
    int LDB;
    quark_unpack_args_11(quark, side, uplo, transA, diag, M, N, alpha, A, LDA, B, LDB);
    cblas_strmm(
        CblasColMajor,
        (CBLAS_SIDE)side, (CBLAS_UPLO)uplo,
        (CBLAS_TRANSPOSE)transA, (CBLAS_DIAG)diag,
        M, N,
        (alpha), A, LDA,
        B, LDB);
}

Here is the call graph for this function:

Here is the caller graph for this function:

void CORE_strsm	(	int	side,
		int	uplo,
		int	transA,
		int	diag,
		int	M,
		int	N,
		float	alpha,
		float *	A,
		int	LDA,
		float *	B,
		int	LDB
	)

Definition at line 28 of file core_strsm.c.

References cblas_strsm(), and CblasColMajor.

{
    cblas_strsm(
        CblasColMajor,
        (CBLAS_SIDE)side, (CBLAS_UPLO)uplo,
        (CBLAS_TRANSPOSE)transA, (CBLAS_DIAG)diag,
        M, N,
        (alpha), A, LDA,
        B, LDB);
}

Here is the call graph for this function:

Here is the caller graph for this function:

void CORE_strsm_quark ( Quark * quark )

Definition at line 75 of file core_strsm.c.

References A, B, cblas_strsm(), CblasColMajor, diag, quark_unpack_args_11, side, and uplo.

{
    int side;
    int uplo;
    int transA;
    int diag;
    int m;
    int n;
    float alpha;
    float *A;
    int lda;
    float *B;
    int ldb;
    quark_unpack_args_11(quark, side, uplo, transA, diag, m, n, alpha, A, lda, B, ldb);
    cblas_strsm(
        CblasColMajor,
        (CBLAS_SIDE)side, (CBLAS_UPLO)uplo,
        (CBLAS_TRANSPOSE)transA, (CBLAS_DIAG)diag,
        m, n,
        (alpha), A, lda,
        B, ldb);
}

Here is the call graph for this function:

Here is the caller graph for this function:

void CORE_strtri	(	int	uplo,
		int	diag,
		int	N,
		float *	A,
		int	LDA,
		int *	info
	)

Definition at line 29 of file core_strtri.c.

References lapack_const.

{
    *info = LAPACKE_strtri_work(
        LAPACK_COL_MAJOR,
        lapack_const(uplo), lapack_const(diag),
        N, A, LDA);
}

void CORE_strtri_quark ( Quark * quark )

Definition at line 67 of file core_strtri.c.

References A, diag, lapack_const, plasma_sequence_flush(), PLASMA_SUCCESS, quark_unpack_args_8, plasma_sequence_t::status, and uplo.

{
    int uplo;
    int diag;
    int N;
    float *A;
    int LDA;
    PLASMA_sequence *sequence;
    PLASMA_request *request;
    int iinfo;
    int info;
    quark_unpack_args_8(quark, uplo, diag, N, A, LDA, sequence, request, iinfo);
    info = LAPACKE_strtri_work(
        LAPACK_COL_MAJOR,
        lapack_const(uplo), lapack_const(diag),
        N, A, LDA);
    if ((sequence->status == PLASMA_SUCCESS) && (info > 0))
        plasma_sequence_flush(quark, sequence, request, iinfo + info);
}

Here is the call graph for this function:

Here is the caller graph for this function:

int CORE_stslqt	(	int	M,
		int	N,
		int	IB,
		float *	A1,
		int	LDA1,
		float *	A2,
		int	LDA2,
		float *	T,
		int	LDT,
		float *	TAU,
		float *	WORK
	)

CORE_stslqt computes a LQ factorization of a rectangular matrix formed by coupling side-by-side a complex M-by-M lower triangular tile A1 and a complex M-by-N tile A2:

| A1 A2 | = L * Q

The tile Q is represented as a product of elementary reflectors

Q = H(k)' . . . H(2)' H(1)', where k = min(M,N).

Each H(i) has the form

H(i) = I - tau * v * v'

where tau is a complex scalar, and v is a complex vector with v(1:i-1) = 0 and v(i) = 1; g(v(i+1:n)) is stored on exit in A2(i,1:n), and tau in TAU(i).

Parameters:

[in]	M	The number of rows of the tile A1 and A2. M >= 0. The number of columns of the tile A1.
[in]	N	The number of columns of the tile A2. N >= 0.
[in]	IB	The inner-blocking size. IB >= 0.
[in,out]	A1	On entry, the M-by-M tile A1. On exit, the elements on and below the diagonal of the array contain the M-by-M lower trapezoidal tile L; the elements above the diagonal are not referenced.
[in]	LDA1	The leading dimension of the array A1. LDA1 >= max(1,M).
[in,out]	A2	On entry, the M-by-N tile A2. On exit, all the elements with the array TAU, represent the unitary tile Q as a product of elementary reflectors (see Further Details).
[in]	LDA2	The leading dimension of the tile A2. LDA2 >= max(1,M).
[out]	T	The IB-by-N triangular factor T of the block reflector. T is upper triangular by block (economic storage); The rest of the array is not referenced.
[in]	LDT	The leading dimension of the array T. LDT >= IB.
[out]	TAU	The scalar factors of the elementary reflectors (see Further Details).
[out]	WORK

Returns:

Return values:

PLASMA_SUCCESS	successful exit
<0	if -i, the i-th argument had an illegal value

Definition at line 107 of file core_stslqt.c.

References cblas_saxpy(), cblas_scopy(), cblas_sgemv(), cblas_sger(), cblas_strmv(), CblasColMajor, CORE_stsmlq(), coreblas_error, max, min, PLASMA_SUCCESS, PlasmaNonUnit, PlasmaNoTrans, PlasmaRight, PlasmaTrans, and PlasmaUpper.

{
    static float zone  = 1.0;
    static float zzero = 0.0;
    float alpha;
    int i, ii, sb;
    /* Check input arguments */
    if (M < 0) {
        coreblas_error(1, "Illegal value of M");
        return -1;
    }
    if (N < 0) {
        coreblas_error(2, "Illegal value of N");
        return -2;
    }
    if (IB < 0) {
        coreblas_error(3, "Illegal value of IB");
        return -3;
    }
    if ((LDA2 < max(1,M)) && (M > 0)) {
        coreblas_error(8, "Illegal value of LDA2");
        return -8;
    }
    /* Quick return */
    if ((M == 0) || (N == 0) || (IB == 0))
        return PLASMA_SUCCESS;
    for(ii = 0; ii < M; ii += IB) {
        sb = min(M-ii, IB);
        for(i = 0; i < sb; i++) {
            /*
             * Generate elementary reflector H( II*IB+I ) to annihilate A( II*IB+I, II*IB+I:N ).
             */
#ifdef COMPLEX
            LAPACKE_slacgv_work(N, &A2[ii+i], LDA2);
            LAPACKE_slacgv_work(1, &A1[LDA1*(ii+i)+ii+i], LDA1);
#endif
            LAPACKE_slarfg_work(N+1, &A1[LDA1*(ii+i)+ii+i], &A2[ii+i], LDA2, &TAU[ii+i]);
            alpha = -(TAU[ii+i]);
            if (ii+i+1 < M) {
                /*
                 * Apply H( II+I-1 ) to A( II+I:II+IB-1, II+I-1:N  ) from the right.
                 */
                cblas_scopy(
                    sb-i-1,
                    &A1[LDA1*(ii+i)+(ii+i+1)], 1,
                    WORK, 1);
                cblas_sgemv(
                    CblasColMajor, (CBLAS_TRANSPOSE)PlasmaNoTrans,
                    sb-i-1, N,
                    (zone), &A2[ii+i+1], LDA2,
                    &A2[ii+i], LDA2,
                    (zone), WORK, 1);
                cblas_saxpy(
                    sb-i-1, (alpha),
                    WORK, 1,
                    &A1[LDA1*(ii+i)+ii+i+1], 1);
                cblas_sger(
                    CblasColMajor, sb-i-1, N,
                    (alpha), WORK, 1,
                    &A2[ii+i], LDA2,
                    &A2[ii+i+1], LDA2);
            }
            /*
             * Calculate T.
             */
            cblas_sgemv(
                CblasColMajor, (CBLAS_TRANSPOSE)PlasmaNoTrans, i, N,
                (alpha), &A2[ii], LDA2,
                &A2[ii+i], LDA2,
                (zzero), &T[LDT*(ii+i)], 1);
#ifdef COMPLEX
            LAPACKE_slacgv_work(N, &A2[ii+i], LDA2 );
            LAPACKE_slacgv_work(1, &A1[LDA1*(ii+i)+ii+i], LDA1 );
#endif
            cblas_strmv(
                CblasColMajor, (CBLAS_UPLO)PlasmaUpper,
                (CBLAS_TRANSPOSE)PlasmaNoTrans, (CBLAS_DIAG)PlasmaNonUnit, i,
                &T[LDT*ii], LDT,
                &T[LDT*(ii+i)], 1);
            T[LDT*(ii+i)+i] = TAU[ii+i];
        }
        if (M > ii+sb) {
            CORE_stsmlq(
                PlasmaRight, PlasmaTrans,
                M-(ii+sb), sb, M-(ii+sb), N, IB, IB,
                &A1[LDA1*ii+ii+sb], LDA1,
                &A2[ii+sb], LDA2,
                &A2[ii], LDA2,
                &T[LDT*ii], LDT,
                WORK, LDA1);
        }
    }
    return PLASMA_SUCCESS;
}

Here is the call graph for this function:

Here is the caller graph for this function:

void CORE_stslqt_quark ( Quark * quark )

Definition at line 247 of file core_stslqt.c.

References CORE_stslqt(), quark_unpack_args_11, T, and TAU.

{
    int m;
    int n;
    int ib;
    float *A1;
    int lda1;
    float *A2;
    int lda2;
    float *T;
    int ldt;
    float *TAU;
    float *WORK;
    quark_unpack_args_11(quark, m, n, ib, A1, lda1, A2, lda2, T, ldt, TAU, WORK);
    CORE_stslqt(m, n, ib, A1, lda1, A2, lda2, T, ldt, TAU, WORK);
}

Here is the call graph for this function:

Here is the caller graph for this function:

int CORE_stsmlq	(	int	side,
		int	trans,
		int	M1,
		int	N1,
		int	M2,
		int	N2,
		int	K,
		int	IB,
		float *	A1,
		int	LDA1,
		float *	A2,
		int	LDA2,
		float *	V,
		int	LDV,
		float *	T,
		int	LDT,
		float *	WORK,
		int	LDWORK
	)

CORE_stsmlq overwrites the general complex M1-by-N1 tile A1 and M2-by-N2 tile A2 with

                  SIDE = 'L'        SIDE = 'R'

TRANS = 'N': Q * | A1 | | A1 A2 | * Q | A2 |

TRANS = 'C': Q**T * | A1 | | A1 A2 | * Q**T | A2 |

where Q is a complex unitary matrix defined as the product of k elementary reflectors

Q = H(k)' . . . H(2)' H(1)'

as returned by CORE_STSLQT.

Parameters:

[in]	side	PlasmaLeft : apply Q or QT from the Left; PlasmaRight : apply Q or QT from the Right.
[in]	trans	PlasmaNoTrans : No transpose, apply Q; PlasmaTrans : ConjTranspose, apply Q**T.
[in]	M1	The number of rows of the tile A1. M1 >= 0.
[in]	N1	The number of columns of the tile A1. N1 >= 0.
[in]	M2	The number of rows of the tile A2. M2 >= 0. M2 = M1 if side == PlasmaRight.
[in]	N2	The number of columns of the tile A2. N2 >= 0. N2 = N1 if side == PlasmaLeft.
[in]	K	The number of elementary reflectors whose product defines the matrix Q.
[in]	IB	The inner-blocking size. IB >= 0.
[in,out]	A1	On entry, the M1-by-N1 tile A1. On exit, A1 is overwritten by the application of Q.
[in]	LDA1	The leading dimension of the array A1. LDA1 >= max(1,M1).
[in,out]	A2	On entry, the M2-by-N2 tile A2. On exit, A2 is overwritten by the application of Q.
[in]	LDA2	The leading dimension of the tile A2. LDA2 >= max(1,M2).
[in]	V	The i-th row must contain the vector which defines the elementary reflector H(i), for i = 1,2,...,k, as returned by CORE_STSLQT in the first k rows of its array argument V.
[in]	LDV	The leading dimension of the array V. LDV >= max(1,K).
[out]	T	The IB-by-N1 triangular factor T of the block reflector. T is upper triangular by block (economic storage); The rest of the array is not referenced.
[in]	LDT	The leading dimension of the array T. LDT >= IB.
[out]	WORK	Workspace array of size LDWORK-by-M1 if side == PlasmaLeft LDWORK-by-IB if side == PlasmaRight
[in]	LDWORK	The leading dimension of the array WORK. LDWORK >= max(1,IB) if side == PlasmaLeft LDWORK >= max(1,N1) if side == PlasmaRight

Returns:

Return values:

PLASMA_SUCCESS	successful exit
<0	if -i, the i-th argument had an illegal value

Definition at line 124 of file core_stsmlq.c.

References CORE_sparfb(), coreblas_error, max, min, PLASMA_SUCCESS, PlasmaForward, PlasmaLeft, PlasmaNoTrans, PlasmaRight, PlasmaRowwise, and PlasmaTrans.

{
    int i, i1, i3;
    int NW;
    int kb;
    int ic = 0;
    int jc = 0;
    int mi = M1;
    int ni = N1;
    /* Check input arguments */
    if ((side != PlasmaLeft) && (side != PlasmaRight)) {
        coreblas_error(1, "Illegal value of side");
        return -1;
    }
    /* NW is the minimum dimension of WORK */
    if (side == PlasmaLeft) {
        NW = IB;
    }
    else {
        NW = N1;
    }
    if ((trans != PlasmaNoTrans) && (trans != PlasmaTrans)) {
        coreblas_error(2, "Illegal value of trans");
        return -2;
    }
    if (M1 < 0) {
        coreblas_error(3, "Illegal value of M1");
        return -3;
    }
    if (N1 < 0) {
        coreblas_error(4, "Illegal value of N1");
        return -4;
    }
    if ( (M2 < 0) || 
         ( (M2 != M1) && (side == PlasmaRight) ) ){
        coreblas_error(5, "Illegal value of M2");
        return -5;
    }
    if ( (N2 < 0) || 
         ( (N2 != N1) && (side == PlasmaLeft) ) ){
        coreblas_error(6, "Illegal value of N2");
        return -6;
    }
    if ((K < 0) || 
        ( (side == PlasmaLeft)  && (K > M1) ) ||
        ( (side == PlasmaRight) && (K > N1) ) ) {
        coreblas_error(7, "Illegal value of K");
        return -7;
    }
    if (IB < 0) {
        coreblas_error(8, "Illegal value of IB");
        return -8;
    }
    if (LDA1 < max(1,M1)){
        coreblas_error(10, "Illegal value of LDA1");
        return -10;
    }
    if (LDA2 < max(1,M2)){
        coreblas_error(12, "Illegal value of LDA2");
        return -12;
    }
    if (LDV < max(1,K)){
        coreblas_error(14, "Illegal value of LDV");
        return -14;
    }
    if (LDT < max(1,IB)){
        coreblas_error(16, "Illegal value of LDT");
        return -16;
    }
    if (LDWORK < max(1,NW)){
        coreblas_error(18, "Illegal value of LDWORK");
        return -18;
    }
    /* Quick return */
    if ((M1 == 0) || (N1 == 0) || (M2 == 0) || (N2 == 0) || (K == 0) || (IB == 0))
        return PLASMA_SUCCESS;
    if (((side == PlasmaLeft) && (trans == PlasmaNoTrans))
        || ((side == PlasmaRight) && (trans != PlasmaNoTrans))) {
        i1 = 0;
        i3 = IB;
    }
    else {
        i1 = ((K-1) / IB)*IB;
        i3 = -IB;
    }
    if (trans == PlasmaNoTrans) {
        trans = PlasmaTrans;
    }
    else {
        trans = PlasmaNoTrans;
    }
    for(i = i1; (i > -1) && (i < K); i += i3) {
        kb = min(IB, K-i);
        if (side == PlasmaLeft) {
            /*
             * H or H' is applied to C(i:m,1:n)
             */
            mi = M1 - i;
            ic = i;
        }
        else {
            /*
             * H or H' is applied to C(1:m,i:n)
             */
            ni = N1 - i;
            jc = i;
        }
        /*
         * Apply H or H' (NOTE: CORE_sparfb used to be CORE_stsrfb)
         */
        CORE_sparfb(
            side, trans, PlasmaForward, PlasmaRowwise,
            mi, ni, M2, N2, kb, 0,
            &A1[LDA1*jc+ic], LDA1,
            A2, LDA2,
            &V[i], LDV,
            &T[LDT*i], LDT,
            WORK, LDWORK);
    }
    return PLASMA_SUCCESS;
}

Here is the call graph for this function:

Here is the caller graph for this function:

int CORE_stsmlq_corner	(	int	m1,
		int	n1,
		int	m2,
		int	n2,
		int	m3,
		int	n3,
		int	k,
		int	ib,
		int	nb,
		float *	A1,
		int	lda1,
		float *	A2,
		int	lda2,
		float *	A3,
		int	lda3,
		float *	V,
		int	ldv,
		float *	T,
		int	ldt,
		float *	WORK,
		int	ldwork
	)

CORE_stsmlq_corner: see CORE_stsmlq

This kernel applies left and right transformations as depicted below: |I -VTV'| * | A1 A2 | * |I - VT'V'| | A2' A3 | where A1 and A3 are symmetric matrices. Only the lower part is referenced. This is an adhoc implementation, can be further optimized...

Parameters:

[in]	side	PlasmaLeft : apply Q or QT from the Left; PlasmaRight : apply Q or QT from the Right.
[in]	trans	PlasmaNoTrans : No transpose, apply Q; PlasmaTrans : ConjTranspose, apply Q**T.
[in]	M1	The number of rows of the tile A1. M1 >= 0.
[in]	N1	The number of columns of the tile A1. N1 >= 0.
[in]	M2	The number of rows of the tile A2. M2 >= 0. M2 = M1 if side == PlasmaRight.
[in]	N2	The number of columns of the tile A2. N2 >= 0. N2 = N1 if side == PlasmaLeft.
[in]	K	The number of elementary reflectors whose product defines the matrix Q.
[in]	IB	The inner-blocking size. IB >= 0.
[in,out]	A1	On entry, the M1-by-N1 tile A1. On exit, A1 is overwritten by the application of Q.
[in]	LDA1	The leading dimension of the array A1. LDA1 >= max(1,M1).
[in,out]	A2	On entry, the M2-by-N2 tile A2. On exit, A2 is overwritten by the application of Q.
[in]	LDA2	The leading dimension of the tile A2. LDA2 >= max(1,M2).
[in]	V	The i-th row must contain the vector which defines the elementary reflector H(i), for i = 1,2,...,k, as returned by CORE_STSLQT in the first k rows of its array argument V.
[in]	LDV	The leading dimension of the array V. LDV >= max(1,K).
[out]	T	The IB-by-N1 triangular factor T of the block reflector. T is upper triangular by block (economic storage); The rest of the array is not referenced.
[in]	LDT	The leading dimension of the array T. LDT >= IB.
[out]	WORK	Workspace array of size LDWORK-by-M1 if side == PlasmaLeft LDWORK-by-IB if side == PlasmaRight
[in]	LDWORK	The leading dimension of the array WORK. LDWORK >= max(1,IB) if side == PlasmaLeft LDWORK >= max(1,N1) if side == PlasmaRight

Returns:

Return values:

PLASMA_SUCCESS	successful exit
<0	if -i, the i-th argument had an illegal value

Definition at line 125 of file core_stsmlq_corner.c.

References CORE_stsmlq(), coreblas_error, PLASMA_SUCCESS, PlasmaLeft, PlasmaNoTrans, PlasmaRight, PlasmaTrans, side, and trans.

{
    PLASMA_enum side;
    PLASMA_enum trans;
    int i, j;
    if ( m1 != n1 ) {
        coreblas_error(1, "Illegal value of M1, N1");
        return -1;
    }
    /* Rebuild the symmetric block: WORK <- A1 */
    for (i = 0; i < m1; i++)
        for (j = i; j < n1; j++){
            *(WORK + i + j*ldwork) = *(A1 + i + j*lda1);
            if (j > i){
                *(WORK + j + i*ldwork) =  ( *(WORK + i + j*ldwork) );
            }
        }
    /*  Copy the transpose of A2: WORK+nb*ldwork <- A2' */
    for (j = 0; j < n2; j++)
        for (i = 0; i < m2; i++){
            *(WORK + j + (i + nb) * ldwork) = ( *(A2 + i + j*lda2) );
        }
    side = PlasmaRight;
    trans = PlasmaTrans;
    /*  Right application on |A1 A2| */
    CORE_stsmlq(side, trans, m1, n1, m2, n2, k, ib, 
                WORK, ldwork, A2, lda2, 
                V, ldv, T, ldt, 
                WORK+3*nb*ldwork, ldwork);
    /*  Rebuild the symmetric block: WORK+2*nb*ldwork <- A3 */
    for (i = 0; i < m3; i++)
        for (j = i; j < n3; j++){
            *(WORK + i + (j + 2*nb) * ldwork) = *(A3 + i + j*lda3);
            if (j > i){
                *(WORK + j + (i + 2*nb) * ldwork) =   ( *(WORK + i + (j + 2*nb) * ldwork) );
            }
        }
    /*  Right application on | A2' A3 | */
    CORE_stsmlq(side, trans, n2, m2, m3, n3, k, ib, 
                WORK+nb*ldwork, ldwork, WORK+2*nb*ldwork, ldwork, 
                V, ldv, T, ldt, 
                WORK + 3*nb*ldwork, ldwork);
    side = PlasmaLeft;
    trans = PlasmaNoTrans;
    /*  Left application on | A1  | */
    /*                      | A2' | */
    CORE_stsmlq(side, trans, m1, n1, n2, m2, k, ib, 
                WORK, ldwork, WORK+nb*ldwork, ldwork, 
                V, ldv, T, ldt, 
                WORK + 3*nb*ldwork, ldwork);
    /*  Copy back the final result to the upper part of A1 */
    /*  A1 = WORK */
    for (i = 0; i < m1; i++)
        for (j = i; j < n1; j++)
            *(A1 + i + j*lda1) = *(WORK + i + j*ldwork);
    /*  Left application on | A2 | */
    /*                      | A3 | */
    CORE_stsmlq(side, trans, m2, n2, m3, n3, k, ib, 
                A2, lda2, WORK+2*nb*ldwork, ldwork, 
                V, ldv, T, ldt, 
                WORK + 3*nb*ldwork, ldwork);
    /*  Copy back the final result to the upper part of A3 */
    /*  A3 = WORK+2*nb*ldwork */
    for (i = 0; i < m3; i++)
        for (j = i; j < n3; j++)
            *(A3 + i + j*lda3) = *(WORK + i + (j+ 2*nb) * ldwork);
    return PLASMA_SUCCESS;
}

Here is the call graph for this function:

Here is the caller graph for this function:

void CORE_stsmlq_corner_quark ( Quark * quark )

This kernel applies right and left transformations as depicted below: |I -VTV'| * | A1 A2| * |I - VT'V'| | A2' A3 | where A1 and A3 are symmetric matrices. Only the upper part is referenced. This is an adhoc implementation, can be further optimized...

Definition at line 266 of file core_stsmlq_corner.c.

References CORE_stsmlq_corner(), quark_unpack_args_21, T, and V.

{
    int m1;
    int n1;
    int m2;
    int n2;
    int m3;
    int n3;
    int k;
    int ib;
    int nb;
    float *A1;
    int lda1;
    float *A2;
    int lda2;
    float *A3;
    int lda3;
    float *V;
    int ldv;
    float *T;
    int ldt;
    float *WORK;
    int ldwork;
    quark_unpack_args_21(quark, m1, n1, m2, n2, m3, n3, k, ib, nb, 
                         A1, lda1, A2, lda2, A3, lda3, V, ldv, T, ldt, WORK, ldwork);
    CORE_stsmlq_corner(m1, n1, m2, n2, m3, n3, k, ib, nb, 
                       A1, lda1, A2, lda2, A3, lda3, V, ldv, T, ldt, WORK, ldwork);
}

Here is the call graph for this function:

Here is the caller graph for this function:

void CORE_stsmlq_quark ( Quark * quark )

Definition at line 303 of file core_stsmlq.c.

References CORE_stsmlq(), quark_unpack_args_18, side, T, trans, and V.

{
    int side;
    int trans;
    int m1;
    int n1;
    int m2;
    int n2;
    int k;
    int ib;
    float *A1;
    int lda1;
    float *A2;
    int lda2;
    float *V;
    int ldv;
    float *T;
    int ldt;
    float *WORK;
    int ldwork;
    quark_unpack_args_18(quark, side, trans, m1, n1, m2, n2, k, ib, 
                         A1, lda1, A2, lda2, V, ldv, T, ldt, WORK, ldwork);
    CORE_stsmlq(side, trans, m1, n1, m2, n2, k, ib, 
                A1, lda1, A2, lda2, V, ldv, T, ldt, WORK, ldwork);
}

Here is the call graph for this function:

Here is the caller graph for this function:

int CORE_stsmlq_sytra1	(	int	side,
		int	trans,
		int	m1,
		int	n1,
		int	m2,
		int	n2,
		int	k,
		int	ib,
		float *	A1,
		int	lda1,
		float *	A2,
		int	lda2,
		float *	V,
		int	ldv,
		float *	T,
		int	ldt,
		float *	WORK,
		int	ldwork
	)

CORE_stsmlq_sytra1: see CORE_stsmlq

This kernel applies a Right transformation on | A1' A2 | and does not handle the transpose of A1. Needs therefore to make the explicit transpose of A1 before and after the application of the block of reflectors Can be further optimized by changing accordingly the underneath kernel ztsrfb!

Parameters:

[in]	side	PlasmaLeft : apply Q or QT from the Left; PlasmaRight : apply Q or QT from the Right.
[in]	trans	PlasmaNoTrans : No transpose, apply Q; PlasmaTrans : ConjTranspose, apply Q**T.
[in]	M1	The number of rows of the tile A1. M1 >= 0.
[in]	N1	The number of columns of the tile A1. N1 >= 0.
[in]	M2	The number of rows of the tile A2. M2 >= 0. M2 = M1 if side == PlasmaRight.
[in]	N2	The number of columns of the tile A2. N2 >= 0. N2 = N1 if side == PlasmaLeft.
[in]	K	The number of elementary reflectors whose product defines the matrix Q.
[in]	IB	The inner-blocking size. IB >= 0.
[in,out]	A1	On entry, the M1-by-N1 tile A1. On exit, A1 is overwritten by the application of Q.
[in]	LDA1	The leading dimension of the array A1. LDA1 >= max(1,M1).
[in,out]	A2	On entry, the M2-by-N2 tile A2. On exit, A2 is overwritten by the application of Q.
[in]	LDA2	The leading dimension of the tile A2. LDA2 >= max(1,M2).
[in]	V	The i-th row must contain the vector which defines the elementary reflector H(i), for i = 1,2,...,k, as returned by CORE_STSLQT in the first k rows of its array argument V.
[in]	LDV	The leading dimension of the array V. LDV >= max(1,K).
[out]	T	The IB-by-N1 triangular factor T of the block reflector. T is upper triangular by block (economic storage); The rest of the array is not referenced.
[in]	LDT	The leading dimension of the array T. LDT >= IB.
[out]	WORK	Workspace array of size LDWORK-by-M1 if side == PlasmaLeft LDWORK-by-IB if side == PlasmaRight
[in]	LDWORK	The leading dimension of the array WORK. LDWORK >= max(1,IB) if side == PlasmaLeft LDWORK >= max(1,N1) if side == PlasmaRight

Returns:

Return values:

PLASMA_SUCCESS	successful exit
<0	if -i, the i-th argument had an illegal value

Definition at line 125 of file core_stsmlq_sytra1.c.

References CORE_stsmlq(), coreblas_error, and PLASMA_SUCCESS.

{
    int i, j;
    if ( (m1 != n1) ) {
        coreblas_error(3, "Illegal value of M1, N1");
        return -3;
    }
    /* in-place transposition of A1 */
    for (j = 0; j < n1; j++){
        A1[j + j*lda1] = (A1[j + j*lda1]);
        for (i = j+1; i < m1; i++){
            *WORK = *(A1 + i + j*lda1);
            *(A1 + i + j*lda1) = (*(A1 + j + i*lda1));
            *(A1 + j + i*lda1) = (*WORK);
        }
    }
    CORE_stsmlq(side, trans, m1, n1, m2, n2, k, ib, 
                A1, lda1, A2, lda2, 
                V,  ldv,  T,  ldt, 
                WORK, ldwork);
    /* in-place transposition of A1 */
    for (j = 0; j < n1; j++){
        A1[j + j*lda1] = (A1[j + j*lda1]);
        for (i = j+1; i < m1; i++){
            *WORK = *(A1 + i + j*lda1);
            *(A1 + i + j*lda1) = (*(A1 + j + i*lda1));
            *(A1 + j + i*lda1) = (*WORK);
        }
    }
    
    return PLASMA_SUCCESS;
}

Here is the call graph for this function:

Here is the caller graph for this function:

void CORE_stsmlq_sytra1_quark ( Quark * quark )

This kernel applies a Right transformation on | A1' A2 | and does not handle the transpose of A1. Needs therefore to make the explicit transpose of A1 before and after the application of the block of reflectors Can be further optimized by changing accordingly the underneath kernel ztsrfb!

Definition at line 218 of file core_stsmlq_sytra1.c.

References CORE_stsmlq_sytra1(), quark_unpack_args_18, side, T, trans, and V.

{
    int side;
    int trans;
    int m1;
    int n1;
    int m2;
    int n2;
    int k;
    int ib;
    float *A1;
    int lda1;
    float *A2;
    int lda2;
    float *V;
    int ldv;
    float *T;
    int ldt;
    float *WORK;
    int ldwork;
    quark_unpack_args_18(quark, side, trans, m1, n1, m2, n2, k, ib, 
                         A1, lda1, A2, lda2, V, ldv, T, ldt, WORK, ldwork);
    CORE_stsmlq_sytra1(side, trans, m1, n1, m2, n2, k, ib, 
                       A1, lda1, A2, lda2, V, ldv, T, ldt, WORK, ldwork);
}

Here is the call graph for this function:

Here is the caller graph for this function:

int CORE_stsmqr	(	int	side,
		int	trans,
		int	M1,
		int	N1,
		int	M2,
		int	N2,
		int	K,
		int	IB,
		float *	A1,
		int	LDA1,
		float *	A2,
		int	LDA2,
		float *	V,
		int	LDV,
		float *	T,
		int	LDT,
		float *	WORK,
		int	LDWORK
	)

CORE_stsmqr overwrites the general complex M1-by-N1 tile A1 and M2-by-N2 tile A2 with

                  SIDE = 'L'        SIDE = 'R'

TRANS = 'N': Q * | A1 | | A1 A2 | * Q | A2 |

TRANS = 'C': Q**T * | A1 | | A1 A2 | * Q**T | A2 |

where Q is a complex unitary matrix defined as the product of k elementary reflectors

Q = H(1) H(2) . . . H(k)

as returned by CORE_STSQRT.

Parameters:

[in]	side	PlasmaLeft : apply Q or QT from the Left; PlasmaRight : apply Q or QT from the Right.
[in]	trans	PlasmaNoTrans : No transpose, apply Q; PlasmaTrans : ConjTranspose, apply Q**T.
[in]	M1	The number of rows of the tile A1. M1 >= 0.
[in]	N1	The number of columns of the tile A1. N1 >= 0.
[in]	M2	The number of rows of the tile A2. M2 >= 0. M2 = M1 if side == PlasmaRight.
[in]	N2	The number of columns of the tile A2. N2 >= 0. N2 = N1 if side == PlasmaLeft.
[in]	K	The number of elementary reflectors whose product defines the matrix Q.
[in]	IB	The inner-blocking size. IB >= 0.
[in,out]	A1	On entry, the M1-by-N1 tile A1. On exit, A1 is overwritten by the application of Q.
[in]	LDA1	The leading dimension of the array A1. LDA1 >= max(1,M1).
[in,out]	A2	On entry, the M2-by-N2 tile A2. On exit, A2 is overwritten by the application of Q.
[in]	LDA2	The leading dimension of the tile A2. LDA2 >= max(1,M2).
[in]	V	The i-th row must contain the vector which defines the elementary reflector H(i), for i = 1,2,...,k, as returned by CORE_STSQRT in the first k columns of its array argument V.
[in]	LDV	The leading dimension of the array V. LDV >= max(1,K).
[out]	T	The IB-by-N1 triangular factor T of the block reflector. T is upper triangular by block (economic storage); The rest of the array is not referenced.
[in]	LDT	The leading dimension of the array T. LDT >= IB.
[out]	WORK	Workspace array of size LDWORK-by-N1 if side == PlasmaLeft LDWORK-by-IB if side == PlasmaRight
[in]	LDWORK	The leading dimension of the array WORK. LDWORK >= max(1,IB) if side == PlasmaLeft LDWORK >= max(1,M1) if side == PlasmaRight

Returns:

Return values:

PLASMA_SUCCESS	successful exit
<0	if -i, the i-th argument had an illegal value

Definition at line 124 of file core_stsmqr.c.

References CORE_sparfb(), coreblas_error, max, min, PLASMA_SUCCESS, PlasmaColumnwise, PlasmaForward, PlasmaLeft, PlasmaNoTrans, PlasmaRight, and PlasmaTrans.

{
    int i, i1, i3;
    int NQ, NW;
    int kb;
    int ic = 0;
    int jc = 0;
    int mi = M1;
    int ni = N1;
    /* Check input arguments */
    if ((side != PlasmaLeft) && (side != PlasmaRight)) {
        coreblas_error(1, "Illegal value of side");
        return -1;
    }
    /* NQ is the order of Q */
    if (side == PlasmaLeft) {
        NQ = M2;
        NW = IB;
    }
    else {
        NQ = N2;
        NW = M1;
    }
    if ((trans != PlasmaNoTrans) && (trans != PlasmaTrans)) {
        coreblas_error(2, "Illegal value of trans");
        return -2;
    }
    if (M1 < 0) {
        coreblas_error(3, "Illegal value of M1");
        return -3;
    }
    if (N1 < 0) {
        coreblas_error(4, "Illegal value of N1");
        return -4;
    }
    if ( (M2 < 0) || 
         ( (M2 != M1) && (side == PlasmaRight) ) ){
        coreblas_error(5, "Illegal value of M2");
        return -5;
    }
    if ( (N2 < 0) || 
         ( (N2 != N1) && (side == PlasmaLeft) ) ){
        coreblas_error(6, "Illegal value of N2");
        return -6;
    }
    if ((K < 0) || 
        ( (side == PlasmaLeft)  && (K > M1) ) ||
        ( (side == PlasmaRight) && (K > N1) ) ) {
        coreblas_error(7, "Illegal value of K");
        return -7;
    }
    if (IB < 0) {
        coreblas_error(8, "Illegal value of IB");
        return -8;
    }
    if (LDA1 < max(1,M1)){
        coreblas_error(10, "Illegal value of LDA1");
        return -10;
    }
    if (LDA2 < max(1,M2)){
        coreblas_error(12, "Illegal value of LDA2");
        return -12;
    }
    if (LDV < max(1,NQ)){
        coreblas_error(14, "Illegal value of LDV");
        return -14;
    }
    if (LDT < max(1,IB)){
        coreblas_error(16, "Illegal value of LDT");
        return -16;
    }
    if (LDWORK < max(1,NW)){
        coreblas_error(18, "Illegal value of LDWORK");
        return -18;
    }
    /* Quick return */
    if ((M1 == 0) || (N1 == 0) || (M2 == 0) || (N2 == 0) || (K == 0) || (IB == 0))
        return PLASMA_SUCCESS;
    if (((side == PlasmaLeft)  && (trans != PlasmaNoTrans))
        || ((side == PlasmaRight) && (trans == PlasmaNoTrans))) {
        i1 = 0;
        i3 = IB;
    }
    else {
        i1 = ((K-1) / IB)*IB;
        i3 = -IB;
    }
    for(i = i1; (i > -1) && (i < K); i += i3) {
        kb = min(IB, K-i);
        if (side == PlasmaLeft) {
            /*
             * H or H' is applied to C(i:m,1:n)
             */
            mi = M1 - i;
            ic = i;
        }
        else {
            /*
             * H or H' is applied to C(1:m,i:n)
             */
            ni = N1 - i;
            jc = i;
        }
        /*
         * Apply H or H' (NOTE: CORE_sparfb used to be CORE_stsrfb)
         */
        CORE_sparfb(
            side, trans, PlasmaForward, PlasmaColumnwise,
            mi, ni, M2, N2, kb, 0,
            &A1[LDA1*jc+ic], LDA1,
            A2, LDA2,
            &V[LDV*i], LDV,
            &T[LDT*i], LDT,
            WORK, LDWORK);
    }
    return PLASMA_SUCCESS;
}

Here is the call graph for this function:

Here is the caller graph for this function:

int CORE_stsmqr_corner	(	int	m1,
		int	n1,
		int	m2,
		int	n2,
		int	m3,
		int	n3,
		int	k,
		int	ib,
		int	nb,
		float *	A1,
		int	lda1,
		float *	A2,
		int	lda2,
		float *	A3,
		int	lda3,
		float *	V,
		int	ldv,
		float *	T,
		int	ldt,
		float *	WORK,
		int	ldwork
	)

CORE_stsmqr_corner: see CORE_stsmqr

This kernel applies left and right transformations as depicted below: |I -VT'V'| * | A1 A2'| * |I - VTV'| | A2 A3 | where A1 and A3 are symmetric matrices. Only the lower part is referenced. This is an adhoc implementation, can be further optimized...

Parameters:

[in]	side	PlasmaLeft : apply Q or QT from the Left; PlasmaRight : apply Q or QT from the Right.
[in]	trans	PlasmaNoTrans : No transpose, apply Q; PlasmaTrans : ConjTranspose, apply Q**T.
[in]	M1	The number of rows of the tile A1. M1 >= 0.
[in]	N1	The number of columns of the tile A1. N1 >= 0.
[in]	M2	The number of rows of the tile A2. M2 >= 0. M2 = M1 if side == PlasmaRight.
[in]	N2	The number of columns of the tile A2. N2 >= 0. N2 = N1 if side == PlasmaLeft.
[in]	K	The number of elementary reflectors whose product defines the matrix Q.
[in]	IB	The inner-blocking size. IB >= 0.
[in,out]	A1	On entry, the M1-by-N1 tile A1. On exit, A1 is overwritten by the application of Q.
[in]	LDA1	The leading dimension of the array A1. LDA1 >= max(1,M1).
[in,out]	A2	On entry, the M2-by-N2 tile A2. On exit, A2 is overwritten by the application of Q.
[in]	LDA2	The leading dimension of the tile A2. LDA2 >= max(1,M2).
[in]	V	The i-th row must contain the vector which defines the elementary reflector H(i), for i = 1,2,...,k, as returned by CORE_STSQRT in the first k columns of its array argument V.
[in]	LDV	The leading dimension of the array V. LDV >= max(1,K).
[out]	T	The IB-by-N1 triangular factor T of the block reflector. T is upper triangular by block (economic storage); The rest of the array is not referenced.
[in]	LDT	The leading dimension of the array T. LDT >= IB.
[out]	WORK	Workspace array of size LDWORK-by-N1 if side == PlasmaLeft LDWORK-by-IB if side == PlasmaRight
[in]	LDWORK	The leading dimension of the array WORK. LDWORK >= max(1,IB) if side == PlasmaLeft LDWORK >= max(1,M1) if side == PlasmaRight

Returns:

Return values:

PLASMA_SUCCESS	successful exit
<0	if -i, the i-th argument had an illegal value

Definition at line 125 of file core_stsmqr_corner.c.

References CORE_stsmqr(), coreblas_error, PLASMA_SUCCESS, PlasmaLeft, PlasmaNoTrans, PlasmaRight, PlasmaTrans, side, and trans.

{
    int i, j;
    PLASMA_enum side, trans;
    if ( m1 != n1 ) {
        coreblas_error(1, "Illegal value of M1, N1");
        return -1;
    }
    /*  Rebuild the symmetric block: WORK <- A1 */
    for (j = 0; j < n1; j++)
        for (i = j; i < m1; i++){
            *(WORK + i + j*ldwork) = *(A1 + i + j*lda1);
            if (i > j){
                *(WORK + j + i*ldwork) =  ( *(WORK + i + j*ldwork) );
            }
        }
    
    /*  Copy the transpose of A2: WORK+nb*ldwork <- A2' */
    for (j = 0; j < n2; j++)
        for (i = 0; i < m2; i++){
            *(WORK + j + (i + nb) * ldwork) = ( *(A2 + i + j*lda2) );
        }
    side  = PlasmaLeft;
    trans = PlasmaTrans;
    /*  Left application on |A1| */
    /*                      |A2| */
    CORE_stsmqr(side, trans, m1, n1, m2, n2, k, ib, 
                WORK, ldwork, A2, lda2, 
                V, ldv, T, ldt, 
                WORK + 3*nb*ldwork, ldwork);
    /*  Rebuild the symmetric block: WORK+2*nb*ldwork <- A3 */
    for (j = 0; j < n3; j++)
        for (i = j; i < m3; i++){
            *(WORK + i + (j + 2*nb) * ldwork) = *(A3 + i + j*lda3);
            if (i != j){
                *(WORK + j + (i + 2*nb) * ldwork) =  ( *(WORK + i + (j + 2*nb) * ldwork) );
            }
        }
    /*  Left application on | A2'| */
    /*                      | A3 | */
    CORE_stsmqr(side, trans, n2, m2, m3, n3, k, ib, 
                WORK+nb*ldwork, ldwork, WORK+2*nb*ldwork, ldwork, 
                V, ldv, T, ldt, 
                WORK + 3*nb*ldwork, ldwork);
    side  = PlasmaRight;
    trans = PlasmaNoTrans;
    /*  Right application on | A1 A2' | */
    CORE_stsmqr(side, trans, m1, n1, n2, m2, k, ib, 
                WORK, ldwork, WORK+nb*ldwork, ldwork, 
                V, ldv, T, ldt, 
                WORK + 3*nb*ldwork, ldwork);
    /*  Copy back the final result to the lower part of A1 */
    /*  A1 = WORK */
    for (j = 0; j < n1; j++)
        for (i = j; i < m1; i++)
            *(A1 + i + j*lda1) = *(WORK + i + j*ldwork);
    /*  Right application on | A2 A3 | */
    CORE_stsmqr(side, trans, m2, n2, m3, n3, k, ib, 
                A2, lda2, WORK+2*nb*ldwork, ldwork, 
                V,  ldv,  T, ldt, 
                WORK + 3*nb*ldwork, ldwork);
    /*  Copy back the final result to the lower part of A3 */
    /*  A3 = WORK+2*nb*ldwork */
    for (j = 0; j < n3; j++)
        for (i = j; i < m3; i++)
            *(A3 + i + j*lda3) = *(WORK + i + (j+ 2*nb) * ldwork);
    return PLASMA_SUCCESS;
}

Here is the call graph for this function:

Here is the caller graph for this function:

void CORE_stsmqr_corner_quark ( Quark * quark )

Definition at line 254 of file core_stsmqr_corner.c.

References CORE_stsmqr_corner(), quark_unpack_args_21, T, and V.

{
    int m1;
    int n1;
    int m2;
    int n2;
    int m3;
    int n3;
    int k;
    int ib;
    int nb;
    float *A1;
    int lda1;
    float *A2;
    int lda2;
    float *A3;
    int lda3;
    float *V;
    int ldv;
    float *T;
    int ldt;
    float *WORK;
    int ldwork;
    quark_unpack_args_21(quark, m1, n1, m2, n2, m3, n3, k, ib, nb, 
                         A1, lda1, A2, lda2, A3, lda3, V, ldv, T, ldt, WORK, ldwork);
    CORE_stsmqr_corner(m1, n1, m2, n2, m3, n3, k, ib, nb, 
                       A1, lda1, A2, lda2, A3, lda3, V, ldv, T, ldt, WORK, ldwork);
}

Here is the call graph for this function:

Here is the caller graph for this function:

void CORE_stsmqr_quark ( Quark * quark )

Definition at line 298 of file core_stsmqr.c.

References CORE_stsmqr(), quark_unpack_args_18, side, T, trans, and V.

{
    int side;
    int trans;
    int m1;
    int n1;
    int m2;
    int n2;
    int k;
    int ib;
    float *A1;
    int lda1;
    float *A2;
    int lda2;
    float *V;
    int ldv;
    float *T;
    int ldt;
    float *WORK;
    int ldwork;
    quark_unpack_args_18(quark, side, trans, m1, n1, m2, n2, k, ib, 
                         A1, lda1, A2, lda2, V, ldv, T, ldt, WORK, ldwork);
    CORE_stsmqr(side, trans, m1, n1, m2, n2, k, ib, 
                A1, lda1, A2, lda2, V, ldv, T, ldt, WORK, ldwork);
}

Here is the call graph for this function:

Here is the caller graph for this function:

int CORE_stsmqr_sytra1	(	int	side,
		int	trans,
		int	m1,
		int	n1,
		int	m2,
		int	n2,
		int	k,
		int	ib,
		float *	A1,
		int	lda1,
		float *	A2,
		int	lda2,
		float *	V,
		int	ldv,
		float *	T,
		int	ldt,
		float *	WORK,
		int	ldwork
	)

CORE_stsmqr_sytra1: see CORE_stsmqr

This kernel applies a left transformation on | A1'| | A2 |

Needs therefore to make the explicit transpose of A1 before and after the application of the block of reflectors Can be further optimized by changing accordingly the underneath kernel ztsrfb!

Parameters:

[in]	side	PlasmaLeft : apply Q or QT from the Left; PlasmaRight : apply Q or QT from the Right.
[in]	trans	PlasmaNoTrans : No transpose, apply Q; PlasmaTrans : ConjTranspose, apply Q**T.
[in]	m1	The number of rows of the tile A1. M1 >= 0.
[in]	n1	The number of columns of the tile A1. N1 >= 0.
[in]	m2	The number of rows of the tile A2. M2 >= 0. M2 = M1 if side == PlasmaRight.
[in]	n2	The number of columns of the tile A2. N2 >= 0. N2 = N1 if side == PlasmaLeft.
[in]	k	The number of elementary reflectors whose product defines the matrix Q.
[in]	ib	The inner-blocking size. IB >= 0.
[in,out]	A1	On entry, the M1-by-N1 tile A1. On exit, A1 is overwritten by the application of Q.
[in]	lda1	The leading dimension of the array A1. LDA1 >= max(1,M1).
[in,out]	A2	On entry, the M2-by-N2 tile A2. On exit, A2 is overwritten by the application of Q.
[in]	lda2	The leading dimension of the tile A2. LDA2 >= max(1,M2).
[in]	V	The i-th row must contain the vector which defines the elementary reflector H(i), for i = 1,2,...,k, as returned by CORE_STSQRT in the first k columns of its array argument V.
[in]	ldv	The leading dimension of the array V. LDV >= max(1,K).
[out]	T	The IB-by-N1 triangular factor T of the block reflector. T is upper triangular by block (economic storage); The rest of the array is not referenced.
[in]	ldt	The leading dimension of the array T. LDT >= IB.
[out]	WORK	Workspace array of size LDWORK-by-N1 if side == PlasmaLeft LDWORK-by-IB if side == PlasmaRight
[in]	ldwork	The leading dimension of the array WORK. LDWORK >= max(1,IB) if side == PlasmaLeft LDWORK >= max(1,M1) if side == PlasmaRight

Returns:

Return values:

PLASMA_SUCCESS	successful exit
<0	if -i, the i-th argument had an illegal value

Definition at line 127 of file core_stsmqr_sytra1.c.

References CORE_stsmqr(), coreblas_error, and PLASMA_SUCCESS.

{
    int i, j;
    if ( (m1 != n1) ) {
        coreblas_error(3, "Illegal value of M1, N1");
        return -3;
    }
    /* in-place transposition of A1 */
    for (j = 0; j < n1; j++){
        A1[j + j*lda1] = (A1[j + j*lda1]);
        for (i = j+1; i < m1; i++){
            *WORK = *(A1 + i + j*lda1);
            *(A1 + i + j*lda1) = (*(A1 + j + i*lda1));
            *(A1 + j + i*lda1) = (*WORK);
        }
    }
    CORE_stsmqr(side, trans, m1, n1, m2, n2, k, ib, A1, lda1, A2, lda2, V, ldv, T, ldt, WORK, ldwork);
    /* in-place transposition of A1 */
    for (j = 0; j < n1; j++){
        A1[j + j*lda1] = (A1[j + j*lda1]);
        for (i = j+1; i < m1; i++){
            *WORK = *(A1 + i + j*lda1);
            *(A1 + i + j*lda1) = (*(A1 + j + i*lda1));
            *(A1 + j + i*lda1) = (*WORK);
        }
    }
    return PLASMA_SUCCESS;
}

Here is the call graph for this function:

Here is the caller graph for this function:

void CORE_stsmqr_sytra1_quark ( Quark * quark )

Definition at line 212 of file core_stsmqr_sytra1.c.

References CORE_stsmqr_sytra1(), quark_unpack_args_18, side, T, trans, and V.

{
    int side;
    int trans;
    int m1;
    int n1;
    int m2;
    int n2;
    int k;
    int ib;
    float *A1;
    int lda1;
    float *A2;
    int lda2;
    float *V;
    int ldv;
    float *T;
    int ldt;
    float *WORK;
    int ldwork;
    quark_unpack_args_18(quark, side, trans, m1, n1, m2, n2, k, ib, A1, lda1, A2, lda2, V, ldv, T, ldt, WORK, ldwork);
    CORE_stsmqr_sytra1(side, trans, m1, n1, m2, n2, k, ib, A1, lda1, A2, lda2, V, ldv, T, ldt, WORK, ldwork);
}

Here is the call graph for this function:

Here is the caller graph for this function:

int CORE_stsqrt	(	int	M,
		int	N,
		int	IB,
		float *	A1,
		int	LDA1,
		float *	A2,
		int	LDA2,
		float *	T,
		int	LDT,
		float *	TAU,
		float *	WORK
	)

CORE_stsqrt computes a QR factorization of a rectangular matrix formed by coupling a complex N-by-N upper triangular tile A1 on top of a complex M-by-N tile A2:

| A1 | = Q * R | A2 |

Parameters:

[in]	M	The number of columns of the tile A2. M >= 0.
[in]	N	The number of rows of the tile A1. The number of columns of the tiles A1 and A2. N >= 0.
[in]	IB	The inner-blocking size. IB >= 0.
[in,out]	A1	On entry, the N-by-N tile A1. On exit, the elements on and above the diagonal of the array contain the N-by-N upper trapezoidal tile R; the elements below the diagonal are not referenced.
[in]	LDA1	The leading dimension of the array A1. LDA1 >= max(1,N).
[in,out]	A2	On entry, the M-by-N tile A2. On exit, all the elements with the array TAU, represent the unitary tile Q as a product of elementary reflectors (see Further Details).
[in]	LDA2	The leading dimension of the tile A2. LDA2 >= max(1,M).
[out]	T	The IB-by-N triangular factor T of the block reflector. T is upper triangular by block (economic storage); The rest of the array is not referenced.
[in]	LDT	The leading dimension of the array T. LDT >= IB.
[out]	TAU	The scalar factors of the elementary reflectors (see Further Details).
[out]	WORK

Returns:

Return values:

PLASMA_SUCCESS	successful exit
<0	if -i, the i-th argument had an illegal value

Definition at line 97 of file core_stsqrt.c.

References cblas_saxpy(), cblas_scopy(), cblas_sgemv(), cblas_sger(), cblas_strmv(), CblasColMajor, CORE_stsmqr(), coreblas_error, max, min, PLASMA_SUCCESS, PlasmaLeft, PlasmaNonUnit, PlasmaNoTrans, PlasmaTrans, and PlasmaUpper.

{
    static float zone  = 1.0;
    static float zzero = 0.0;
    float alpha;
    int i, ii, sb;
    /* Check input arguments */
    if (M < 0) {
        coreblas_error(1, "Illegal value of M");
        return -1;
    }
    if (N < 0) {
        coreblas_error(2, "Illegal value of N");
        return -2;
    }
    if (IB < 0) {
        coreblas_error(3, "Illegal value of IB");
        return -3;
    }
    if ((LDA2 < max(1,M)) && (M > 0)) {
        coreblas_error(8, "Illegal value of LDA2");
        return -8;
    }
    /* Quick return */
    if ((M == 0) || (N == 0) || (IB == 0))
        return PLASMA_SUCCESS;
    for(ii = 0; ii < N; ii += IB) {
        sb = min(N-ii, IB);
        for(i = 0; i < sb; i++) {
            /*
             * Generate elementary reflector H( II*IB+I ) to annihilate
             * A( II*IB+I:M, II*IB+I )
             */
            LAPACKE_slarfg_work(M+1, &A1[LDA1*(ii+i)+ii+i], &A2[LDA2*(ii+i)], 1, &TAU[ii+i]);
            if (ii+i+1 < N) {
                /*
                 * Apply H( II*IB+I ) to A( II*IB+I:M, II*IB+I+1:II*IB+IB ) from the left
                 */
                alpha = -(TAU[ii+i]);
                cblas_scopy(
                    sb-i-1,
                    &A1[LDA1*(ii+i+1)+(ii+i)], LDA1,
                    WORK, 1);
#ifdef COMPLEX
                LAPACKE_slacgv_work(sb-i-1, WORK, 1);
#endif
                cblas_sgemv(
                    CblasColMajor, (CBLAS_TRANSPOSE)PlasmaTrans,
                    M, sb-i-1,
                    (zone), &A2[LDA2*(ii+i+1)], LDA2,
                    &A2[LDA2*(ii+i)], 1,
                    (zone), WORK, 1);
#ifdef COMPLEX
                LAPACKE_slacgv_work(sb-i-1, WORK, 1 );
#endif
                cblas_saxpy(
                    sb-i-1, (alpha),
                    WORK, 1,
                    &A1[LDA1*(ii+i+1)+ii+i], LDA1);
#ifdef COMPLEX
                LAPACKE_slacgv_work(sb-i-1, WORK, 1 );
#endif
                cblas_sger(
                    CblasColMajor, M, sb-i-1, (alpha),
                    &A2[LDA2*(ii+i)], 1,
                    WORK, 1,
                    &A2[LDA2*(ii+i+1)], LDA2);
            }
            /*
             * Calculate T
             */
            alpha = -TAU[ii+i];
            cblas_sgemv(
                CblasColMajor, (CBLAS_TRANSPOSE)PlasmaTrans, M, i,
                (alpha), &A2[LDA2*ii], LDA2,
                &A2[LDA2*(ii+i)], 1,
                (zzero), &T[LDT*(ii+i)], 1);
            cblas_strmv(
                CblasColMajor, (CBLAS_UPLO)PlasmaUpper,
                (CBLAS_TRANSPOSE)PlasmaNoTrans, (CBLAS_DIAG)PlasmaNonUnit, i,
                &T[LDT*ii], LDT,
                &T[LDT*(ii+i)], 1);
            T[LDT*(ii+i)+i] = TAU[ii+i];
        }
        if (N > ii+sb) {
            CORE_stsmqr(
                PlasmaLeft, PlasmaTrans,
                sb, N-(ii+sb), M, N-(ii+sb), IB, IB,
                &A1[LDA1*(ii+sb)+ii], LDA1,
                &A2[LDA2*(ii+sb)], LDA2,
                &A2[LDA2*ii], LDA2,
                &T[LDT*ii], LDT,
                WORK, sb);
        }
    }
    return PLASMA_SUCCESS;
}

Here is the call graph for this function:

Here is the caller graph for this function:

void CORE_stsqrt_quark ( Quark * quark )

Definition at line 238 of file core_stsqrt.c.

References CORE_stsqrt(), quark_unpack_args_11, T, and TAU.

{
    int m;
    int n;
    int ib;
    float *A1;
    int lda1;
    float *A2;
    int lda2;
    float *T;
    int ldt;
    float *TAU;
    float *WORK;
    quark_unpack_args_11(quark, m, n, ib, A1, lda1, A2, lda2, T, ldt, TAU, WORK);
    CORE_stsqrt(m, n, ib, A1, lda1, A2, lda2, T, ldt, TAU, WORK);
}

Here is the call graph for this function:

Here is the caller graph for this function:

int CORE_ststrf	(	int	M,
		int	N,
		int	IB,
		int	NB,
		float *	U,
		int	LDU,
		float *	A,
		int	LDA,
		float *	L,
		int	LDL,
		int *	IPIV,
		float *	WORK,
		int	LDWORK,
		int *	INFO
	)

CORE_ststrf computes an LU factorization of a complex matrix formed by an upper triangular NB-by-N tile U on top of a M-by-N tile A using partial pivoting with row interchanges.

This is the right-looking Level 2.5 BLAS version of the algorithm.

Parameters:

[in]	M	The number of rows of the tile A. M >= 0.
[in]	N	The number of columns of the tile A. N >= 0.
[in]	IB	The inner-blocking size. IB >= 0.
[in]	NB
[in,out]	U	On entry, the NB-by-N upper triangular tile. On exit, the new factor U from the factorization
[in]	LDU	The leading dimension of the array U. LDU >= max(1,NB).
[in,out]	A	On entry, the M-by-N tile to be factored. On exit, the factor L from the factorization
[in]	LDA	The leading dimension of the array A. LDA >= max(1,M).
[in,out]	L	On entry, the IB-by-N lower triangular tile. On exit, the interchanged rows form the tile A in case of pivoting.
[in]	LDL	The leading dimension of the array L. LDL >= max(1,IB).
[out]	IPIV	The pivot indices; for 1 <= i <= min(M,N), row i of the tile U was interchanged with row IPIV(i) of the tile A.
[in,out]	WORK
[in]	LDWORK	The dimension of the array WORK.
[out]	INFO

Returns:

Return values:

PLASMA_SUCCESS	successful exit
<0	if INFO = -k, the k-th argument had an illegal value
>0	if INFO = k, U(k,k) is exactly zero. The factorization has been completed, but the factor U is exactly singular, and division by zero will occur if it is used to solve a system of equations.

Definition at line 99 of file core_ststrf.c.

References cblas_isamax(), cblas_scopy(), cblas_sger(), cblas_sscal(), cblas_sswap(), CblasColMajor, CORE_sssssm(), coreblas_error, max, min, and PLASMA_SUCCESS.

{
    static float zzero = 0.0;
    static float mzone =-1.0;
    float alpha;
    int i, j, ii, sb;
    int im, ip;
    /* Check input arguments */
    *INFO = 0;
    if (M < 0) {
        coreblas_error(1, "Illegal value of M");
        return -1;
    }
    if (N < 0) {
        coreblas_error(2, "Illegal value of N");
        return -2;
    }
    if (IB < 0) {
        coreblas_error(3, "Illegal value of IB");
        return -3;
    }
    if ((LDU < max(1,NB)) && (NB > 0)) {
        coreblas_error(6, "Illegal value of LDU");
        return -6;
    }
    if ((LDA < max(1,M)) && (M > 0)) {
        coreblas_error(8, "Illegal value of LDA");
        return -8;
    }
    if ((LDL < max(1,IB)) && (IB > 0)) {
        coreblas_error(10, "Illegal value of LDL");
        return -10;
    }
    /* Quick return */
    if ((M == 0) || (N == 0) || (IB == 0))
        return PLASMA_SUCCESS;
    /* Set L to 0 */
    memset(L, 0, LDL*N*sizeof(float));
    ip = 0;
    for (ii = 0; ii < N; ii += IB) {
        sb = min(N-ii, IB);
        for (i = 0; i < sb; i++) {
            im = cblas_isamax(M, &A[LDA*(ii+i)], 1);
            IPIV[ip] = ii+i+1;
            if (fabsf(A[LDA*(ii+i)+im]) > fabsf(U[LDU*(ii+i)+ii+i])) {
                /*
                 * Swap behind.
                 */
                cblas_sswap(i, &L[LDL*ii+i], LDL, &WORK[im], LDWORK );
                /*
                 * Swap ahead.
                 */
                cblas_sswap(sb-i, &U[LDU*(ii+i)+ii+i], LDU, &A[LDA*(ii+i)+im], LDA );
                /*
                 * Set IPIV.
                 */
                IPIV[ip] = NB + im + 1;
                for (j = 0; j < i; j++) {
                    A[LDA*(ii+j)+im] = zzero;
                }
            }
            if ((*INFO == 0) && (fabsf(A[LDA*(ii+i)+im]) == zzero)
                && (fabsf(U[LDU*(ii+i)+ii+i]) == zzero)) {
                *INFO = ii+i+1;
            }
            alpha = ((float)1. / U[LDU*(ii+i)+ii+i]);
            cblas_sscal(M, (alpha), &A[LDA*(ii+i)], 1);
            cblas_scopy(M, &A[LDA*(ii+i)], 1, &WORK[LDWORK*i], 1);
            cblas_sger(
                CblasColMajor, M, sb-i-1,
                (mzone), &A[LDA*(ii+i)], 1,
                &U[LDU*(ii+i+1)+ii+i], LDU,
                &A[LDA*(ii+i+1)], LDA );
            ip = ip+1;
        }
        /*
         * Apply the subpanel to the rest of the panel.
         */
        if(ii+i < N) {
            for(j = ii; j < ii+sb; j++) {
                if (IPIV[j] <= NB) {
                    IPIV[j] = IPIV[j] - ii;
                }
            }
            CORE_sssssm(
                NB, N-(ii+sb), M, N-(ii+sb), sb, sb,
                &U[LDU*(ii+sb)+ii], LDU,
                &A[LDA*(ii+sb)], LDA,
                &L[LDL*ii], LDL,
                WORK, LDWORK, &IPIV[ii]);
            for(j = ii; j < ii+sb; j++) {
                if (IPIV[j] <= NB) {
                    IPIV[j] = IPIV[j] + ii;
                }
            }
        }
    }
    return PLASMA_SUCCESS;
}

Here is the call graph for this function:

Here is the caller graph for this function:

void CORE_ststrf_quark ( Quark * quark )

Definition at line 258 of file core_ststrf.c.

References A, CORE_ststrf(), IPIV, L, plasma_sequence_flush(), PLASMA_SUCCESS, and quark_unpack_args_17.

{
    int m;
    int n;
    int ib;
    int nb;
    float *U;
    int ldu;
    float *A;
    int lda;
    float *L;
    int ldl;
    int *IPIV;
    float *WORK;
    int ldwork;
    PLASMA_sequence *sequence;
    PLASMA_request *request;
    PLASMA_bool check_info;
    int iinfo;
    int info;
    quark_unpack_args_17(quark, m, n, ib, nb, U, ldu, A, lda, L, ldl, IPIV, WORK, ldwork, sequence, request, check_info, iinfo);
    CORE_ststrf(m, n, ib, nb, U, ldu, A, lda, L, ldl, IPIV, WORK, ldwork, &info);
    if (info != PLASMA_SUCCESS && check_info)
        plasma_sequence_flush(quark, sequence, request, iinfo + info);
}

Here is the call graph for this function:

Here is the caller graph for this function:

int CORE_sttlqt	(	int	M,
		int	N,
		int	IB,
		float *	A1,
		int	LDA1,
		float *	A2,
		int	LDA2,
		float *	T,
		int	LDT,
		float *	TAU,
		float *	WORK
	)

CORE_sttlqt computes a LQ factorization of a rectangular matrix formed by coupling side-by-side a complex M-by-M lower triangular tile A1 and a complex M-by-N lower triangular tile A2:

| A1 A2 | = L * Q

The tile Q is represented as a product of elementary reflectors

Q = H(k)' . . . H(2)' H(1)', where k = min(M,N).

Each H(i) has the form

H(i) = I - tau * v * v'

where tau is a complex scalar, and v is a complex vector with v(1:i-1) = 0 and v(i) = 1; g(v(i+1:n)) is stored on exit in A2(i,1:n), and tau in TAU(i).

Parameters:

[in]	M	The number of rows of the tile A1 and A2. M >= 0. The number of columns of the tile A1.
[in]	N	The number of columns of the tile A2. N >= 0.
[in]	IB	The inner-blocking size. IB >= 0.
[in,out]	A1	On entry, the M-by-M tile A1. On exit, the elements on and below the diagonal of the array contain the M-by-M lower trapezoidal tile L; the elements above the diagonal are not referenced.
[in]	LDA1	The leading dimension of the array A1. LDA1 >= max(1,N).
[in,out]	A2	On entry, the M-by-N lower triangular tile A2. On exit, the elements on and below the diagonal of the array with the array TAU, represent the unitary tile Q as a product of elementary reflectors (see Further Details).
[in]	LDA2	The leading dimension of the array A2. LDA2 >= max(1,M).
[out]	T	The IB-by-N triangular factor T of the block reflector. T is upper triangular by block (economic storage); The rest of the array is not referenced.
[in]	LDT	The leading dimension of the array T. LDT >= IB.
[out]	TAU	The scalar factors of the elementary reflectors (see Further Details).
[in,out]	WORK

Returns:

Return values:

PLASMA_SUCCESS	successful exit
<0	if -i, the i-th argument had an illegal value

Definition at line 100 of file core_sttlqt.c.

References cblas_saxpy(), cblas_scopy(), cblas_sgemv(), cblas_sger(), cblas_strmv(), CblasColMajor, CORE_slaset(), CORE_sparfb(), CORE_spemv(), coreblas_error, max, min, PLASMA_SUCCESS, PlasmaForward, PlasmaNonUnit, PlasmaNoTrans, PlasmaRight, PlasmaRowwise, PlasmaUpper, and PlasmaUpperLower.

{
    static float zone  = 1.0;
    static float zzero = 0.0;
#ifdef COMPLEX
    static int                ione  = 1;
#endif
    float alpha;
    int i, j, l, ii, sb, mi, ni;
    /* Check input arguments */
    if (M < 0) {
        coreblas_error(1, "Illegal value of M");
        return -1;
    }
    if (N < 0) {
        coreblas_error(2, "Illegal value of N");
        return -2;
    }
    if (IB < 0) {
        coreblas_error(3, "Illegal value of IB");
        return -3;
    }
    if ((LDA2 < max(1,M)) && (M > 0)) {
        coreblas_error(7, "Illegal value of LDA2");
        return -7;
    }
    /* Quick return */
    if ((M == 0) || (N == 0) || (IB == 0))
        return PLASMA_SUCCESS;
    /* TODO: Need to check why some cases require 
     *  this to not have uninitialized values */
    CORE_slaset( PlasmaUpperLower, IB, N,
                 0., 0., T, LDT);
    for(ii = 0; ii < M; ii += IB) {
        sb = min(M-ii, IB);
        for(i = 0; i < sb; i++) {
            j  = ii + i;
            mi = sb-i-1;
            ni = min( j + 1, N);
            /*
             * Generate elementary reflector H( II*IB+I ) to annihilate A( II*IB+I, II*IB+I:M ).
             */
#ifdef COMPLEX
            LAPACKE_slacgv_work(ni, &A2[j], LDA2);
            LAPACKE_slacgv_work(ione, &A1[LDA1*j+j], LDA1);
#endif
            LAPACKE_slarfg_work(ni+1, &A1[LDA1*j+j], &A2[j], LDA2, &TAU[j]);
            if (mi > 0) {
                /*
                 * Apply H( j-1 ) to A( j:II+IB-1, j-1:M  ) from the right.
                 */
                cblas_scopy(
                    mi,
                    &A1[LDA1*j+(j+1)], 1,
                    WORK, 1);
                cblas_sgemv(
                    CblasColMajor, (CBLAS_TRANSPOSE)PlasmaNoTrans,
                    mi, ni,
                    (zone), &A2[j+1], LDA2,
                    &A2[j], LDA2,
                    (zone), WORK, 1);
                alpha = -(TAU[j]);
                cblas_saxpy(
                    mi, (alpha),
                    WORK, 1,
                    &A1[LDA1*j+j+1], 1);
                cblas_sger(
                    CblasColMajor, mi, ni,
                    (alpha), WORK, 1,
                    &A2[j], LDA2,
                    &A2[j+1], LDA2);
            }
            /*
             * Calculate T.
             */
            if (i > 0 ) {
                l = min(i, max(0, N-ii));
                alpha = -(TAU[j]);
                CORE_spemv(
                        PlasmaNoTrans, PlasmaRowwise,
                        i , min(j, N), l,
                        alpha, &A2[ii], LDA2,
                        &A2[j], LDA2,
                        zzero, &T[LDT*j], 1,
                        WORK);
                /* T(0:i-1, j) = T(0:i-1, ii:j-1) * T(0:i-1, j) */
                cblas_strmv(
                        CblasColMajor, (CBLAS_UPLO)PlasmaUpper,
                        (CBLAS_TRANSPOSE)PlasmaNoTrans,
                        (CBLAS_DIAG)PlasmaNonUnit,
                        i, &T[LDT*ii], LDT,
                        &T[LDT*j], 1);
            }
#ifdef COMPLEX
            LAPACKE_slacgv_work(ni, &A2[j], LDA2 );
            LAPACKE_slacgv_work(ione, &A1[LDA1*j+j], LDA1 );
#endif
            T[LDT*j+i] = TAU[j];
        }
        /* Apply Q to the rest of the matrix to the right */
        if (M > ii+sb) {
            mi = M-(ii+sb);
            ni = min(ii+sb, N);
            l  = min(sb, max(0, ni-ii));
            CORE_sparfb(
                PlasmaRight, PlasmaNoTrans,
                PlasmaForward, PlasmaRowwise,
                mi, IB, mi, ni, sb, l,
                &A1[LDA1*ii+ii+sb], LDA1,
                &A2[ii+sb], LDA2,
                &A2[ii], LDA2,
                &T[LDT*ii], LDT,
                WORK, M);
        }
    }
    return PLASMA_SUCCESS;
}

Here is the call graph for this function:

Here is the caller graph for this function:

void CORE_sttlqt_quark ( Quark * quark )

Definition at line 273 of file core_sttlqt.c.

References CORE_sttlqt(), quark_unpack_args_11, T, and TAU.

{
    int m;
    int n;
    int ib;
    float *A1;
    int lda1;
    float *A2;
    int lda2;
    float *T;
    int ldt;
    float *TAU;
    float *WORK;
    quark_unpack_args_11(quark, m, n, ib, A1, lda1, A2, lda2, T, ldt, TAU, WORK);
    CORE_sttlqt(m, n, ib, A1, lda1, A2, lda2, T, ldt, TAU, WORK);
}

Here is the call graph for this function:

Here is the caller graph for this function:

int CORE_sttmlq	(	int	side,
		int	trans,
		int	M1,
		int	N1,
		int	M2,
		int	N2,
		int	K,
		int	IB,
		float *	A1,
		int	LDA1,
		float *	A2,
		int	LDA2,
		float *	V,
		int	LDV,
		float *	T,
		int	LDT,
		float *	WORK,
		int	LDWORK
	)

CORE_sttmlq overwrites the general complex M1-by-N1 tile A1 and M2-by-N2 tile A2 (N1 == N2) with

                  SIDE = 'L'        SIDE = 'R'

TRANS = 'N': Q * | A1 | | A1 | * Q | A2 | | A2 |

TRANS = 'C': Q**T * | A1 | | A1 | * Q**T | A2 | | A2 |

where Q is a complex unitary matrix defined as the product of k elementary reflectors

Q = H(1) H(2) . . . H(k)

as returned by CORE_sttqrt.

Parameters:

[in]	side	PlasmaLeft : apply Q or QT from the Left; PlasmaRight : apply Q or QT from the Right.
[in]	trans	PlasmaNoTrans : No transpose, apply Q; PlasmaTrans : ConjTranspose, apply Q**T.
[in]	M1	The number of rows of the tile A1. M1 >= 0.
[in]	N1	The number of columns of the tile A1. N1 >= 0.
[in]	M2	The number of rows of the tile A2. M2 >= 0.
[in]	N2	The number of columns of the tile A2. N2 >= 0.
[in]	K	The number of elementary reflectors whose product defines the matrix Q.
[in]	IB	The inner-blocking size. IB >= 0.
[in,out]	A1	On entry, the M1-by-N1 tile A1. On exit, A1 is overwritten by the application of Q.
[in]	LDA1	The leading dimension of the array A1. LDA1 >= max(1,M1).
[in,out]	A2	On entry, the M2-by-N2 tile A2. On exit, A2 is overwritten by the application of Q.
[in]	LDA2	The leading dimension of the tile A2. LDA2 >= max(1,M2).
[in]	V	The i-th row must contain the vector which defines the elementary reflector H(i), for i = 1,2,...,k, as returned by CORE_STTQRT in the first k rows of its array argument V.
[in]	LDV	The leading dimension of the array V. LDV >= max(1,K).
[out]	T	The IB-by-N1 triangular factor T of the block reflector. T is upper triangular by block (economic storage); The rest of the array is not referenced.
[in]	LDT	The leading dimension of the array T. LDT >= IB.
[out]	WORK	Workspace array of size LDWORK-by-N1.
[in]	LDWORK	The dimension of the array WORK. LDWORK >= max(1,IB).

Returns:

Return values:

PLASMA_SUCCESS	successful exit
<0	if -i, the i-th argument had an illegal value

Definition at line 116 of file core_sttmlq.c.

References CORE_sparfb(), coreblas_error, max, min, PLASMA_SUCCESS, PlasmaForward, PlasmaLeft, PlasmaNoTrans, PlasmaRight, PlasmaRowwise, and PlasmaTrans.

{
    int i, i1, i3, l;
    int NQ, NW;
    int kb;
    int ic = 0;
    int jc = 0;
    int mi1 = M1;
    int mi2 = M2;
    int ni1 = N1;
    int ni2 = N2;
    /* Check input arguments */
    if ((side != PlasmaLeft) && (side != PlasmaRight)) {
        coreblas_error(1, "Illegal value of side");
        return -1;
    }
    /* NQ is the order of Q */
    if (side == PlasmaLeft) {
        NQ = N2;
        NW = IB;
    }
    else {
        NQ = M2;
        NW = N1;
    }
    if ((trans != PlasmaNoTrans) && (trans != PlasmaTrans)) {
        coreblas_error(2, "Illegal value of trans");
        return -2;
    }
    if (M1 < 0) {
        coreblas_error(3, "Illegal value of M1");
        return -3;
    }
    if (N1 < 0) {
        coreblas_error(4, "Illegal value of N1");
        return -4;
    }
    if ((M2 < 0) ||
        ( (side == PlasmaRight) && (M1 != M2) ) ) {
        coreblas_error(5, "Illegal value of M2");
        return -5;
    }
    if ((N2 < 0) ||
        ( (side == PlasmaLeft) && (N1 != N2) ) ) {
        coreblas_error(6, "Illegal value of N2");
        return -6;
    }
    if ((K < 0) || 
        ( (side == PlasmaLeft)  && (K > M1) ) ||
        ( (side == PlasmaRight) && (K > N1) ) ) {
        coreblas_error(7, "Illegal value of K");
        return -7;
    }
    if (IB < 0) {
        coreblas_error(8, "Illegal value of IB");
        return -8;
    }
    if (LDA1 < max(1,M1)){
        coreblas_error(10, "Illegal value of LDA1");
        return -10;
    }
    if (LDA2 < max(1,M2)){
        coreblas_error(12, "Illegal value of LDA2");
        return -12;
    }
    if (LDV < max(1,NQ)){
        coreblas_error(14, "Illegal value of LDV");
        return -14;
    }
    if (LDT < max(1,IB)){
        coreblas_error(16, "Illegal value of LDT");
        return -16;
    }
    if (LDWORK < max(1,NW)){
        coreblas_error(18, "Illegal value of LDWORK");
        return -18;
    }
    /* Quick return */
    if ((M1 == 0) || (N1 == 0) || (M2 == 0) || (N2 == 0) || (K == 0) || (IB == 0))
        return PLASMA_SUCCESS;
    if (((side == PlasmaLeft) && (trans == PlasmaNoTrans))
        || ((side == PlasmaRight) && (trans != PlasmaNoTrans))) {
        i1 = 0;
        i3 = IB;
    }
    else {
        i1 = ( ( K-1 ) / IB )*IB;
        i3 = -IB;
    }
    /* Transpose */
    if (trans == PlasmaNoTrans) {
        trans = PlasmaTrans;
    }
    else {
        trans = PlasmaNoTrans;
    }
    for (i = i1; (i > -1) && (i < K); i+=i3) {
        kb = min(IB, K-i);
        if (side == PlasmaLeft) {
            mi1 = kb; // M1 - i;
            mi2 = min(i+kb, M2);
            l   = min(kb, max(0, M2-i));
            ic  = i;
        }
        else {
            ni1 = kb;
            ni2 = min(i+kb, N2);
            l   = min(kb, max(0, N2-i));
            jc  = i;
        }
        /*
         * Apply H or H' (NOTE: CORE_sparfb used to be CORE_sttrfb)
         */
        CORE_sparfb(
            side, trans, PlasmaForward, PlasmaRowwise,
            mi1, ni1, mi2, ni2, kb, l,
            &A1[LDA1*jc+ic], LDA1,
            A2, LDA2,
            &V[i], LDV,
            &T[LDT*i], LDT,
            WORK, LDWORK);
    }
    return PLASMA_SUCCESS;
}

Here is the call graph for this function:

Here is the caller graph for this function:

void CORE_sttmlq_quark ( Quark * quark )

Definition at line 299 of file core_sttmlq.c.

References CORE_sttmlq(), quark_unpack_args_18, side, T, trans, and V.

{
    int side;
    int trans;
    int m1;
    int n1;
    int m2;
    int n2;
    int k;
    int ib;
    float *A1;
    int lda1;
    float *A2;
    int lda2;
    float *V;
    int ldv;
    float *T;
    int ldt;
    float *WORK;
    int ldwork;
    quark_unpack_args_18(quark, side, trans, m1, n1, m2, n2, k, ib, 
                         A1, lda1, A2, lda2, V, ldv, T, ldt, WORK, ldwork);
    CORE_sttmlq(side, trans, m1, n1, m2, n2, k, ib, A1, lda1, 
                A2, lda2, V, ldv, T, ldt, WORK, ldwork);
}

Here is the call graph for this function:

Here is the caller graph for this function:

int CORE_sttmqr	(	int	side,
		int	trans,
		int	M1,
		int	N1,
		int	M2,
		int	N2,
		int	K,
		int	IB,
		float *	A1,
		int	LDA1,
		float *	A2,
		int	LDA2,
		float *	V,
		int	LDV,
		float *	T,
		int	LDT,
		float *	WORK,
		int	LDWORK
	)

CORE_sttmqr overwrites the general complex M1-by-N1 tile A1 and M2-by-N2 tile A2 (N1 == N2) with

                  SIDE = 'L'        SIDE = 'R'

TRANS = 'N': Q * | A1 | | A1 | * Q | A2 | | A2 |

TRANS = 'C': Q**T * | A1 | | A1 | * Q**T | A2 | | A2 |

where Q is a complex unitary matrix defined as the product of k elementary reflectors

Q = H(1) H(2) . . . H(k)

as returned by CORE_sttqrt.

Parameters:

[in]	side	PlasmaLeft : apply Q or QT from the Left; PlasmaRight : apply Q or QT from the Right.
[in]	trans	PlasmaNoTrans : No transpose, apply Q; PlasmaTrans : ConjTranspose, apply Q**T.
[in]	M1	The number of rows of the tile A1. M1 >= 0.
[in]	N1	The number of columns of the tile A1. N1 >= 0.
[in]	M2	The number of rows of the tile A2. M2 >= 0.
[in]	N2	The number of columns of the tile A2. N2 >= 0.
[in]	K	The number of elementary reflectors whose product defines the matrix Q.
[in]	IB	The inner-blocking size. IB >= 0.
[in,out]	A1	On entry, the M1-by-N1 tile A1. On exit, A1 is overwritten by the application of Q.
[in]	LDA1	The leading dimension of the array A1. LDA1 >= max(1,M1).
[in,out]	A2	On entry, the M2-by-N2 tile A2. On exit, A2 is overwritten by the application of Q.
[in]	LDA2	The leading dimension of the tile A2. LDA2 >= max(1,M2).
[in]	V	The i-th row must contain the vector which defines the elementary reflector H(i), for i = 1,2,...,k, as returned by CORE_STTQRT in the first k rows of its array argument V.
[in]	LDV	The leading dimension of the array V. LDV >= max(1,K).
[out]	T	The IB-by-N1 triangular factor T of the block reflector. T is upper triangular by block (economic storage); The rest of the array is not referenced.
[in]	LDT	The leading dimension of the array T. LDT >= IB.
[out]	WORK	Workspace array of size LDWORK-by-N1.
[in]	LDWORK	The dimension of the array WORK. LDWORK >= max(1,IB).

Returns:

Return values:

PLASMA_SUCCESS	successful exit
<0	if -i, the i-th argument had an illegal value

Definition at line 116 of file core_sttmqr.c.

References CORE_sparfb(), coreblas_error, max, min, PLASMA_SUCCESS, PlasmaColumnwise, PlasmaForward, PlasmaLeft, PlasmaNoTrans, PlasmaRight, and PlasmaTrans.

{
    int i,  i1, i3;
    int NQ, NW;
    int kb, l;
    int ic = 0;
    int jc = 0;
    int mi1 = M1;
    int mi2 = M2;
    int ni1 = N1;
    int ni2 = N2;
    /* Check input arguments */
    if ((side != PlasmaLeft) && (side != PlasmaRight)) {
        coreblas_error(1, "Illegal value of side");
        return -1;
    }
    /* NQ is the order of Q */
    if (side == PlasmaLeft) {
        NQ = M2;
        NW = IB;
    }
    else {
        NQ = N2;
        NW = M1;
    }
    if ((trans != PlasmaNoTrans) && (trans != PlasmaTrans)) {
        coreblas_error(2, "Illegal value of trans");
        return -2;
    }
    if (M1 < 0) {
        coreblas_error(3, "Illegal value of M1");
        return -3;
    }
    if (N1 < 0) {
        coreblas_error(4, "Illegal value of N1");
        return -4;
    }
    if ( (M2 < 0) || 
         ( (M2 != M1) && (side == PlasmaRight) ) ){
        coreblas_error(5, "Illegal value of M2");
        return -5;
    }
    if ( (N2 < 0) || 
         ( (N2 != N1) && (side == PlasmaLeft) ) ){
        coreblas_error(6, "Illegal value of N2");
        return -6;
    }
    if ((K < 0) || 
        ( (side == PlasmaLeft)  && (K > M1) ) ||
        ( (side == PlasmaRight) && (K > N1) ) ) {
        coreblas_error(7, "Illegal value of K");
        return -7;
    }
    if (IB < 0) {
        coreblas_error(8, "Illegal value of IB");
        return -8;
    }
    if (LDA1 < max(1,M1)){
        coreblas_error(10, "Illegal value of LDA1");
        return -10;
    }
    if (LDA2 < max(1,M2)){
        coreblas_error(12, "Illegal value of LDA2");
        return -12;
    }
    if (LDV < max(1,NQ)){
        coreblas_error(14, "Illegal value of LDV");
        return -14;
    }
    if (LDT < max(1,IB)){
        coreblas_error(16, "Illegal value of LDT");
        return -16;
    }
    if (LDWORK < max(1,NW)){
        coreblas_error(18, "Illegal value of LDWORK");
        return -18;
    }
    /* Quick return */
    if ((M1 == 0) || (N1 == 0) || (M2 == 0) || (N2 == 0) || (K == 0) || (IB == 0))
        return PLASMA_SUCCESS;
    if (((side == PlasmaLeft) && (trans != PlasmaNoTrans))
        || ((side == PlasmaRight) && (trans == PlasmaNoTrans))) {
        i1 = 0;
        i3 = IB;
    }
    else {
        i1 = ( ( K-1 ) / IB )*IB;
        i3 = -IB;
    }
    for (i = i1; (i > -1) && (i < K); i+=i3) {
        kb = min(IB, K-i);
        if (side == PlasmaLeft) {
            mi1 = kb;
            mi2 = min(i+kb, M2);
            l   = min(kb, max(0, M2-i));
            ic  = i;
        }
        else {
            ni1 = kb; 
            ni2 = min(i+kb, N2);
            l   = min(kb, max(0, N2-i));
            jc  = i;
        }
        /*
         * Apply H or H' (NOTE: CORE_sparfb used to be CORE_sttrfb)
         */
        CORE_sparfb(
            side, trans, PlasmaForward, PlasmaColumnwise,
            mi1, ni1, mi2, ni2, kb, l,
            &A1[LDA1*jc+ic], LDA1,
            A2, LDA2,
            &V[LDV*i], LDV,
            &T[LDT*i], LDT,
            WORK, LDWORK);
    }
    return PLASMA_SUCCESS;
}

Here is the call graph for this function:

Here is the caller graph for this function:

void CORE_sttmqr_quark ( Quark * quark )

Definition at line 291 of file core_sttmqr.c.

References CORE_sttmqr(), quark_unpack_args_18, side, T, trans, and V.

{
    int side;
    int trans;
    int m1;
    int n1;
    int m2;
    int n2;
    int k;
    int ib;
    float *A1;
    int lda1;
    float *A2;
    int lda2;
    float *V;
    int ldv;
    float *T;
    int ldt;
    float *WORK;
    int ldwork;
    quark_unpack_args_18(quark, side, trans, m1, n1, m2, n2, k, ib, 
                         A1, lda1, A2, lda2, V, ldv, T, ldt, WORK, ldwork);
    CORE_sttmqr(side, trans, m1, n1, m2, n2, k, ib, 
                A1, lda1, A2, lda2, V, ldv, T, ldt, WORK, ldwork);
}

Here is the call graph for this function:

Here is the caller graph for this function:

int CORE_sttqrt	(	int	M,
		int	N,
		int	IB,
		float *	A1,
		int	LDA1,
		float *	A2,
		int	LDA2,
		float *	T,
		int	LDT,
		float *	TAU,
		float *	WORK
	)

CORE_sttqrt computes a QR factorization of a rectangular matrix formed by coupling a complex N-by-N upper triangular tile A1 on top of a complex M-by-N upper trapezoidal tile A2:

| A1 | = Q * R | A2 |

The tile Q is represented as a product of elementary reflectors

Q = H(1) H(2) . . . H(k), where k = min(M,N).

Each H(i) has the form

H(i) = I - tau * v * v'

where tau is a complex scalar, and v is a complex vector with v(1:i-1) = 0 and v(i) = 1; v(i+1:m) is stored on exit in A2(1:m,i), and tau in TAU(i).

Parameters:

[in]	M	The number of rows of the tile A2. M >= 0.
[in]	N	The number of columns of the tile A1 and A2. N >= 0.
[in]	IB	The inner-blocking size. IB >= 0.
[in,out]	A1	On entry, the N-by-N tile A1. On exit, the elements on and above the diagonal of the array contain the N-by-N upper trapezoidal tile R; the elements below the diagonal are not referenced.
[in]	LDA1	The leading dimension of the array A1. LDA1 >= max(1,N).
[in,out]	A2	On entry, the M-by-N upper triangular tile A2. On exit, the elements on and above the diagonal of the array with the array TAU, represent the unitary tile Q as a product of elementary reflectors (see Further Details).
[in]	LDA2	The leading dimension of the array A2. LDA2 >= max(1,M).
[out]	T	The IB-by-N triangular factor T of the block reflector. T is upper triangular by block (economic storage); The rest of the array is not referenced.
[in]	LDT	The leading dimension of the array T. LDT >= IB.
[out]	TAU	The scalar factors of the elementary reflectors (see Further Details).
[in,out]	WORK

Returns:

Return values:

PLASMA_SUCCESS	successful exit
<0	if -i, the i-th argument had an illegal value

Definition at line 100 of file core_sttqrt.c.

References cblas_saxpy(), cblas_scopy(), cblas_sgemv(), cblas_sger(), cblas_strmv(), CblasColMajor, CORE_slaset(), CORE_sparfb(), CORE_spemv(), coreblas_error, max, min, PLASMA_SUCCESS, PlasmaColumnwise, PlasmaForward, PlasmaLeft, PlasmaNonUnit, PlasmaNoTrans, PlasmaTrans, PlasmaUpper, and PlasmaUpperLower.

{
    static int                ione  = 1;
    static float zone  = 1.0;
    static float zzero = 0.0;
    float alpha;
    int i, j, l, ii, sb, mi, ni;
    /* Check input arguments */
    if (M < 0) {
        coreblas_error(1, "Illegal value of M");
        return -1;
    }
    if (N < 0) {
        coreblas_error(2, "Illegal value of N");
        return -2;
    }
    if (IB < 0) {
        coreblas_error(3, "Illegal value of IB");
        return -3;
    }
    if ((LDA2 < max(1,M)) && (M > 0)) {
        coreblas_error(7, "Illegal value of LDA2");
        return -7;
    }
    /* Quick return */
    if ((M == 0) || (N == 0) || (IB == 0))
        return PLASMA_SUCCESS;
    /* TODO: Need to check why some cases require 
     *  this to not have uninitialized values */
    CORE_slaset( PlasmaUpperLower, IB, N,
                 0., 0., T, LDT);
    for (ii = 0; ii < N; ii += IB) {
        sb = min(N-ii, IB);
        for (i = 0; i < sb; i++) {
            j  = ii + i;
            mi = min( j + 1, M );
            ni = sb-i-1;
            /*
             * Generate elementary reflector H( II*IB+I ) to annihilate
             * A( II*IB+I:mi, II*IB+I ).
             */
            LAPACKE_slarfg_work(
                    mi+1, &A1[LDA1*j+j], &A2[LDA2*j], ione, &TAU[j]);
            if (ni > 0) {
                /*
                 * Apply H( II*IB+I ) to A( II*IB+I:M, II*IB+I+1:II*IB+IB ) from the left.
                 */
                cblas_scopy(
                    ni,
                    &A1[LDA1*(j+1)+j], LDA1,
                    WORK, 1);
#ifdef COMPLEX
                LAPACKE_slacgv_work(ni, WORK, 1);
#endif
                cblas_sgemv(
                    CblasColMajor, (CBLAS_TRANSPOSE)PlasmaTrans,
                    mi, ni,
                    (zone), &A2[LDA2*(j+1)], LDA2,
                                       &A2[LDA2*j],     1,
                    (zone), WORK,            1);
#ifdef COMPLEX
                LAPACKE_slacgv_work(ni, WORK, 1);
#endif
                alpha = -(TAU[j]);
                cblas_saxpy(
                    ni, (alpha),
                    WORK, 1,
                    &A1[LDA1*(j+1)+j], LDA1);
#ifdef COMPLEX
                LAPACKE_slacgv_work(ni, WORK, 1);
#endif
                cblas_sger(
                    CblasColMajor, mi, ni,
                    (alpha), &A2[LDA2*j], 1,
                    WORK, 1,
                    &A2[LDA2*(j+1)], LDA2);
            }
            /*
             * Calculate T
             *
             * T(0:i-1, j) = alpha * A2(0:M-1, ii:j-1)' * A2(0:M-1, j)
             */
            if ( i > 0 ) {
                l = min(i, max(0, M-ii));
                alpha = -(TAU[j]);
                CORE_spemv(
                        PlasmaTrans, PlasmaColumnwise,
                        min(j, M), i, l,
                        alpha, &A2[LDA2*ii], LDA2,
                               &A2[LDA2*j],  1,
                        zzero, &T[LDT*j],    1,
                        WORK);
                /* T(0:i-1, j) = T(0:i-1, ii:j-1) * T(0:i-1, j) */
                cblas_strmv(
                        CblasColMajor, (CBLAS_UPLO)PlasmaUpper,
                        (CBLAS_TRANSPOSE)PlasmaNoTrans,
                        (CBLAS_DIAG)PlasmaNonUnit,
                        i, &T[LDT*ii], LDT,
                           &T[LDT*j], 1);
            }
            T[LDT*j+i] = TAU[j];
        }
        /* Apply Q' to the rest of the matrix to the left  */
        if (N > ii+sb) {
            mi = min(ii+sb, M);
            ni = N-(ii+sb);
            l  = min(sb, max(0, mi-ii));
            CORE_sparfb(
                PlasmaLeft, PlasmaTrans,
                PlasmaForward, PlasmaColumnwise,
                IB, ni, mi, ni, sb, l,             //replaced sb by IB
                &A1[LDA1*(ii+sb)+ii], LDA1,
                &A2[LDA2*(ii+sb)], LDA2,
                &A2[LDA2*ii], LDA2,
                &T[LDT*ii], LDT,
                WORK, sb);
        }
    }
    return PLASMA_SUCCESS;
}

Here is the call graph for this function:

Here is the caller graph for this function:

void CORE_sttqrt_quark ( Quark * quark )

Definition at line 273 of file core_sttqrt.c.

References CORE_sttqrt(), quark_unpack_args_11, T, and TAU.

{
    int m;
    int n;
    int ib;
    float *A1;
    int lda1;
    float *A2;
    int lda2;
    float *T;
    int ldt;
    float *TAU;
    float *WORK;
    quark_unpack_args_11(quark, m, n, ib, A1, lda1, A2, lda2, T, ldt, TAU, WORK);
    CORE_sttqrt(m, n, ib, A1, lda1, A2, lda2, T, ldt, TAU, WORK);
}

Here is the call graph for this function:

Here is the caller graph for this function:

void QUARK_CORE_sasum	(	Quark *	quark,
		Quark_Task_Flags *	task_flags,
		PLASMA_enum	storev,
		PLASMA_enum	uplo,
		int	m,
		int	n,
		float *	A,
		int	lda,
		int	szeA,
		float *	work,
		int	szeW
	)

Declarations of QUARK wrappers (called by PLASMA) - alphabetical order

Definition at line 95 of file core_sasum.c.

References CORE_sasum_quark(), INOUT, INPUT, QUARK_Insert_Task(), and VALUE.

{
    QUARK_Insert_Task(
        quark, CORE_sasum_quark, task_flags,
        sizeof(PLASMA_enum),                &storev,    VALUE,
        sizeof(PLASMA_enum),                &uplo,      VALUE,
        sizeof(int),                        &M,         VALUE,
        sizeof(int),                        &N,         VALUE,
        sizeof(float)*szeA,     A,                 INPUT,
        sizeof(int),                        &lda,       VALUE,
        sizeof(float)*szeW,                 work,              INOUT,
        0);
}

Here is the call graph for this function:

void QUARK_CORE_sasum_f1	(	Quark *	quark,
		Quark_Task_Flags *	task_flags,
		PLASMA_enum	storev,
		PLASMA_enum	uplo,
		int	m,
		int	n,
		float *	A,
		int	lda,
		int	szeA,
		float *	work,
		int	szeW,
		float *	fake,
		int	szeF
	)

Definition at line 136 of file core_sasum.c.

References CORE_sasum_f1_quark(), DAG_CORE_ASUM, GATHERV, INOUT, INPUT, OUTPUT, QUARK_Insert_Task(), and VALUE.

{
    DAG_CORE_ASUM;
    QUARK_Insert_Task(
        quark, CORE_sasum_f1_quark, task_flags,
        sizeof(PLASMA_enum),                &storev,    VALUE,
        sizeof(PLASMA_enum),                &uplo,      VALUE,
        sizeof(int),                        &M,         VALUE,
        sizeof(int),                        &N,         VALUE,
        sizeof(float)*szeA,     A,                 INPUT,
        sizeof(int),                        &lda,       VALUE,
        sizeof(float)*szeW,                 work,              INOUT,
        sizeof(float)*szeF,                 fake,              OUTPUT | GATHERV,
        0);
}

Here is the call graph for this function:

Here is the caller graph for this function:

void QUARK_CORE_sbrdalg	(	Quark *	quark,
		Quark_Task_Flags *	task_flags,
		int	uplo,
		int	N,
		int	NB,
		PLASMA_desc *	A,
		float *	C,
		float *	S,
		int	i,
		int	j,
		int	m,
		int	grsiz,
		int	BAND,
		int *	PCOL,
		int *	ACOL,
		int *	MCOL
	)

Definition at line 127 of file core_sbrdalg.c.

References CORE_sbrdalg_quark(), INPUT, LOCALITY, NODEP, OUTPUT, QUARK_Insert_Task(), and VALUE.

{
    QUARK_Insert_Task(quark, CORE_sbrdalg_quark,   task_flags,
        sizeof(int),               &uplo,               VALUE,
        sizeof(int),                  &N,               VALUE,
        sizeof(int),                 &NB,               VALUE,
        sizeof(PLASMA_desc),           A,               NODEP,
        sizeof(float),    V,               NODEP,
        sizeof(float),    TAU,               NODEP,
        sizeof(int),                  &i,               VALUE,
        sizeof(int),                  &j,               VALUE,
        sizeof(int),                  &m,               VALUE,
        sizeof(int),              &grsiz,               VALUE,
        sizeof(int),                PCOL,               INPUT,
        sizeof(int),                ACOL,               INPUT,
        sizeof(int),                MCOL,              OUTPUT | LOCALITY,
        0);
}

Here is the call graph for this function:

Here is the caller graph for this function:

void QUARK_CORE_sgeadd	(	Quark *	quark,
		Quark_Task_Flags *	task_flags,
		int	m,
		int	n,
		int	nb,
		float	alpha,
		float *	A,
		int	lda,
		float *	B,
		int	ldb
	)

Definition at line 43 of file core_sgeadd.c.

References CORE_sgeadd_quark(), DAG_CORE_GEADD, INOUT, INPUT, QUARK_Insert_Task(), and VALUE.

{
    DAG_CORE_GEADD;
    QUARK_Insert_Task(quark, CORE_sgeadd_quark, task_flags,
        sizeof(int),                        &m,     VALUE,
        sizeof(int),                        &n,     VALUE,
        sizeof(float),         &alpha, VALUE,
        sizeof(float)*nb*nb,    A,             INPUT,
        sizeof(int),                        &lda,   VALUE,
        sizeof(float)*nb*nb,    B,             INOUT,
        sizeof(int),                        &ldb,   VALUE,
        0);
}

Here is the call graph for this function:

Here is the caller graph for this function:

void QUARK_CORE_sgelqt	(	Quark *	quark,
		Quark_Task_Flags *	task_flags,
		int	m,
		int	n,
		int	ib,
		int	nb,
		float *	A,
		int	lda,
		float *	T,
		int	ldt
	)

Definition at line 154 of file core_sgelqt.c.

References CORE_sgelqt_quark(), DAG_CORE_GELQT, INOUT, OUTPUT, QUARK_Insert_Task(), SCRATCH, and VALUE.

{
    DAG_CORE_GELQT;
    QUARK_Insert_Task(quark, CORE_sgelqt_quark, task_flags,
        sizeof(int),                        &m,     VALUE,
        sizeof(int),                        &n,     VALUE,
        sizeof(int),                        &ib,    VALUE,
        sizeof(float)*nb*nb,    A,             INOUT,
        sizeof(int),                        &lda,   VALUE,
        sizeof(float)*ib*nb,    T,             OUTPUT,
        sizeof(int),                        &ldt,   VALUE,
        sizeof(float)*nb,       NULL,          SCRATCH,
        sizeof(float)*ib*nb,    NULL,          SCRATCH,
        0);
}

Here is the call graph for this function:

Here is the caller graph for this function:

void QUARK_CORE_sgemm	(	Quark *	quark,
		Quark_Task_Flags *	task_flags,
		int	transA,
		int	transB,
		int	m,
		int	n,
		int	k,
		int	nb,
		float	alpha,
		float *	A,
		int	lda,
		float *	B,
		int	ldb,
		float	beta,
		float *	C,
		int	ldc
	)

Definition at line 46 of file core_sgemm.c.

References CORE_sgemm_quark(), DAG_CORE_GEMM, INOUT, INPUT, QUARK_Insert_Task(), and VALUE.

{
    DAG_CORE_GEMM;
    QUARK_Insert_Task(quark, CORE_sgemm_quark, task_flags,
        sizeof(PLASMA_enum),                &transA,    VALUE,
        sizeof(PLASMA_enum),                &transB,    VALUE,
        sizeof(int),                        &m,         VALUE,
        sizeof(int),                        &n,         VALUE,
        sizeof(int),                        &k,         VALUE,
        sizeof(float),         &alpha,     VALUE,
        sizeof(float)*nb*nb,    A,                 INPUT,
        sizeof(int),                        &lda,       VALUE,
        sizeof(float)*nb*nb,    B,                 INPUT,
        sizeof(int),                        &ldb,       VALUE,
        sizeof(float),         &beta,      VALUE,
        sizeof(float)*nb*nb,    C,                 INOUT,
        sizeof(int),                        &ldc,       VALUE,
        0);
}

Here is the call graph for this function:

Here is the caller graph for this function:

void QUARK_CORE_sgemm2	(	Quark *	quark,
		Quark_Task_Flags *	task_flags,
		int	transA,
		int	transB,
		int	m,
		int	n,
		int	k,
		int	nb,
		float	alpha,
		float *	A,
		int	lda,
		float *	B,
		int	ldb,
		float	beta,
		float *	C,
		int	ldc
	)

Definition at line 74 of file core_sgemm.c.

References CORE_sgemm_quark(), DAG_CORE_GEMM, GATHERV, INOUT, INPUT, LOCALITY, QUARK_Insert_Task(), and VALUE.

{
    DAG_CORE_GEMM;
    QUARK_Insert_Task(quark, CORE_sgemm_quark, task_flags,
        sizeof(PLASMA_enum),                &transA,    VALUE,
        sizeof(PLASMA_enum),                &transB,    VALUE,
        sizeof(int),                        &m,         VALUE,
        sizeof(int),                        &n,         VALUE,
        sizeof(int),                        &k,         VALUE,
        sizeof(float),         &alpha,     VALUE,
        sizeof(float)*nb*nb,    A,                 INPUT,
        sizeof(int),                        &lda,       VALUE,
        sizeof(float)*nb*nb,    B,                 INPUT,
        sizeof(int),                        &ldb,       VALUE,
        sizeof(float),         &beta,      VALUE,
        sizeof(float)*nb*nb,    C,                 INOUT | LOCALITY | GATHERV,
        sizeof(int),                        &ldc,       VALUE,
        0);
}

Here is the call graph for this function:

Here is the caller graph for this function:

void QUARK_CORE_sgemm_f2	(	Quark *	quark,
		Quark_Task_Flags *	task_flags,
		int	transA,
		int	transB,
		int	m,
		int	n,
		int	k,
		int	nb,
		float	alpha,
		float *	A,
		int	lda,
		float *	B,
		int	ldb,
		float	beta,
		float *	C,
		int	ldc,
		float *	fake1,
		int	szefake1,
		int	flag1,
		float *	fake2,
		int	szefake2,
		int	flag2
	)

Definition at line 135 of file core_sgemm.c.

References CORE_sgemm_f2_quark(), DAG_CORE_GEMM, INOUT, INPUT, LOCALITY, QUARK_Insert_Task(), and VALUE.

{
    DAG_CORE_GEMM;
    QUARK_Insert_Task(quark, CORE_sgemm_f2_quark, task_flags,
        sizeof(PLASMA_enum),                &transA,    VALUE,
        sizeof(PLASMA_enum),                &transB,    VALUE,
        sizeof(int),                        &m,         VALUE,
        sizeof(int),                        &n,         VALUE,
        sizeof(int),                        &k,         VALUE,
        sizeof(float),         &alpha,     VALUE,
        sizeof(float)*nb*nb,    A,                 INPUT,
        sizeof(int),                        &lda,       VALUE,
        sizeof(float)*nb*nb,    B,                 INPUT,
        sizeof(int),                        &ldb,       VALUE,
        sizeof(float),         &beta,      VALUE,
        sizeof(float)*nb*nb,    C,                 INOUT | LOCALITY,
        sizeof(int),                        &ldc,       VALUE,
        sizeof(float)*szefake1, fake1,             flag1,
        sizeof(float)*szefake2, fake2,             flag2,
        0);
}

Here is the call graph for this function:

Here is the caller graph for this function:

void QUARK_CORE_sgemm_p2	(	Quark *	quark,
		Quark_Task_Flags *	task_flags,
		int	transA,
		int	transB,
		int	m,
		int	n,
		int	k,
		int	nb,
		float	alpha,
		float *	A,
		int	lda,
		float **	B,
		int	ldb,
		float	beta,
		float *	C,
		int	ldc
	)

Definition at line 202 of file core_sgemm.c.

References CORE_sgemm_p2_quark(), DAG_CORE_GEMM, INOUT, INPUT, LOCALITY, QUARK_Insert_Task(), and VALUE.

{
    DAG_CORE_GEMM;
    QUARK_Insert_Task(quark, CORE_sgemm_p2_quark, task_flags,
        sizeof(PLASMA_enum),                &transA,    VALUE,
        sizeof(PLASMA_enum),                &transB,    VALUE,
        sizeof(int),                        &m,         VALUE,
        sizeof(int),                        &n,         VALUE,
        sizeof(int),                        &k,         VALUE,
        sizeof(float),         &alpha,     VALUE,
        sizeof(float)*lda*nb,   A,                 INPUT,
        sizeof(int),                        &lda,       VALUE,
        sizeof(float*),         B,                 INPUT,
        sizeof(int),                        &ldb,       VALUE,
        sizeof(float),         &beta,      VALUE,
        sizeof(float)*ldc*nb,    C,                 INOUT | LOCALITY,
        sizeof(int),                        &ldc,       VALUE,
        0);
}

Here is the call graph for this function:

void QUARK_CORE_sgemm_p2f1	(	Quark *	quark,
		Quark_Task_Flags *	task_flags,
		int	transA,
		int	transB,
		int	m,
		int	n,
		int	k,
		int	nb,
		float	alpha,
		float *	A,
		int	lda,
		float **	B,
		int	ldb,
		float	beta,
		float *	C,
		int	ldc,
		float *	fake1,
		int	szefake1,
		int	flag1
	)

Definition at line 326 of file core_sgemm.c.

References CORE_sgemm_p2f1_quark(), DAG_CORE_GEMM, INOUT, INPUT, LOCALITY, QUARK_Insert_Task(), and VALUE.

{
    DAG_CORE_GEMM;
    QUARK_Insert_Task(quark, CORE_sgemm_p2f1_quark, task_flags,
        sizeof(PLASMA_enum),                &transA,    VALUE,
        sizeof(PLASMA_enum),                &transB,    VALUE,
        sizeof(int),                        &m,         VALUE,
        sizeof(int),                        &n,         VALUE,
        sizeof(int),                        &k,         VALUE,
        sizeof(float),         &alpha,     VALUE,
        sizeof(float)*lda*nb,   A,                 INPUT,
        sizeof(int),                        &lda,       VALUE,
        sizeof(float*),         B,                 INPUT,
        sizeof(int),                        &ldb,       VALUE,
        sizeof(float),         &beta,      VALUE,
        sizeof(float)*ldc*nb,    C,                 INOUT | LOCALITY,
        sizeof(int),                        &ldc,       VALUE,
        sizeof(float)*szefake1, fake1,             flag1,
        0);
}

Here is the call graph for this function:

void QUARK_CORE_sgemm_p3	(	Quark *	quark,
		Quark_Task_Flags *	task_flags,
		int	transA,
		int	transB,
		int	m,
		int	n,
		int	k,
		int	nb,
		float	alpha,
		float *	A,
		int	lda,
		float *	B,
		int	ldb,
		float	beta,
		float **	C,
		int	ldc
	)

Definition at line 264 of file core_sgemm.c.

References CORE_sgemm_p3_quark(), DAG_CORE_GEMM, INOUT, INPUT, LOCALITY, QUARK_Insert_Task(), and VALUE.

{
    DAG_CORE_GEMM;
    QUARK_Insert_Task(quark, CORE_sgemm_p3_quark, task_flags,
        sizeof(PLASMA_enum),                &transA,    VALUE,
        sizeof(PLASMA_enum),                &transB,    VALUE,
        sizeof(int),                        &m,         VALUE,
        sizeof(int),                        &n,         VALUE,
        sizeof(int),                        &k,         VALUE,
        sizeof(float),         &alpha,     VALUE,
        sizeof(float)*lda*nb,   A,                 INPUT,
        sizeof(int),                        &lda,       VALUE,
        sizeof(float)*ldb*nb,   B,                 INPUT,
        sizeof(int),                        &ldb,       VALUE,
        sizeof(float),         &beta,      VALUE,
        sizeof(float*),         C,                 INOUT | LOCALITY,
        sizeof(int),                        &ldc,       VALUE,
        0);
}

Here is the call graph for this function:

void QUARK_CORE_sgeqrt	(	Quark *	quark,
		Quark_Task_Flags *	task_flags,
		int	m,
		int	n,
		int	ib,
		int	nb,
		float *	A,
		int	lda,
		float *	T,
		int	ldt
	)

Definition at line 155 of file core_sgeqrt.c.

References CORE_sgeqrt_quark(), DAG_CORE_GEQRT, INOUT, OUTPUT, QUARK_Insert_Task(), SCRATCH, and VALUE.

{
    DAG_CORE_GEQRT;
    QUARK_Insert_Task(quark, CORE_sgeqrt_quark, task_flags,
        sizeof(int),                        &m,     VALUE,
        sizeof(int),                        &n,     VALUE,
        sizeof(int),                        &ib,    VALUE,
        sizeof(float)*nb*nb,    A,             INOUT,
        sizeof(int),                        &lda,   VALUE,
        sizeof(float)*ib*nb,    T,             OUTPUT,
        sizeof(int),                        &ldt,   VALUE,
        sizeof(float)*nb,       NULL,          SCRATCH,
        sizeof(float)*ib*nb,    NULL,          SCRATCH,
        0);
}

Here is the call graph for this function:

Here is the caller graph for this function:

void QUARK_CORE_sgessm	(	Quark *	quark,
		Quark_Task_Flags *	task_flags,
		int	m,
		int	n,
		int	k,
		int	ib,
		int	nb,
		int *	IPIV,
		float *	L,
		int	ldl,
		float *	A,
		int	lda
	)

Definition at line 145 of file core_sgessm.c.

References CORE_sgessm_quark(), DAG_CORE_GESSM, INOUT, INPUT, QUARK_Insert_Task(), QUARK_REGION_L, and VALUE.

{
    DAG_CORE_GESSM;
    QUARK_Insert_Task(quark, CORE_sgessm_quark, task_flags,
        sizeof(int),                        &m,     VALUE,
        sizeof(int),                        &n,     VALUE,
        sizeof(int),                        &k,     VALUE,
        sizeof(int),                        &ib,    VALUE,
        sizeof(int)*nb,                      IPIV,          INPUT,
        sizeof(float)*nb*nb,    L,             INPUT | QUARK_REGION_L,
        sizeof(int),                        &ldl,   VALUE,
        sizeof(float)*nb*nb,    A,             INOUT,
        sizeof(int),                        &lda,   VALUE,
        0);
}

Here is the call graph for this function:

Here is the caller graph for this function:

void QUARK_CORE_sgetrf	(	Quark *	quark,
		Quark_Task_Flags *	task_flags,
		int	m,
		int	n,
		int	nb,
		float *	A,
		int	lda,
		int *	IPIV,
		PLASMA_sequence *	sequence,
		PLASMA_request *	request,
		PLASMA_bool	check_info,
		int	iinfo
	)

Definition at line 33 of file core_sgetrf.c.

References CORE_sgetrf_quark(), DAG_CORE_GETRF, INOUT, LOCALITY, OUTPUT, QUARK_Insert_Task(), and VALUE.

{
    DAG_CORE_GETRF;
    QUARK_Insert_Task(quark, CORE_sgetrf_quark, task_flags,
        sizeof(int),                        &m,             VALUE,
        sizeof(int),                        &n,             VALUE,
        sizeof(float)*nb*nb,    A,                     INOUT | LOCALITY,
        sizeof(int),                        &lda,           VALUE,
        sizeof(int)*nb,                      IPIV,                  OUTPUT,
        sizeof(PLASMA_sequence*),           &sequence,      VALUE,
        sizeof(PLASMA_request*),            &request,       VALUE,
        sizeof(PLASMA_bool),                &check_info,    VALUE,
        sizeof(int),                        &iinfo,         VALUE,
        0);
}

Here is the call graph for this function:

Here is the caller graph for this function:

void QUARK_CORE_sgetrf_incpiv	(	Quark *	quark,
		Quark_Task_Flags *	task_flags,
		int	m,
		int	n,
		int	ib,
		int	nb,
		float *	A,
		int	lda,
		int *	IPIV,
		PLASMA_sequence *	sequence,
		PLASMA_request *	request,
		PLASMA_bool	check_info,
		int	iinfo
	)

Definition at line 145 of file core_sgetrf_incpiv.c.

References CORE_sgetrf_incpiv_quark(), DAG_CORE_GETRF, INOUT, OUTPUT, QUARK_Insert_Task(), and VALUE.

{
    DAG_CORE_GETRF;
    QUARK_Insert_Task(quark, CORE_sgetrf_incpiv_quark, task_flags,
        sizeof(int),                        &m,             VALUE,
        sizeof(int),                        &n,             VALUE,
        sizeof(int),                        &ib,            VALUE,
        sizeof(float)*nb*nb,    A,                     INOUT,
        sizeof(int),                        &lda,           VALUE,
        sizeof(int)*nb,                      IPIV,                  OUTPUT,
        sizeof(PLASMA_sequence*),           &sequence,      VALUE,
        sizeof(PLASMA_request*),            &request,       VALUE,
        sizeof(PLASMA_bool),                &check_info,    VALUE,
        sizeof(int),                        &iinfo,         VALUE,
        0);
}

Here is the call graph for this function:

Here is the caller graph for this function:

void QUARK_CORE_sgetrf_reclap	(	Quark *	quark,
		Quark_Task_Flags *	task_flags,
		int	m,
		int	n,
		int	nb,
		float *	A,
		int	lda,
		int *	IPIV,
		PLASMA_sequence *	sequence,
		PLASMA_request *	request,
		PLASMA_bool	check_info,
		int	iinfo,
		int	nbthread
	)

Definition at line 351 of file core_sgetrf_reclap.c.

References CORE_sgetrf_reclap_quark(), DAG_CORE_GETRF, INOUT, OUTPUT, QUARK_Insert_Task(), and VALUE.

{
    DAG_CORE_GETRF;
    QUARK_Insert_Task(quark, CORE_sgetrf_reclap_quark, task_flags,
        sizeof(int),                        &m,             VALUE,
        sizeof(int),                        &n,             VALUE,
        sizeof(float)*nb*nb,    A,                     INOUT,
        sizeof(int),                        &lda,           VALUE,
        sizeof(int)*nb,                      IPIV,                  OUTPUT,
        sizeof(PLASMA_sequence*),           &sequence,      VALUE,
        sizeof(PLASMA_request*),            &request,       VALUE,
        sizeof(PLASMA_bool),                &check_info,    VALUE,
        sizeof(int),                        &iinfo,         VALUE,
        sizeof(int),                        &nbthread,      VALUE,
        0);
}

Here is the call graph for this function:

Here is the caller graph for this function:

void QUARK_CORE_sgetrf_rectil	(	Quark *	quark,
		Quark_Task_Flags *	task_flags,
		PLASMA_desc	A,
		float *	Amn,
		int	size,
		int *	IPIV,
		PLASMA_sequence *	sequence,
		PLASMA_request *	request,
		PLASMA_bool	check_info,
		int	iinfo,
		int	nbthread
	)

Definition at line 699 of file core_sgetrf_rectil.c.

References CORE_sgetrf_rectil_quark(), DAG_CORE_GETRF, INOUT, plasma_desc_t::n, OUTPUT, QUARK_Insert_Task(), and VALUE.

{
    DAG_CORE_GETRF;
    QUARK_Insert_Task(quark, CORE_sgetrf_rectil_quark, task_flags,
        sizeof(PLASMA_desc),                &A,             VALUE,
        sizeof(float)*size,     Amn,               INOUT,
        sizeof(int)*A.n,                     IPIV,              OUTPUT,
        sizeof(PLASMA_sequence*),           &sequence,      VALUE,
        sizeof(PLASMA_request*),            &request,       VALUE,
        sizeof(PLASMA_bool),                &check_info,    VALUE,
        sizeof(int),                        &iinfo,         VALUE,
        sizeof(int),                        &nbthread,      VALUE,
        0);
}

Here is the call graph for this function:

Here is the caller graph for this function:

void QUARK_CORE_sgetrip	(	Quark *	quark,
		Quark_Task_Flags *	task_flags,
		int	m,
		int	n,
		float *	A,
		int	szeA
	)

Definition at line 82 of file core_sgetrip.c.

References CORE_sgetrip_quark(), DAG_CORE_GETRIP, INOUT, QUARK_Insert_Task(), SCRATCH, and VALUE.

{
    DAG_CORE_GETRIP;
    QUARK_Insert_Task(quark, CORE_sgetrip_quark, task_flags,
        sizeof(int),                     &m,   VALUE,
        sizeof(int),                     &n,   VALUE,
        sizeof(float)*szeA, A,        INOUT,
        sizeof(float)*szeA, NULL,     SCRATCH,
        0);
}

Here is the call graph for this function:

Here is the caller graph for this function:

void QUARK_CORE_sgetrip_f1	(	Quark *	quark,
		Quark_Task_Flags *	task_flags,
		int	m,
		int	n,
		float *	A,
		int	szeA,
		float *	fake,
		int	szeF,
		int	paramF
	)

Definition at line 115 of file core_sgetrip.c.

References CORE_sgetrip_f1_quark(), DAG_CORE_GETRIP, INOUT, QUARK_Insert_Task(), SCRATCH, and VALUE.

{
    DAG_CORE_GETRIP;
    QUARK_Insert_Task(
        quark, CORE_sgetrip_f1_quark, task_flags,
        sizeof(int),                     &m,   VALUE,
        sizeof(int),                     &n,   VALUE,
        sizeof(float)*szeA, A,        INOUT,
        sizeof(float)*szeA, NULL,     SCRATCH,
        sizeof(float)*szeF, fake,     paramF,
        0);
}

Here is the call graph for this function:

Here is the caller graph for this function:

void QUARK_CORE_sgetrip_f2	(	Quark *	quark,
		Quark_Task_Flags *	task_flags,
		int	m,
		int	n,
		float *	A,
		int	szeA,
		float *	fake1,
		int	szeF1,
		int	paramF1,
		float *	fake2,
		int	szeF2,
		int	paramF2
	)

Definition at line 153 of file core_sgetrip.c.

References CORE_sgetrip_f2_quark(), DAG_CORE_GETRIP, INOUT, QUARK_Insert_Task(), SCRATCH, and VALUE.

{
    DAG_CORE_GETRIP;
    QUARK_Insert_Task(
        quark, CORE_sgetrip_f2_quark, task_flags,
        sizeof(int),                     &m,   VALUE,
        sizeof(int),                     &n,   VALUE,
        sizeof(float)*szeA, A,        INOUT,
        sizeof(float)*szeA, NULL,     SCRATCH,
        sizeof(float)*szeF1, fake1,     paramF1,
        sizeof(float)*szeF2, fake2,     paramF2,
        0);
}

Here is the call graph for this function:

Here is the caller graph for this function:

void QUARK_CORE_slacpy	(	Quark *	quark,
		Quark_Task_Flags *	task_flags,
		PLASMA_enum	uplo,
		int	m,
		int	n,
		int	mb,
		float *	A,
		int	lda,
		float *	B,
		int	ldb
	)

Definition at line 42 of file core_slacpy.c.

References CORE_slacpy_quark(), DAG_CORE_LACPY, INPUT, OUTPUT, QUARK_Insert_Task(), and VALUE.

{
    DAG_CORE_LACPY;
    QUARK_Insert_Task(quark, CORE_slacpy_quark, task_flags,
        sizeof(PLASMA_enum),                &uplo,  VALUE,
        sizeof(int),                        &m,     VALUE,
        sizeof(int),                        &n,     VALUE,
        sizeof(float)*nb*nb,    A,             INPUT,
        sizeof(int),                        &lda,   VALUE,
        sizeof(float)*nb*nb,    B,             OUTPUT,
        sizeof(int),                        &ldb,   VALUE,
        0);
}

Here is the call graph for this function:

Here is the caller graph for this function:

void QUARK_CORE_slange	(	Quark *	quark,
		Quark_Task_Flags *	task_flags,
		int	norm,
		int	M,
		int	N,
		float *	A,
		int	LDA,
		int	szeA,
		int	szeW,
		float *	result
	)

Definition at line 42 of file core_slange.c.

References CORE_slange_quark(), DAG_CORE_LANGE, INPUT, max, OUTPUT, QUARK_Insert_Task(), SCRATCH, and VALUE.

{
    szeW = max(1, szeW);
    DAG_CORE_LANGE;
    QUARK_Insert_Task(quark, CORE_slange_quark, task_flags,
        sizeof(PLASMA_enum),                &norm,  VALUE,
        sizeof(int),                        &M,     VALUE,
        sizeof(int),                        &N,     VALUE,
        sizeof(float)*szeA,     A,             INPUT,
        sizeof(int),                        &LDA,   VALUE,
        sizeof(float)*szeW,                 NULL,          SCRATCH,
        sizeof(float),                      result,        OUTPUT,
        0);
}

Here is the call graph for this function:

Here is the caller graph for this function:

void QUARK_CORE_slange_f1	(	Quark *	quark,
		Quark_Task_Flags *	task_flags,
		int	norm,
		int	M,
		int	N,
		float *	A,
		int	LDA,
		int	szeA,
		int	szeW,
		float *	result,
		float *	fake,
		int	szeF
	)

Definition at line 87 of file core_slange.c.

References CORE_slange_f1_quark(), DAG_CORE_LANGE, GATHERV, INPUT, max, OUTPUT, QUARK_Insert_Task(), SCRATCH, and VALUE.

{
    szeW = max(1, szeW);
    DAG_CORE_LANGE;
    QUARK_Insert_Task(quark, CORE_slange_f1_quark, task_flags,
        sizeof(PLASMA_enum),                &norm,  VALUE,
        sizeof(int),                        &M,     VALUE,
        sizeof(int),                        &N,     VALUE,
        sizeof(float)*szeA,     A,             INPUT,
        sizeof(int),                        &LDA,   VALUE,
        sizeof(float)*szeW,                 NULL,          SCRATCH,
        sizeof(float),                      result,        OUTPUT,
        sizeof(float)*szeF,                 fake,          OUTPUT | GATHERV,
        0);
}

Here is the call graph for this function:

Here is the caller graph for this function:

void QUARK_CORE_slansy	(	Quark *	quark,
		Quark_Task_Flags *	task_flags,
		int	norm,
		int	uplo,
		int	N,
		float *	A,
		int	LDA,
		int	szeA,
		int	szeW,
		float *	result
	)

Definition at line 42 of file core_slansy.c.

References CORE_slansy_quark(), DAG_CORE_LANSY, INPUT, max, OUTPUT, QUARK_Insert_Task(), SCRATCH, and VALUE.

{
    szeW = max(1, szeW);
    DAG_CORE_LANSY;
    QUARK_Insert_Task(quark, CORE_slansy_quark, task_flags,
        sizeof(PLASMA_enum),                &norm,  VALUE,
        sizeof(PLASMA_enum),                &uplo,  VALUE,
        sizeof(int),                        &N,     VALUE,
        sizeof(float)*szeA,     A,             INPUT,
        sizeof(int),                        &LDA,   VALUE,
        sizeof(float)*szeW,                 NULL,          SCRATCH,
        sizeof(float),                      result,        OUTPUT,
        0);
}

Here is the call graph for this function:

void QUARK_CORE_slansy_f1	(	Quark *	quark,
		Quark_Task_Flags *	task_flags,
		int	norm,
		int	uplo,
		int	N,
		float *	A,
		int	LDA,
		int	szeA,
		int	szeW,
		float *	result,
		float *	fake,
		int	szeF
	)

Definition at line 87 of file core_slansy.c.

References CORE_slansy_f1_quark(), DAG_CORE_LANSY, GATHERV, INPUT, max, OUTPUT, QUARK_Insert_Task(), SCRATCH, and VALUE.

{
    szeW = max(1, szeW);
    DAG_CORE_LANSY;
    QUARK_Insert_Task(quark, CORE_slansy_f1_quark, task_flags,
        sizeof(PLASMA_enum),                &norm,  VALUE,
        sizeof(PLASMA_enum),                &uplo,  VALUE,
        sizeof(int),                        &N,     VALUE,
        sizeof(float)*szeA,     A,             INPUT,
        sizeof(int),                        &LDA,   VALUE,
        sizeof(float)*szeW,                 NULL,          SCRATCH,
        sizeof(float),                      result,        OUTPUT,
        sizeof(float)*szeF,                 fake,          OUTPUT | GATHERV,
        0);
}

Here is the call graph for this function:

Here is the caller graph for this function:

void QUARK_CORE_slaset	(	Quark *	quark,
		Quark_Task_Flags *	task_flags,
		PLASMA_enum	uplo,
		int	n1,
		int	n2,
		float	alpha,
		float	beta,
		float *	tileA,
		int	ldtilea
	)

Definition at line 71 of file core_slaset.c.

References CORE_slaset_quark(), DAG_CORE_LASET, OUTPUT, QUARK_Insert_Task(), and VALUE.

{
    DAG_CORE_LASET;
    QUARK_Insert_Task(quark, CORE_slaset_quark, task_flags,
        sizeof(PLASMA_enum),                &uplo,  VALUE,
        sizeof(int),                        &M,     VALUE,
        sizeof(int),                        &N,     VALUE,
        sizeof(float),         &alpha, VALUE,
        sizeof(float),         &beta,  VALUE,
        sizeof(float)*M*N,     A,      OUTPUT,
        sizeof(int),                        &LDA,   VALUE,
        0);
}

Here is the call graph for this function:

Here is the caller graph for this function:

void QUARK_CORE_slaset2	(	Quark *	quark,
		Quark_Task_Flags *	task_flags,
		PLASMA_enum	uplo,
		int	n1,
		int	n2,
		float	alpha,
		float *	tileA,
		int	ldtilea
	)

Definition at line 82 of file core_slaset2.c.

References CORE_slaset2_quark(), OUTPUT, QUARK_Insert_Task(), and VALUE.

{
    QUARK_Insert_Task(quark, CORE_slaset2_quark, task_flags,
        sizeof(PLASMA_enum),                &uplo,  VALUE,
        sizeof(int),                        &M,     VALUE,
        sizeof(int),                        &N,     VALUE,
        sizeof(float),         &alpha, VALUE,
        sizeof(float)*M*N,     A,      OUTPUT,
        sizeof(int),                        &LDA,   VALUE,
        0);
}

Here is the call graph for this function:

Here is the caller graph for this function:

void QUARK_CORE_slaswp	(	Quark *	quark,
		Quark_Task_Flags *	task_flags,
		int	n,
		float *	A,
		int	lda,
		int	i1,
		int	i2,
		int *	ipiv,
		int	inc
	)

Definition at line 37 of file core_slaswp.c.

References CORE_slaswp_quark(), DAG_CORE_LASWP, INOUT, INPUT, LOCALITY, QUARK_Insert_Task(), and VALUE.

{
    DAG_CORE_LASWP;
    QUARK_Insert_Task(
        quark, CORE_slaswp_quark, task_flags,
        sizeof(int),                      &n,    VALUE,
        sizeof(float)*lda*n,  A,        INOUT | LOCALITY,
        sizeof(int),                      &lda,  VALUE,
        sizeof(int),                      &i1,   VALUE,
        sizeof(int),                      &i2,   VALUE,
        sizeof(int)*n,                     ipiv,     INPUT,
        sizeof(int),                      &inc,  VALUE,
        0);
}

Here is the call graph for this function:

Here is the caller graph for this function:

void QUARK_CORE_slaswp_f2	(	Quark *	quark,
		Quark_Task_Flags *	task_flags,
		int	n,
		float *	A,
		int	lda,
		int	i1,
		int	i2,
		int *	ipiv,
		int	inc,
		float *	fake1,
		int	szefake1,
		int	flag1,
		float *	fake2,
		int	szefake2,
		int	flag2
	)

Definition at line 74 of file core_slaswp.c.

References CORE_slaswp_f2_quark(), DAG_CORE_LASWP, INOUT, INPUT, LOCALITY, QUARK_Insert_Task(), and VALUE.

{
    DAG_CORE_LASWP;
    QUARK_Insert_Task(
        quark, CORE_slaswp_f2_quark, task_flags,
        sizeof(int),                        &n,     VALUE,
        sizeof(float)*lda*n,    A,         INOUT | LOCALITY,
        sizeof(int),                        &lda,   VALUE,
        sizeof(int),                        &i1,    VALUE,
        sizeof(int),                        &i2,    VALUE,
        sizeof(int)*n,                       ipiv,      INPUT,
        sizeof(int),                        &inc,   VALUE,
        sizeof(float)*szefake1, fake1,     flag1,
        sizeof(float)*szefake2, fake2,     flag2,
        0);
}

Here is the call graph for this function:

Here is the caller graph for this function:

void QUARK_CORE_slaswp_ontile	(	Quark *	quark,
		Quark_Task_Flags *	task_flags,
		PLASMA_desc	descA,
		float *	A,
		int	i1,
		int	i2,
		int *	ipiv,
		int	inc,
		float *	fakepanel
	)

Definition at line 214 of file core_slaswp.c.

References CORE_slaswp_ontile_quark(), DAG_CORE_LASWP, INOUT, INPUT, LOCALITY, QUARK_Insert_Task(), and VALUE.

{
    DAG_CORE_LASWP;
    QUARK_Insert_Task(
        quark, CORE_slaswp_ontile_quark, task_flags,
        sizeof(PLASMA_desc),              &descA,     VALUE,
        sizeof(float)*1,      Aij,           INOUT | LOCALITY,
        sizeof(int),                      &i1,        VALUE,
        sizeof(int),                      &i2,        VALUE,
        sizeof(int)*(i2-i1+1)*abs(inc),   ipiv,           INPUT,
        sizeof(int),                      &inc,       VALUE,
        sizeof(float)*1,      fakepanel,     INOUT,
        0);
}

Here is the call graph for this function:

Here is the caller graph for this function:

void QUARK_CORE_slaswp_ontile_f2	(	Quark *	quark,
		Quark_Task_Flags *	task_flags,
		PLASMA_desc	descA,
		float *	A,
		int	i1,
		int	i2,
		int *	ipiv,
		int	inc,
		float *	fake1,
		int	szefake1,
		int	flag1,
		float *	fake2,
		int	szefake2,
		int	flag2
	)

Definition at line 252 of file core_slaswp.c.

References CORE_slaswp_ontile_f2_quark(), DAG_CORE_LASWP, INOUT, INPUT, LOCALITY, QUARK_Insert_Task(), and VALUE.

{
    DAG_CORE_LASWP;
    QUARK_Insert_Task(
        quark, CORE_slaswp_ontile_f2_quark, task_flags,
        sizeof(PLASMA_desc),                &descA, VALUE,
        sizeof(float)*1,        Aij,       INOUT | LOCALITY,
        sizeof(int),                        &i1,    VALUE,
        sizeof(int),                        &i2,    VALUE,
        sizeof(int)*(i2-i1+1)*abs(inc),      ipiv,      INPUT,
        sizeof(int),                        &inc,   VALUE,
        sizeof(float)*szefake1, fake1, flag1,
        sizeof(float)*szefake2, fake2, flag2,
        0);
}

Here is the call graph for this function:

Here is the caller graph for this function:

void QUARK_CORE_slaswpc_ontile	(	Quark *	quark,
		Quark_Task_Flags *	task_flags,
		PLASMA_desc	descA,
		float *	A,
		int	i1,
		int	i2,
		int *	ipiv,
		int	inc,
		float *	fakepanel
	)

Definition at line 492 of file core_slaswp.c.

References CORE_slaswpc_ontile_quark(), DAG_CORE_LASWP, INOUT, INPUT, LOCALITY, QUARK_Insert_Task(), and VALUE.

{
    DAG_CORE_LASWP;
    QUARK_Insert_Task(
        quark, CORE_slaswpc_ontile_quark, task_flags,
        sizeof(PLASMA_desc),              &descA,     VALUE,
        sizeof(float)*1,      Aij,           INOUT | LOCALITY,
        sizeof(int),                      &i1,        VALUE,
        sizeof(int),                      &i2,        VALUE,
        sizeof(int)*(i2-i1+1)*abs(inc),   ipiv,           INPUT,
        sizeof(int),                      &inc,       VALUE,
        sizeof(float)*1,      fakepanel,     INOUT,
        0);
}

Here is the call graph for this function:

Here is the caller graph for this function:

void QUARK_CORE_slauum	(	Quark *	quark,
		Quark_Task_Flags *	task_flags,
		int	uplo,
		int	n,
		int	nb,
		float *	A,
		int	lda
	)

Definition at line 37 of file core_slauum.c.

References CORE_slauum_quark(), DAG_CORE_LAUUM, INOUT, QUARK_Insert_Task(), and VALUE.

{
    DAG_CORE_LAUUM;
    QUARK_Insert_Task(quark, CORE_slauum_quark, task_flags,
        sizeof(PLASMA_enum),                &uplo,  VALUE,
        sizeof(int),                        &n,     VALUE,
        sizeof(float)*nb*nb,    A,             INOUT,
        sizeof(int),                        &lda,   VALUE,
        0);
}

Here is the call graph for this function:

Here is the caller graph for this function:

void QUARK_CORE_sormlq	(	Quark *	quark,
		Quark_Task_Flags *	task_flags,
		int	side,
		int	trans,
		int	m,
		int	n,
		int	ib,
		int	nb,
		int	k,
		float *	A,
		int	lda,
		float *	T,
		int	ldt,
		float *	C,
		int	ldc
	)

Definition at line 231 of file core_sormlq.c.

References CORE_sormlq_quark(), DAG_CORE_UNMLQ, INOUT, INPUT, QUARK_Insert_Task(), QUARK_REGION_U, SCRATCH, and VALUE.

{
    DAG_CORE_UNMLQ;
    QUARK_Insert_Task(quark, CORE_sormlq_quark, task_flags,
        sizeof(PLASMA_enum),                &side,  VALUE,
        sizeof(PLASMA_enum),                &trans, VALUE,
        sizeof(int),                        &m,     VALUE,
        sizeof(int),                        &n,     VALUE,
        sizeof(int),                        &k,     VALUE,
        sizeof(int),                        &ib,    VALUE,
        sizeof(float)*nb*nb,    A,             INPUT | QUARK_REGION_U,
        sizeof(int),                        &lda,   VALUE,
        sizeof(float)*ib*nb,    T,             INPUT,
        sizeof(int),                        &ldt,   VALUE,
        sizeof(float)*nb*nb,    C,             INOUT,
        sizeof(int),                        &ldc,   VALUE,
        sizeof(float)*ib*nb,    NULL,          SCRATCH,
        sizeof(int),                        &nb,    VALUE,
        0);
}

Here is the call graph for this function:

Here is the caller graph for this function:

void QUARK_CORE_sormqr	(	Quark *	quark,
		Quark_Task_Flags *	task_flags,
		int	side,
		int	trans,
		int	m,
		int	n,
		int	k,
		int	ib,
		int	nb,
		float *	A,
		int	lda,
		float *	T,
		int	ldt,
		float *	C,
		int	ldc
	)

Definition at line 224 of file core_sormqr.c.

References CORE_sormqr_quark(), DAG_CORE_UNMQR, INOUT, INPUT, QUARK_Insert_Task(), QUARK_REGION_L, SCRATCH, and VALUE.

{
    DAG_CORE_UNMQR;
    QUARK_Insert_Task(quark, CORE_sormqr_quark, task_flags,
        sizeof(PLASMA_enum),                &side,  VALUE,
        sizeof(PLASMA_enum),                &trans, VALUE,
        sizeof(int),                        &m,     VALUE,
        sizeof(int),                        &n,     VALUE,
        sizeof(int),                        &k,     VALUE,
        sizeof(int),                        &ib,    VALUE,
        sizeof(float)*nb*nb,   A,      INPUT | QUARK_REGION_L,
        sizeof(int),                        &lda,   VALUE,
        sizeof(float)*ib*nb,   T,      INPUT,
        sizeof(int),                        &ldt,   VALUE,
        sizeof(float)*nb*nb,   C,      INOUT,
        sizeof(int),                        &ldc,   VALUE,
        sizeof(float)*ib*nb,   NULL,   SCRATCH,
        sizeof(int),                        &nb,    VALUE,
        0);
}

Here is the call graph for this function:

Here is the caller graph for this function:

void QUARK_CORE_spamm	(	Quark *	quark,
		Quark_Task_Flags *	task_flags,
		int	op,
		int	side,
		int	storev,
		int	m,
		int	n,
		int	k,
		int	l,
		float *	A1,
		int	lda1,
		float *	A2,
		int	lda2,
		float *	V,
		int	ldv,
		float *	W,
		int	ldw
	)

Definition at line 569 of file core_spamm.c.

References CORE_spamm_quark(), INOUT, INPUT, QUARK_Insert_Task(), and VALUE.

{
    QUARK_Insert_Task(quark, CORE_spamm_quark, task_flags,
        sizeof(int),                        &op,      VALUE,
        sizeof(PLASMA_enum),                &side,    VALUE,
        sizeof(PLASMA_enum),                &storev,  VALUE,
        sizeof(int),                        &m,       VALUE,
        sizeof(int),                        &n,       VALUE,
        sizeof(int),                        &k,       VALUE,
        sizeof(int),                        &l,       VALUE,
        sizeof(float)*m*k,     A1,        INPUT,
        sizeof(int),                        &lda1,     VALUE,
        sizeof(float)*k*n,     A2,        INPUT,
        sizeof(int),                        &lda2,     VALUE,
        sizeof(float)*m*n,     V,        INPUT,
        sizeof(int),                        &ldv,     VALUE,
        sizeof(float)*m*n,     W,        INOUT,
        sizeof(int),                        &ldw,     VALUE,
        0);
}

Here is the call graph for this function:

void QUARK_CORE_splgsy	(	Quark *	quark,
		Quark_Task_Flags *	task_flags,
		float	bump,
		int	m,
		int	n,
		float *	A,
		int	lda,
		int	bigM,
		int	m0,
		int	n0,
		unsigned long long int	seed
	)

Definition at line 147 of file core_splgsy.c.

References CORE_splgsy_quark(), DAG_CORE_PLGSY, OUTPUT, QUARK_Insert_Task(), and VALUE.

{
    DAG_CORE_PLGSY;
    QUARK_Insert_Task(quark, CORE_splgsy_quark, task_flags,
        sizeof(float),       &bump, VALUE,
        sizeof(int),                      &m,    VALUE,
        sizeof(int),                      &n,    VALUE,
        sizeof(float)*lda*n, A,         OUTPUT,
        sizeof(int),                      &lda,  VALUE,
        sizeof(int),                      &bigM, VALUE,
        sizeof(int),                      &m0,   VALUE,
        sizeof(int),                      &n0,   VALUE,
        sizeof(unsigned long long int),   &seed, VALUE,
        0);
}

Here is the call graph for this function:

Here is the caller graph for this function:

void QUARK_CORE_splrnt	(	Quark *	quark,
		Quark_Task_Flags *	task_flags,
		int	m,
		int	n,
		float *	A,
		int	lda,
		int	bigM,
		int	m0,
		int	n0,
		unsigned long long int	seed
	)

Definition at line 92 of file core_splrnt.c.

References CORE_splrnt_quark(), DAG_CORE_PLRNT, OUTPUT, QUARK_Insert_Task(), and VALUE.

{
    DAG_CORE_PLRNT;
    QUARK_Insert_Task(quark, CORE_splrnt_quark, task_flags,
        sizeof(int),                      &m,    VALUE,
        sizeof(int),                      &n,    VALUE,
        sizeof(float)*lda*n, A,         OUTPUT,
        sizeof(int),                      &lda,  VALUE,
        sizeof(int),                      &bigM, VALUE,
        sizeof(int),                      &m0,   VALUE,
        sizeof(int),                      &n0,   VALUE,
        sizeof(unsigned long long int),   &seed, VALUE,
        0);
}

Here is the call graph for this function:

Here is the caller graph for this function:

void QUARK_CORE_spotrf	(	Quark *	quark,
		Quark_Task_Flags *	task_flags,
		int	uplo,
		int	n,
		int	nb,
		float *	A,
		int	lda,
		PLASMA_sequence *	sequence,
		PLASMA_request *	request,
		int	iinfo
	)

Definition at line 40 of file core_spotrf.c.

References CORE_spotrf_quark(), DAG_CORE_POTRF, INOUT, QUARK_Insert_Task(), and VALUE.

{
    DAG_CORE_POTRF;
    QUARK_Insert_Task(quark, CORE_spotrf_quark, task_flags,
        sizeof(PLASMA_enum),                &uplo,      VALUE,
        sizeof(int),                        &n,         VALUE,
        sizeof(float)*nb*nb,    A,                 INOUT,
        sizeof(int),                        &lda,       VALUE,
        sizeof(PLASMA_sequence*),           &sequence,  VALUE,
        sizeof(PLASMA_request*),            &request,   VALUE,
        sizeof(int),                        &iinfo,     VALUE,
        0);
}

Here is the call graph for this function:

Here is the caller graph for this function:

void QUARK_CORE_sshift	(	Quark *	quark,
		Quark_Task_Flags *	task_flags,
		int	s,
		int	m,
		int	n,
		int	L,
		float *	A
	)

Definition at line 187 of file core_sshift.c.

References CORE_sshift_quark(), DAG_CORE_SHIFT, GATHERV, INOUT, QUARK_Insert_Task(), SCRATCH, and VALUE.

{
    DAG_CORE_SHIFT;
    QUARK_Insert_Task(quark, CORE_sshift_quark, task_flags,
        sizeof(int),                      &s,    VALUE,
        sizeof(int),                      &m,    VALUE,
        sizeof(int),                      &n,    VALUE,
        sizeof(int),                      &L,    VALUE,
        sizeof(float)*m*n*L, A,        INOUT | GATHERV,
        sizeof(float)*L,     NULL,     SCRATCH,
        0);
}

Here is the call graph for this function:

Here is the caller graph for this function:

void QUARK_CORE_sshiftw	(	Quark *	quark,
		Quark_Task_Flags *	task_flags,
		int	s,
		int	cl,
		int	m,
		int	n,
		int	L,
		float *	A,
		float *	W
	)

Definition at line 108 of file core_sshift.c.

References CORE_sshiftw_quark(), DAG_CORE_SHIFTW, INOUT, INPUT, QUARK_Insert_Task(), and VALUE.

{
    DAG_CORE_SHIFTW;
    QUARK_Insert_Task(quark, CORE_sshiftw_quark, task_flags,
        sizeof(int),                      &s,   VALUE,
        sizeof(int),                      &cl,  VALUE,
        sizeof(int),                      &m,   VALUE,
        sizeof(int),                      &n,   VALUE,
        sizeof(int),                      &L,   VALUE,
        sizeof(float)*m*n*L, A,        INOUT,
        sizeof(float)*L,     W,        INPUT,
        0);
}

Here is the call graph for this function:

void QUARK_CORE_sssssm	(	Quark *	quark,
		Quark_Task_Flags *	task_flags,
		int	m1,
		int	n1,
		int	m2,
		int	n2,
		int	k,
		int	ib,
		int	nb,
		float *	A1,
		int	lda1,
		float *	A2,
		int	lda2,
		float *	L1,
		int	ldl1,
		float *	L2,
		int	ldl2,
		int *	IPIV
	)

Definition at line 184 of file core_sssssm.c.

References CORE_sssssm_quark(), DAG_CORE_SSSSM, INOUT, INPUT, LOCALITY, QUARK_Insert_Task(), and VALUE.

{
    DAG_CORE_SSSSM;
    QUARK_Insert_Task(quark, CORE_sssssm_quark, task_flags,
        sizeof(int),                        &m1,    VALUE,
        sizeof(int),                        &n1,    VALUE,
        sizeof(int),                        &m2,    VALUE,
        sizeof(int),                        &n2,    VALUE,
        sizeof(int),                        &k,     VALUE,
        sizeof(int),                        &ib,    VALUE,
        sizeof(float)*nb*nb,    A1,            INOUT,
        sizeof(int),                        &lda1,  VALUE,
        sizeof(float)*nb*nb,    A2,            INOUT | LOCALITY,
        sizeof(int),                        &lda2,  VALUE,
        sizeof(float)*ib*nb,    L1,            INPUT,
        sizeof(int),                        &ldl1,  VALUE,
        sizeof(float)*ib*nb,    L2,            INPUT,
        sizeof(int),                        &ldl2,  VALUE,
        sizeof(int)*nb,                      IPIV,          INPUT,
        0);
}

Here is the call graph for this function:

Here is the caller graph for this function:

void QUARK_CORE_sswpab	(	Quark *	quark,
		Quark_Task_Flags *	task_flags,
		int	i,
		int	n1,
		int	n2,
		float *	A,
		int	szeA
	)

Definition at line 85 of file core_sswpab.c.

References CORE_sswpab_quark(), DAG_CORE_SWPAB, INOUT, min, QUARK_Insert_Task(), SCRATCH, and VALUE.

{
    DAG_CORE_SWPAB;
    QUARK_Insert_Task(
        quark, CORE_sswpab_quark, task_flags,
        sizeof(int),                           &i,   VALUE,
        sizeof(int),                           &n1,  VALUE,
        sizeof(int),                           &n2,  VALUE,
        sizeof(float)*szeA,       A,            INOUT,
        sizeof(float)*min(n1,n2), NULL,         SCRATCH,
        0);
}

Here is the call graph for this function:

void QUARK_CORE_sswptr_ontile	(	Quark *	quark,
		Quark_Task_Flags *	task_flags,
		PLASMA_desc	descA,
		float *	Aij,
		int	i1,
		int	i2,
		int *	ipiv,
		int	inc,
		float *	Akk,
		int	ldak
	)

Definition at line 359 of file core_slaswp.c.

References CORE_sswptr_ontile_quark(), DAG_CORE_TRSM, INOUT, INPUT, LOCALITY, QUARK_Insert_Task(), and VALUE.

{
    DAG_CORE_TRSM;
    QUARK_Insert_Task(
        quark, CORE_sswptr_ontile_quark, task_flags,
        sizeof(PLASMA_desc),              &descA, VALUE,
        sizeof(float)*1,      Aij,       INOUT | LOCALITY,
        sizeof(int),                      &i1,    VALUE,
        sizeof(int),                      &i2,    VALUE,
        sizeof(int)*(i2-i1+1)*abs(inc),    ipiv,      INPUT,
        sizeof(int),                      &inc,   VALUE,
        sizeof(float)*ldak,   Akk,       INPUT,
        sizeof(int),                      &ldak,  VALUE,
        0);
}

Here is the call graph for this function:

Here is the caller graph for this function:

void QUARK_CORE_ssygst	(	Quark *	quark,
		Quark_Task_Flags *	task_flags,
		int	itype,
		int	uplo,
		int	N,
		float *	A,
		int	LDA,
		float *	B,
		int	LDB,
		PLASMA_sequence *	sequence,
		PLASMA_request *	request,
		int	iinfo
	)

Definition at line 39 of file core_ssygst.c.

References CORE_ssygst_quark(), INOUT, QUARK_Insert_Task(), and VALUE.

{
    QUARK_Insert_Task(quark, CORE_ssygst_quark, task_flags,
        sizeof(int),                        &itype,      VALUE,
        sizeof(PLASMA_enum),                &uplo,      VALUE,
        sizeof(int),                        &n,         VALUE,
        sizeof(float)*n*n,    A,                 INOUT,
        sizeof(int),                        &lda,       VALUE,
        sizeof(float)*n*n,    B,                 INOUT,
        sizeof(int),                        &ldb,       VALUE,
        sizeof(PLASMA_sequence*),           &sequence,  VALUE,
        sizeof(PLASMA_request*),            &request,   VALUE,
        sizeof(int),                        &iinfo,     VALUE,
        0);
}

Here is the call graph for this function:

Here is the caller graph for this function:

void QUARK_CORE_ssymm	(	Quark *	quark,
		Quark_Task_Flags *	task_flags,
		int	side,
		int	uplo,
		int	m,
		int	n,
		int	nb,
		float	alpha,
		float *	A,
		int	lda,
		float *	B,
		int	ldb,
		float	beta,
		float *	C,
		int	ldc
	)

Definition at line 46 of file core_ssymm.c.

References CORE_ssymm_quark(), DAG_CORE_SYMM, INOUT, INPUT, QUARK_Insert_Task(), and VALUE.

{
    DAG_CORE_SYMM;
    QUARK_Insert_Task(quark, CORE_ssymm_quark, task_flags,
        sizeof(PLASMA_enum),                &side,    VALUE,
        sizeof(PLASMA_enum),                &uplo,    VALUE,
        sizeof(int),                        &m,       VALUE,
        sizeof(int),                        &n,       VALUE,
        sizeof(float),         &alpha,   VALUE,
        sizeof(float)*nb*nb,    A,               INPUT,
        sizeof(int),                        &lda,     VALUE,
        sizeof(float)*nb*nb,    B,               INPUT,
        sizeof(int),                        &ldb,     VALUE,
        sizeof(float),         &beta,    VALUE,
        sizeof(float)*nb*nb,    C,               INOUT,
        sizeof(int),                        &ldc,     VALUE,
        0);
}

Here is the call graph for this function:

Here is the caller graph for this function:

void QUARK_CORE_ssyr2k	(	Quark *	quark,
		Quark_Task_Flags *	task_flags,
		int	uplo,
		int	trans,
		int	n,
		int	k,
		int	nb,
		float	alpha,
		float *	A,
		int	lda,
		float *	B,
		int	LDB,
		float	beta,
		float *	C,
		int	ldc
	)

Definition at line 45 of file core_ssyr2k.c.

References CORE_ssyr2k_quark(), DAG_CORE_SYR2K, INOUT, INPUT, QUARK_Insert_Task(), and VALUE.

{
    DAG_CORE_SYR2K;
    QUARK_Insert_Task(quark, CORE_ssyr2k_quark, task_flags,
        sizeof(PLASMA_enum),                &uplo,      VALUE,
        sizeof(PLASMA_enum),                &trans,     VALUE,
        sizeof(int),                        &n,         VALUE,
        sizeof(int),                        &k,         VALUE,
        sizeof(float),         &alpha,     VALUE,
        sizeof(float)*nb*nb,    A,                 INPUT,
        sizeof(int),                        &lda,       VALUE,
        sizeof(float)*nb*nb,    B,                 INPUT,
        sizeof(int),                        &ldb,       VALUE,
        sizeof(float),         &beta,      VALUE,
        sizeof(float)*nb*nb,    C,                 INOUT,
        sizeof(int),                        &ldc,       VALUE,
        0);
}

Here is the call graph for this function:

Here is the caller graph for this function:

void QUARK_CORE_ssyrfb	(	Quark *	quark,
		Quark_Task_Flags *	task_flags,
		PLASMA_enum	uplo,
		int	n,
		int	k,
		int	ib,
		int	nb,
		float *	A,
		int	lda,
		float *	T,
		int	ldt,
		float *	C,
		int	ldc
	)

This kernel is just a workaround for now... will be deleted eventually and replaced by the one above (Piotr's Task)

Definition at line 183 of file core_ssyrfb.c.

References CORE_ssyrfb_quark(), INOUT, INPUT, PlasmaUpper, QUARK_Insert_Task(), QUARK_REGION_D, QUARK_REGION_L, QUARK_REGION_U, SCRATCH, and VALUE.

{
    QUARK_Insert_Task(
        quark, CORE_ssyrfb_quark, task_flags,
        sizeof(PLASMA_enum),                     &uplo,  VALUE,
        sizeof(int),                             &n,     VALUE,
        sizeof(int),                             &k,     VALUE,
        sizeof(int),                             &ib,    VALUE,
        sizeof(int),                             &nb,    VALUE,
        sizeof(float)*nb*nb,        A,          uplo == PlasmaUpper ? INOUT|QUARK_REGION_U : INOUT|QUARK_REGION_L,
        sizeof(int),                             &lda,   VALUE,
        sizeof(float)*ib*nb,        T,          INPUT,
        sizeof(int),                             &ldt,   VALUE,
        sizeof(float)*nb*nb,        C,          uplo == PlasmaUpper ? INOUT|QUARK_REGION_D|QUARK_REGION_U : INOUT|QUARK_REGION_D|QUARK_REGION_L,
        sizeof(int),                             &ldc,   VALUE,
        sizeof(float)*2*nb*nb,    NULL,         SCRATCH,
        sizeof(int),                             &nb,    VALUE,
        0);
}

Here is the call graph for this function:

Here is the caller graph for this function:

void QUARK_CORE_ssyrk	(	Quark *	quark,
		Quark_Task_Flags *	task_flags,
		int	uplo,
		int	trans,
		int	n,
		int	k,
		int	nb,
		float	alpha,
		float *	A,
		int	lda,
		float	beta,
		float *	C,
		int	ldc
	)

Definition at line 44 of file core_ssyrk.c.

References CORE_ssyrk_quark(), DAG_CORE_SYRK, INOUT, INPUT, QUARK_Insert_Task(), and VALUE.

{
    DAG_CORE_SYRK;
    QUARK_Insert_Task(quark, CORE_ssyrk_quark, task_flags,
        sizeof(PLASMA_enum),                &uplo,      VALUE,
        sizeof(PLASMA_enum),                &trans,     VALUE,
        sizeof(int),                        &n,         VALUE,
        sizeof(int),                        &k,         VALUE,
        sizeof(float),         &alpha,     VALUE,
        sizeof(float)*nb*nb,    A,                 INPUT,
        sizeof(int),                        &lda,       VALUE,
        sizeof(float),         &beta,      VALUE,
        sizeof(float)*nb*nb,    C,                 INOUT,
        sizeof(int),                        &ldc,       VALUE,
        0);
}

Here is the call graph for this function:

Here is the caller graph for this function:

void QUARK_CORE_strdalg	(	Quark *	quark,
		Quark_Task_Flags *	task_flags,
		int	uplo,
		int	N,
		int	NB,
		PLASMA_desc *	A,
		float *	C,
		float *	S,
		int	i,
		int	j,
		int	m,
		int	grsiz,
		int	BAND,
		int *	PCOL,
		int *	ACOL,
		int *	MCOL
	)

Definition at line 126 of file core_strdalg.c.

References CORE_strdalg_quark(), INPUT, LOCALITY, NODEP, OUTPUT, QUARK_Insert_Task(), and VALUE.

{
    QUARK_Insert_Task(quark, CORE_strdalg_quark,   task_flags,
        sizeof(int),               &uplo,               VALUE,
        sizeof(int),                  &N,               VALUE,
        sizeof(int),                 &NB,               VALUE,
        sizeof(PLASMA_desc),           A,               NODEP,
        sizeof(float),    V,               NODEP,
        sizeof(float),    TAU,               NODEP,
        sizeof(int),                  &i,               VALUE,
        sizeof(int),                  &j,               VALUE,
        sizeof(int),                  &m,               VALUE,
        sizeof(int),              &grsiz,               VALUE,
        sizeof(int),                PCOL,               INPUT,
        sizeof(int),                ACOL,               INPUT,
        sizeof(int),                MCOL,               OUTPUT | LOCALITY,
        0);
}

Here is the call graph for this function:

Here is the caller graph for this function:

void QUARK_CORE_strmm	(	Quark *	quark,
		Quark_Task_Flags *	task_flags,
		int	side,
		int	uplo,
		int	transA,
		int	diag,
		int	m,
		int	n,
		int	nb,
		float	alpha,
		float *	A,
		int	lda,
		float *	B,
		int	ldb
	)

Definition at line 47 of file core_strmm.c.

References CORE_strmm_quark(), DAG_CORE_TRMM, INOUT, INPUT, QUARK_Insert_Task(), and VALUE.

{
    DAG_CORE_TRMM;
    QUARK_Insert_Task(quark, CORE_strmm_quark, task_flags,
        sizeof(PLASMA_enum),                &side,      VALUE,
        sizeof(PLASMA_enum),                &uplo,      VALUE,
        sizeof(PLASMA_enum),                &transA,    VALUE,
        sizeof(PLASMA_enum),                &diag,      VALUE,
        sizeof(int),                        &m,         VALUE,
        sizeof(int),                        &n,         VALUE,
        sizeof(float),         &alpha,     VALUE,
        sizeof(float)*nb*nb,    A,                 INPUT,
        sizeof(int),                        &lda,       VALUE,
        sizeof(float)*nb*nb,    B,                 INOUT,
        sizeof(int),                        &ldb,       VALUE,
        0);
}

Here is the call graph for this function:

Here is the caller graph for this function:

void QUARK_CORE_strmm_p2	(	Quark *	quark,
		Quark_Task_Flags *	task_flags,
		int	side,
		int	uplo,
		int	transA,
		int	diag,
		int	m,
		int	n,
		int	nb,
		float	alpha,
		float *	A,
		int	lda,
		float **	B,
		int	ldb
	)

Definition at line 103 of file core_strmm.c.

References CORE_strmm_p2_quark(), DAG_CORE_TRMM, INOUT, INPUT, QUARK_Insert_Task(), and VALUE.

{
    DAG_CORE_TRMM;
    QUARK_Insert_Task(quark, CORE_strmm_p2_quark, task_flags,
        sizeof(PLASMA_enum),                &side,      VALUE,
        sizeof(PLASMA_enum),                &uplo,      VALUE,
        sizeof(PLASMA_enum),                &transA,    VALUE,
        sizeof(PLASMA_enum),                &diag,      VALUE,
        sizeof(int),                        &m,         VALUE,
        sizeof(int),                        &n,         VALUE,
        sizeof(float),         &alpha,     VALUE,
        sizeof(float)*lda*nb,   A,                 INPUT,
        sizeof(int),                        &lda,       VALUE,
        sizeof(float*),         B,                 INOUT,
        sizeof(int),                        &ldb,       VALUE,
        0);
}

Here is the call graph for this function:

void QUARK_CORE_strsm	(	Quark *	quark,
		Quark_Task_Flags *	task_flags,
		int	side,
		int	uplo,
		int	transA,
		int	diag,
		int	m,
		int	n,
		int	nb,
		float	alpha,
		float *	A,
		int	lda,
		float *	B,
		int	ldb
	)

Definition at line 46 of file core_strsm.c.

References CORE_strsm_quark(), DAG_CORE_TRSM, INOUT, INPUT, LOCALITY, QUARK_Insert_Task(), and VALUE.

{
    DAG_CORE_TRSM;
    QUARK_Insert_Task(quark, CORE_strsm_quark, task_flags,
        sizeof(PLASMA_enum),                &side,      VALUE,
        sizeof(PLASMA_enum),                &uplo,      VALUE,
        sizeof(PLASMA_enum),                &transA,    VALUE,
        sizeof(PLASMA_enum),                &diag,      VALUE,
        sizeof(int),                        &m,         VALUE,
        sizeof(int),                        &n,         VALUE,
        sizeof(float),         &alpha,     VALUE,
        sizeof(float)*nb*nb,    A,                 INPUT,
        sizeof(int),                        &lda,       VALUE,
        sizeof(float)*nb*nb,    B,                 INOUT | LOCALITY,
        sizeof(int),                        &ldb,       VALUE,
        0);
}

Here is the call graph for this function:

Here is the caller graph for this function:

void QUARK_CORE_strtri	(	Quark *	quark,
		Quark_Task_Flags *	task_flags,
		int	uplo,
		int	diag,
		int	n,
		int	nb,
		float *	A,
		int	lda,
		PLASMA_sequence *	sequence,
		PLASMA_request *	request,
		int	iinfo
	)

Definition at line 40 of file core_strtri.c.

References CORE_strtri_quark(), INOUT, QUARK_Insert_Task(), and VALUE.

{
    QUARK_Insert_Task(
        quark, CORE_strtri_quark, task_flags,
        sizeof(PLASMA_enum),                &uplo,      VALUE,
        sizeof(PLASMA_enum),                &diag,      VALUE,
        sizeof(int),                        &n,         VALUE,
        sizeof(float)*nb*nb,    A,                 INOUT,
        sizeof(int),                        &lda,       VALUE,
        sizeof(PLASMA_sequence*),           &sequence,  VALUE,
        sizeof(PLASMA_request*),            &request,   VALUE,
        sizeof(int),                        &iinfo,     VALUE,
        0);
}

Here is the call graph for this function:

Here is the caller graph for this function:

void QUARK_CORE_stslqt	(	Quark *	quark,
		Quark_Task_Flags *	task_flags,
		int	m,
		int	n,
		int	ib,
		int	nb,
		float *	A1,
		int	lda1,
		float *	A2,
		int	lda2,
		float *	T,
		int	ldt
	)

Definition at line 218 of file core_stslqt.c.

References CORE_stslqt_quark(), DAG_CORE_TSLQT, INOUT, LOCALITY, OUTPUT, QUARK_Insert_Task(), QUARK_REGION_D, QUARK_REGION_L, SCRATCH, and VALUE.

{
    DAG_CORE_TSLQT;
    QUARK_Insert_Task(quark, CORE_stslqt_quark, task_flags,
        sizeof(int),                        &m,     VALUE,
        sizeof(int),                        &n,     VALUE,
        sizeof(int),                        &ib,    VALUE,
        sizeof(float)*nb*nb,    A1,            INOUT | QUARK_REGION_D | QUARK_REGION_L,
        sizeof(int),                        &lda1,  VALUE,
        sizeof(float)*nb*nb,    A2,            INOUT | LOCALITY,
        sizeof(int),                        &lda2,  VALUE,
        sizeof(float)*ib*nb,    T,             OUTPUT,
        sizeof(int),                        &ldt,   VALUE,
        sizeof(float)*nb,       NULL,          SCRATCH,
        sizeof(float)*ib*nb,    NULL,          SCRATCH,
        0);
}

Here is the call graph for this function:

Here is the caller graph for this function:

void QUARK_CORE_stsmlq	(	Quark *	quark,
		Quark_Task_Flags *	task_flags,
		int	side,
		int	trans,
		int	m1,
		int	n1,
		int	m2,
		int	n2,
		int	k,
		int	ib,
		int	nb,
		float *	A1,
		int	lda1,
		float *	A2,
		int	lda2,
		float *	V,
		int	ldv,
		float *	T,
		int	ldt
	)

Definition at line 263 of file core_stsmlq.c.

References CORE_stsmlq_quark(), DAG_CORE_TSMLQ, INOUT, INPUT, LOCALITY, PlasmaLeft, QUARK_Insert_Task(), SCRATCH, and VALUE.

{
    int ldwork = side == PlasmaLeft ? ib : nb;
    DAG_CORE_TSMLQ;
    QUARK_Insert_Task(quark, CORE_stsmlq_quark, task_flags,
        sizeof(PLASMA_enum),                &side,  VALUE,
        sizeof(PLASMA_enum),                &trans, VALUE,
        sizeof(int),                        &m1,    VALUE,
        sizeof(int),                        &n1,    VALUE,
        sizeof(int),                        &m2,    VALUE,
        sizeof(int),                        &n2,    VALUE,
        sizeof(int),                        &k,     VALUE,
        sizeof(int),                        &ib,    VALUE,
        sizeof(float)*nb*nb,    A1,            INOUT,
        sizeof(int),                        &lda1,  VALUE,
        sizeof(float)*nb*nb,    A2,            INOUT | LOCALITY,
        sizeof(int),                        &lda2,  VALUE,
        sizeof(float)*nb*nb,    V,             INPUT,
        sizeof(int),                        &ldv,   VALUE,
        sizeof(float)*ib*nb,    T,             INPUT,
        sizeof(int),                        &ldt,   VALUE,
        sizeof(float)*ib*nb,    NULL,          SCRATCH,
        sizeof(int),                        &ldwork, VALUE,
        0);
}

Here is the call graph for this function:

Here is the caller graph for this function:

void QUARK_CORE_stsmlq_corner	(	Quark *	quark,
		Quark_Task_Flags *	task_flags,
		int	m1,
		int	n1,
		int	m2,
		int	n2,
		int	m3,
		int	n3,
		int	k,
		int	ib,
		int	nb,
		float *	A1,
		int	lda1,
		float *	A2,
		int	lda2,
		float *	A3,
		int	lda3,
		float *	V,
		int	ldv,
		float *	T,
		int	ldt
	)

Definition at line 219 of file core_stsmlq_corner.c.

References CORE_stsmlq_corner_quark(), INOUT, INPUT, QUARK_Insert_Task(), QUARK_REGION_D, QUARK_REGION_U, SCRATCH, and VALUE.

{
    int ldwork = nb;
    QUARK_Insert_Task(quark, CORE_stsmlq_corner_quark, task_flags,
        sizeof(int),                        &m1,    VALUE,
        sizeof(int),                        &n1,    VALUE,
        sizeof(int),                        &m2,    VALUE,
        sizeof(int),                        &n2,    VALUE,
        sizeof(int),                        &m3,    VALUE,
        sizeof(int),                        &n3,    VALUE,
        sizeof(int),                        &k,     VALUE,
        sizeof(int),                        &ib,    VALUE,
        sizeof(int),                        &nb,    VALUE,
        sizeof(float)*nb*nb,    A1,            INOUT|QUARK_REGION_D|QUARK_REGION_U,
        sizeof(int),                        &lda1,  VALUE,
        sizeof(float)*nb*nb,    A2,            INOUT,
        sizeof(int),                        &lda2,  VALUE,
        sizeof(float)*nb*nb,    A3,            INOUT|QUARK_REGION_D|QUARK_REGION_U,
        sizeof(int),                        &lda3,  VALUE,
        sizeof(float)*nb*nb,    V,             INPUT,
        sizeof(int),                        &ldv,   VALUE,
        sizeof(float)*ib*nb,    T,             INPUT,
        sizeof(int),                        &ldt,   VALUE,
        sizeof(float)*4*nb*nb,    NULL,          SCRATCH,
        sizeof(int),                        &ldwork, VALUE,
        0);
}

Here is the call graph for this function:

Here is the caller graph for this function:

void QUARK_CORE_stsmlq_sytra1	(	Quark *	quark,
		Quark_Task_Flags *	task_flags,
		int	side,
		int	trans,
		int	m1,
		int	n1,
		int	m2,
		int	n2,
		int	k,
		int	ib,
		int	nb,
		float *	A1,
		int	lda1,
		float *	A2,
		int	lda2,
		float *	V,
		int	ldv,
		float *	T,
		int	ldt
	)

Definition at line 174 of file core_stsmlq_sytra1.c.

References CORE_stsmlq_sytra1_quark(), INOUT, INPUT, PlasmaLeft, QUARK_Insert_Task(), QUARK_REGION_D, QUARK_REGION_U, SCRATCH, and VALUE.

{
    int ldwork = side == PlasmaLeft ? ib : nb;
    QUARK_Insert_Task(quark, CORE_stsmlq_sytra1_quark, task_flags,
        sizeof(PLASMA_enum),                &side,  VALUE,
        sizeof(PLASMA_enum),                &trans, VALUE,
        sizeof(int),                        &m1,    VALUE,
        sizeof(int),                        &n1,    VALUE,
        sizeof(int),                        &m2,    VALUE,
        sizeof(int),                        &n2,    VALUE,
        sizeof(int),                        &k,     VALUE,
        sizeof(int),                        &ib,    VALUE,
        sizeof(float)*nb*nb,    A1,            INOUT|QUARK_REGION_U|QUARK_REGION_D,
        sizeof(int),                        &lda1,  VALUE,
        sizeof(float)*nb*nb,    A2,            INOUT,
        sizeof(int),                        &lda2,  VALUE,
        sizeof(float)*nb*nb,    V,             INPUT,
        sizeof(int),                        &ldv,   VALUE,
        sizeof(float)*ib*nb,    T,             INPUT,
        sizeof(int),                        &ldt,   VALUE,
        sizeof(float)*ib*nb,    NULL,          SCRATCH,
        sizeof(int),                        &ldwork, VALUE,
        0);
}

Here is the call graph for this function:

Here is the caller graph for this function:

void QUARK_CORE_stsmqr	(	Quark *	quark,
		Quark_Task_Flags *	task_flags,
		int	side,
		int	trans,
		int	m1,
		int	n1,
		int	m2,
		int	n2,
		int	k,
		int	ib,
		int	nb,
		float *	A1,
		int	lda1,
		float *	A2,
		int	lda2,
		float *	V,
		int	ldv,
		float *	T,
		int	ldt
	)

Definition at line 258 of file core_stsmqr.c.

References CORE_stsmqr_quark(), DAG_CORE_TSMQR, INOUT, INPUT, LOCALITY, PlasmaLeft, QUARK_Insert_Task(), SCRATCH, and VALUE.

{
    int ldwork = side == PlasmaLeft ? ib : nb;
    DAG_CORE_TSMQR;
    QUARK_Insert_Task(quark, CORE_stsmqr_quark, task_flags,
        sizeof(PLASMA_enum),                &side,  VALUE,
        sizeof(PLASMA_enum),                &trans, VALUE,
        sizeof(int),                        &m1,    VALUE,
        sizeof(int),                        &n1,    VALUE,
        sizeof(int),                        &m2,    VALUE,
        sizeof(int),                        &n2,    VALUE,
        sizeof(int),                        &k,     VALUE,
        sizeof(int),                        &ib,    VALUE,
        sizeof(float)*nb*nb,    A1,            INOUT,
        sizeof(int),                        &lda1,  VALUE,
        sizeof(float)*nb*nb,    A2,            INOUT | LOCALITY,
        sizeof(int),                        &lda2,  VALUE,
        sizeof(float)*nb*nb,    V,             INPUT,
        sizeof(int),                        &ldv,   VALUE,
        sizeof(float)*ib*nb,    T,             INPUT,
        sizeof(int),                        &ldt,   VALUE,
        sizeof(float)*ib*nb,    NULL,          SCRATCH,
        sizeof(int),                        &ldwork, VALUE,
        0);
}

Here is the call graph for this function:

Here is the caller graph for this function:

void QUARK_CORE_stsmqr_corner	(	Quark *	quark,
		Quark_Task_Flags *	task_flags,
		int	m1,
		int	n1,
		int	m2,
		int	n2,
		int	m3,
		int	n3,
		int	k,
		int	ib,
		int	nb,
		float *	A1,
		int	lda1,
		float *	A2,
		int	lda2,
		float *	A3,
		int	lda3,
		float *	V,
		int	ldv,
		float *	T,
		int	ldt
	)

Definition at line 214 of file core_stsmqr_corner.c.

References CORE_stsmqr_corner_quark(), INOUT, INPUT, QUARK_Insert_Task(), QUARK_REGION_D, QUARK_REGION_L, SCRATCH, and VALUE.

{
    int ldwork = nb;
    QUARK_Insert_Task(quark, CORE_stsmqr_corner_quark, task_flags,
        sizeof(int),                        &m1,    VALUE,
        sizeof(int),                        &n1,    VALUE,
        sizeof(int),                        &m2,    VALUE,
        sizeof(int),                        &n2,    VALUE,
        sizeof(int),                        &m3,    VALUE,
        sizeof(int),                        &n3,    VALUE,
        sizeof(int),                        &k,     VALUE,
        sizeof(int),                        &ib,    VALUE,
        sizeof(int),                        &nb,    VALUE,
        sizeof(float)*nb*nb,    A1,            INOUT|QUARK_REGION_D|QUARK_REGION_L,
        sizeof(int),                        &lda1,  VALUE,
        sizeof(float)*nb*nb,    A2,            INOUT,
        sizeof(int),                        &lda2,  VALUE,
        sizeof(float)*nb*nb,    A3,            INOUT|QUARK_REGION_D|QUARK_REGION_L,
        sizeof(int),                        &lda3,  VALUE,
        sizeof(float)*nb*nb,    V,             INPUT,
        sizeof(int),                        &ldv,   VALUE,
        sizeof(float)*ib*nb,    T,             INPUT,
        sizeof(int),                        &ldt,   VALUE,
        sizeof(float)*4*nb*nb,    NULL,          SCRATCH,
        sizeof(int),                        &ldwork, VALUE,
        0);
}

Here is the call graph for this function:

Here is the caller graph for this function:

void QUARK_CORE_stsmqr_sytra1	(	Quark *	quark,
		Quark_Task_Flags *	task_flags,
		int	side,
		int	trans,
		int	m1,
		int	n1,
		int	m2,
		int	n2,
		int	k,
		int	ib,
		int	nb,
		float *	A1,
		int	lda1,
		float *	A2,
		int	lda2,
		float *	V,
		int	ldv,
		float *	T,
		int	ldt
	)

Definition at line 173 of file core_stsmqr_sytra1.c.

References CORE_stsmqr_sytra1_quark(), INOUT, INPUT, PlasmaLeft, QUARK_Insert_Task(), QUARK_REGION_D, QUARK_REGION_L, SCRATCH, and VALUE.

{
    int ldwork = side == PlasmaLeft ? ib : nb;
    QUARK_Insert_Task(quark, CORE_stsmqr_sytra1_quark, task_flags,
        sizeof(PLASMA_enum),                &side,  VALUE,
        sizeof(PLASMA_enum),                &trans, VALUE,
        sizeof(int),                        &m1,    VALUE,
        sizeof(int),                        &n1,    VALUE,
        sizeof(int),                        &m2,    VALUE,
        sizeof(int),                        &n2,    VALUE,
        sizeof(int),                        &k,     VALUE,
        sizeof(int),                        &ib,    VALUE,
        sizeof(float)*nb*nb,    A1,            INOUT|QUARK_REGION_L|QUARK_REGION_D,
        sizeof(int),                        &lda1,  VALUE,
        sizeof(float)*nb*nb,    A2,            INOUT,
        sizeof(int),                        &lda2,  VALUE,
        sizeof(float)*nb*nb,    V,             INPUT,
        sizeof(int),                        &ldv,   VALUE,
        sizeof(float)*ib*nb,    T,             INPUT,
        sizeof(int),                        &ldt,   VALUE,
        sizeof(float)*ib*nb,    NULL,          SCRATCH,
        sizeof(int),                        &ldwork, VALUE,
        0);
}

Here is the call graph for this function:

Here is the caller graph for this function:

void QUARK_CORE_stsqrt	(	Quark *	quark,
		Quark_Task_Flags *	task_flags,
		int	m,
		int	n,
		int	ib,
		int	nb,
		float *	A1,
		int	lda1,
		float *	A2,
		int	lda2,
		float *	T,
		int	ldt
	)

Definition at line 209 of file core_stsqrt.c.

References CORE_stsqrt_quark(), DAG_CORE_TSQRT, INOUT, LOCALITY, OUTPUT, QUARK_Insert_Task(), QUARK_REGION_D, QUARK_REGION_U, SCRATCH, and VALUE.

{
    DAG_CORE_TSQRT;
    QUARK_Insert_Task(quark, CORE_stsqrt_quark, task_flags,
        sizeof(int),                        &m,     VALUE,
        sizeof(int),                        &n,     VALUE,
        sizeof(int),                        &ib,    VALUE,
        sizeof(float)*nb*nb,    A1,            INOUT | QUARK_REGION_D | QUARK_REGION_U,
        sizeof(int),                        &lda1,  VALUE,
        sizeof(float)*nb*nb,    A2,            INOUT | LOCALITY,
        sizeof(int),                        &lda2,  VALUE,
        sizeof(float)*ib*nb,    T,             OUTPUT,
        sizeof(int),                        &ldt,   VALUE,
        sizeof(float)*nb,       NULL,          SCRATCH,
        sizeof(float)*ib*nb,    NULL,          SCRATCH,
        0);
}

Here is the call graph for this function:

Here is the caller graph for this function:

void QUARK_CORE_ststrf	(	Quark *	quark,
		Quark_Task_Flags *	task_flags,
		int	m,
		int	n,
		int	ib,
		int	nb,
		float *	U,
		int	ldu,
		float *	A,
		int	lda,
		float *	L,
		int	ldl,
		int *	IPIV,
		PLASMA_sequence *	sequence,
		PLASMA_request *	request,
		PLASMA_bool	check_info,
		int	iinfo
	)

Definition at line 220 of file core_ststrf.c.

References CORE_ststrf_quark(), DAG_CORE_TSTRF, INOUT, LOCALITY, OUTPUT, QUARK_Insert_Task(), QUARK_REGION_D, QUARK_REGION_U, SCRATCH, and VALUE.

{
    DAG_CORE_TSTRF;
    QUARK_Insert_Task(quark, CORE_ststrf_quark, task_flags,
        sizeof(int),                        &m,             VALUE,
        sizeof(int),                        &n,             VALUE,
        sizeof(int),                        &ib,            VALUE,
        sizeof(int),                        &nb,            VALUE,
        sizeof(float)*nb*nb,    U,                     INOUT | QUARK_REGION_D | QUARK_REGION_U,
        sizeof(int),                        &ldu,           VALUE,
        sizeof(float)*nb*nb,    A,                     INOUT | LOCALITY,
        sizeof(int),                        &lda,           VALUE,
        sizeof(float)*ib*nb,    L,                     OUTPUT,
        sizeof(int),                        &ldl,           VALUE,
        sizeof(int)*nb,                      IPIV,                  OUTPUT,
        sizeof(float)*ib*nb,    NULL,                  SCRATCH,
        sizeof(int),                        &nb,            VALUE,
        sizeof(PLASMA_sequence*),           &sequence,      VALUE,
        sizeof(PLASMA_request*),            &request,       VALUE,
        sizeof(PLASMA_bool),                &check_info,    VALUE,
        sizeof(int),                        &iinfo,         VALUE,
        0);
}

Here is the call graph for this function:

Here is the caller graph for this function:

void QUARK_CORE_sttlqt	(	Quark *	quark,
		Quark_Task_Flags *	task_flags,
		int	m,
		int	n,
		int	ib,
		int	nb,
		float *	A1,
		int	lda1,
		float *	A2,
		int	lda2,
		float *	T,
		int	ldt
	)

Definition at line 244 of file core_sttlqt.c.

References CORE_sttlqt_quark(), DAG_CORE_TTLQT, INOUT, LOCALITY, OUTPUT, QUARK_Insert_Task(), QUARK_REGION_D, QUARK_REGION_L, SCRATCH, and VALUE.

{
    DAG_CORE_TTLQT;
    QUARK_Insert_Task(quark, CORE_sttlqt_quark, task_flags,
        sizeof(int),                        &m,     VALUE,
        sizeof(int),                        &n,     VALUE,
        sizeof(int),                        &ib,    VALUE,
        sizeof(float)*nb*nb,    A1,            INOUT|QUARK_REGION_D|QUARK_REGION_L,
        sizeof(int),                        &lda1,  VALUE,
        sizeof(float)*nb*nb,    A2,            INOUT|QUARK_REGION_D|QUARK_REGION_L|LOCALITY,
        sizeof(int),                        &lda2,  VALUE,
        sizeof(float)*ib*nb,    T,             OUTPUT,
        sizeof(int),                        &ldt,   VALUE,
        sizeof(float)*nb,       NULL,          SCRATCH,
        sizeof(float)*ib*nb,    NULL,          SCRATCH,
        0);
}

Here is the call graph for this function:

Here is the caller graph for this function:

void QUARK_CORE_sttmlq	(	Quark *	quark,
		Quark_Task_Flags *	task_flags,
		int	side,
		int	trans,
		int	m1,
		int	n1,
		int	m2,
		int	n2,
		int	k,
		int	ib,
		int	nb,
		float *	A1,
		int	lda1,
		float *	A2,
		int	lda2,
		float *	V,
		int	ldv,
		float *	T,
		int	ldt
	)

Definition at line 259 of file core_sttmlq.c.

References CORE_sttmlq_quark(), DAG_CORE_TTMLQ, INOUT, INPUT, PlasmaLeft, QUARK_Insert_Task(), QUARK_REGION_D, QUARK_REGION_L, SCRATCH, and VALUE.

{
    int ldwork = side == PlasmaLeft ? ib : nb;
    DAG_CORE_TTMLQ;
    QUARK_Insert_Task(quark, CORE_sttmlq_quark, task_flags,
        sizeof(PLASMA_enum),                &side,  VALUE,
        sizeof(PLASMA_enum),                &trans, VALUE,
        sizeof(int),                        &m1,    VALUE,
        sizeof(int),                        &n1,    VALUE,
        sizeof(int),                        &m2,    VALUE,
        sizeof(int),                        &n2,    VALUE,
        sizeof(int),                        &k,     VALUE,
        sizeof(int),                        &ib,    VALUE,
        sizeof(float)*nb*nb,    A1,            INOUT,
        sizeof(int),                        &lda1,  VALUE,
        sizeof(float)*nb*nb,    A2,            INOUT,
        sizeof(int),                        &lda2,  VALUE,
        sizeof(float)*nb*nb,    V,             INPUT|QUARK_REGION_D|QUARK_REGION_L,
        sizeof(int),                        &ldv,   VALUE,
        sizeof(float)*ib*nb,    T,             INPUT,
        sizeof(int),                        &ldt,   VALUE,
        sizeof(float)*ib*nb,    NULL,          SCRATCH,
        sizeof(int),                        &ldwork,    VALUE,
        0);
}

Here is the call graph for this function:

Here is the caller graph for this function:

void QUARK_CORE_sttmqr	(	Quark *	quark,
		Quark_Task_Flags *	task_flags,
		int	side,
		int	trans,
		int	m1,
		int	n1,
		int	m2,
		int	n2,
		int	k,
		int	ib,
		int	nb,
		float *	A1,
		int	lda1,
		float *	A2,
		int	lda2,
		float *	V,
		int	ldv,
		float *	T,
		int	ldt
	)

Definition at line 251 of file core_sttmqr.c.

References CORE_sttmqr_quark(), DAG_CORE_TTMQR, INOUT, INPUT, PlasmaLeft, QUARK_Insert_Task(), QUARK_REGION_D, QUARK_REGION_U, SCRATCH, and VALUE.

{
    int ldwork = side == PlasmaLeft ? ib : nb;
    DAG_CORE_TTMQR;
    QUARK_Insert_Task(quark, CORE_sttmqr_quark, task_flags,
        sizeof(PLASMA_enum),                &side,  VALUE,
        sizeof(PLASMA_enum),                &trans, VALUE,
        sizeof(int),                        &m1,    VALUE,
        sizeof(int),                        &n1,    VALUE,
        sizeof(int),                        &m2,    VALUE,
        sizeof(int),                        &n2,    VALUE,
        sizeof(int),                        &k,     VALUE,
        sizeof(int),                        &ib,    VALUE,
        sizeof(float)*nb*nb,    A1,            INOUT,
        sizeof(int),                        &lda1,  VALUE,
        sizeof(float)*nb*nb,    A2,            INOUT,
        sizeof(int),                        &lda2,  VALUE,
        sizeof(float)*nb*nb,    V,             INPUT|QUARK_REGION_D|QUARK_REGION_U,
        sizeof(int),                        &ldv,   VALUE,
        sizeof(float)*ib*nb,    T,             INPUT,
        sizeof(int),                        &ldt,   VALUE,
        sizeof(float)*ib*nb,    NULL,          SCRATCH,
        sizeof(int),                        &ldwork,    VALUE,
        0);
}

Here is the call graph for this function:

Here is the caller graph for this function:

void QUARK_CORE_sttqrt	(	Quark *	quark,
		Quark_Task_Flags *	task_flags,
		int	m,
		int	n,
		int	ib,
		int	nb,
		float *	A1,
		int	lda1,
		float *	A2,
		int	lda2,
		float *	T,
		int	ldt
	)

Definition at line 244 of file core_sttqrt.c.

References CORE_sttqrt_quark(), DAG_CORE_TTQRT, INOUT, LOCALITY, OUTPUT, QUARK_Insert_Task(), QUARK_REGION_D, QUARK_REGION_U, SCRATCH, and VALUE.

{
    DAG_CORE_TTQRT;
    QUARK_Insert_Task(quark, CORE_sttqrt_quark, task_flags,
        sizeof(int),                        &m,     VALUE,
        sizeof(int),                        &n,     VALUE,
        sizeof(int),                        &ib,    VALUE,
        sizeof(float)*nb*nb,    A1,            INOUT|QUARK_REGION_D|QUARK_REGION_U,
        sizeof(int),                        &lda1,  VALUE,
        sizeof(float)*nb*nb,    A2,            INOUT|QUARK_REGION_D|QUARK_REGION_U|LOCALITY,
        sizeof(int),                        &lda2,  VALUE,
        sizeof(float)*ib*nb,    T,             OUTPUT,
        sizeof(int),                        &ldt,   VALUE,
        sizeof(float)*nb,       NULL,          SCRATCH,
        sizeof(float)*ib*nb,    NULL,          SCRATCH,
        0);
}

Here is the call graph for this function:

Here is the caller graph for this function:

Macros

Functions

Detailed Description

Macro Definition Documentation

Function Documentation

Purpose

Arguments

Purpose

Arguments

Purpose

Arguments

side trans size

T N x K

Arguments

Arguments

Further Details