PLASMA  2.4.5
PLASMA - Parallel Linear Algebra for Scalable Multi-core Architectures
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups
core_sssssm.c File Reference
#include <cblas.h>
#include "common.h"
Include dependency graph for core_sssssm.c:

Go to the source code of this file.

Functions

int CORE_sssssm (int M1, int N1, int M2, int N2, int K, int IB, float *A1, int LDA1, float *A2, int LDA2, float *L1, int LDL1, float *L2, int LDL2, int *IPIV)
void QUARK_CORE_sssssm (Quark *quark, Quark_Task_Flags *task_flags, int m1, int n1, int m2, int n2, int k, int ib, int nb, float *A1, int lda1, float *A2, int lda2, float *L1, int ldl1, float *L2, int ldl2, int *IPIV)
void CORE_sssssm_quark (Quark *quark)

Detailed Description

PLASMA core_blas kernel PLASMA is a software package provided by Univ. of Tennessee, Univ. of California Berkeley and Univ. of Colorado Denver

Version:
2.4.5
Author:
Hatem Ltaief
Mathieu Faverge
Jakub Kurzak
Date:
2010-11-15 s Tue Nov 22 14:35:18 2011

Definition in file core_sssssm.c.


Function Documentation

int CORE_sssssm ( int  M1,
int  N1,
int  M2,
int  N2,
int  K,
int  IB,
float *  A1,
int  LDA1,
float *  A2,
int  LDA2,
float *  L1,
int  LDL1,
float *  L2,
int  LDL2,
int *  IPIV 
)

CORE_ststrf computes an LU factorization of a complex matrix formed by an upper triangular M1-by-N1 tile U on top of a M2-by-N2 tile A (N1 == N2) using partial pivoting with row interchanges.

This is the right-looking Level 2.5 BLAS version of the algorithm.

Parameters:
[in]M1The number of rows of the tile A1. M1 >= 0.
[in]N1The number of columns of the tile A1. N1 >= 0.
[in]M2The number of rows of the tile A2. M2 >= 0.
[in]N2The number of columns of the tile A2. N2 >= 0.
[in]KThe number of columns of the tiles L1 and L2. K >= 0.
[in]IBThe inner-blocking size. IB >= 0.
[in,out]A1On entry, the M1-by-N1 tile A1. On exit, A1 is overwritten by the application of L.
[in]LDA1The leading dimension of the array A1. LDA1 >= max(1,M1).
[in,out]A2On entry, the M2-by-N2 tile A2. On exit, A2 is overwritten by the application of L.
[in]LDA2The leading dimension of the array A2. LDA2 >= max(1,M2).
[in]L1The IB-by-K lower triangular tile as returned by CORE_ststrf.
[in]LDL1The leading dimension of the array L1. LDL1 >= max(1,IB).
[in]L2The M2-by-N2 tile as returned by CORE_ststrf.
[in]LDL2The leading dimension of the array L2. LDL2 >= max(1,M2).
[in]IPIVas returned by CORE_ststrf.
Returns:
Return values:
PLASMA_SUCCESSsuccessful exit
<0if INFO = -k, the k-th argument had an illegal value

Definition at line 90 of file core_sssssm.c.

References cblas_sgemm(), cblas_sswap(), cblas_strsm(), CblasColMajor, CblasLeft, CblasLower, CblasNoTrans, CblasUnit, coreblas_error, max, min, and PLASMA_SUCCESS.

{
static float zone = 1.0;
static float mzone =-1.0;
int i, ii, sb;
int im, ip;
/* Check input arguments */
if (M1 < 0) {
coreblas_error(1, "Illegal value of M1");
return -1;
}
if (N1 < 0) {
coreblas_error(2, "Illegal value of N1");
return -2;
}
if (M2 < 0) {
coreblas_error(3, "Illegal value of M2");
return -3;
}
if (N2 < 0) {
coreblas_error(4, "Illegal value of N2");
return -4;
}
if (K < 0) {
coreblas_error(5, "Illegal value of K");
return -5;
}
if (IB < 0) {
coreblas_error(6, "Illegal value of IB");
return -6;
}
if (LDA1 < max(1,M1)) {
coreblas_error(8, "Illegal value of LDA1");
return -8;
}
if (LDA2 < max(1,M2)) {
coreblas_error(10, "Illegal value of LDA2");
return -10;
}
if (LDL1 < max(1,IB)) {
coreblas_error(12, "Illegal value of LDL1");
return -12;
}
if (LDL2 < max(1,M2)) {
coreblas_error(14, "Illegal value of LDL2");
return -14;
}
/* Quick return */
if ((M1 == 0) || (N1 == 0) || (M2 == 0) || (N2 == 0) || (K == 0) || (IB == 0))
ip = 0;
for(ii = 0; ii < K; ii += IB) {
sb = min(K-ii, IB);
for(i = 0; i < sb; i++) {
im = IPIV[ip]-1;
if (im != (ii+i)) {
im = im - M1;
cblas_sswap(N1, &A1[ii+i], LDA1, &A2[im], LDA2);
}
ip = ip + 1;
}
sb, N1, (zone),
&L1[LDL1*ii], LDL1,
&A1[ii], LDA1);
M2, N2, sb,
(mzone), &L2[LDL2*ii], LDL2,
&A1[ii], LDA1,
(zone), A2, LDA2);
}
}

Here is the call graph for this function:

Here is the caller graph for this function:

void CORE_sssssm_quark ( Quark quark)

Definition at line 219 of file core_sssssm.c.

References CORE_sssssm(), IPIV, and quark_unpack_args_15.

{
int m1;
int n1;
int m2;
int n2;
int k;
int ib;
float *A1;
int lda1;
float *A2;
int lda2;
float *L1;
int ldl1;
float *L2;
int ldl2;
int *IPIV;
quark_unpack_args_15(quark, m1, n1, m2, n2, k, ib, A1, lda1, A2, lda2, L1, ldl1, L2, ldl2, IPIV);
CORE_sssssm(m1, n1, m2, n2, k, ib, A1, lda1, A2, lda2, L1, ldl1, L2, ldl2, IPIV);
}

Here is the call graph for this function:

Here is the caller graph for this function:

void QUARK_CORE_sssssm ( Quark quark,
Quark_Task_Flags task_flags,
int  m1,
int  n1,
int  m2,
int  n2,
int  k,
int  ib,
int  nb,
float *  A1,
int  lda1,
float *  A2,
int  lda2,
float *  L1,
int  ldl1,
float *  L2,
int  ldl2,
int *  IPIV 
)

Definition at line 184 of file core_sssssm.c.

References CORE_sssssm_quark(), DAG_CORE_SSSSM, INOUT, INPUT, LOCALITY, QUARK_Insert_Task(), and VALUE.

{
sizeof(int), &m1, VALUE,
sizeof(int), &n1, VALUE,
sizeof(int), &m2, VALUE,
sizeof(int), &n2, VALUE,
sizeof(int), &k, VALUE,
sizeof(int), &ib, VALUE,
sizeof(float)*nb*nb, A1, INOUT,
sizeof(int), &lda1, VALUE,
sizeof(float)*nb*nb, A2, INOUT | LOCALITY,
sizeof(int), &lda2, VALUE,
sizeof(float)*ib*nb, L1, INPUT,
sizeof(int), &ldl1, VALUE,
sizeof(float)*ib*nb, L2, INPUT,
sizeof(int), &ldl2, VALUE,
sizeof(int)*nb, IPIV, INPUT,
0);
}

Here is the call graph for this function:

Here is the caller graph for this function: