PLASMA  2.4.5
PLASMA - Parallel Linear Algebra for Scalable Multi-core Architectures
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups
core_stsqrt.c File Reference
#include <lapacke.h>
#include "common.h"
Include dependency graph for core_stsqrt.c:

Go to the source code of this file.

Macros

#define REAL

Functions

int CORE_stsqrt (int M, int N, int IB, float *A1, int LDA1, float *A2, int LDA2, float *T, int LDT, float *TAU, float *WORK)
void QUARK_CORE_stsqrt (Quark *quark, Quark_Task_Flags *task_flags, int m, int n, int ib, int nb, float *A1, int lda1, float *A2, int lda2, float *T, int ldt)
void CORE_stsqrt_quark (Quark *quark)

Detailed Description

PLASMA core_blas kernel PLASMA is a software package provided by Univ. of Tennessee, Univ. of California Berkeley and Univ. of Colorado Denver

Version:
2.4.5
Author:
Hatem Ltaief
Mathieu Faverge
Jakub Kurzak
Date:
2010-11-15 s Tue Nov 22 14:35:17 2011

Definition in file core_stsqrt.c.


Macro Definition Documentation

#define REAL

Definition at line 20 of file core_stsqrt.c.


Function Documentation

int CORE_stsqrt ( int  M,
int  N,
int  IB,
float *  A1,
int  LDA1,
float *  A2,
int  LDA2,
float *  T,
int  LDT,
float *  TAU,
float *  WORK 
)

CORE_stsqrt computes a QR factorization of a rectangular matrix formed by coupling a complex N-by-N upper triangular tile A1 on top of a complex M-by-N tile A2:

| A1 | = Q * R | A2 |

Parameters:
[in]MThe number of columns of the tile A2. M >= 0.
[in]NThe number of rows of the tile A1. The number of columns of the tiles A1 and A2. N >= 0.
[in]IBThe inner-blocking size. IB >= 0.
[in,out]A1On entry, the N-by-N tile A1. On exit, the elements on and above the diagonal of the array contain the N-by-N upper trapezoidal tile R; the elements below the diagonal are not referenced.
[in]LDA1The leading dimension of the array A1. LDA1 >= max(1,N).
[in,out]A2On entry, the M-by-N tile A2. On exit, all the elements with the array TAU, represent the unitary tile Q as a product of elementary reflectors (see Further Details).
[in]LDA2The leading dimension of the tile A2. LDA2 >= max(1,M).
[out]TThe IB-by-N triangular factor T of the block reflector. T is upper triangular by block (economic storage); The rest of the array is not referenced.
[in]LDTThe leading dimension of the array T. LDT >= IB.
[out]TAUThe scalar factors of the elementary reflectors (see Further Details).
[out]WORK
Returns:
Return values:
PLASMA_SUCCESSsuccessful exit
<0if -i, the i-th argument had an illegal value

Definition at line 97 of file core_stsqrt.c.

References cblas_saxpy(), cblas_scopy(), cblas_sgemv(), cblas_sger(), cblas_strmv(), CblasColMajor, CORE_stsmqr(), coreblas_error, max, min, PLASMA_SUCCESS, PlasmaLeft, PlasmaNonUnit, PlasmaNoTrans, PlasmaTrans, and PlasmaUpper.

{
static float zone = 1.0;
static float zzero = 0.0;
float alpha;
int i, ii, sb;
/* Check input arguments */
if (M < 0) {
coreblas_error(1, "Illegal value of M");
return -1;
}
if (N < 0) {
coreblas_error(2, "Illegal value of N");
return -2;
}
if (IB < 0) {
coreblas_error(3, "Illegal value of IB");
return -3;
}
if ((LDA2 < max(1,M)) && (M > 0)) {
coreblas_error(8, "Illegal value of LDA2");
return -8;
}
/* Quick return */
if ((M == 0) || (N == 0) || (IB == 0))
for(ii = 0; ii < N; ii += IB) {
sb = min(N-ii, IB);
for(i = 0; i < sb; i++) {
/*
* Generate elementary reflector H( II*IB+I ) to annihilate
* A( II*IB+I:M, II*IB+I )
*/
LAPACKE_slarfg_work(M+1, &A1[LDA1*(ii+i)+ii+i], &A2[LDA2*(ii+i)], 1, &TAU[ii+i]);
if (ii+i+1 < N) {
/*
* Apply H( II*IB+I ) to A( II*IB+I:M, II*IB+I+1:II*IB+IB ) from the left
*/
alpha = -(TAU[ii+i]);
sb-i-1,
&A1[LDA1*(ii+i+1)+(ii+i)], LDA1,
WORK, 1);
#ifdef COMPLEX
LAPACKE_slacgv_work(sb-i-1, WORK, 1);
#endif
M, sb-i-1,
(zone), &A2[LDA2*(ii+i+1)], LDA2,
&A2[LDA2*(ii+i)], 1,
(zone), WORK, 1);
#ifdef COMPLEX
LAPACKE_slacgv_work(sb-i-1, WORK, 1 );
#endif
sb-i-1, (alpha),
WORK, 1,
&A1[LDA1*(ii+i+1)+ii+i], LDA1);
#ifdef COMPLEX
LAPACKE_slacgv_work(sb-i-1, WORK, 1 );
#endif
CblasColMajor, M, sb-i-1, (alpha),
&A2[LDA2*(ii+i)], 1,
WORK, 1,
&A2[LDA2*(ii+i+1)], LDA2);
}
/*
* Calculate T
*/
alpha = -TAU[ii+i];
CblasColMajor, (CBLAS_TRANSPOSE)PlasmaTrans, M, i,
(alpha), &A2[LDA2*ii], LDA2,
&A2[LDA2*(ii+i)], 1,
(zzero), &T[LDT*(ii+i)], 1);
&T[LDT*ii], LDT,
&T[LDT*(ii+i)], 1);
T[LDT*(ii+i)+i] = TAU[ii+i];
}
if (N > ii+sb) {
PlasmaLeft, PlasmaTrans,
sb, N-(ii+sb), M, N-(ii+sb), IB, IB,
&A1[LDA1*(ii+sb)+ii], LDA1,
&A2[LDA2*(ii+sb)], LDA2,
&A2[LDA2*ii], LDA2,
&T[LDT*ii], LDT,
WORK, sb);
}
}
}

Here is the call graph for this function:

Here is the caller graph for this function:

void CORE_stsqrt_quark ( Quark quark)

Definition at line 238 of file core_stsqrt.c.

References CORE_stsqrt(), quark_unpack_args_11, T, and TAU.

{
int m;
int n;
int ib;
float *A1;
int lda1;
float *A2;
int lda2;
float *T;
int ldt;
float *TAU;
float *WORK;
quark_unpack_args_11(quark, m, n, ib, A1, lda1, A2, lda2, T, ldt, TAU, WORK);
CORE_stsqrt(m, n, ib, A1, lda1, A2, lda2, T, ldt, TAU, WORK);
}

Here is the call graph for this function:

Here is the caller graph for this function:

void QUARK_CORE_stsqrt ( Quark quark,
Quark_Task_Flags task_flags,
int  m,
int  n,
int  ib,
int  nb,
float *  A1,
int  lda1,
float *  A2,
int  lda2,
float *  T,
int  ldt 
)

Definition at line 209 of file core_stsqrt.c.

References CORE_stsqrt_quark(), DAG_CORE_TSQRT, INOUT, LOCALITY, OUTPUT, QUARK_Insert_Task(), QUARK_REGION_D, QUARK_REGION_U, SCRATCH, and VALUE.

{
sizeof(int), &m, VALUE,
sizeof(int), &n, VALUE,
sizeof(int), &ib, VALUE,
sizeof(float)*nb*nb, A1, INOUT | QUARK_REGION_D | QUARK_REGION_U,
sizeof(int), &lda1, VALUE,
sizeof(float)*nb*nb, A2, INOUT | LOCALITY,
sizeof(int), &lda2, VALUE,
sizeof(float)*ib*nb, T, OUTPUT,
sizeof(int), &ldt, VALUE,
sizeof(float)*nb, NULL, SCRATCH,
sizeof(float)*ib*nb, NULL, SCRATCH,
0);
}

Here is the call graph for this function:

Here is the caller graph for this function: