PLASMA  2.4.5
PLASMA - Parallel Linear Algebra for Scalable Multi-core Architectures
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups
core_spemv.c File Reference
#include <cblas.h>
#include <lapacke.h>
#include "common.h"
Include dependency graph for core_spemv.c:

Go to the source code of this file.

Functions

int CORE_spemv (int trans, int storev, int M, int N, int L, float ALPHA, float *A, int LDA, float *X, int INCX, float BETA, float *Y, int INCY, float *WORK)

Detailed Description

PLASMA core_blas kernel PLASMA is a software package provided by Univ. of Tennessee, Univ. of California Berkeley and Univ. of Colorado Denver

Version:
2.4.5
Author:
Dulceneia Becker
Date:
2011-06-29 s Tue Nov 22 14:35:24 2011

Definition in file core_spemv.c.


Function Documentation

int CORE_spemv ( int  trans,
int  storev,
int  M,
int  N,
int  L,
float  ALPHA,
float *  A,
int  LDA,
float *  X,
int  INCX,
float  BETA,
float *  Y,
int  INCY,
float *  WORK 
)

SPEMV performs one of the matrix-vector operations

y = alpha*op( A )*x + beta*y

where op( A ) is one of

op( A ) = A or op( A ) = A**T or op( A ) = A**T,

alpha and beta are scalars, x and y are vectors and A is a pentagonal matrix (see further details).

Arguments

Parameters:
[in]storev
    @arg PlasmaColumnwise :  array A stored columwise
    @arg PlasmaRowwise    :  array A stored rowwise
[in]trans
    @arg PlasmaNoTrans   :  y := alpha*A*x    + beta*y.
    @arg PlasmaTrans     :  y := alpha*A**T*x + beta*y.
    @arg PlasmaTrans :  y := alpha*A**T*x + beta*y.
[in]MNumber of rows of the matrix A. M must be at least zero.
[in]NNumber of columns of the matrix A. N must be at least zero.
[in]LOrder of triangle within the matrix A (L specifies the shape of the matrix A; see further details).
[in]ALPHAScalar alpha.
[in]AArray of size LDA-by-N. On entry, the leading M by N part of the array A must contain the matrix of coefficients.
[in]LDALeading dimension of array A.
[in]XOn entry, the incremented array X must contain the vector x.
[in]INCXIncrement for the elements of X. INCX must not be zero.
[in]BETAScalar beta.
[in,out]YOn entry, the incremented array Y must contain the vector y.
[out]INCYIncrement for the elements of Y. INCY must not be zero.
[in]WORKWorkspace array of size at least L.

Further Details

         |     N    |
      _   ___________   _
         |          |

A: | | M-L | | | | M _ |..... | \ : | L \ : | _ \:_____| _

| L | N-L |

Returns:
Return values:
PLASMA_SUCCESSsuccessful exit
<0if -i, the i-th argument had an illegal value

Definition at line 118 of file core_spemv.c.

References cblas_saxpy(), cblas_scopy(), cblas_sgemv(), cblas_sscal(), cblas_strmv(), CblasColMajor, coreblas_error, L, max, min, PLASMA_SUCCESS, PlasmaColumnwise, PlasmaLower, PlasmaNonUnit, PlasmaNoTrans, PlasmaRowwise, PlasmaTrans, and PlasmaUpper.

{
/*
* y = alpha * op(A) * x + beta * y
*/
int K;
static float zzero = 0.0;
/* Check input arguments */
if ((trans != PlasmaNoTrans) && (trans != PlasmaTrans) && (trans != PlasmaTrans)) {
coreblas_error(1, "Illegal value of trans");
return -1;
}
coreblas_error(2, "Illegal value of storev");
return -2;
}
if (!( ((storev == PlasmaColumnwise) && (trans != PlasmaNoTrans)) ||
coreblas_error(2, "Illegal values of trans/storev");
return -2;
}
if (M < 0) {
coreblas_error(3, "Illegal value of M");
return -3;
}
if (N < 0) {
coreblas_error(4, "Illegal value of N");
return -4;
}
if (L > min(M ,N)) {
coreblas_error(5, "Illegal value of L");
return -5;
}
if (LDA < max(1,M)) {
coreblas_error(8, "Illegal value of LDA");
return -8;
}
if (INCX < 1) {
coreblas_error(10, "Illegal value of INCX");
return -10;
}
if (INCY < 1) {
coreblas_error(13, "Illegal value of INCY");
return -13;
}
/* Quick return */
if ((M == 0) || (N == 0))
if ((ALPHA == zzero) && (BETA == zzero))
/* If L < 2, there is no triangular part */
if (L == 1) L = 0;
/* Columnwise */
/*
* ______________
* | | | A1: A[ 0 ]
* | | | A2: A[ M-L ]
* | A1 | | A3: A[ (N-L) * LDA ]
* | | |
* |______| A3 |
* \ | |
* \ A2 | |
* \ | |
* \|_____|
*
*/
/* Columnwise / NoTrans */
if (trans == PlasmaNoTrans) {
coreblas_error(1, "The case PlasmaNoTrans / PlasmaColumnwise is not yet implemented");
return -1;
}
/* Columnwise / [Conj]Trans */
else {
/* L top rows of y */
if (L > 0) {
/* w = A_2' * x_2 */
L, &X[INCX*(M-L)], INCX, WORK, 1);
L, &A[M-L], LDA, WORK, 1);
if (M > L) {
/* y_1 = beta * y_1 [ + alpha * A_1 * x_1 ] */
M-L, L, (ALPHA), A, LDA,
X, INCX, (BETA), Y, INCY);
/* y_1 = y_1 + alpha * w */
cblas_saxpy(L, (ALPHA), WORK, 1, Y, INCY);
} else {
/* y_1 = y_1 + alpha * w */
if (BETA == zzero) {
cblas_sscal(L, (ALPHA), WORK, 1);
cblas_scopy(L, WORK, 1, Y, INCY);
} else {
cblas_sscal(L, (BETA), Y, INCY);
cblas_saxpy(L, (ALPHA), WORK, 1, Y, INCY);
}
}
}
/* N-L bottom rows of Y */
if (N > L) {
K = N - L;
M, K, (ALPHA), &A[LDA*L], LDA,
X, INCX, (BETA), &Y[INCY*L], INCY);
}
}
}
/* Rowwise */
else {
/*
* --------------
* | | \ A1: A[ 0 ]
* | A1 | \ A2: A[ (N-L) * LDA ]
* | | A2 \ A3: A[ L ]
* |--------------------\
* | A3 |
* ----------------------
*
*/
/* Rowwise / NoTrans */
if (trans == PlasmaNoTrans) {
/* L top rows of A and y */
if (L > 0) {
/* w = A_2 * x_2 */
L, &X[INCX*(N-L)], INCX, WORK, 1);
L, &A[LDA*(N-L)], LDA, WORK, 1);
if (N > L) {
/* y_1 = beta * y_1 [ + alpha * A_1 * x_1 ] */
CblasColMajor, (CBLAS_TRANSPOSE)PlasmaNoTrans,
L, N-L, (ALPHA), A, LDA,
X, INCX, (BETA), Y, INCY);
/* y_1 = y_1 + alpha * w */
cblas_saxpy(L, (ALPHA), WORK, 1, Y, INCY);
} else {
/* y_1 = y_1 + alpha * w */
if (BETA == zzero) {
cblas_sscal(L, (ALPHA), WORK, 1);
cblas_scopy(L, WORK, 1, Y, INCY);
} else {
cblas_sscal(L, (BETA), Y, INCY);
cblas_saxpy(L, (ALPHA), WORK, 1, Y, INCY);
}
}
}
/* M-L bottom rows of Y */
if (M > L) {
CblasColMajor, (CBLAS_TRANSPOSE)PlasmaNoTrans,
M-L, N, (ALPHA), &A[L], LDA,
X, INCX, (BETA), &Y[INCY*L], INCY);
}
}
/* Rowwise / [Conj]Trans */
else {
coreblas_error(1, "The case Plasma[Conj]Trans / PlasmaRowwise is not yet implemented");
return -1;
}
}
}

Here is the call graph for this function:

Here is the caller graph for this function: