#include <inttypes.h>

Include dependency graph for tile.h:

This graph shows which files directly or indirectly include this file:

Macros
#define	ELTADDR(A, type, m, n) (type *)plasma_geteltaddr(A, m, n)
#define	ELTLDD(A, k) ( ( (((k)-1)/(A).mb) + (A).i/(A).mb) < (A).lm1 ? (A).mb : (A).lm%(A).mb )
#define	BLKADDR(A, type, m, n) (type *)plasma_getaddr(A, m, n)
#define	BLKLDD(A, k) ( ( (k) + (A).i/(A).mb) < (A).lm1 ? (A).mb : (A).lm%(A).mb )

Functions
void	plasma_pztile_to_lapack (plasma_context_t *plasma)
void	plasma_pctile_to_lapack (plasma_context_t *plasma)
void	plasma_pdtile_to_lapack (plasma_context_t *plasma)
void	plasma_pstile_to_lapack (plasma_context_t *plasma)
void	plasma_pzlapack_to_tile (plasma_context_t *plasma)
void	plasma_pclapack_to_tile (plasma_context_t *plasma)
void	plasma_pdlapack_to_tile (plasma_context_t *plasma)
void	plasma_pslapack_to_tile (plasma_context_t *plasma)
void	plasma_pztile_zero (plasma_context_t *plasma)
void	plasma_pctile_zero (plasma_context_t *plasma)
void	plasma_pdtile_zero (plasma_context_t *plasma)
void	plasma_pstile_zero (plasma_context_t *plasma)
void	plasma_pztile_to_lapack_quark (PLASMA_desc, PLASMA_Complex64_t , int, PLASMA_sequence sequence, PLASMA_request *request)
void	plasma_pctile_to_lapack_quark (PLASMA_desc, PLASMA_Complex32_t , int, PLASMA_sequence sequence, PLASMA_request *request)
void	plasma_pdtile_to_lapack_quark (PLASMA_desc, double , int, PLASMA_sequence sequence, PLASMA_request *request)
void	plasma_pstile_to_lapack_quark (PLASMA_desc, float , int, PLASMA_sequence sequence, PLASMA_request *request)
void	plasma_pzlapack_to_tile_quark (PLASMA_Complex64_t , int, PLASMA_desc, PLASMA_sequence sequence, PLASMA_request *request)
void	plasma_pclapack_to_tile_quark (PLASMA_Complex32_t , int, PLASMA_desc, PLASMA_sequence sequence, PLASMA_request *request)
void	plasma_pdlapack_to_tile_quark (double , int, PLASMA_desc, PLASMA_sequence sequence, PLASMA_request *request)
void	plasma_pslapack_to_tile_quark (float , int, PLASMA_desc, PLASMA_sequence sequence, PLASMA_request *request)
void	plasma_pztile_zero_quark (PLASMA_desc, PLASMA_sequence sequence, PLASMA_request request)
void	plasma_pctile_zero_quark (PLASMA_desc, PLASMA_sequence sequence, PLASMA_request request)
void	plasma_pdtile_zero_quark (PLASMA_desc, PLASMA_sequence sequence, PLASMA_request request)
void	plasma_pstile_zero_quark (PLASMA_desc, PLASMA_sequence sequence, PLASMA_request request)

Detailed Description

PLASMA auxiliary routines PLASMA is a software package provided by Univ. of Tennessee, Univ. of California Berkeley and Univ. of Colorado Denver

Version:: 2.4.5

Author:: Jakub Kurzak

Date:: 2010-11-15

Definition in file tile.h.

Macro Definition Documentation

#define BLKADDR	(	A,
		type,
		m,
		n
	)	(type *)plasma_getaddr(A, m, n)

Definition at line 25 of file tile.h.

#define BLKLDD	(	A,
		k
	)	( ( (k) + (A).i/(A).mb) < (A).lm1 ? (A).mb : (A).lm%(A).mb )

Definition at line 26 of file tile.h.

#define ELTADDR	(	A,
		type,
		m,
		n
	)	(type *)plasma_geteltaddr(A, m, n)

Definition at line 23 of file tile.h.

#define ELTLDD	(	A,
		k
	)	( ( (((k)-1)/(A).mb) + (A).i/(A).mb) < (A).lm1 ? (A).mb : (A).lm%(A).mb )

Definition at line 24 of file tile.h.

Function Documentation

void plasma_pclapack_to_tile ( plasma_context_t * plasma )

Conversion from LAPACK F77 matrix layout to tile layout - static scheduling

Definition at line 29 of file pctile.c.

References A, ABDL, AF77, BLKLDD, CORE_clacpy(), plasma_desc_t::i, plasma_desc_t::j, plasma_desc_t::m, plasma_desc_t::mb, plasma_desc_t::mt, plasma_desc_t::n, plasma_desc_t::nb, plasma_desc_t::nt, PLASMA_RANK, PLASMA_SIZE, PLASMA_SUCCESS, plasma_unpack_args_5, PlasmaUpperLower, and plasma_sequence_t::status.

{
    PLASMA_Complex32_t *Af77;
    int lda;
    PLASMA_desc A;
    PLASMA_sequence *sequence;
    PLASMA_request *request;
    PLASMA_Complex32_t *f77;
    PLASMA_Complex32_t *bdl;
    int X1, Y1;
    int X2, Y2;
    int n, m, ldt;
    int next_m;
    int next_n;
    plasma_unpack_args_5(Af77, lda, A, sequence, request);
    if (sequence->status != PLASMA_SUCCESS)
        return;
    n = 0;
    m = PLASMA_RANK;
    while (m >= A.mt && n < A.nt) {
        n++;
        m = m-A.mt;
    }
    while (n < A.nt) {
        next_m = m;
        next_n = n;
        next_m += PLASMA_SIZE;
        while (next_m >= A.mt && next_n < A.nt) {
            next_n++;
            next_m = next_m-A.mt;
        }
        X1 = n == 0 ? A.j%A.nb : 0;
        X2 = n == A.nt-1 ? (A.j+A.n-1)%A.nb+1 : A.nb;
        Y1 = m == 0 ? A.i%A.mb : 0;
        Y2 = m == A.mt-1 ? (A.i+A.m-1)%A.mb+1 : A.mb;
        f77 = AF77(m, n);
        bdl = ABDL(m, n);
        ldt = BLKLDD(A, m);
        CORE_clacpy(
            PlasmaUpperLower, (Y2-Y1), (X2-X1),
            &(f77[X1*lda+Y1]), lda, 
            &(bdl[X1*lda+Y1]), ldt);
        
        m = next_m;
        n = next_n;
    }
}

Here is the call graph for this function:

Here is the caller graph for this function:

void plasma_pclapack_to_tile_quark	(	PLASMA_Complex32_t *	Af77,
		int	lda,
		PLASMA_desc	A,
		PLASMA_sequence *	sequence,
		PLASMA_request *	request
	)

Conversion from LAPACK F77 matrix layout to tile layout - dynamic scheduling

Definition at line 88 of file pctile.c.

References ABDL, AF77, BLKLDD, plasma_desc_t::i, plasma_desc_t::j, plasma_desc_t::m, plasma_desc_t::mb, plasma_desc_t::mt, plasma_desc_t::n, plasma_desc_t::nb, plasma_desc_t::nt, plasma_context_self(), PLASMA_SUCCESS, PlasmaUpperLower, plasma_context_struct::quark, QUARK_CORE_clacpy(), plasma_sequence_t::quark_sequence, QUARK_Task_Flag_Set(), Quark_Task_Flags_Initializer, plasma_sequence_t::status, and TASK_SEQUENCE.

{
    PLASMA_Complex32_t *f77;
    PLASMA_Complex32_t *bdl;
    plasma_context_t *plasma;
    int X1, Y1;
    int X2, Y2;
    int n, m, ldt;
    Quark_Task_Flags task_flags = Quark_Task_Flags_Initializer;
    plasma = plasma_context_self();
    if (sequence->status != PLASMA_SUCCESS)
        return;
    QUARK_Task_Flag_Set(&task_flags, TASK_SEQUENCE, (intptr_t)sequence->quark_sequence);
    for (m = 0; m < A.mt; m++)
    {
        ldt = BLKLDD(A, m);
        for (n = 0; n < A.nt; n++)
        {
            X1 = n == 0 ? A.j%A.nb : 0;
            Y1 = m == 0 ? A.i%A.mb : 0;
            X2 = n == A.nt-1 ? (A.j+A.n-1)%A.nb+1 : A.nb;
            Y2 = m == A.mt-1 ? (A.i+A.m-1)%A.mb+1 : A.mb;
            f77 = AF77(m, n);
            bdl = ABDL(m, n);
            QUARK_CORE_clacpy(
                plasma->quark, &task_flags,
                PlasmaUpperLower, (Y2-Y1), (X2-X1), A.mb,
                &(f77[X1*lda+Y1]), lda, 
                &(bdl[X1*lda+Y1]), ldt);
        }
    }
}

Here is the call graph for this function:

void plasma_pctile_to_lapack ( plasma_context_t * plasma )

Conversion from LAPACK F77 matrix layout to tile layout - static scheduling

Definition at line 128 of file pctile.c.

References A, ABDL, AF77, BLKLDD, CORE_clacpy(), plasma_desc_t::i, plasma_desc_t::j, plasma_desc_t::m, plasma_desc_t::mb, plasma_desc_t::mt, plasma_desc_t::n, plasma_desc_t::nb, plasma_desc_t::nt, PLASMA_RANK, PLASMA_SIZE, PLASMA_SUCCESS, plasma_unpack_args_5, PlasmaUpperLower, and plasma_sequence_t::status.

{
    PLASMA_desc A;
    PLASMA_Complex32_t *Af77;
    int lda;
    PLASMA_sequence *sequence;
    PLASMA_request *request;
    PLASMA_Complex32_t *f77;
    PLASMA_Complex32_t *bdl;
    int X1, Y1;
    int X2, Y2;
    int n, m, ldt;
    int next_m;
    int next_n;
    plasma_unpack_args_5(A, Af77, lda, sequence, request);
    if (sequence->status != PLASMA_SUCCESS)
        return;
    n = 0;
    m = PLASMA_RANK;
    while (m >= A.mt && n < A.nt) {
        n++;
        m = m-A.mt;
    }
    while (n < A.nt) {
        next_m = m;
        next_n = n;
        next_m += PLASMA_SIZE;
        while (next_m >= A.mt && next_n < A.nt) {
            next_n++;
            next_m = next_m-A.mt;
        }
        X1 = n == 0 ? A.j%A.nb : 0;
        Y1 = m == 0 ? A.i%A.mb : 0;
        X2 = n == A.nt-1 ? (A.j+A.n-1)%A.nb+1 : A.nb;
        Y2 = m == A.mt-1 ? (A.i+A.m-1)%A.mb+1 : A.mb;
        f77 = AF77(m, n);
        bdl = ABDL(m, n);
        ldt = BLKLDD(A, m);
        CORE_clacpy(
            PlasmaUpperLower, (Y2-Y1), (X2-X1),
            &(bdl[X1*lda+Y1]), ldt,
            &(f77[X1*lda+Y1]), lda);
        m = next_m;
        n = next_n;
    }
}

Here is the call graph for this function:

Here is the caller graph for this function:

void plasma_pctile_to_lapack_quark	(	PLASMA_desc	A,
		PLASMA_Complex32_t *	Af77,
		int	lda,
		PLASMA_sequence *	sequence,
		PLASMA_request *	request
	)

Conversion from LAPACK F77 matrix layout to tile layout - dynamic scheduling

Definition at line 187 of file pctile.c.

References ABDL, AF77, BLKLDD, plasma_desc_t::i, plasma_desc_t::j, plasma_desc_t::m, plasma_desc_t::mb, plasma_desc_t::mt, plasma_desc_t::n, plasma_desc_t::nb, plasma_desc_t::nt, plasma_context_self(), PLASMA_SUCCESS, PlasmaUpperLower, plasma_context_struct::quark, QUARK_CORE_clacpy(), plasma_sequence_t::quark_sequence, QUARK_Task_Flag_Set(), Quark_Task_Flags_Initializer, plasma_sequence_t::status, and TASK_SEQUENCE.

{
    PLASMA_Complex32_t *f77;
    PLASMA_Complex32_t *bdl;
    plasma_context_t *plasma;
    int X1, Y1;
    int X2, Y2;
    int n, m, ldt;
    Quark_Task_Flags task_flags = Quark_Task_Flags_Initializer;
    plasma = plasma_context_self();
    if (sequence->status != PLASMA_SUCCESS)
        return;
    QUARK_Task_Flag_Set(&task_flags, TASK_SEQUENCE, (intptr_t)sequence->quark_sequence);
    for (m = 0; m < A.mt; m++)
    {
        ldt = BLKLDD(A, m);
        for (n = 0; n < A.nt; n++)
        {
            X1 = n == 0 ? A.j%A.nb : 0;
            Y1 = m == 0 ? A.i%A.mb : 0;
            X2 = n == A.nt-1 ? (A.j+A.n-1)%A.nb+1 : A.nb;
            Y2 = m == A.mt-1 ? (A.i+A.m-1)%A.mb+1 : A.mb;
            f77 = AF77(m, n);
            bdl = ABDL(m, n);
            QUARK_CORE_clacpy(
                plasma->quark, &task_flags,
                PlasmaUpperLower, (Y2-Y1), (X2-X1), A.mb,
                &(bdl[X1*lda+Y1]), ldt,
                &(f77[X1*lda+Y1]), lda);
        }
    }
}

Here is the call graph for this function:

void plasma_pctile_zero ( plasma_context_t * plasma )

Zeroes a submatrix in tile layout - static scheduling

Definition at line 227 of file pctile.c.

References A, ABDL, BLKLDD, plasma_desc_t::i, plasma_desc_t::j, plasma_desc_t::m, plasma_desc_t::mb, plasma_desc_t::mt, plasma_desc_t::n, plasma_desc_t::nb, plasma_desc_t::nt, PLASMA_RANK, PLASMA_SIZE, PLASMA_SUCCESS, plasma_unpack_args_3, and plasma_sequence_t::status.

{
    PLASMA_desc A;
    PLASMA_sequence *sequence;
    PLASMA_request *request;
    PLASMA_Complex32_t *bdl;
    int x, y;
    int X1, Y1;
    int X2, Y2;
    int n, m, ldt;
    int next_m;
    int next_n;
    plasma_unpack_args_3(A, sequence, request);
    if (sequence->status != PLASMA_SUCCESS)
        return;
    n = 0;
    m = PLASMA_RANK;
    while (m >= A.mt && n < A.nt) {
        n++;
        m = m-A.mt;
    }
    while (n < A.nt) {
        next_m = m;
        next_n = n;
        next_m += PLASMA_SIZE;
        while (next_m >= A.mt && next_n < A.nt) {
            next_n++;
            next_m = next_m-A.mt;
        }
        X1 = n == 0 ? A.j%A.nb : 0;
        Y1 = m == 0 ? A.i%A.mb : 0;
        X2 = n == A.nt-1 ? (A.j+A.n-1)%A.nb+1 : A.nb;
        Y2 = m == A.mt-1 ? (A.i+A.m-1)%A.mb+1 : A.mb;
        bdl = ABDL(m, n);
        ldt = BLKLDD(A, m);
        for (x = X1; x < X2; x++)
            for (y = Y1; y < Y2; y++)
                bdl[ldt*x+y] = 0.0;
        m = next_m;
        n = next_n;
    }
}

Here is the caller graph for this function:

void plasma_pctile_zero_quark	(	PLASMA_desc	A,
		PLASMA_sequence *	sequence,
		PLASMA_request *	request
	)

Zeroes a submatrix in tile layout - dynamic scheduling

Definition at line 281 of file pctile.c.

References ABDL, BLKLDD, plasma_desc_t::bsiz, CORE_ctile_zero_quark(), plasma_desc_t::i, plasma_desc_t::j, LOCALITY, plasma_desc_t::m, plasma_desc_t::mb, plasma_desc_t::mt, plasma_desc_t::n, plasma_desc_t::nb, plasma_desc_t::nt, OUTPUT, plasma_context_self(), PLASMA_SUCCESS, plasma_context_struct::quark, QUARK_Insert_Task(), plasma_sequence_t::quark_sequence, QUARK_Task_Flag_Set(), Quark_Task_Flags_Initializer, plasma_sequence_t::status, TASK_SEQUENCE, and VALUE.

{
    PLASMA_Complex32_t *bdl;
    plasma_context_t *plasma;
    int X1, Y1;
    int X2, Y2;
    int n, m, ldt;
    Quark_Task_Flags task_flags = Quark_Task_Flags_Initializer;
    plasma = plasma_context_self();
    if (sequence->status != PLASMA_SUCCESS)
        return;
    QUARK_Task_Flag_Set(&task_flags, TASK_SEQUENCE, (intptr_t)sequence->quark_sequence);
    for (m = 0; m < A.mt; m++)
    {
        ldt = BLKLDD(A, m);
        for (n = 0; n < A.nt; n++)
        {
            X1 = n == 0 ? A.j%A.nb : 0;
            Y1 = m == 0 ? A.i%A.mb : 0;
            X2 = n == A.nt-1 ? (A.j+A.n-1)%A.nb+1 : A.nb;
            Y2 = m == A.mt-1 ? (A.i+A.m-1)%A.mb+1 : A.mb;
            bdl = ABDL(m, n);
            QUARK_Insert_Task(plasma->quark, CORE_ctile_zero_quark, &task_flags,
                sizeof(int),                       &X1,  VALUE,
                sizeof(int),                       &X2,  VALUE,
                sizeof(int),                       &Y1,  VALUE,
                sizeof(int),                       &Y2,  VALUE,
                sizeof(PLASMA_Complex32_t)*A.bsiz, bdl,      OUTPUT | LOCALITY,
                sizeof(int),                       &ldt, VALUE,
                0);
        }
    }
}

Here is the call graph for this function:

void plasma_pdlapack_to_tile ( plasma_context_t * plasma )

Conversion from LAPACK F77 matrix layout to tile layout - static scheduling

Definition at line 29 of file pdtile.c.

References A, ABDL, AF77, BLKLDD, CORE_dlacpy(), plasma_desc_t::i, plasma_desc_t::j, plasma_desc_t::m, plasma_desc_t::mb, plasma_desc_t::mt, plasma_desc_t::n, plasma_desc_t::nb, plasma_desc_t::nt, PLASMA_RANK, PLASMA_SIZE, PLASMA_SUCCESS, plasma_unpack_args_5, PlasmaUpperLower, and plasma_sequence_t::status.

{
    double *Af77;
    int lda;
    PLASMA_desc A;
    PLASMA_sequence *sequence;
    PLASMA_request *request;
    double *f77;
    double *bdl;
    int X1, Y1;
    int X2, Y2;
    int n, m, ldt;
    int next_m;
    int next_n;
    plasma_unpack_args_5(Af77, lda, A, sequence, request);
    if (sequence->status != PLASMA_SUCCESS)
        return;
    n = 0;
    m = PLASMA_RANK;
    while (m >= A.mt && n < A.nt) {
        n++;
        m = m-A.mt;
    }
    while (n < A.nt) {
        next_m = m;
        next_n = n;
        next_m += PLASMA_SIZE;
        while (next_m >= A.mt && next_n < A.nt) {
            next_n++;
            next_m = next_m-A.mt;
        }
        X1 = n == 0 ? A.j%A.nb : 0;
        X2 = n == A.nt-1 ? (A.j+A.n-1)%A.nb+1 : A.nb;
        Y1 = m == 0 ? A.i%A.mb : 0;
        Y2 = m == A.mt-1 ? (A.i+A.m-1)%A.mb+1 : A.mb;
        f77 = AF77(m, n);
        bdl = ABDL(m, n);
        ldt = BLKLDD(A, m);
        CORE_dlacpy(
            PlasmaUpperLower, (Y2-Y1), (X2-X1),
            &(f77[X1*lda+Y1]), lda, 
            &(bdl[X1*lda+Y1]), ldt);
        
        m = next_m;
        n = next_n;
    }
}

Here is the call graph for this function:

Here is the caller graph for this function:

void plasma_pdlapack_to_tile_quark	(	double *	Af77,
		int	lda,
		PLASMA_desc	A,
		PLASMA_sequence *	sequence,
		PLASMA_request *	request
	)

Conversion from LAPACK F77 matrix layout to tile layout - dynamic scheduling

Definition at line 88 of file pdtile.c.

References ABDL, AF77, BLKLDD, plasma_desc_t::i, plasma_desc_t::j, plasma_desc_t::m, plasma_desc_t::mb, plasma_desc_t::mt, plasma_desc_t::n, plasma_desc_t::nb, plasma_desc_t::nt, plasma_context_self(), PLASMA_SUCCESS, PlasmaUpperLower, plasma_context_struct::quark, QUARK_CORE_dlacpy(), plasma_sequence_t::quark_sequence, QUARK_Task_Flag_Set(), Quark_Task_Flags_Initializer, plasma_sequence_t::status, and TASK_SEQUENCE.

{
    double *f77;
    double *bdl;
    plasma_context_t *plasma;
    int X1, Y1;
    int X2, Y2;
    int n, m, ldt;
    Quark_Task_Flags task_flags = Quark_Task_Flags_Initializer;
    plasma = plasma_context_self();
    if (sequence->status != PLASMA_SUCCESS)
        return;
    QUARK_Task_Flag_Set(&task_flags, TASK_SEQUENCE, (intptr_t)sequence->quark_sequence);
    for (m = 0; m < A.mt; m++)
    {
        ldt = BLKLDD(A, m);
        for (n = 0; n < A.nt; n++)
        {
            X1 = n == 0 ? A.j%A.nb : 0;
            Y1 = m == 0 ? A.i%A.mb : 0;
            X2 = n == A.nt-1 ? (A.j+A.n-1)%A.nb+1 : A.nb;
            Y2 = m == A.mt-1 ? (A.i+A.m-1)%A.mb+1 : A.mb;
            f77 = AF77(m, n);
            bdl = ABDL(m, n);
            QUARK_CORE_dlacpy(
                plasma->quark, &task_flags,
                PlasmaUpperLower, (Y2-Y1), (X2-X1), A.mb,
                &(f77[X1*lda+Y1]), lda, 
                &(bdl[X1*lda+Y1]), ldt);
        }
    }
}

Here is the call graph for this function:

void plasma_pdtile_to_lapack ( plasma_context_t * plasma )

Conversion from LAPACK F77 matrix layout to tile layout - static scheduling

Definition at line 128 of file pdtile.c.

References A, ABDL, AF77, BLKLDD, CORE_dlacpy(), plasma_desc_t::i, plasma_desc_t::j, plasma_desc_t::m, plasma_desc_t::mb, plasma_desc_t::mt, plasma_desc_t::n, plasma_desc_t::nb, plasma_desc_t::nt, PLASMA_RANK, PLASMA_SIZE, PLASMA_SUCCESS, plasma_unpack_args_5, PlasmaUpperLower, and plasma_sequence_t::status.

{
    PLASMA_desc A;
    double *Af77;
    int lda;
    PLASMA_sequence *sequence;
    PLASMA_request *request;
    double *f77;
    double *bdl;
    int X1, Y1;
    int X2, Y2;
    int n, m, ldt;
    int next_m;
    int next_n;
    plasma_unpack_args_5(A, Af77, lda, sequence, request);
    if (sequence->status != PLASMA_SUCCESS)
        return;
    n = 0;
    m = PLASMA_RANK;
    while (m >= A.mt && n < A.nt) {
        n++;
        m = m-A.mt;
    }
    while (n < A.nt) {
        next_m = m;
        next_n = n;
        next_m += PLASMA_SIZE;
        while (next_m >= A.mt && next_n < A.nt) {
            next_n++;
            next_m = next_m-A.mt;
        }
        X1 = n == 0 ? A.j%A.nb : 0;
        Y1 = m == 0 ? A.i%A.mb : 0;
        X2 = n == A.nt-1 ? (A.j+A.n-1)%A.nb+1 : A.nb;
        Y2 = m == A.mt-1 ? (A.i+A.m-1)%A.mb+1 : A.mb;
        f77 = AF77(m, n);
        bdl = ABDL(m, n);
        ldt = BLKLDD(A, m);
        CORE_dlacpy(
            PlasmaUpperLower, (Y2-Y1), (X2-X1),
            &(bdl[X1*lda+Y1]), ldt,
            &(f77[X1*lda+Y1]), lda);
        m = next_m;
        n = next_n;
    }
}

Here is the call graph for this function:

Here is the caller graph for this function:

void plasma_pdtile_to_lapack_quark	(	PLASMA_desc	A,
		double *	Af77,
		int	lda,
		PLASMA_sequence *	sequence,
		PLASMA_request *	request
	)

Conversion from LAPACK F77 matrix layout to tile layout - dynamic scheduling

Definition at line 187 of file pdtile.c.

References ABDL, AF77, BLKLDD, plasma_desc_t::i, plasma_desc_t::j, plasma_desc_t::m, plasma_desc_t::mb, plasma_desc_t::mt, plasma_desc_t::n, plasma_desc_t::nb, plasma_desc_t::nt, plasma_context_self(), PLASMA_SUCCESS, PlasmaUpperLower, plasma_context_struct::quark, QUARK_CORE_dlacpy(), plasma_sequence_t::quark_sequence, QUARK_Task_Flag_Set(), Quark_Task_Flags_Initializer, plasma_sequence_t::status, and TASK_SEQUENCE.

{
    double *f77;
    double *bdl;
    plasma_context_t *plasma;
    int X1, Y1;
    int X2, Y2;
    int n, m, ldt;
    Quark_Task_Flags task_flags = Quark_Task_Flags_Initializer;
    plasma = plasma_context_self();
    if (sequence->status != PLASMA_SUCCESS)
        return;
    QUARK_Task_Flag_Set(&task_flags, TASK_SEQUENCE, (intptr_t)sequence->quark_sequence);
    for (m = 0; m < A.mt; m++)
    {
        ldt = BLKLDD(A, m);
        for (n = 0; n < A.nt; n++)
        {
            X1 = n == 0 ? A.j%A.nb : 0;
            Y1 = m == 0 ? A.i%A.mb : 0;
            X2 = n == A.nt-1 ? (A.j+A.n-1)%A.nb+1 : A.nb;
            Y2 = m == A.mt-1 ? (A.i+A.m-1)%A.mb+1 : A.mb;
            f77 = AF77(m, n);
            bdl = ABDL(m, n);
            QUARK_CORE_dlacpy(
                plasma->quark, &task_flags,
                PlasmaUpperLower, (Y2-Y1), (X2-X1), A.mb,
                &(bdl[X1*lda+Y1]), ldt,
                &(f77[X1*lda+Y1]), lda);
        }
    }
}

Here is the call graph for this function:

void plasma_pdtile_zero ( plasma_context_t * plasma )

Zeroes a submatrix in tile layout - static scheduling

Definition at line 227 of file pdtile.c.

References A, ABDL, BLKLDD, plasma_desc_t::i, plasma_desc_t::j, plasma_desc_t::m, plasma_desc_t::mb, plasma_desc_t::mt, plasma_desc_t::n, plasma_desc_t::nb, plasma_desc_t::nt, PLASMA_RANK, PLASMA_SIZE, PLASMA_SUCCESS, plasma_unpack_args_3, and plasma_sequence_t::status.

{
    PLASMA_desc A;
    PLASMA_sequence *sequence;
    PLASMA_request *request;
    double *bdl;
    int x, y;
    int X1, Y1;
    int X2, Y2;
    int n, m, ldt;
    int next_m;
    int next_n;
    plasma_unpack_args_3(A, sequence, request);
    if (sequence->status != PLASMA_SUCCESS)
        return;
    n = 0;
    m = PLASMA_RANK;
    while (m >= A.mt && n < A.nt) {
        n++;
        m = m-A.mt;
    }
    while (n < A.nt) {
        next_m = m;
        next_n = n;
        next_m += PLASMA_SIZE;
        while (next_m >= A.mt && next_n < A.nt) {
            next_n++;
            next_m = next_m-A.mt;
        }
        X1 = n == 0 ? A.j%A.nb : 0;
        Y1 = m == 0 ? A.i%A.mb : 0;
        X2 = n == A.nt-1 ? (A.j+A.n-1)%A.nb+1 : A.nb;
        Y2 = m == A.mt-1 ? (A.i+A.m-1)%A.mb+1 : A.mb;
        bdl = ABDL(m, n);
        ldt = BLKLDD(A, m);
        for (x = X1; x < X2; x++)
            for (y = Y1; y < Y2; y++)
                bdl[ldt*x+y] = 0.0;
        m = next_m;
        n = next_n;
    }
}

Here is the caller graph for this function:

void plasma_pdtile_zero_quark	(	PLASMA_desc	A,
		PLASMA_sequence *	sequence,
		PLASMA_request *	request
	)

Zeroes a submatrix in tile layout - dynamic scheduling

Definition at line 281 of file pdtile.c.

References ABDL, BLKLDD, plasma_desc_t::bsiz, CORE_dtile_zero_quark(), plasma_desc_t::i, plasma_desc_t::j, LOCALITY, plasma_desc_t::m, plasma_desc_t::mb, plasma_desc_t::mt, plasma_desc_t::n, plasma_desc_t::nb, plasma_desc_t::nt, OUTPUT, plasma_context_self(), PLASMA_SUCCESS, plasma_context_struct::quark, QUARK_Insert_Task(), plasma_sequence_t::quark_sequence, QUARK_Task_Flag_Set(), Quark_Task_Flags_Initializer, plasma_sequence_t::status, TASK_SEQUENCE, and VALUE.

{
    double *bdl;
    plasma_context_t *plasma;
    int X1, Y1;
    int X2, Y2;
    int n, m, ldt;
    Quark_Task_Flags task_flags = Quark_Task_Flags_Initializer;
    plasma = plasma_context_self();
    if (sequence->status != PLASMA_SUCCESS)
        return;
    QUARK_Task_Flag_Set(&task_flags, TASK_SEQUENCE, (intptr_t)sequence->quark_sequence);
    for (m = 0; m < A.mt; m++)
    {
        ldt = BLKLDD(A, m);
        for (n = 0; n < A.nt; n++)
        {
            X1 = n == 0 ? A.j%A.nb : 0;
            Y1 = m == 0 ? A.i%A.mb : 0;
            X2 = n == A.nt-1 ? (A.j+A.n-1)%A.nb+1 : A.nb;
            Y2 = m == A.mt-1 ? (A.i+A.m-1)%A.mb+1 : A.mb;
            bdl = ABDL(m, n);
            QUARK_Insert_Task(plasma->quark, CORE_dtile_zero_quark, &task_flags,
                sizeof(int),                       &X1,  VALUE,
                sizeof(int),                       &X2,  VALUE,
                sizeof(int),                       &Y1,  VALUE,
                sizeof(int),                       &Y2,  VALUE,
                sizeof(double)*A.bsiz, bdl,      OUTPUT | LOCALITY,
                sizeof(int),                       &ldt, VALUE,
                0);
        }
    }
}

Here is the call graph for this function:

void plasma_pslapack_to_tile ( plasma_context_t * plasma )

Conversion from LAPACK F77 matrix layout to tile layout - static scheduling

Definition at line 29 of file pstile.c.

References A, ABDL, AF77, BLKLDD, CORE_slacpy(), plasma_desc_t::i, plasma_desc_t::j, plasma_desc_t::m, plasma_desc_t::mb, plasma_desc_t::mt, plasma_desc_t::n, plasma_desc_t::nb, plasma_desc_t::nt, PLASMA_RANK, PLASMA_SIZE, PLASMA_SUCCESS, plasma_unpack_args_5, PlasmaUpperLower, and plasma_sequence_t::status.

{
    float *Af77;
    int lda;
    PLASMA_desc A;
    PLASMA_sequence *sequence;
    PLASMA_request *request;
    float *f77;
    float *bdl;
    int X1, Y1;
    int X2, Y2;
    int n, m, ldt;
    int next_m;
    int next_n;
    plasma_unpack_args_5(Af77, lda, A, sequence, request);
    if (sequence->status != PLASMA_SUCCESS)
        return;
    n = 0;
    m = PLASMA_RANK;
    while (m >= A.mt && n < A.nt) {
        n++;
        m = m-A.mt;
    }
    while (n < A.nt) {
        next_m = m;
        next_n = n;
        next_m += PLASMA_SIZE;
        while (next_m >= A.mt && next_n < A.nt) {
            next_n++;
            next_m = next_m-A.mt;
        }
        X1 = n == 0 ? A.j%A.nb : 0;
        X2 = n == A.nt-1 ? (A.j+A.n-1)%A.nb+1 : A.nb;
        Y1 = m == 0 ? A.i%A.mb : 0;
        Y2 = m == A.mt-1 ? (A.i+A.m-1)%A.mb+1 : A.mb;
        f77 = AF77(m, n);
        bdl = ABDL(m, n);
        ldt = BLKLDD(A, m);
        CORE_slacpy(
            PlasmaUpperLower, (Y2-Y1), (X2-X1),
            &(f77[X1*lda+Y1]), lda, 
            &(bdl[X1*lda+Y1]), ldt);
        
        m = next_m;
        n = next_n;
    }
}

Here is the call graph for this function:

Here is the caller graph for this function:

void plasma_pslapack_to_tile_quark	(	float *	Af77,
		int	lda,
		PLASMA_desc	A,
		PLASMA_sequence *	sequence,
		PLASMA_request *	request
	)

Conversion from LAPACK F77 matrix layout to tile layout - dynamic scheduling

Definition at line 88 of file pstile.c.

References ABDL, AF77, BLKLDD, plasma_desc_t::i, plasma_desc_t::j, plasma_desc_t::m, plasma_desc_t::mb, plasma_desc_t::mt, plasma_desc_t::n, plasma_desc_t::nb, plasma_desc_t::nt, plasma_context_self(), PLASMA_SUCCESS, PlasmaUpperLower, plasma_context_struct::quark, QUARK_CORE_slacpy(), plasma_sequence_t::quark_sequence, QUARK_Task_Flag_Set(), Quark_Task_Flags_Initializer, plasma_sequence_t::status, and TASK_SEQUENCE.

{
    float *f77;
    float *bdl;
    plasma_context_t *plasma;
    int X1, Y1;
    int X2, Y2;
    int n, m, ldt;
    Quark_Task_Flags task_flags = Quark_Task_Flags_Initializer;
    plasma = plasma_context_self();
    if (sequence->status != PLASMA_SUCCESS)
        return;
    QUARK_Task_Flag_Set(&task_flags, TASK_SEQUENCE, (intptr_t)sequence->quark_sequence);
    for (m = 0; m < A.mt; m++)
    {
        ldt = BLKLDD(A, m);
        for (n = 0; n < A.nt; n++)
        {
            X1 = n == 0 ? A.j%A.nb : 0;
            Y1 = m == 0 ? A.i%A.mb : 0;
            X2 = n == A.nt-1 ? (A.j+A.n-1)%A.nb+1 : A.nb;
            Y2 = m == A.mt-1 ? (A.i+A.m-1)%A.mb+1 : A.mb;
            f77 = AF77(m, n);
            bdl = ABDL(m, n);
            QUARK_CORE_slacpy(
                plasma->quark, &task_flags,
                PlasmaUpperLower, (Y2-Y1), (X2-X1), A.mb,
                &(f77[X1*lda+Y1]), lda, 
                &(bdl[X1*lda+Y1]), ldt);
        }
    }
}

Here is the call graph for this function:

void plasma_pstile_to_lapack ( plasma_context_t * plasma )

Conversion from LAPACK F77 matrix layout to tile layout - static scheduling

Definition at line 128 of file pstile.c.

References A, ABDL, AF77, BLKLDD, CORE_slacpy(), plasma_desc_t::i, plasma_desc_t::j, plasma_desc_t::m, plasma_desc_t::mb, plasma_desc_t::mt, plasma_desc_t::n, plasma_desc_t::nb, plasma_desc_t::nt, PLASMA_RANK, PLASMA_SIZE, PLASMA_SUCCESS, plasma_unpack_args_5, PlasmaUpperLower, and plasma_sequence_t::status.

{
    PLASMA_desc A;
    float *Af77;
    int lda;
    PLASMA_sequence *sequence;
    PLASMA_request *request;
    float *f77;
    float *bdl;
    int X1, Y1;
    int X2, Y2;
    int n, m, ldt;
    int next_m;
    int next_n;
    plasma_unpack_args_5(A, Af77, lda, sequence, request);
    if (sequence->status != PLASMA_SUCCESS)
        return;
    n = 0;
    m = PLASMA_RANK;
    while (m >= A.mt && n < A.nt) {
        n++;
        m = m-A.mt;
    }
    while (n < A.nt) {
        next_m = m;
        next_n = n;
        next_m += PLASMA_SIZE;
        while (next_m >= A.mt && next_n < A.nt) {
            next_n++;
            next_m = next_m-A.mt;
        }
        X1 = n == 0 ? A.j%A.nb : 0;
        Y1 = m == 0 ? A.i%A.mb : 0;
        X2 = n == A.nt-1 ? (A.j+A.n-1)%A.nb+1 : A.nb;
        Y2 = m == A.mt-1 ? (A.i+A.m-1)%A.mb+1 : A.mb;
        f77 = AF77(m, n);
        bdl = ABDL(m, n);
        ldt = BLKLDD(A, m);
        CORE_slacpy(
            PlasmaUpperLower, (Y2-Y1), (X2-X1),
            &(bdl[X1*lda+Y1]), ldt,
            &(f77[X1*lda+Y1]), lda);
        m = next_m;
        n = next_n;
    }
}

Here is the call graph for this function:

Here is the caller graph for this function:

void plasma_pstile_to_lapack_quark	(	PLASMA_desc	A,
		float *	Af77,
		int	lda,
		PLASMA_sequence *	sequence,
		PLASMA_request *	request
	)

Conversion from LAPACK F77 matrix layout to tile layout - dynamic scheduling

Definition at line 187 of file pstile.c.

References ABDL, AF77, BLKLDD, plasma_desc_t::i, plasma_desc_t::j, plasma_desc_t::m, plasma_desc_t::mb, plasma_desc_t::mt, plasma_desc_t::n, plasma_desc_t::nb, plasma_desc_t::nt, plasma_context_self(), PLASMA_SUCCESS, PlasmaUpperLower, plasma_context_struct::quark, QUARK_CORE_slacpy(), plasma_sequence_t::quark_sequence, QUARK_Task_Flag_Set(), Quark_Task_Flags_Initializer, plasma_sequence_t::status, and TASK_SEQUENCE.

{
    float *f77;
    float *bdl;
    plasma_context_t *plasma;
    int X1, Y1;
    int X2, Y2;
    int n, m, ldt;
    Quark_Task_Flags task_flags = Quark_Task_Flags_Initializer;
    plasma = plasma_context_self();
    if (sequence->status != PLASMA_SUCCESS)
        return;
    QUARK_Task_Flag_Set(&task_flags, TASK_SEQUENCE, (intptr_t)sequence->quark_sequence);
    for (m = 0; m < A.mt; m++)
    {
        ldt = BLKLDD(A, m);
        for (n = 0; n < A.nt; n++)
        {
            X1 = n == 0 ? A.j%A.nb : 0;
            Y1 = m == 0 ? A.i%A.mb : 0;
            X2 = n == A.nt-1 ? (A.j+A.n-1)%A.nb+1 : A.nb;
            Y2 = m == A.mt-1 ? (A.i+A.m-1)%A.mb+1 : A.mb;
            f77 = AF77(m, n);
            bdl = ABDL(m, n);
            QUARK_CORE_slacpy(
                plasma->quark, &task_flags,
                PlasmaUpperLower, (Y2-Y1), (X2-X1), A.mb,
                &(bdl[X1*lda+Y1]), ldt,
                &(f77[X1*lda+Y1]), lda);
        }
    }
}

Here is the call graph for this function:

void plasma_pstile_zero ( plasma_context_t * plasma )

Zeroes a submatrix in tile layout - static scheduling

Definition at line 227 of file pstile.c.

References A, ABDL, BLKLDD, plasma_desc_t::i, plasma_desc_t::j, plasma_desc_t::m, plasma_desc_t::mb, plasma_desc_t::mt, plasma_desc_t::n, plasma_desc_t::nb, plasma_desc_t::nt, PLASMA_RANK, PLASMA_SIZE, PLASMA_SUCCESS, plasma_unpack_args_3, and plasma_sequence_t::status.

{
    PLASMA_desc A;
    PLASMA_sequence *sequence;
    PLASMA_request *request;
    float *bdl;
    int x, y;
    int X1, Y1;
    int X2, Y2;
    int n, m, ldt;
    int next_m;
    int next_n;
    plasma_unpack_args_3(A, sequence, request);
    if (sequence->status != PLASMA_SUCCESS)
        return;
    n = 0;
    m = PLASMA_RANK;
    while (m >= A.mt && n < A.nt) {
        n++;
        m = m-A.mt;
    }
    while (n < A.nt) {
        next_m = m;
        next_n = n;
        next_m += PLASMA_SIZE;
        while (next_m >= A.mt && next_n < A.nt) {
            next_n++;
            next_m = next_m-A.mt;
        }
        X1 = n == 0 ? A.j%A.nb : 0;
        Y1 = m == 0 ? A.i%A.mb : 0;
        X2 = n == A.nt-1 ? (A.j+A.n-1)%A.nb+1 : A.nb;
        Y2 = m == A.mt-1 ? (A.i+A.m-1)%A.mb+1 : A.mb;
        bdl = ABDL(m, n);
        ldt = BLKLDD(A, m);
        for (x = X1; x < X2; x++)
            for (y = Y1; y < Y2; y++)
                bdl[ldt*x+y] = 0.0;
        m = next_m;
        n = next_n;
    }
}

Here is the caller graph for this function:

void plasma_pstile_zero_quark	(	PLASMA_desc	A,
		PLASMA_sequence *	sequence,
		PLASMA_request *	request
	)

Zeroes a submatrix in tile layout - dynamic scheduling

Definition at line 281 of file pstile.c.

References ABDL, BLKLDD, plasma_desc_t::bsiz, CORE_stile_zero_quark(), plasma_desc_t::i, plasma_desc_t::j, LOCALITY, plasma_desc_t::m, plasma_desc_t::mb, plasma_desc_t::mt, plasma_desc_t::n, plasma_desc_t::nb, plasma_desc_t::nt, OUTPUT, plasma_context_self(), PLASMA_SUCCESS, plasma_context_struct::quark, QUARK_Insert_Task(), plasma_sequence_t::quark_sequence, QUARK_Task_Flag_Set(), Quark_Task_Flags_Initializer, plasma_sequence_t::status, TASK_SEQUENCE, and VALUE.

{
    float *bdl;
    plasma_context_t *plasma;
    int X1, Y1;
    int X2, Y2;
    int n, m, ldt;
    Quark_Task_Flags task_flags = Quark_Task_Flags_Initializer;
    plasma = plasma_context_self();
    if (sequence->status != PLASMA_SUCCESS)
        return;
    QUARK_Task_Flag_Set(&task_flags, TASK_SEQUENCE, (intptr_t)sequence->quark_sequence);
    for (m = 0; m < A.mt; m++)
    {
        ldt = BLKLDD(A, m);
        for (n = 0; n < A.nt; n++)
        {
            X1 = n == 0 ? A.j%A.nb : 0;
            Y1 = m == 0 ? A.i%A.mb : 0;
            X2 = n == A.nt-1 ? (A.j+A.n-1)%A.nb+1 : A.nb;
            Y2 = m == A.mt-1 ? (A.i+A.m-1)%A.mb+1 : A.mb;
            bdl = ABDL(m, n);
            QUARK_Insert_Task(plasma->quark, CORE_stile_zero_quark, &task_flags,
                sizeof(int),                       &X1,  VALUE,
                sizeof(int),                       &X2,  VALUE,
                sizeof(int),                       &Y1,  VALUE,
                sizeof(int),                       &Y2,  VALUE,
                sizeof(float)*A.bsiz, bdl,      OUTPUT | LOCALITY,
                sizeof(int),                       &ldt, VALUE,
                0);
        }
    }
}

Here is the call graph for this function:

void plasma_pzlapack_to_tile ( plasma_context_t * plasma )

Conversion from LAPACK F77 matrix layout to tile layout - static scheduling

Definition at line 29 of file pztile.c.

References A, ABDL, AF77, BLKLDD, CORE_zlacpy(), plasma_desc_t::i, plasma_desc_t::j, plasma_desc_t::m, plasma_desc_t::mb, plasma_desc_t::mt, plasma_desc_t::n, plasma_desc_t::nb, plasma_desc_t::nt, PLASMA_RANK, PLASMA_SIZE, PLASMA_SUCCESS, plasma_unpack_args_5, PlasmaUpperLower, and plasma_sequence_t::status.

{
    PLASMA_Complex64_t *Af77;
    int lda;
    PLASMA_desc A;
    PLASMA_sequence *sequence;
    PLASMA_request *request;
    PLASMA_Complex64_t *f77;
    PLASMA_Complex64_t *bdl;
    int X1, Y1;
    int X2, Y2;
    int n, m, ldt;
    int next_m;
    int next_n;
    plasma_unpack_args_5(Af77, lda, A, sequence, request);
    if (sequence->status != PLASMA_SUCCESS)
        return;
    n = 0;
    m = PLASMA_RANK;
    while (m >= A.mt && n < A.nt) {
        n++;
        m = m-A.mt;
    }
    while (n < A.nt) {
        next_m = m;
        next_n = n;
        next_m += PLASMA_SIZE;
        while (next_m >= A.mt && next_n < A.nt) {
            next_n++;
            next_m = next_m-A.mt;
        }
        X1 = n == 0 ? A.j%A.nb : 0;
        X2 = n == A.nt-1 ? (A.j+A.n-1)%A.nb+1 : A.nb;
        Y1 = m == 0 ? A.i%A.mb : 0;
        Y2 = m == A.mt-1 ? (A.i+A.m-1)%A.mb+1 : A.mb;
        f77 = AF77(m, n);
        bdl = ABDL(m, n);
        ldt = BLKLDD(A, m);
        CORE_zlacpy(
            PlasmaUpperLower, (Y2-Y1), (X2-X1),
            &(f77[X1*lda+Y1]), lda, 
            &(bdl[X1*lda+Y1]), ldt);
        
        m = next_m;
        n = next_n;
    }
}

Here is the call graph for this function:

Here is the caller graph for this function:

void plasma_pzlapack_to_tile_quark	(	PLASMA_Complex64_t *	Af77,
		int	lda,
		PLASMA_desc	A,
		PLASMA_sequence *	sequence,
		PLASMA_request *	request
	)

Conversion from LAPACK F77 matrix layout to tile layout - dynamic scheduling

Definition at line 88 of file pztile.c.

References ABDL, AF77, BLKLDD, plasma_desc_t::i, plasma_desc_t::j, plasma_desc_t::m, plasma_desc_t::mb, plasma_desc_t::mt, plasma_desc_t::n, plasma_desc_t::nb, plasma_desc_t::nt, plasma_context_self(), PLASMA_SUCCESS, PlasmaUpperLower, plasma_context_struct::quark, QUARK_CORE_zlacpy(), plasma_sequence_t::quark_sequence, QUARK_Task_Flag_Set(), Quark_Task_Flags_Initializer, plasma_sequence_t::status, and TASK_SEQUENCE.

{
    PLASMA_Complex64_t *f77;
    PLASMA_Complex64_t *bdl;
    plasma_context_t *plasma;
    int X1, Y1;
    int X2, Y2;
    int n, m, ldt;
    Quark_Task_Flags task_flags = Quark_Task_Flags_Initializer;
    plasma = plasma_context_self();
    if (sequence->status != PLASMA_SUCCESS)
        return;
    QUARK_Task_Flag_Set(&task_flags, TASK_SEQUENCE, (intptr_t)sequence->quark_sequence);
    for (m = 0; m < A.mt; m++)
    {
        ldt = BLKLDD(A, m);
        for (n = 0; n < A.nt; n++)
        {
            X1 = n == 0 ? A.j%A.nb : 0;
            Y1 = m == 0 ? A.i%A.mb : 0;
            X2 = n == A.nt-1 ? (A.j+A.n-1)%A.nb+1 : A.nb;
            Y2 = m == A.mt-1 ? (A.i+A.m-1)%A.mb+1 : A.mb;
            f77 = AF77(m, n);
            bdl = ABDL(m, n);
            QUARK_CORE_zlacpy(
                plasma->quark, &task_flags,
                PlasmaUpperLower, (Y2-Y1), (X2-X1), A.mb,
                &(f77[X1*lda+Y1]), lda, 
                &(bdl[X1*lda+Y1]), ldt);
        }
    }
}

Here is the call graph for this function:

void plasma_pztile_to_lapack ( plasma_context_t * plasma )

Internal routines - static scheduling

Conversion from LAPACK F77 matrix layout to tile layout - static scheduling

Definition at line 128 of file pztile.c.

References A, ABDL, AF77, BLKLDD, CORE_zlacpy(), plasma_desc_t::i, plasma_desc_t::j, plasma_desc_t::m, plasma_desc_t::mb, plasma_desc_t::mt, plasma_desc_t::n, plasma_desc_t::nb, plasma_desc_t::nt, PLASMA_RANK, PLASMA_SIZE, PLASMA_SUCCESS, plasma_unpack_args_5, PlasmaUpperLower, and plasma_sequence_t::status.

{
    PLASMA_desc A;
    PLASMA_Complex64_t *Af77;
    int lda;
    PLASMA_sequence *sequence;
    PLASMA_request *request;
    PLASMA_Complex64_t *f77;
    PLASMA_Complex64_t *bdl;
    int X1, Y1;
    int X2, Y2;
    int n, m, ldt;
    int next_m;
    int next_n;
    plasma_unpack_args_5(A, Af77, lda, sequence, request);
    if (sequence->status != PLASMA_SUCCESS)
        return;
    n = 0;
    m = PLASMA_RANK;
    while (m >= A.mt && n < A.nt) {
        n++;
        m = m-A.mt;
    }
    while (n < A.nt) {
        next_m = m;
        next_n = n;
        next_m += PLASMA_SIZE;
        while (next_m >= A.mt && next_n < A.nt) {
            next_n++;
            next_m = next_m-A.mt;
        }
        X1 = n == 0 ? A.j%A.nb : 0;
        Y1 = m == 0 ? A.i%A.mb : 0;
        X2 = n == A.nt-1 ? (A.j+A.n-1)%A.nb+1 : A.nb;
        Y2 = m == A.mt-1 ? (A.i+A.m-1)%A.mb+1 : A.mb;
        f77 = AF77(m, n);
        bdl = ABDL(m, n);
        ldt = BLKLDD(A, m);
        CORE_zlacpy(
            PlasmaUpperLower, (Y2-Y1), (X2-X1),
            &(bdl[X1*lda+Y1]), ldt,
            &(f77[X1*lda+Y1]), lda);
        m = next_m;
        n = next_n;
    }
}

Here is the call graph for this function:

Here is the caller graph for this function:

void plasma_pztile_to_lapack_quark	(	PLASMA_desc	A,
		PLASMA_Complex64_t *	Af77,
		int	lda,
		PLASMA_sequence *	sequence,
		PLASMA_request *	request
	)

Internal routines - dynamic scheduling

Conversion from LAPACK F77 matrix layout to tile layout - dynamic scheduling

Definition at line 187 of file pztile.c.

References ABDL, AF77, BLKLDD, plasma_desc_t::i, plasma_desc_t::j, plasma_desc_t::m, plasma_desc_t::mb, plasma_desc_t::mt, plasma_desc_t::n, plasma_desc_t::nb, plasma_desc_t::nt, plasma_context_self(), PLASMA_SUCCESS, PlasmaUpperLower, plasma_context_struct::quark, QUARK_CORE_zlacpy(), plasma_sequence_t::quark_sequence, QUARK_Task_Flag_Set(), Quark_Task_Flags_Initializer, plasma_sequence_t::status, and TASK_SEQUENCE.

{
    PLASMA_Complex64_t *f77;
    PLASMA_Complex64_t *bdl;
    plasma_context_t *plasma;
    int X1, Y1;
    int X2, Y2;
    int n, m, ldt;
    Quark_Task_Flags task_flags = Quark_Task_Flags_Initializer;
    plasma = plasma_context_self();
    if (sequence->status != PLASMA_SUCCESS)
        return;
    QUARK_Task_Flag_Set(&task_flags, TASK_SEQUENCE, (intptr_t)sequence->quark_sequence);
    for (m = 0; m < A.mt; m++)
    {
        ldt = BLKLDD(A, m);
        for (n = 0; n < A.nt; n++)
        {
            X1 = n == 0 ? A.j%A.nb : 0;
            Y1 = m == 0 ? A.i%A.mb : 0;
            X2 = n == A.nt-1 ? (A.j+A.n-1)%A.nb+1 : A.nb;
            Y2 = m == A.mt-1 ? (A.i+A.m-1)%A.mb+1 : A.mb;
            f77 = AF77(m, n);
            bdl = ABDL(m, n);
            QUARK_CORE_zlacpy(
                plasma->quark, &task_flags,
                PlasmaUpperLower, (Y2-Y1), (X2-X1), A.mb,
                &(bdl[X1*lda+Y1]), ldt,
                &(f77[X1*lda+Y1]), lda);
        }
    }
}

Here is the call graph for this function:

void plasma_pztile_zero ( plasma_context_t * plasma )

Zeroes a submatrix in tile layout - static scheduling

Definition at line 227 of file pztile.c.

References A, ABDL, BLKLDD, plasma_desc_t::i, plasma_desc_t::j, plasma_desc_t::m, plasma_desc_t::mb, plasma_desc_t::mt, plasma_desc_t::n, plasma_desc_t::nb, plasma_desc_t::nt, PLASMA_RANK, PLASMA_SIZE, PLASMA_SUCCESS, plasma_unpack_args_3, and plasma_sequence_t::status.

{
    PLASMA_desc A;
    PLASMA_sequence *sequence;
    PLASMA_request *request;
    PLASMA_Complex64_t *bdl;
    int x, y;
    int X1, Y1;
    int X2, Y2;
    int n, m, ldt;
    int next_m;
    int next_n;
    plasma_unpack_args_3(A, sequence, request);
    if (sequence->status != PLASMA_SUCCESS)
        return;
    n = 0;
    m = PLASMA_RANK;
    while (m >= A.mt && n < A.nt) {
        n++;
        m = m-A.mt;
    }
    while (n < A.nt) {
        next_m = m;
        next_n = n;
        next_m += PLASMA_SIZE;
        while (next_m >= A.mt && next_n < A.nt) {
            next_n++;
            next_m = next_m-A.mt;
        }
        X1 = n == 0 ? A.j%A.nb : 0;
        Y1 = m == 0 ? A.i%A.mb : 0;
        X2 = n == A.nt-1 ? (A.j+A.n-1)%A.nb+1 : A.nb;
        Y2 = m == A.mt-1 ? (A.i+A.m-1)%A.mb+1 : A.mb;
        bdl = ABDL(m, n);
        ldt = BLKLDD(A, m);
        for (x = X1; x < X2; x++)
            for (y = Y1; y < Y2; y++)
                bdl[ldt*x+y] = 0.0;
        m = next_m;
        n = next_n;
    }
}

Here is the caller graph for this function:

void plasma_pztile_zero_quark	(	PLASMA_desc	A,
		PLASMA_sequence *	sequence,
		PLASMA_request *	request
	)

Zeroes a submatrix in tile layout - dynamic scheduling

Definition at line 281 of file pztile.c.

References ABDL, BLKLDD, plasma_desc_t::bsiz, CORE_ztile_zero_quark(), plasma_desc_t::i, plasma_desc_t::j, LOCALITY, plasma_desc_t::m, plasma_desc_t::mb, plasma_desc_t::mt, plasma_desc_t::n, plasma_desc_t::nb, plasma_desc_t::nt, OUTPUT, plasma_context_self(), PLASMA_SUCCESS, plasma_context_struct::quark, QUARK_Insert_Task(), plasma_sequence_t::quark_sequence, QUARK_Task_Flag_Set(), Quark_Task_Flags_Initializer, plasma_sequence_t::status, TASK_SEQUENCE, and VALUE.

{
    PLASMA_Complex64_t *bdl;
    plasma_context_t *plasma;
    int X1, Y1;
    int X2, Y2;
    int n, m, ldt;
    Quark_Task_Flags task_flags = Quark_Task_Flags_Initializer;
    plasma = plasma_context_self();
    if (sequence->status != PLASMA_SUCCESS)
        return;
    QUARK_Task_Flag_Set(&task_flags, TASK_SEQUENCE, (intptr_t)sequence->quark_sequence);
    for (m = 0; m < A.mt; m++)
    {
        ldt = BLKLDD(A, m);
        for (n = 0; n < A.nt; n++)
        {
            X1 = n == 0 ? A.j%A.nb : 0;
            Y1 = m == 0 ? A.i%A.mb : 0;
            X2 = n == A.nt-1 ? (A.j+A.n-1)%A.nb+1 : A.nb;
            Y2 = m == A.mt-1 ? (A.i+A.m-1)%A.mb+1 : A.mb;
            bdl = ABDL(m, n);
            QUARK_Insert_Task(plasma->quark, CORE_ztile_zero_quark, &task_flags,
                sizeof(int),                       &X1,  VALUE,
                sizeof(int),                       &X2,  VALUE,
                sizeof(int),                       &Y1,  VALUE,
                sizeof(int),                       &Y2,  VALUE,
                sizeof(PLASMA_Complex64_t)*A.bsiz, bdl,      OUTPUT | LOCALITY,
                sizeof(int),                       &ldt, VALUE,
                0);
        }
    }
}

Here is the call graph for this function:

Macros

Functions

Detailed Description

Macro Definition Documentation

Function Documentation