Functions
void	magma_cgetmatrix_1D_col_bcyclic (magma_int_t ngpu, magma_int_t m, magma_int_t n, magma_int_t nb, magmaFloatComplex_const_ptr const dA, magma_int_t ldda, magmaFloatComplex hA, magma_int_t lda, magma_queue_t queues[])
	Copy matrix dA, which is distributed 1D column block cyclic over multiple GPUs, to hA on CPU host.

void	magma_cgetmatrix_1D_row_bcyclic (magma_int_t ngpu, magma_int_t m, magma_int_t n, magma_int_t nb, magmaFloatComplex_const_ptr const dA, magma_int_t ldda, magmaFloatComplex hA, magma_int_t lda, magma_queue_t queues[])
	Copy matrix dA, which is distributed 1D row block cyclic over multiple GPUs, to hA on CPU host.

void	magma_dgetmatrix_1D_col_bcyclic (magma_int_t ngpu, magma_int_t m, magma_int_t n, magma_int_t nb, magmaDouble_const_ptr const dA, magma_int_t ldda, double hA, magma_int_t lda, magma_queue_t queues[])
	Copy matrix dA, which is distributed 1D column block cyclic over multiple GPUs, to hA on CPU host.

void	magma_dgetmatrix_1D_row_bcyclic (magma_int_t ngpu, magma_int_t m, magma_int_t n, magma_int_t nb, magmaDouble_const_ptr const dA, magma_int_t ldda, double hA, magma_int_t lda, magma_queue_t queues[])
	Copy matrix dA, which is distributed 1D row block cyclic over multiple GPUs, to hA on CPU host.

void	magma_sgetmatrix_1D_col_bcyclic (magma_int_t ngpu, magma_int_t m, magma_int_t n, magma_int_t nb, magmaFloat_const_ptr const dA, magma_int_t ldda, float hA, magma_int_t lda, magma_queue_t queues[])
	Copy matrix dA, which is distributed 1D column block cyclic over multiple GPUs, to hA on CPU host.

void	magma_sgetmatrix_1D_row_bcyclic (magma_int_t ngpu, magma_int_t m, magma_int_t n, magma_int_t nb, magmaFloat_const_ptr const dA, magma_int_t ldda, float hA, magma_int_t lda, magma_queue_t queues[])
	Copy matrix dA, which is distributed 1D row block cyclic over multiple GPUs, to hA on CPU host.

void	magma_zgetmatrix_1D_col_bcyclic (magma_int_t ngpu, magma_int_t m, magma_int_t n, magma_int_t nb, magmaDoubleComplex_const_ptr const dA, magma_int_t ldda, magmaDoubleComplex hA, magma_int_t lda, magma_queue_t queues[])
	Copy matrix dA, which is distributed 1D column block cyclic over multiple GPUs, to hA on CPU host.

void	magma_zgetmatrix_1D_row_bcyclic (magma_int_t ngpu, magma_int_t m, magma_int_t n, magma_int_t nb, magmaDoubleComplex_const_ptr const dA, magma_int_t ldda, magmaDoubleComplex hA, magma_int_t lda, magma_queue_t queues[])
	Copy matrix dA, which is distributed 1D row block cyclic over multiple GPUs, to hA on CPU host.

Detailed Description

Function Documentation

◆ magma_cgetmatrix_1D_col_bcyclic()

void magma_cgetmatrix_1D_col_bcyclic	(	magma_int_t	ngpu,
		magma_int_t	m,
		magma_int_t	n,
		magma_int_t	nb,
		magmaFloatComplex_const_ptr const *	dA,
		magma_int_t	ldda,
		magmaFloatComplex *	hA,
		magma_int_t	lda,
		magma_queue_t	queues[] )

Copy matrix dA, which is distributed 1D column block cyclic over multiple GPUs, to hA on CPU host.

Parameters

[in]	ngpu	Number of GPUs over which dAT is distributed.
[in]	m	Number of rows of matrix hA. m >= 0.
[in]	n	Number of columns of matrix hA. n >= 0.
[in]	nb	Block size. nb > 0.
[in]	dA	Array of ngpu pointers, one per GPU, that store the disributed m-by-n matrix A on the GPUs, each of dimension (ldda,nlocal), where nlocal is the columns assigned to each GPU.
[in]	ldda	Leading dimension of each matrix dAT on each GPU. ldda >= m.
[out]	hA	The m-by-n matrix A on the CPU, of dimension (lda,n).
[in]	lda	Leading dimension of matrix hA. lda >= m.
[in]	queues	Array of dimension (ngpu), with one queue per GPU.

◆ magma_cgetmatrix_1D_row_bcyclic()

void magma_cgetmatrix_1D_row_bcyclic	(	magma_int_t	ngpu,
		magma_int_t	m,
		magma_int_t	n,
		magma_int_t	nb,
		magmaFloatComplex_const_ptr const *	dA,
		magma_int_t	ldda,
		magmaFloatComplex *	hA,
		magma_int_t	lda,
		magma_queue_t	queues[] )

Copy matrix dA, which is distributed 1D row block cyclic over multiple GPUs, to hA on CPU host.

Parameters

[in]	ngpu	Number of GPUs over which dAT is distributed. ngpu > 0.
[in]	m	Number of rows of matrix hA. m >= 0.
[in]	n	Number of columns of matrix hA. n >= 0.
[in]	nb	Block size. nb > 0.
[in]	dA	Array of ngpu pointers, one per GPU, that store the disributed m-by-n matrix A on the GPUs, each of dimension (ldda,n).
[in]	ldda	Leading dimension of each matrix dAT on each GPU. ldda >= (1 + m/(nbngpu))nb
[out]	hA	The m-by-n matrix A on the CPU, of dimension (lda,n).
[in]	lda	Leading dimension of matrix hA. lda >= m.
[in]	queues	Array of dimension (ngpu), with one queue per GPU.

◆ magma_dgetmatrix_1D_col_bcyclic()

void magma_dgetmatrix_1D_col_bcyclic	(	magma_int_t	ngpu,
		magma_int_t	m,
		magma_int_t	n,
		magma_int_t	nb,
		magmaDouble_const_ptr const *	dA,
		magma_int_t	ldda,
		double *	hA,
		magma_int_t	lda,
		magma_queue_t	queues[] )

Copy matrix dA, which is distributed 1D column block cyclic over multiple GPUs, to hA on CPU host.

Parameters

[in]	ngpu	Number of GPUs over which dAT is distributed.
[in]	m	Number of rows of matrix hA. m >= 0.
[in]	n	Number of columns of matrix hA. n >= 0.
[in]	nb	Block size. nb > 0.
[in]	dA	Array of ngpu pointers, one per GPU, that store the disributed m-by-n matrix A on the GPUs, each of dimension (ldda,nlocal), where nlocal is the columns assigned to each GPU.
[in]	ldda	Leading dimension of each matrix dAT on each GPU. ldda >= m.
[out]	hA	The m-by-n matrix A on the CPU, of dimension (lda,n).
[in]	lda	Leading dimension of matrix hA. lda >= m.
[in]	queues	Array of dimension (ngpu), with one queue per GPU.

◆ magma_dgetmatrix_1D_row_bcyclic()

void magma_dgetmatrix_1D_row_bcyclic	(	magma_int_t	ngpu,
		magma_int_t	m,
		magma_int_t	n,
		magma_int_t	nb,
		magmaDouble_const_ptr const *	dA,
		magma_int_t	ldda,
		double *	hA,
		magma_int_t	lda,
		magma_queue_t	queues[] )

Copy matrix dA, which is distributed 1D row block cyclic over multiple GPUs, to hA on CPU host.

Parameters

[in]	ngpu	Number of GPUs over which dAT is distributed. ngpu > 0.
[in]	m	Number of rows of matrix hA. m >= 0.
[in]	n	Number of columns of matrix hA. n >= 0.
[in]	nb	Block size. nb > 0.
[in]	dA	Array of ngpu pointers, one per GPU, that store the disributed m-by-n matrix A on the GPUs, each of dimension (ldda,n).
[in]	ldda	Leading dimension of each matrix dAT on each GPU. ldda >= (1 + m/(nbngpu))nb
[out]	hA	The m-by-n matrix A on the CPU, of dimension (lda,n).
[in]	lda	Leading dimension of matrix hA. lda >= m.
[in]	queues	Array of dimension (ngpu), with one queue per GPU.

◆ magma_sgetmatrix_1D_col_bcyclic()

void magma_sgetmatrix_1D_col_bcyclic	(	magma_int_t	ngpu,
		magma_int_t	m,
		magma_int_t	n,
		magma_int_t	nb,
		magmaFloat_const_ptr const *	dA,
		magma_int_t	ldda,
		float *	hA,
		magma_int_t	lda,
		magma_queue_t	queues[] )

Copy matrix dA, which is distributed 1D column block cyclic over multiple GPUs, to hA on CPU host.

Parameters

[in]	ngpu	Number of GPUs over which dAT is distributed.
[in]	m	Number of rows of matrix hA. m >= 0.
[in]	n	Number of columns of matrix hA. n >= 0.
[in]	nb	Block size. nb > 0.
[in]	dA	Array of ngpu pointers, one per GPU, that store the disributed m-by-n matrix A on the GPUs, each of dimension (ldda,nlocal), where nlocal is the columns assigned to each GPU.
[in]	ldda	Leading dimension of each matrix dAT on each GPU. ldda >= m.
[out]	hA	The m-by-n matrix A on the CPU, of dimension (lda,n).
[in]	lda	Leading dimension of matrix hA. lda >= m.
[in]	queues	Array of dimension (ngpu), with one queue per GPU.

◆ magma_sgetmatrix_1D_row_bcyclic()

void magma_sgetmatrix_1D_row_bcyclic	(	magma_int_t	ngpu,
		magma_int_t	m,
		magma_int_t	n,
		magma_int_t	nb,
		magmaFloat_const_ptr const *	dA,
		magma_int_t	ldda,
		float *	hA,
		magma_int_t	lda,
		magma_queue_t	queues[] )

Copy matrix dA, which is distributed 1D row block cyclic over multiple GPUs, to hA on CPU host.

Parameters

[in]	ngpu	Number of GPUs over which dAT is distributed. ngpu > 0.
[in]	m	Number of rows of matrix hA. m >= 0.
[in]	n	Number of columns of matrix hA. n >= 0.
[in]	nb	Block size. nb > 0.
[in]	dA	Array of ngpu pointers, one per GPU, that store the disributed m-by-n matrix A on the GPUs, each of dimension (ldda,n).
[in]	ldda	Leading dimension of each matrix dAT on each GPU. ldda >= (1 + m/(nbngpu))nb
[out]	hA	The m-by-n matrix A on the CPU, of dimension (lda,n).
[in]	lda	Leading dimension of matrix hA. lda >= m.
[in]	queues	Array of dimension (ngpu), with one queue per GPU.

◆ magma_zgetmatrix_1D_col_bcyclic()

void magma_zgetmatrix_1D_col_bcyclic	(	magma_int_t	ngpu,
		magma_int_t	m,
		magma_int_t	n,
		magma_int_t	nb,
		magmaDoubleComplex_const_ptr const *	dA,
		magma_int_t	ldda,
		magmaDoubleComplex *	hA,
		magma_int_t	lda,
		magma_queue_t	queues[] )

Copy matrix dA, which is distributed 1D column block cyclic over multiple GPUs, to hA on CPU host.

Parameters

[in]	ngpu	Number of GPUs over which dAT is distributed.
[in]	m	Number of rows of matrix hA. m >= 0.
[in]	n	Number of columns of matrix hA. n >= 0.
[in]	nb	Block size. nb > 0.
[in]	dA	Array of ngpu pointers, one per GPU, that store the disributed m-by-n matrix A on the GPUs, each of dimension (ldda,nlocal), where nlocal is the columns assigned to each GPU.
[in]	ldda	Leading dimension of each matrix dAT on each GPU. ldda >= m.
[out]	hA	The m-by-n matrix A on the CPU, of dimension (lda,n).
[in]	lda	Leading dimension of matrix hA. lda >= m.
[in]	queues	Array of dimension (ngpu), with one queue per GPU.

◆ magma_zgetmatrix_1D_row_bcyclic()

void magma_zgetmatrix_1D_row_bcyclic	(	magma_int_t	ngpu,
		magma_int_t	m,
		magma_int_t	n,
		magma_int_t	nb,
		magmaDoubleComplex_const_ptr const *	dA,
		magma_int_t	ldda,
		magmaDoubleComplex *	hA,
		magma_int_t	lda,
		magma_queue_t	queues[] )

Copy matrix dA, which is distributed 1D row block cyclic over multiple GPUs, to hA on CPU host.

Parameters

[in]	ngpu	Number of GPUs over which dAT is distributed. ngpu > 0.
[in]	m	Number of rows of matrix hA. m >= 0.
[in]	n	Number of columns of matrix hA. n >= 0.
[in]	nb	Block size. nb > 0.
[in]	dA	Array of ngpu pointers, one per GPU, that store the disributed m-by-n matrix A on the GPUs, each of dimension (ldda,n).
[in]	ldda	Leading dimension of each matrix dAT on each GPU. ldda >= (1 + m/(nbngpu))nb
[out]	hA	The m-by-n matrix A on the CPU, of dimension (lda,n).
[in]	lda	Leading dimension of matrix hA. lda >= m.
[in]	queues	Array of dimension (ngpu), with one queue per GPU.

Functions

Detailed Description

Function Documentation

◆ magma_cgetmatrix_1D_col_bcyclic()

◆ magma_cgetmatrix_1D_row_bcyclic()

◆ magma_dgetmatrix_1D_col_bcyclic()

◆ magma_dgetmatrix_1D_row_bcyclic()

◆ magma_sgetmatrix_1D_col_bcyclic()

◆ magma_sgetmatrix_1D_row_bcyclic()

◆ magma_zgetmatrix_1D_col_bcyclic()

◆ magma_zgetmatrix_1D_row_bcyclic()