Functions
magma_int_t	magma_cpotrf_expert (magma_uplo_t uplo, magma_int_t n, magmaFloatComplex A, magma_int_t lda, magmaFloatComplex dA, magma_int_t ldda, magma_int_t info, magma_queue_t queues)
	CPOTRF computes the Cholesky factorization of a complex Hermitian positive definite matrix A.

magma_int_t	magma_cpotrf3_mgpu (magma_int_t ngpu, magma_uplo_t uplo, magma_int_t m, magma_int_t n, magma_int_t off_i, magma_int_t off_j, magma_int_t nb, magmaFloatComplex_ptr d_lA[], magma_int_t ldda, magmaFloatComplex_ptr d_lP[], magma_int_t lddp, magmaFloatComplex A, magma_int_t lda, magma_int_t h, magma_queue_t queues[][3], magma_event_t events[][5], magma_int_t info)
	CPOTRF computes the Cholesky factorization of a complex Hermitian positive definite matrix dA.

magma_int_t	magma_cpotrf_expert_gpu_work (magma_uplo_t uplo, magma_int_t n, magmaFloatComplex_ptr dA, magma_int_t ldda, magma_int_t info, magma_mode_t mode, magma_int_t nb, magma_int_t recnb, void host_work, magma_int_t lwork_host, void device_work, magma_int_t *lwork_device, magma_event_t events[2], magma_queue_t queues[2])
	CPOTRF computes the Cholesky factorization of a complex Hermitian positive definite matrix dA.

magma_int_t	magma_cpotrf_expert_gpu (magma_uplo_t uplo, magma_int_t n, magmaFloatComplex_ptr dA, magma_int_t ldda, magma_int_t *info, magma_int_t nb, magma_mode_t mode)
	wrapper around magma_cpotrf_expert_gpu_work to hide workspace, event, and queue management

magma_int_t	magma_cpotrf_gpu (magma_uplo_t uplo, magma_int_t n, magmaFloatComplex_ptr dA, magma_int_t ldda, magma_int_t *info)
	magma_cpotrf_expert_gpu with mode = MagmaHybrid.

magma_int_t	magma_cpotrf_native (magma_uplo_t uplo, magma_int_t n, magmaFloatComplex_ptr dA, magma_int_t ldda, magma_int_t *info)
	magma_cpotrf_expert_gpu with mode = MagmaNative.

magma_int_t	magma_cpotrf_m (magma_int_t ngpu, magma_uplo_t uplo, magma_int_t n, magmaFloatComplex A, magma_int_t lda, magma_int_t info)
	CPOTRF computes the Cholesky factorization of a complex Hermitian positive definite matrix A.

magma_int_t	magma_cpotrf_mgpu (magma_int_t ngpu, magma_uplo_t uplo, magma_int_t n, magmaFloatComplex_ptr d_lA[], magma_int_t ldda, magma_int_t *info)
	CPOTRF computes the Cholesky factorization of a complex Hermitian positive definite matrix dA.

magma_int_t	magma_cpotrf_rectile_native (magma_uplo_t uplo, magma_int_t n, magma_int_t recnb, magmaFloatComplex dA, magma_int_t ldda, magma_int_t gbstep, magma_int_t dinfo, magma_int_t *info, magma_queue_t queue)
	CPOTRF_RECTILE computes the Cholesky factorization of a complex Hermitian positive definite matrix dA.

magma_int_t	magma_dpotrf_expert (magma_uplo_t uplo, magma_int_t n, double A, magma_int_t lda, double dA, magma_int_t ldda, magma_int_t info, magma_queue_t queues)
	DPOTRF computes the Cholesky factorization of a real symmetric positive definite matrix A.

magma_int_t	magma_dpotrf3_mgpu (magma_int_t ngpu, magma_uplo_t uplo, magma_int_t m, magma_int_t n, magma_int_t off_i, magma_int_t off_j, magma_int_t nb, magmaDouble_ptr d_lA[], magma_int_t ldda, magmaDouble_ptr d_lP[], magma_int_t lddp, double A, magma_int_t lda, magma_int_t h, magma_queue_t queues[][3], magma_event_t events[][5], magma_int_t info)
	DPOTRF computes the Cholesky factorization of a real symmetric positive definite matrix dA.

magma_int_t	magma_dpotrf_expert_gpu_work (magma_uplo_t uplo, magma_int_t n, magmaDouble_ptr dA, magma_int_t ldda, magma_int_t info, magma_mode_t mode, magma_int_t nb, magma_int_t recnb, void host_work, magma_int_t lwork_host, void device_work, magma_int_t *lwork_device, magma_event_t events[2], magma_queue_t queues[2])
	DPOTRF computes the Cholesky factorization of a real symmetric positive definite matrix dA.

magma_int_t	magma_dpotrf_expert_gpu (magma_uplo_t uplo, magma_int_t n, magmaDouble_ptr dA, magma_int_t ldda, magma_int_t *info, magma_int_t nb, magma_mode_t mode)
	wrapper around magma_dpotrf_expert_gpu_work to hide workspace, event, and queue management

magma_int_t	magma_dpotrf_gpu (magma_uplo_t uplo, magma_int_t n, magmaDouble_ptr dA, magma_int_t ldda, magma_int_t *info)
	magma_dpotrf_expert_gpu with mode = MagmaHybrid.

magma_int_t	magma_dpotrf_native (magma_uplo_t uplo, magma_int_t n, magmaDouble_ptr dA, magma_int_t ldda, magma_int_t *info)
	magma_dpotrf_expert_gpu with mode = MagmaNative.

magma_int_t	magma_dpotrf_m (magma_int_t ngpu, magma_uplo_t uplo, magma_int_t n, double A, magma_int_t lda, magma_int_t info)
	DPOTRF computes the Cholesky factorization of a real symmetric positive definite matrix A.

magma_int_t	magma_dpotrf_mgpu (magma_int_t ngpu, magma_uplo_t uplo, magma_int_t n, magmaDouble_ptr d_lA[], magma_int_t ldda, magma_int_t *info)
	DPOTRF computes the Cholesky factorization of a real symmetric positive definite matrix dA.

magma_int_t	magma_dpotrf_rectile_native (magma_uplo_t uplo, magma_int_t n, magma_int_t recnb, double dA, magma_int_t ldda, magma_int_t gbstep, magma_int_t dinfo, magma_int_t *info, magma_queue_t queue)
	DPOTRF_RECTILE computes the Cholesky factorization of a real symmetric positive definite matrix dA.

magma_int_t	magma_shpotrf_LL_expert_gpu (magma_uplo_t uplo, magma_int_t n, magmaFloat_ptr dA, magma_int_t ldda, magma_int_t nb, magma_int_t recnb, magma_int_t *info, magma_mode_t mode)
	SPOTRF computes the Cholesky factorization of a real symmetric positive definite matrix dA.

magma_int_t	magma_shpotrf_gpu (magma_uplo_t uplo, magma_int_t n, magmaFloat_ptr dA, magma_int_t ldda, magma_int_t *info)
	magma_shpotrf_LL_expert_gpu with mode = MagmaHybrid.

magma_int_t	magma_shpotrf_native (magma_uplo_t uplo, magma_int_t n, magmaFloat_ptr dA, magma_int_t ldda, magma_int_t *info)
	magma_shpotrf_LL_expert_gpu with mode = MagmaNative.

magma_int_t	magma_spotrf_expert (magma_uplo_t uplo, magma_int_t n, float A, magma_int_t lda, float dA, magma_int_t ldda, magma_int_t info, magma_queue_t queues)
	SPOTRF computes the Cholesky factorization of a real symmetric positive definite matrix A.

magma_int_t	magma_spotrf3_mgpu (magma_int_t ngpu, magma_uplo_t uplo, magma_int_t m, magma_int_t n, magma_int_t off_i, magma_int_t off_j, magma_int_t nb, magmaFloat_ptr d_lA[], magma_int_t ldda, magmaFloat_ptr d_lP[], magma_int_t lddp, float A, magma_int_t lda, magma_int_t h, magma_queue_t queues[][3], magma_event_t events[][5], magma_int_t info)
	SPOTRF computes the Cholesky factorization of a real symmetric positive definite matrix dA.

magma_int_t	magma_spotrf_expert_gpu_work (magma_uplo_t uplo, magma_int_t n, magmaFloat_ptr dA, magma_int_t ldda, magma_int_t info, magma_mode_t mode, magma_int_t nb, magma_int_t recnb, void host_work, magma_int_t lwork_host, void device_work, magma_int_t *lwork_device, magma_event_t events[2], magma_queue_t queues[2])
	SPOTRF computes the Cholesky factorization of a real symmetric positive definite matrix dA.

magma_int_t	magma_spotrf_expert_gpu (magma_uplo_t uplo, magma_int_t n, magmaFloat_ptr dA, magma_int_t ldda, magma_int_t *info, magma_int_t nb, magma_mode_t mode)
	wrapper around magma_spotrf_expert_gpu_work to hide workspace, event, and queue management

magma_int_t	magma_spotrf_gpu (magma_uplo_t uplo, magma_int_t n, magmaFloat_ptr dA, magma_int_t ldda, magma_int_t *info)
	magma_spotrf_expert_gpu with mode = MagmaHybrid.

magma_int_t	magma_spotrf_native (magma_uplo_t uplo, magma_int_t n, magmaFloat_ptr dA, magma_int_t ldda, magma_int_t *info)
	magma_spotrf_expert_gpu with mode = MagmaNative.

magma_int_t	magma_spotrf_m (magma_int_t ngpu, magma_uplo_t uplo, magma_int_t n, float A, magma_int_t lda, magma_int_t info)
	SPOTRF computes the Cholesky factorization of a real symmetric positive definite matrix A.

magma_int_t	magma_spotrf_mgpu (magma_int_t ngpu, magma_uplo_t uplo, magma_int_t n, magmaFloat_ptr d_lA[], magma_int_t ldda, magma_int_t *info)
	SPOTRF computes the Cholesky factorization of a real symmetric positive definite matrix dA.

magma_int_t	magma_spotrf_rectile_native (magma_uplo_t uplo, magma_int_t n, magma_int_t recnb, float dA, magma_int_t ldda, magma_int_t gbstep, magma_int_t dinfo, magma_int_t *info, magma_queue_t queue)
	SPOTRF_RECTILE computes the Cholesky factorization of a real symmetric positive definite matrix dA.

magma_int_t	magma_zpotrf_expert (magma_uplo_t uplo, magma_int_t n, magmaDoubleComplex A, magma_int_t lda, magmaDoubleComplex dA, magma_int_t ldda, magma_int_t info, magma_queue_t queues)
	ZPOTRF computes the Cholesky factorization of a complex Hermitian positive definite matrix A.

magma_int_t	magma_zpotrf3_mgpu (magma_int_t ngpu, magma_uplo_t uplo, magma_int_t m, magma_int_t n, magma_int_t off_i, magma_int_t off_j, magma_int_t nb, magmaDoubleComplex_ptr d_lA[], magma_int_t ldda, magmaDoubleComplex_ptr d_lP[], magma_int_t lddp, magmaDoubleComplex A, magma_int_t lda, magma_int_t h, magma_queue_t queues[][3], magma_event_t events[][5], magma_int_t info)
	ZPOTRF computes the Cholesky factorization of a complex Hermitian positive definite matrix dA.

magma_int_t	magma_zpotrf_expert_gpu_work (magma_uplo_t uplo, magma_int_t n, magmaDoubleComplex_ptr dA, magma_int_t ldda, magma_int_t info, magma_mode_t mode, magma_int_t nb, magma_int_t recnb, void host_work, magma_int_t lwork_host, void device_work, magma_int_t *lwork_device, magma_event_t events[2], magma_queue_t queues[2])
	ZPOTRF computes the Cholesky factorization of a complex Hermitian positive definite matrix dA.

magma_int_t	magma_zpotrf_expert_gpu (magma_uplo_t uplo, magma_int_t n, magmaDoubleComplex_ptr dA, magma_int_t ldda, magma_int_t *info, magma_int_t nb, magma_mode_t mode)
	wrapper around magma_zpotrf_expert_gpu_work to hide workspace, event, and queue management

magma_int_t	magma_zpotrf_gpu (magma_uplo_t uplo, magma_int_t n, magmaDoubleComplex_ptr dA, magma_int_t ldda, magma_int_t *info)
	magma_zpotrf_expert_gpu with mode = MagmaHybrid.

magma_int_t	magma_zpotrf_native (magma_uplo_t uplo, magma_int_t n, magmaDoubleComplex_ptr dA, magma_int_t ldda, magma_int_t *info)
	magma_zpotrf_expert_gpu with mode = MagmaNative.

magma_int_t	magma_zpotrf_m (magma_int_t ngpu, magma_uplo_t uplo, magma_int_t n, magmaDoubleComplex A, magma_int_t lda, magma_int_t info)
	ZPOTRF computes the Cholesky factorization of a complex Hermitian positive definite matrix A.

magma_int_t	magma_zpotrf_mgpu (magma_int_t ngpu, magma_uplo_t uplo, magma_int_t n, magmaDoubleComplex_ptr d_lA[], magma_int_t ldda, magma_int_t *info)
	ZPOTRF computes the Cholesky factorization of a complex Hermitian positive definite matrix dA.

magma_int_t	magma_zpotrf_rectile_native (magma_uplo_t uplo, magma_int_t n, magma_int_t recnb, magmaDoubleComplex dA, magma_int_t ldda, magma_int_t gbstep, magma_int_t dinfo, magma_int_t *info, magma_queue_t queue)
	ZPOTRF_RECTILE computes the Cholesky factorization of a complex Hermitian positive definite matrix dA.

Detailed Description

Function Documentation

◆ magma_cpotrf_expert()

magma_int_t magma_cpotrf_expert	(	magma_uplo_t	uplo,
		magma_int_t	n,
		magmaFloatComplex *	A,
		magma_int_t	lda,
		magmaFloatComplex *	dA,
		magma_int_t	ldda,
		magma_int_t *	info,
		magma_queue_t *	queues )

CPOTRF computes the Cholesky factorization of a complex Hermitian positive definite matrix A.

This version does not require work space on the GPU passed as input. GPU memory is allocated in the routine.

The factorization has the form A = U**H * U, if uplo = MagmaUpper, or A = L * L**H, if uplo = MagmaLower, where U is an upper triangular matrix and L is lower triangular.

This is the block version of the algorithm, calling Level 3 BLAS.

This uses multiple queues to overlap communication and computation.

Parameters

[in]	uplo	magma_uplo_t = MagmaUpper: Upper triangle of A is stored; = MagmaLower: Lower triangle of A is stored.
[in]	n	INTEGER The order of the matrix A. N >= 0.
[in,out]	A	COMPLEX array, dimension (LDA,N) On entry, the Hermitian matrix A. If uplo = MagmaUpper, the leading N-by-N upper triangular part of A contains the upper triangular part of the matrix A, and the strictly lower triangular part of A is not referenced. If uplo = MagmaLower, the leading N-by-N lower triangular part of A contains the lower triangular part of the matrix A, and the strictly upper triangular part of A is not referenced. On exit, if INFO = 0, the factor U or L from the Cholesky factorization A = U*H U or A = L * L**H. Higher performance is achieved if A is in pinned memory, e.g. allocated using magma_malloc_pinned.
[in]	lda	INTEGER The leading dimension of the array A. LDA >= max(1,N).
[out]	info	INTEGER = 0: successful exit < 0: if INFO = -i, the i-th argument had an illegal value or another error occured, such as memory allocation failed. > 0: if INFO = i, the leading minor of order i is not positive definite, and the factorization could not be completed.

◆ magma_cpotrf3_mgpu()

magma_int_t magma_cpotrf3_mgpu	(	magma_int_t	ngpu,
		magma_uplo_t	uplo,
		magma_int_t	m,
		magma_int_t	n,
		magma_int_t	off_i,
		magma_int_t	off_j,
		magma_int_t	nb,
		magmaFloatComplex_ptr	d_lA[],
		magma_int_t	ldda,
		magmaFloatComplex_ptr	d_lP[],
		magma_int_t	lddp,
		magmaFloatComplex *	A,
		magma_int_t	lda,
		magma_int_t	h,
		magma_queue_t	queues[][3],
		magma_event_t	events[][5],
		magma_int_t *	info )

CPOTRF computes the Cholesky factorization of a complex Hermitian positive definite matrix dA.

Auxiliary subroutine for cpotrf2_ooc. It is multiple gpu interface to compute Cholesky of a "rectangular" matrix.

The factorization has the form dA = U**H * U, if UPLO = MagmaUpper, or dA = L * L**H, if UPLO = MagmaLower, where U is an upper triangular matrix and L is lower triangular.

This is the block version of the algorithm, calling Level 3 BLAS.

Parameters

[in]	ngpu	INTEGER Number of GPUs to use. ngpu > 0.
[in]	uplo	magma_uplo_t = MagmaUpper: Upper triangle of dA is stored; = MagmaLower: Lower triangle of dA is stored.
[in]	m	INTEGER The number of rows of the submatrix to be factorized.
[in]	n	INTEGER The number of columns of the submatrix to be factorized.
[in]	off_i	INTEGER The first row index of the submatrix to be factorized.
[in]	off_j	INTEGER The first column index of the submatrix to be factorized.
[in]	nb	INTEGER The block size used for the factorization and distribution.
[in,out]	d_lA	COMPLEX array of pointers on the GPU, dimension (ngpu). On entry, the Hermitian matrix dA distributed over GPU. (d_lAT[d] points to the local matrix on d-th GPU). If UPLO = MagmaLower or MagmaUpper, it respectively uses a 1D block column or row cyclic format (with the block size nb), and each local matrix is stored by column. If UPLO = MagmaUpper, the leading N-by-N upper triangular part of dA contains the upper triangular part of the matrix dA, and the strictly lower triangular part of dA is not referenced. If UPLO = MagmaLower, the leading N-by-N lower triangular part of dA contains the lower triangular part of the matrix dA, and the strictly upper triangular part of dA is not referenced. On exit, if INFO = 0, the factor U or L from the Cholesky factorization dA = U*H U or dA = L * L**H.
[in,out]	d_lP	COMPLEX array of pointers on the GPU, dimension (ngpu). d_LAT[d] points to workspace of size hlddpnb on d-th GPU.
[in]	lddp	INTEGER The leading dimension of the array dP. LDDA >= max(1,N).
[in]	ldda	INTEGER The leading dimension of the array dA. LDDA >= max(1,N). To benefit from coalescent memory accesses LDDA must be divisible by 16.
[in,out]	A	COMPLEX array on the CPU, dimension (LDA,H*NB) On exit, the panel is copied back to the CPU
[in]	lda	INTEGER The leading dimension of the array A. LDA >= max(1,N).
[in]	h	INTEGER It specifies the size of the CPU workspace, A.
[in]	queues	magma_queue_t queues is of dimension (ngpu,3) and contains the queues used for the partial factorization.
[in]	events	magma_event_t events is of dimension(ngpu,5) and contains the events used for the partial factorization.
[out]	info	INTEGER = 0: successful exit < 0: if INFO = -i, the i-th argument had an illegal value > 0: if INFO = i, the leading minor of order i is not positive definite, and the factorization could not be completed.

◆ magma_cpotrf_expert_gpu_work()

magma_int_t magma_cpotrf_expert_gpu_work	(	magma_uplo_t	uplo,
		magma_int_t	n,
		magmaFloatComplex_ptr	dA,
		magma_int_t	ldda,
		magma_int_t *	info,
		magma_mode_t	mode,
		magma_int_t	nb,
		magma_int_t	recnb,
		void *	host_work,
		magma_int_t *	lwork_host,
		void *	device_work,
		magma_int_t *	lwork_device,
		magma_event_t	events[2],
		magma_queue_t	queues[2] )

CPOTRF computes the Cholesky factorization of a complex Hermitian positive definite matrix dA.

The factorization has the form dA = U**H * U, if UPLO = MagmaUpper, or dA = L * L**H, if UPLO = MagmaLower, where U is an upper triangular matrix and L is lower triangular.

This is the block version of the algorithm, calling Level 3 BLAS. This algorithm uses left-looking Cholesky factorization

Parameters

[in]	uplo	magma_uplo_t = MagmaUpper: Upper triangle of dA is stored (hybrid mode only); = MagmaLower: Lower triangle of dA is stored (hybrid & native modes).
[in]	n	INTEGER The order of the matrix dA. N >= 0.
[in,out]	dA	COMPLEX array on the GPU, dimension (LDDA,N) On entry, the Hermitian matrix dA. If UPLO = MagmaUpper, the leading N-by-N upper triangular part of dA contains the upper triangular part of the matrix dA, and the strictly lower triangular part of dA is not referenced. If UPLO = MagmaLower, the leading N-by-N lower triangular part of dA contains the lower triangular part of the matrix dA, and the strictly upper triangular part of dA is not referenced. On exit, if INFO = 0, the factor U or L from the Cholesky factorization dA = U*H U or dA = L * L**H.
[in]	ldda	INTEGER The leading dimension of the array dA. LDDA >= max(1,N). To benefit from coalescent memory accesses LDDA must be divisible by 16.
[out]	info	INTEGER = 0: successful exit < 0: if INFO = -i, the i-th argument had an illegal value > 0: if INFO = i, the leading minor of order i is not positive definite, and the factorization could not be completed.
[in]	mode	magma_mode_t = MagmaNative: Factorize dA using GPU only mode. = MagmaHybrid: Factorize dA using Hybrid (CPU/GPU) mode.
[in]	nb	INTEGER The blocking size used during the factorization. nb > 0; Users with no specific preference of nb can call magma_get_cpotrf_nb() to get the value of nb as determined by MAGMA's internal tuning.
[in]	recnb	INTEGER The blocking size used during the recursive panel factorization (0 < recnb <= nb); Users with no specific preference of recnb can set it to a fixed value of 64 or 128.
[in,out]	host_work	Workspace, allocated on host (CPU) memory. For faster CPU-GPU communication, user can allocate it as pinned memory using magma_malloc_pinned()
[in,out]	lwork_host	INTEGER pointer The size of the workspace (host_work) in bytes lwork_host[0] < 0: a workspace query is assumed, the routine calculates the required amount of workspace and returns it in lwork_host. The workspace itself is not referenced, and no factorization is performed.

lwork[0] >= 0: the routine assumes that the user has provided a workspace with the size in lwork_host.

Parameters

[in,out]	device_work	Workspace, allocated on device (GPU) memory.
[in,out]	lwork_device	INTEGER pointer The size of the workspace (device_work) in bytes lwork_device[0] < 0: a workspace query is assumed, the routine calculates the required amount of workspace and returns it in lwork_device. The workspace itself is not referenced, and no factorization is performed. lwork_device[0] >= 0: the routine assumes that the user has provided a workspace with the size in lwork_device.
[in]	events	magma_event_t array of size two created/destroyed by the user outside the routine Used to manage inter-stream dependencies
[in]	queues	magma_queue_t array of size two created/destroyed by the user outside the routine Used for concurrent kernel execution, if possible

◆ magma_cpotrf_expert_gpu()

magma_int_t magma_cpotrf_expert_gpu	(	magma_uplo_t	uplo,
		magma_int_t	n,
		magmaFloatComplex_ptr	dA,
		magma_int_t	ldda,
		magma_int_t *	info,
		magma_int_t	nb,
		magma_mode_t	mode )

wrapper around magma_cpotrf_expert_gpu_work to hide workspace, event, and queue management

See also: magma_cpotrf_expert_gpu_work

◆ magma_cpotrf_gpu()

magma_int_t magma_cpotrf_gpu	(	magma_uplo_t	uplo,
		magma_int_t	n,
		magmaFloatComplex_ptr	dA,
		magma_int_t	ldda,
		magma_int_t *	info )

magma_cpotrf_expert_gpu with mode = MagmaHybrid.

Computation is hybrid, part on CPU (panels), part on GPU (matrix updates).

See also: magma_cpotrf_expert_gpu

◆ magma_cpotrf_native()

magma_int_t magma_cpotrf_native	(	magma_uplo_t	uplo,
		magma_int_t	n,
		magmaFloatComplex_ptr	dA,
		magma_int_t	ldda,
		magma_int_t *	info )

magma_cpotrf_expert_gpu with mode = MagmaNative.

Computation is done only on the GPU, not on the CPU.

See also: magma_cpotrf_expert_gpu

◆ magma_cpotrf_m()

magma_int_t magma_cpotrf_m	(	magma_int_t	ngpu,
		magma_uplo_t	uplo,
		magma_int_t	n,
		magmaFloatComplex *	A,
		magma_int_t	lda,
		magma_int_t *	info )

CPOTRF computes the Cholesky factorization of a complex Hermitian positive definite matrix A.

This version does not require work space on the GPU passed as input. GPU memory is allocated in the routine. The matrix A may exceed the GPU memory.

The factorization has the form A = U**H * U, if UPLO = MagmaUpper, or A = L * L**H, if UPLO = MagmaLower, where U is an upper triangular matrix and L is lower triangular.

This is the block version of the algorithm, calling Level 3 BLAS.

Parameters

[in]	ngpu	INTEGER Number of GPUs to use. ngpu > 0.
[in]	uplo	magma_uplo_t = MagmaUpper: Upper triangle of A is stored; = MagmaLower: Lower triangle of A is stored.
[in]	n	INTEGER The order of the matrix A. N >= 0.
[in,out]	A	COMPLEX array, dimension (LDA,N) On entry, the symmetric matrix A. If UPLO = MagmaUpper, the leading N-by-N upper triangular part of A contains the upper triangular part of the matrix A, and the strictly lower triangular part of A is not referenced. If UPLO = MagmaLower, the leading N-by-N lower triangular part of A contains the lower triangular part of the matrix A, and the strictly upper triangular part of A is not referenced. On exit, if INFO = 0, the factor U or L from the Cholesky factorization A = U*H U or A = L * L**H. Higher performance is achieved if A is in pinned memory, e.g. allocated using magma_malloc_pinned.
[in]	lda	INTEGER The leading dimension of the array A. LDA >= max(1,N).
[out]	info	INTEGER = 0: successful exit < 0: if INFO = -i, the i-th argument had an illegal value or another error occured, such as memory allocation failed. > 0: if INFO = i, the leading minor of order i is not positive definite, and the factorization could not be completed.

◆ magma_cpotrf_mgpu()

magma_int_t magma_cpotrf_mgpu	(	magma_int_t	ngpu,
		magma_uplo_t	uplo,
		magma_int_t	n,
		magmaFloatComplex_ptr	d_lA[],
		magma_int_t	ldda,
		magma_int_t *	info )

CPOTRF computes the Cholesky factorization of a complex Hermitian positive definite matrix dA.

The factorization has the form dA = U**H * U, if UPLO = MagmaUpper, or dA = L * L**H, if UPLO = MagmaLower, where U is an upper triangular matrix and L is lower triangular.

This is the block version of the algorithm, calling Level 3 BLAS.

Parameters

[in]	ngpu	INTEGER Number of GPUs to use. ngpu > 0.
[in]	uplo	magma_uplo_t = MagmaUpper: Upper triangle of dA is stored; = MagmaLower: Lower triangle of dA is stored.
[in]	n	INTEGER The order of the matrix dA. N >= 0.
[in,out]	d_lA	COMPLEX array of pointers on the GPU, dimension (ngpu) On entry, the Hermitian matrix dA distributed over GPUs (d_lA[d] points to the local matrix on the d-th GPU). It is distributed in 1D block column or row cyclic (with the block size of nb) if UPLO = MagmaUpper or MagmaLower, respectively. If UPLO = MagmaUpper, the leading N-by-N upper triangular part of dA contains the upper triangular part of the matrix dA, and the strictly lower triangular part of dA is not referenced. If UPLO = MagmaLower, the leading N-by-N lower triangular part of dA contains the lower triangular part of the matrix dA, and the strictly upper triangular part of dA is not referenced. On exit, if INFO = 0, the factor U or L from the Cholesky factorization dA = U*H U or dA = L * L**H.
[in]	ldda	INTEGER The leading dimension of the array d_lA. LDDA >= max(1,N). To benefit from coalescent memory accesses LDDA must be divisible by 16.
[out]	info	INTEGER = 0: successful exit < 0: if INFO = -i, the i-th argument had an illegal value > 0: if INFO = i, the leading minor of order i is not positive definite, and the factorization could not be completed.

◆ magma_cpotrf_rectile_native()

magma_int_t magma_cpotrf_rectile_native	(	magma_uplo_t	uplo,
		magma_int_t	n,
		magma_int_t	recnb,
		magmaFloatComplex *	dA,
		magma_int_t	ldda,
		magma_int_t	gbstep,
		magma_int_t *	dinfo,
		magma_int_t *	info,
		magma_queue_t	queue )

CPOTRF_RECTILE computes the Cholesky factorization of a complex Hermitian positive definite matrix dA.

The factorization has the form dA = U**H * U, if UPLO = MagmaUpper, or dA = L * L**H, if UPLO = MagmaLower, where U is an upper triangular matrix and L is lower triangular.

This is the block version of the algorithm, calling Level 3 BLAS.

Parameters

[in]	uplo	magma_uplo_t = MagmaUpper: Upper triangle of dA is stored. (Not currently supported) = MagmaLower: Lower triangle of dA is stored.
[in]	n	INTEGER The order of the matrix dA. N >= 0.
[in]	recnb	INTEGER The blocking size at which recursion stops.
[in,out]	dA	COMPLEX array on the GPU, dimension (LDDA,N) On entry, the Hermitian matrix dA. If UPLO = MagmaUpper, the leading N-by-N upper triangular part of dA contains the upper triangular part of the matrix dA, and the strictly lower triangular part of dA is not referenced. If UPLO = MagmaLower, the leading N-by-N lower triangular part of dA contains the lower triangular part of the matrix dA, and the strictly upper triangular part of dA is not referenced. On exit, if INFO = 0, the factor U or L from the Cholesky factorization dA = U*H U or dA = L * L**H.
[in]	ldda	INTEGER The leading dimension of the array dA. LDDA >= max(1,N). To benefit from coalescent memory accesses LDDA must be divisible by 16.
[in]	gbstep	INTEGER Internal use.
[out]	dinfo	INTEGER, stored on the GPU. = 0: successful exit < 0: if INFO = -i, the i-th argument had an illegal value > 0: if INFO = i, the leading minor of order i is not positive definite, and the factorization could not be completed.
[out]	info	INTEGER, stored on the CPU. = 0: successful exit < 0: if INFO = -i, the i-th argument had an illegal value > 0: if INFO = i, the leading minor of order i is not positive definite, and the factorization could not be completed.
[in]	queue	magma_queue_t Queue to execute in.

This is an internal routine.

◆ magma_dpotrf_expert()

magma_int_t magma_dpotrf_expert	(	magma_uplo_t	uplo,
		magma_int_t	n,
		double *	A,
		magma_int_t	lda,
		double *	dA,
		magma_int_t	ldda,
		magma_int_t *	info,
		magma_queue_t *	queues )

DPOTRF computes the Cholesky factorization of a real symmetric positive definite matrix A.

This version does not require work space on the GPU passed as input. GPU memory is allocated in the routine.

The factorization has the form A = U**H * U, if uplo = MagmaUpper, or A = L * L**H, if uplo = MagmaLower, where U is an upper triangular matrix and L is lower triangular.

This is the block version of the algorithm, calling Level 3 BLAS.

This uses multiple queues to overlap communication and computation.

Parameters

[in]	uplo	magma_uplo_t = MagmaUpper: Upper triangle of A is stored; = MagmaLower: Lower triangle of A is stored.
[in]	n	INTEGER The order of the matrix A. N >= 0.
[in,out]	A	DOUBLE PRECISION array, dimension (LDA,N) On entry, the symmetric matrix A. If uplo = MagmaUpper, the leading N-by-N upper triangular part of A contains the upper triangular part of the matrix A, and the strictly lower triangular part of A is not referenced. If uplo = MagmaLower, the leading N-by-N lower triangular part of A contains the lower triangular part of the matrix A, and the strictly upper triangular part of A is not referenced. On exit, if INFO = 0, the factor U or L from the Cholesky factorization A = U*H U or A = L * L**H. Higher performance is achieved if A is in pinned memory, e.g. allocated using magma_malloc_pinned.
[in]	lda	INTEGER The leading dimension of the array A. LDA >= max(1,N).
[out]	info	INTEGER = 0: successful exit < 0: if INFO = -i, the i-th argument had an illegal value or another error occured, such as memory allocation failed. > 0: if INFO = i, the leading minor of order i is not positive definite, and the factorization could not be completed.

◆ magma_dpotrf3_mgpu()

magma_int_t magma_dpotrf3_mgpu	(	magma_int_t	ngpu,
		magma_uplo_t	uplo,
		magma_int_t	m,
		magma_int_t	n,
		magma_int_t	off_i,
		magma_int_t	off_j,
		magma_int_t	nb,
		magmaDouble_ptr	d_lA[],
		magma_int_t	ldda,
		magmaDouble_ptr	d_lP[],
		magma_int_t	lddp,
		double *	A,
		magma_int_t	lda,
		magma_int_t	h,
		magma_queue_t	queues[][3],
		magma_event_t	events[][5],
		magma_int_t *	info )

DPOTRF computes the Cholesky factorization of a real symmetric positive definite matrix dA.

Auxiliary subroutine for dpotrf2_ooc. It is multiple gpu interface to compute Cholesky of a "rectangular" matrix.

The factorization has the form dA = U**H * U, if UPLO = MagmaUpper, or dA = L * L**H, if UPLO = MagmaLower, where U is an upper triangular matrix and L is lower triangular.

This is the block version of the algorithm, calling Level 3 BLAS.

Parameters

[in]	ngpu	INTEGER Number of GPUs to use. ngpu > 0.
[in]	uplo	magma_uplo_t = MagmaUpper: Upper triangle of dA is stored; = MagmaLower: Lower triangle of dA is stored.
[in]	m	INTEGER The number of rows of the submatrix to be factorized.
[in]	n	INTEGER The number of columns of the submatrix to be factorized.
[in]	off_i	INTEGER The first row index of the submatrix to be factorized.
[in]	off_j	INTEGER The first column index of the submatrix to be factorized.
[in]	nb	INTEGER The block size used for the factorization and distribution.
[in,out]	d_lA	DOUBLE PRECISION array of pointers on the GPU, dimension (ngpu). On entry, the symmetric matrix dA distributed over GPU. (d_lAT[d] points to the local matrix on d-th GPU). If UPLO = MagmaLower or MagmaUpper, it respectively uses a 1D block column or row cyclic format (with the block size nb), and each local matrix is stored by column. If UPLO = MagmaUpper, the leading N-by-N upper triangular part of dA contains the upper triangular part of the matrix dA, and the strictly lower triangular part of dA is not referenced. If UPLO = MagmaLower, the leading N-by-N lower triangular part of dA contains the lower triangular part of the matrix dA, and the strictly upper triangular part of dA is not referenced. On exit, if INFO = 0, the factor U or L from the Cholesky factorization dA = U*H U or dA = L * L**H.
[in,out]	d_lP	DOUBLE PRECISION array of pointers on the GPU, dimension (ngpu). d_LAT[d] points to workspace of size hlddpnb on d-th GPU.
[in]	lddp	INTEGER The leading dimension of the array dP. LDDA >= max(1,N).
[in]	ldda	INTEGER The leading dimension of the array dA. LDDA >= max(1,N). To benefit from coalescent memory accesses LDDA must be divisible by 16.
[in,out]	A	DOUBLE PRECISION array on the CPU, dimension (LDA,H*NB) On exit, the panel is copied back to the CPU
[in]	lda	INTEGER The leading dimension of the array A. LDA >= max(1,N).
[in]	h	INTEGER It specifies the size of the CPU workspace, A.
[in]	queues	magma_queue_t queues is of dimension (ngpu,3) and contains the queues used for the partial factorization.
[in]	events	magma_event_t events is of dimension(ngpu,5) and contains the events used for the partial factorization.
[out]	info	INTEGER = 0: successful exit < 0: if INFO = -i, the i-th argument had an illegal value > 0: if INFO = i, the leading minor of order i is not positive definite, and the factorization could not be completed.

◆ magma_dpotrf_expert_gpu_work()

magma_int_t magma_dpotrf_expert_gpu_work	(	magma_uplo_t	uplo,
		magma_int_t	n,
		magmaDouble_ptr	dA,
		magma_int_t	ldda,
		magma_int_t *	info,
		magma_mode_t	mode,
		magma_int_t	nb,
		magma_int_t	recnb,
		void *	host_work,
		magma_int_t *	lwork_host,
		void *	device_work,
		magma_int_t *	lwork_device,
		magma_event_t	events[2],
		magma_queue_t	queues[2] )

DPOTRF computes the Cholesky factorization of a real symmetric positive definite matrix dA.

The factorization has the form dA = U**H * U, if UPLO = MagmaUpper, or dA = L * L**H, if UPLO = MagmaLower, where U is an upper triangular matrix and L is lower triangular.

This is the block version of the algorithm, calling Level 3 BLAS. This algorithm uses left-looking Cholesky factorization

Parameters

[in]	uplo	magma_uplo_t = MagmaUpper: Upper triangle of dA is stored (hybrid mode only); = MagmaLower: Lower triangle of dA is stored (hybrid & native modes).
[in]	n	INTEGER The order of the matrix dA. N >= 0.
[in,out]	dA	DOUBLE PRECISION array on the GPU, dimension (LDDA,N) On entry, the symmetric matrix dA. If UPLO = MagmaUpper, the leading N-by-N upper triangular part of dA contains the upper triangular part of the matrix dA, and the strictly lower triangular part of dA is not referenced. If UPLO = MagmaLower, the leading N-by-N lower triangular part of dA contains the lower triangular part of the matrix dA, and the strictly upper triangular part of dA is not referenced. On exit, if INFO = 0, the factor U or L from the Cholesky factorization dA = U*H U or dA = L * L**H.
[in]	ldda	INTEGER The leading dimension of the array dA. LDDA >= max(1,N). To benefit from coalescent memory accesses LDDA must be divisible by 16.
[out]	info	INTEGER = 0: successful exit < 0: if INFO = -i, the i-th argument had an illegal value > 0: if INFO = i, the leading minor of order i is not positive definite, and the factorization could not be completed.
[in]	mode	magma_mode_t = MagmaNative: Factorize dA using GPU only mode. = MagmaHybrid: Factorize dA using Hybrid (CPU/GPU) mode.
[in]	nb	INTEGER The blocking size used during the factorization. nb > 0; Users with no specific preference of nb can call magma_get_dpotrf_nb() to get the value of nb as determined by MAGMA's internal tuning.
[in]	recnb	INTEGER The blocking size used during the recursive panel factorization (0 < recnb <= nb); Users with no specific preference of recnb can set it to a fixed value of 64 or 128.
[in,out]	host_work	Workspace, allocated on host (CPU) memory. For faster CPU-GPU communication, user can allocate it as pinned memory using magma_malloc_pinned()
[in,out]	lwork_host	INTEGER pointer The size of the workspace (host_work) in bytes lwork_host[0] < 0: a workspace query is assumed, the routine calculates the required amount of workspace and returns it in lwork_host. The workspace itself is not referenced, and no factorization is performed.

lwork[0] >= 0: the routine assumes that the user has provided a workspace with the size in lwork_host.

Parameters

[in,out]	device_work	Workspace, allocated on device (GPU) memory.
[in,out]	lwork_device	INTEGER pointer The size of the workspace (device_work) in bytes lwork_device[0] < 0: a workspace query is assumed, the routine calculates the required amount of workspace and returns it in lwork_device. The workspace itself is not referenced, and no factorization is performed. lwork_device[0] >= 0: the routine assumes that the user has provided a workspace with the size in lwork_device.
[in]	events	magma_event_t array of size two created/destroyed by the user outside the routine Used to manage inter-stream dependencies
[in]	queues	magma_queue_t array of size two created/destroyed by the user outside the routine Used for concurrent kernel execution, if possible

◆ magma_dpotrf_expert_gpu()

magma_int_t magma_dpotrf_expert_gpu	(	magma_uplo_t	uplo,
		magma_int_t	n,
		magmaDouble_ptr	dA,
		magma_int_t	ldda,
		magma_int_t *	info,
		magma_int_t	nb,
		magma_mode_t	mode )

wrapper around magma_dpotrf_expert_gpu_work to hide workspace, event, and queue management

See also: magma_dpotrf_expert_gpu_work

◆ magma_dpotrf_gpu()

magma_int_t magma_dpotrf_gpu	(	magma_uplo_t	uplo,
		magma_int_t	n,
		magmaDouble_ptr	dA,
		magma_int_t	ldda,
		magma_int_t *	info )

magma_dpotrf_expert_gpu with mode = MagmaHybrid.

Computation is hybrid, part on CPU (panels), part on GPU (matrix updates).

See also: magma_dpotrf_expert_gpu

◆ magma_dpotrf_native()

magma_int_t magma_dpotrf_native	(	magma_uplo_t	uplo,
		magma_int_t	n,
		magmaDouble_ptr	dA,
		magma_int_t	ldda,
		magma_int_t *	info )

magma_dpotrf_expert_gpu with mode = MagmaNative.

Computation is done only on the GPU, not on the CPU.

See also: magma_dpotrf_expert_gpu

◆ magma_dpotrf_m()

magma_int_t magma_dpotrf_m	(	magma_int_t	ngpu,
		magma_uplo_t	uplo,
		magma_int_t	n,
		double *	A,
		magma_int_t	lda,
		magma_int_t *	info )

DPOTRF computes the Cholesky factorization of a real symmetric positive definite matrix A.

This version does not require work space on the GPU passed as input. GPU memory is allocated in the routine. The matrix A may exceed the GPU memory.

The factorization has the form A = U**H * U, if UPLO = MagmaUpper, or A = L * L**H, if UPLO = MagmaLower, where U is an upper triangular matrix and L is lower triangular.

This is the block version of the algorithm, calling Level 3 BLAS.

Parameters

[in]	ngpu	INTEGER Number of GPUs to use. ngpu > 0.
[in]	uplo	magma_uplo_t = MagmaUpper: Upper triangle of A is stored; = MagmaLower: Lower triangle of A is stored.
[in]	n	INTEGER The order of the matrix A. N >= 0.
[in,out]	A	DOUBLE PRECISION array, dimension (LDA,N) On entry, the symmetric matrix A. If UPLO = MagmaUpper, the leading N-by-N upper triangular part of A contains the upper triangular part of the matrix A, and the strictly lower triangular part of A is not referenced. If UPLO = MagmaLower, the leading N-by-N lower triangular part of A contains the lower triangular part of the matrix A, and the strictly upper triangular part of A is not referenced. On exit, if INFO = 0, the factor U or L from the Cholesky factorization A = U*H U or A = L * L**H. Higher performance is achieved if A is in pinned memory, e.g. allocated using magma_malloc_pinned.
[in]	lda	INTEGER The leading dimension of the array A. LDA >= max(1,N).
[out]	info	INTEGER = 0: successful exit < 0: if INFO = -i, the i-th argument had an illegal value or another error occured, such as memory allocation failed. > 0: if INFO = i, the leading minor of order i is not positive definite, and the factorization could not be completed.

◆ magma_dpotrf_mgpu()

magma_int_t magma_dpotrf_mgpu	(	magma_int_t	ngpu,
		magma_uplo_t	uplo,
		magma_int_t	n,
		magmaDouble_ptr	d_lA[],
		magma_int_t	ldda,
		magma_int_t *	info )

DPOTRF computes the Cholesky factorization of a real symmetric positive definite matrix dA.

The factorization has the form dA = U**H * U, if UPLO = MagmaUpper, or dA = L * L**H, if UPLO = MagmaLower, where U is an upper triangular matrix and L is lower triangular.

This is the block version of the algorithm, calling Level 3 BLAS.

Parameters

[in]	ngpu	INTEGER Number of GPUs to use. ngpu > 0.
[in]	uplo	magma_uplo_t = MagmaUpper: Upper triangle of dA is stored; = MagmaLower: Lower triangle of dA is stored.
[in]	n	INTEGER The order of the matrix dA. N >= 0.
[in,out]	d_lA	DOUBLE PRECISION array of pointers on the GPU, dimension (ngpu) On entry, the symmetric matrix dA distributed over GPUs (d_lA[d] points to the local matrix on the d-th GPU). It is distributed in 1D block column or row cyclic (with the block size of nb) if UPLO = MagmaUpper or MagmaLower, respectively. If UPLO = MagmaUpper, the leading N-by-N upper triangular part of dA contains the upper triangular part of the matrix dA, and the strictly lower triangular part of dA is not referenced. If UPLO = MagmaLower, the leading N-by-N lower triangular part of dA contains the lower triangular part of the matrix dA, and the strictly upper triangular part of dA is not referenced. On exit, if INFO = 0, the factor U or L from the Cholesky factorization dA = U*H U or dA = L * L**H.
[in]	ldda	INTEGER The leading dimension of the array d_lA. LDDA >= max(1,N). To benefit from coalescent memory accesses LDDA must be divisible by 16.
[out]	info	INTEGER = 0: successful exit < 0: if INFO = -i, the i-th argument had an illegal value > 0: if INFO = i, the leading minor of order i is not positive definite, and the factorization could not be completed.

◆ magma_dpotrf_rectile_native()

magma_int_t magma_dpotrf_rectile_native	(	magma_uplo_t	uplo,
		magma_int_t	n,
		magma_int_t	recnb,
		double *	dA,
		magma_int_t	ldda,
		magma_int_t	gbstep,
		magma_int_t *	dinfo,
		magma_int_t *	info,
		magma_queue_t	queue )

DPOTRF_RECTILE computes the Cholesky factorization of a real symmetric positive definite matrix dA.

The factorization has the form dA = U**H * U, if UPLO = MagmaUpper, or dA = L * L**H, if UPLO = MagmaLower, where U is an upper triangular matrix and L is lower triangular.

This is the block version of the algorithm, calling Level 3 BLAS.

Parameters

[in]	uplo	magma_uplo_t = MagmaUpper: Upper triangle of dA is stored. (Not currently supported) = MagmaLower: Lower triangle of dA is stored.
[in]	n	INTEGER The order of the matrix dA. N >= 0.
[in]	recnb	INTEGER The blocking size at which recursion stops.
[in,out]	dA	DOUBLE PRECISION array on the GPU, dimension (LDDA,N) On entry, the symmetric matrix dA. If UPLO = MagmaUpper, the leading N-by-N upper triangular part of dA contains the upper triangular part of the matrix dA, and the strictly lower triangular part of dA is not referenced. If UPLO = MagmaLower, the leading N-by-N lower triangular part of dA contains the lower triangular part of the matrix dA, and the strictly upper triangular part of dA is not referenced. On exit, if INFO = 0, the factor U or L from the Cholesky factorization dA = U*H U or dA = L * L**H.
[in]	ldda	INTEGER The leading dimension of the array dA. LDDA >= max(1,N). To benefit from coalescent memory accesses LDDA must be divisible by 16.
[in]	gbstep	INTEGER Internal use.
[out]	dinfo	INTEGER, stored on the GPU. = 0: successful exit < 0: if INFO = -i, the i-th argument had an illegal value > 0: if INFO = i, the leading minor of order i is not positive definite, and the factorization could not be completed.
[out]	info	INTEGER, stored on the CPU. = 0: successful exit < 0: if INFO = -i, the i-th argument had an illegal value > 0: if INFO = i, the leading minor of order i is not positive definite, and the factorization could not be completed.
[in]	queue	magma_queue_t Queue to execute in.

This is an internal routine.

◆ magma_shpotrf_LL_expert_gpu()

magma_int_t magma_shpotrf_LL_expert_gpu	(	magma_uplo_t	uplo,
		magma_int_t	n,
		magmaFloat_ptr	dA,
		magma_int_t	ldda,
		magma_int_t	nb,
		magma_int_t	recnb,
		magma_int_t *	info,
		magma_mode_t	mode )

SPOTRF computes the Cholesky factorization of a real symmetric positive definite matrix dA.

The factorization has the form dA = U**H * U, if UPLO = MagmaUpper, or dA = L * L**H, if UPLO = MagmaLower, where U is an upper triangular matrix and L is lower triangular.

This is the block version of the algorithm, calling Level 3 BLAS.

This factorization uses half precision during the trailing matrix updates, thus its accuracy is not up to FP32 precision. It is used in mixed precision solvers exploiting half precision.

Parameters

[in]	uplo	magma_uplo_t = MagmaUpper: Upper triangle of dA is stored; = MagmaLower: Lower triangle of dA is stored.
[in]	n	INTEGER The order of the matrix dA. N >= 0.
[in,out]	dA	REAL array on the GPU, dimension (LDDA,N) On entry, the symmetric matrix dA. If UPLO = MagmaUpper, the leading N-by-N upper triangular part of dA contains the upper triangular part of the matrix dA, and the strictly lower triangular part of dA is not referenced. If UPLO = MagmaLower, the leading N-by-N lower triangular part of dA contains the lower triangular part of the matrix dA, and the strictly upper triangular part of dA is not referenced. On exit, if INFO = 0, the factor U or L from the Cholesky factorization dA = U*H U or dA = L * L**H.
[in]	ldda	INTEGER The leading dimension of the array dA. LDDA >= max(1,N). To benefit from coalescent memory accesses LDDA must be divisible by 16.
[out]	info	INTEGER = 0: successful exit < 0: if INFO = -i, the i-th argument had an illegal value > 0: if INFO = i, the leading minor of order i is not positive definite, and the factorization could not be completed.
[in]	mode	magma_mode_t = MagmaNative: Factorize dA using GPU only mode (only uplo=MagmaLower is available); = MagmaHybrid: Factorize dA using Hybrid (CPU/GPU) mode.

◆ magma_shpotrf_gpu()

magma_int_t magma_shpotrf_gpu	(	magma_uplo_t	uplo,
		magma_int_t	n,
		magmaFloat_ptr	dA,
		magma_int_t	ldda,
		magma_int_t *	info )

magma_shpotrf_LL_expert_gpu with mode = MagmaHybrid.

Computation is hybrid, part on CPU (panels), part on GPU (matrix updates).

See also: magma_shpotrf_LL_expert_gpu

◆ magma_shpotrf_native()

magma_int_t magma_shpotrf_native	(	magma_uplo_t	uplo,
		magma_int_t	n,
		magmaFloat_ptr	dA,
		magma_int_t	ldda,
		magma_int_t *	info )

magma_shpotrf_LL_expert_gpu with mode = MagmaNative.

Computation is done only on the GPU, not on the CPU.

See also: magma_shpotrf_LL_expert_gpu

◆ magma_spotrf_expert()

magma_int_t magma_spotrf_expert	(	magma_uplo_t	uplo,
		magma_int_t	n,
		float *	A,
		magma_int_t	lda,
		float *	dA,
		magma_int_t	ldda,
		magma_int_t *	info,
		magma_queue_t *	queues )

SPOTRF computes the Cholesky factorization of a real symmetric positive definite matrix A.

This version does not require work space on the GPU passed as input. GPU memory is allocated in the routine.

The factorization has the form A = U**H * U, if uplo = MagmaUpper, or A = L * L**H, if uplo = MagmaLower, where U is an upper triangular matrix and L is lower triangular.

This is the block version of the algorithm, calling Level 3 BLAS.

This uses multiple queues to overlap communication and computation.

Parameters

[in]	uplo	magma_uplo_t = MagmaUpper: Upper triangle of A is stored; = MagmaLower: Lower triangle of A is stored.
[in]	n	INTEGER The order of the matrix A. N >= 0.
[in,out]	A	REAL array, dimension (LDA,N) On entry, the symmetric matrix A. If uplo = MagmaUpper, the leading N-by-N upper triangular part of A contains the upper triangular part of the matrix A, and the strictly lower triangular part of A is not referenced. If uplo = MagmaLower, the leading N-by-N lower triangular part of A contains the lower triangular part of the matrix A, and the strictly upper triangular part of A is not referenced. On exit, if INFO = 0, the factor U or L from the Cholesky factorization A = U*H U or A = L * L**H. Higher performance is achieved if A is in pinned memory, e.g. allocated using magma_malloc_pinned.
[in]	lda	INTEGER The leading dimension of the array A. LDA >= max(1,N).
[out]	info	INTEGER = 0: successful exit < 0: if INFO = -i, the i-th argument had an illegal value or another error occured, such as memory allocation failed. > 0: if INFO = i, the leading minor of order i is not positive definite, and the factorization could not be completed.

◆ magma_spotrf3_mgpu()

magma_int_t magma_spotrf3_mgpu	(	magma_int_t	ngpu,
		magma_uplo_t	uplo,
		magma_int_t	m,
		magma_int_t	n,
		magma_int_t	off_i,
		magma_int_t	off_j,
		magma_int_t	nb,
		magmaFloat_ptr	d_lA[],
		magma_int_t	ldda,
		magmaFloat_ptr	d_lP[],
		magma_int_t	lddp,
		float *	A,
		magma_int_t	lda,
		magma_int_t	h,
		magma_queue_t	queues[][3],
		magma_event_t	events[][5],
		magma_int_t *	info )

SPOTRF computes the Cholesky factorization of a real symmetric positive definite matrix dA.

Auxiliary subroutine for spotrf2_ooc. It is multiple gpu interface to compute Cholesky of a "rectangular" matrix.

The factorization has the form dA = U**H * U, if UPLO = MagmaUpper, or dA = L * L**H, if UPLO = MagmaLower, where U is an upper triangular matrix and L is lower triangular.

This is the block version of the algorithm, calling Level 3 BLAS.

Parameters

[in]	ngpu	INTEGER Number of GPUs to use. ngpu > 0.
[in]	uplo	magma_uplo_t = MagmaUpper: Upper triangle of dA is stored; = MagmaLower: Lower triangle of dA is stored.
[in]	m	INTEGER The number of rows of the submatrix to be factorized.
[in]	n	INTEGER The number of columns of the submatrix to be factorized.
[in]	off_i	INTEGER The first row index of the submatrix to be factorized.
[in]	off_j	INTEGER The first column index of the submatrix to be factorized.
[in]	nb	INTEGER The block size used for the factorization and distribution.
[in,out]	d_lA	REAL array of pointers on the GPU, dimension (ngpu). On entry, the symmetric matrix dA distributed over GPU. (d_lAT[d] points to the local matrix on d-th GPU). If UPLO = MagmaLower or MagmaUpper, it respectively uses a 1D block column or row cyclic format (with the block size nb), and each local matrix is stored by column. If UPLO = MagmaUpper, the leading N-by-N upper triangular part of dA contains the upper triangular part of the matrix dA, and the strictly lower triangular part of dA is not referenced. If UPLO = MagmaLower, the leading N-by-N lower triangular part of dA contains the lower triangular part of the matrix dA, and the strictly upper triangular part of dA is not referenced. On exit, if INFO = 0, the factor U or L from the Cholesky factorization dA = U*H U or dA = L * L**H.
[in,out]	d_lP	REAL array of pointers on the GPU, dimension (ngpu). d_LAT[d] points to workspace of size hlddpnb on d-th GPU.
[in]	lddp	INTEGER The leading dimension of the array dP. LDDA >= max(1,N).
[in]	ldda	INTEGER The leading dimension of the array dA. LDDA >= max(1,N). To benefit from coalescent memory accesses LDDA must be divisible by 16.
[in,out]	A	REAL array on the CPU, dimension (LDA,H*NB) On exit, the panel is copied back to the CPU
[in]	lda	INTEGER The leading dimension of the array A. LDA >= max(1,N).
[in]	h	INTEGER It specifies the size of the CPU workspace, A.
[in]	queues	magma_queue_t queues is of dimension (ngpu,3) and contains the queues used for the partial factorization.
[in]	events	magma_event_t events is of dimension(ngpu,5) and contains the events used for the partial factorization.
[out]	info	INTEGER = 0: successful exit < 0: if INFO = -i, the i-th argument had an illegal value > 0: if INFO = i, the leading minor of order i is not positive definite, and the factorization could not be completed.

◆ magma_spotrf_expert_gpu_work()

magma_int_t magma_spotrf_expert_gpu_work	(	magma_uplo_t	uplo,
		magma_int_t	n,
		magmaFloat_ptr	dA,
		magma_int_t	ldda,
		magma_int_t *	info,
		magma_mode_t	mode,
		magma_int_t	nb,
		magma_int_t	recnb,
		void *	host_work,
		magma_int_t *	lwork_host,
		void *	device_work,
		magma_int_t *	lwork_device,
		magma_event_t	events[2],
		magma_queue_t	queues[2] )

SPOTRF computes the Cholesky factorization of a real symmetric positive definite matrix dA.

The factorization has the form dA = U**H * U, if UPLO = MagmaUpper, or dA = L * L**H, if UPLO = MagmaLower, where U is an upper triangular matrix and L is lower triangular.

This is the block version of the algorithm, calling Level 3 BLAS. This algorithm uses left-looking Cholesky factorization

Parameters

[in]	uplo	magma_uplo_t = MagmaUpper: Upper triangle of dA is stored (hybrid mode only); = MagmaLower: Lower triangle of dA is stored (hybrid & native modes).
[in]	n	INTEGER The order of the matrix dA. N >= 0.
[in,out]	dA	REAL array on the GPU, dimension (LDDA,N) On entry, the symmetric matrix dA. If UPLO = MagmaUpper, the leading N-by-N upper triangular part of dA contains the upper triangular part of the matrix dA, and the strictly lower triangular part of dA is not referenced. If UPLO = MagmaLower, the leading N-by-N lower triangular part of dA contains the lower triangular part of the matrix dA, and the strictly upper triangular part of dA is not referenced. On exit, if INFO = 0, the factor U or L from the Cholesky factorization dA = U*H U or dA = L * L**H.
[in]	ldda	INTEGER The leading dimension of the array dA. LDDA >= max(1,N). To benefit from coalescent memory accesses LDDA must be divisible by 16.
[out]	info	INTEGER = 0: successful exit < 0: if INFO = -i, the i-th argument had an illegal value > 0: if INFO = i, the leading minor of order i is not positive definite, and the factorization could not be completed.
[in]	mode	magma_mode_t = MagmaNative: Factorize dA using GPU only mode. = MagmaHybrid: Factorize dA using Hybrid (CPU/GPU) mode.
[in]	nb	INTEGER The blocking size used during the factorization. nb > 0; Users with no specific preference of nb can call magma_get_spotrf_nb() to get the value of nb as determined by MAGMA's internal tuning.
[in]	recnb	INTEGER The blocking size used during the recursive panel factorization (0 < recnb <= nb); Users with no specific preference of recnb can set it to a fixed value of 64 or 128.
[in,out]	host_work	Workspace, allocated on host (CPU) memory. For faster CPU-GPU communication, user can allocate it as pinned memory using magma_malloc_pinned()
[in,out]	lwork_host	INTEGER pointer The size of the workspace (host_work) in bytes lwork_host[0] < 0: a workspace query is assumed, the routine calculates the required amount of workspace and returns it in lwork_host. The workspace itself is not referenced, and no factorization is performed.

lwork[0] >= 0: the routine assumes that the user has provided a workspace with the size in lwork_host.

Parameters

[in,out]	device_work	Workspace, allocated on device (GPU) memory.
[in,out]	lwork_device	INTEGER pointer The size of the workspace (device_work) in bytes lwork_device[0] < 0: a workspace query is assumed, the routine calculates the required amount of workspace and returns it in lwork_device. The workspace itself is not referenced, and no factorization is performed. lwork_device[0] >= 0: the routine assumes that the user has provided a workspace with the size in lwork_device.
[in]	events	magma_event_t array of size two created/destroyed by the user outside the routine Used to manage inter-stream dependencies
[in]	queues	magma_queue_t array of size two created/destroyed by the user outside the routine Used for concurrent kernel execution, if possible

◆ magma_spotrf_expert_gpu()

magma_int_t magma_spotrf_expert_gpu	(	magma_uplo_t	uplo,
		magma_int_t	n,
		magmaFloat_ptr	dA,
		magma_int_t	ldda,
		magma_int_t *	info,
		magma_int_t	nb,
		magma_mode_t	mode )

wrapper around magma_spotrf_expert_gpu_work to hide workspace, event, and queue management

See also: magma_spotrf_expert_gpu_work

◆ magma_spotrf_gpu()

magma_int_t magma_spotrf_gpu	(	magma_uplo_t	uplo,
		magma_int_t	n,
		magmaFloat_ptr	dA,
		magma_int_t	ldda,
		magma_int_t *	info )

magma_spotrf_expert_gpu with mode = MagmaHybrid.

Computation is hybrid, part on CPU (panels), part on GPU (matrix updates).

See also: magma_spotrf_expert_gpu

◆ magma_spotrf_native()

magma_int_t magma_spotrf_native	(	magma_uplo_t	uplo,
		magma_int_t	n,
		magmaFloat_ptr	dA,
		magma_int_t	ldda,
		magma_int_t *	info )

magma_spotrf_expert_gpu with mode = MagmaNative.

Computation is done only on the GPU, not on the CPU.

See also: magma_spotrf_expert_gpu

◆ magma_spotrf_m()

magma_int_t magma_spotrf_m	(	magma_int_t	ngpu,
		magma_uplo_t	uplo,
		magma_int_t	n,
		float *	A,
		magma_int_t	lda,
		magma_int_t *	info )

SPOTRF computes the Cholesky factorization of a real symmetric positive definite matrix A.

This version does not require work space on the GPU passed as input. GPU memory is allocated in the routine. The matrix A may exceed the GPU memory.

The factorization has the form A = U**H * U, if UPLO = MagmaUpper, or A = L * L**H, if UPLO = MagmaLower, where U is an upper triangular matrix and L is lower triangular.

This is the block version of the algorithm, calling Level 3 BLAS.

Parameters

[in]	ngpu	INTEGER Number of GPUs to use. ngpu > 0.
[in]	uplo	magma_uplo_t = MagmaUpper: Upper triangle of A is stored; = MagmaLower: Lower triangle of A is stored.
[in]	n	INTEGER The order of the matrix A. N >= 0.
[in,out]	A	REAL array, dimension (LDA,N) On entry, the symmetric matrix A. If UPLO = MagmaUpper, the leading N-by-N upper triangular part of A contains the upper triangular part of the matrix A, and the strictly lower triangular part of A is not referenced. If UPLO = MagmaLower, the leading N-by-N lower triangular part of A contains the lower triangular part of the matrix A, and the strictly upper triangular part of A is not referenced. On exit, if INFO = 0, the factor U or L from the Cholesky factorization A = U*H U or A = L * L**H. Higher performance is achieved if A is in pinned memory, e.g. allocated using magma_malloc_pinned.
[in]	lda	INTEGER The leading dimension of the array A. LDA >= max(1,N).
[out]	info	INTEGER = 0: successful exit < 0: if INFO = -i, the i-th argument had an illegal value or another error occured, such as memory allocation failed. > 0: if INFO = i, the leading minor of order i is not positive definite, and the factorization could not be completed.

◆ magma_spotrf_mgpu()

magma_int_t magma_spotrf_mgpu	(	magma_int_t	ngpu,
		magma_uplo_t	uplo,
		magma_int_t	n,
		magmaFloat_ptr	d_lA[],
		magma_int_t	ldda,
		magma_int_t *	info )

SPOTRF computes the Cholesky factorization of a real symmetric positive definite matrix dA.

The factorization has the form dA = U**H * U, if UPLO = MagmaUpper, or dA = L * L**H, if UPLO = MagmaLower, where U is an upper triangular matrix and L is lower triangular.

This is the block version of the algorithm, calling Level 3 BLAS.

Parameters

[in]	ngpu	INTEGER Number of GPUs to use. ngpu > 0.
[in]	uplo	magma_uplo_t = MagmaUpper: Upper triangle of dA is stored; = MagmaLower: Lower triangle of dA is stored.
[in]	n	INTEGER The order of the matrix dA. N >= 0.
[in,out]	d_lA	REAL array of pointers on the GPU, dimension (ngpu) On entry, the symmetric matrix dA distributed over GPUs (d_lA[d] points to the local matrix on the d-th GPU). It is distributed in 1D block column or row cyclic (with the block size of nb) if UPLO = MagmaUpper or MagmaLower, respectively. If UPLO = MagmaUpper, the leading N-by-N upper triangular part of dA contains the upper triangular part of the matrix dA, and the strictly lower triangular part of dA is not referenced. If UPLO = MagmaLower, the leading N-by-N lower triangular part of dA contains the lower triangular part of the matrix dA, and the strictly upper triangular part of dA is not referenced. On exit, if INFO = 0, the factor U or L from the Cholesky factorization dA = U*H U or dA = L * L**H.
[in]	ldda	INTEGER The leading dimension of the array d_lA. LDDA >= max(1,N). To benefit from coalescent memory accesses LDDA must be divisible by 16.
[out]	info	INTEGER = 0: successful exit < 0: if INFO = -i, the i-th argument had an illegal value > 0: if INFO = i, the leading minor of order i is not positive definite, and the factorization could not be completed.

◆ magma_spotrf_rectile_native()

magma_int_t magma_spotrf_rectile_native	(	magma_uplo_t	uplo,
		magma_int_t	n,
		magma_int_t	recnb,
		float *	dA,
		magma_int_t	ldda,
		magma_int_t	gbstep,
		magma_int_t *	dinfo,
		magma_int_t *	info,
		magma_queue_t	queue )

SPOTRF_RECTILE computes the Cholesky factorization of a real symmetric positive definite matrix dA.

The factorization has the form dA = U**H * U, if UPLO = MagmaUpper, or dA = L * L**H, if UPLO = MagmaLower, where U is an upper triangular matrix and L is lower triangular.

This is the block version of the algorithm, calling Level 3 BLAS.

Parameters

[in]	uplo	magma_uplo_t = MagmaUpper: Upper triangle of dA is stored. (Not currently supported) = MagmaLower: Lower triangle of dA is stored.
[in]	n	INTEGER The order of the matrix dA. N >= 0.
[in]	recnb	INTEGER The blocking size at which recursion stops.
[in,out]	dA	REAL array on the GPU, dimension (LDDA,N) On entry, the symmetric matrix dA. If UPLO = MagmaUpper, the leading N-by-N upper triangular part of dA contains the upper triangular part of the matrix dA, and the strictly lower triangular part of dA is not referenced. If UPLO = MagmaLower, the leading N-by-N lower triangular part of dA contains the lower triangular part of the matrix dA, and the strictly upper triangular part of dA is not referenced. On exit, if INFO = 0, the factor U or L from the Cholesky factorization dA = U*H U or dA = L * L**H.
[in]	ldda	INTEGER The leading dimension of the array dA. LDDA >= max(1,N). To benefit from coalescent memory accesses LDDA must be divisible by 16.
[in]	gbstep	INTEGER Internal use.
[out]	dinfo	INTEGER, stored on the GPU. = 0: successful exit < 0: if INFO = -i, the i-th argument had an illegal value > 0: if INFO = i, the leading minor of order i is not positive definite, and the factorization could not be completed.
[out]	info	INTEGER, stored on the CPU. = 0: successful exit < 0: if INFO = -i, the i-th argument had an illegal value > 0: if INFO = i, the leading minor of order i is not positive definite, and the factorization could not be completed.
[in]	queue	magma_queue_t Queue to execute in.

This is an internal routine.

◆ magma_zpotrf_expert()

magma_int_t magma_zpotrf_expert	(	magma_uplo_t	uplo,
		magma_int_t	n,
		magmaDoubleComplex *	A,
		magma_int_t	lda,
		magmaDoubleComplex *	dA,
		magma_int_t	ldda,
		magma_int_t *	info,
		magma_queue_t *	queues )

ZPOTRF computes the Cholesky factorization of a complex Hermitian positive definite matrix A.

This version does not require work space on the GPU passed as input. GPU memory is allocated in the routine.

The factorization has the form A = U**H * U, if uplo = MagmaUpper, or A = L * L**H, if uplo = MagmaLower, where U is an upper triangular matrix and L is lower triangular.

This is the block version of the algorithm, calling Level 3 BLAS.

This uses multiple queues to overlap communication and computation.

Parameters

[in]	uplo	magma_uplo_t = MagmaUpper: Upper triangle of A is stored; = MagmaLower: Lower triangle of A is stored.
[in]	n	INTEGER The order of the matrix A. N >= 0.
[in,out]	A	COMPLEX_16 array, dimension (LDA,N) On entry, the Hermitian matrix A. If uplo = MagmaUpper, the leading N-by-N upper triangular part of A contains the upper triangular part of the matrix A, and the strictly lower triangular part of A is not referenced. If uplo = MagmaLower, the leading N-by-N lower triangular part of A contains the lower triangular part of the matrix A, and the strictly upper triangular part of A is not referenced. On exit, if INFO = 0, the factor U or L from the Cholesky factorization A = U*H U or A = L * L**H. Higher performance is achieved if A is in pinned memory, e.g. allocated using magma_malloc_pinned.
[in]	lda	INTEGER The leading dimension of the array A. LDA >= max(1,N).
[out]	info	INTEGER = 0: successful exit < 0: if INFO = -i, the i-th argument had an illegal value or another error occured, such as memory allocation failed. > 0: if INFO = i, the leading minor of order i is not positive definite, and the factorization could not be completed.

◆ magma_zpotrf3_mgpu()

magma_int_t magma_zpotrf3_mgpu	(	magma_int_t	ngpu,
		magma_uplo_t	uplo,
		magma_int_t	m,
		magma_int_t	n,
		magma_int_t	off_i,
		magma_int_t	off_j,
		magma_int_t	nb,
		magmaDoubleComplex_ptr	d_lA[],
		magma_int_t	ldda,
		magmaDoubleComplex_ptr	d_lP[],
		magma_int_t	lddp,
		magmaDoubleComplex *	A,
		magma_int_t	lda,
		magma_int_t	h,
		magma_queue_t	queues[][3],
		magma_event_t	events[][5],
		magma_int_t *	info )

ZPOTRF computes the Cholesky factorization of a complex Hermitian positive definite matrix dA.

Auxiliary subroutine for zpotrf2_ooc. It is multiple gpu interface to compute Cholesky of a "rectangular" matrix.

The factorization has the form dA = U**H * U, if UPLO = MagmaUpper, or dA = L * L**H, if UPLO = MagmaLower, where U is an upper triangular matrix and L is lower triangular.

This is the block version of the algorithm, calling Level 3 BLAS.

Parameters

[in]	ngpu	INTEGER Number of GPUs to use. ngpu > 0.
[in]	uplo	magma_uplo_t = MagmaUpper: Upper triangle of dA is stored; = MagmaLower: Lower triangle of dA is stored.
[in]	m	INTEGER The number of rows of the submatrix to be factorized.
[in]	n	INTEGER The number of columns of the submatrix to be factorized.
[in]	off_i	INTEGER The first row index of the submatrix to be factorized.
[in]	off_j	INTEGER The first column index of the submatrix to be factorized.
[in]	nb	INTEGER The block size used for the factorization and distribution.
[in,out]	d_lA	COMPLEX_16 array of pointers on the GPU, dimension (ngpu). On entry, the Hermitian matrix dA distributed over GPU. (d_lAT[d] points to the local matrix on d-th GPU). If UPLO = MagmaLower or MagmaUpper, it respectively uses a 1D block column or row cyclic format (with the block size nb), and each local matrix is stored by column. If UPLO = MagmaUpper, the leading N-by-N upper triangular part of dA contains the upper triangular part of the matrix dA, and the strictly lower triangular part of dA is not referenced. If UPLO = MagmaLower, the leading N-by-N lower triangular part of dA contains the lower triangular part of the matrix dA, and the strictly upper triangular part of dA is not referenced. On exit, if INFO = 0, the factor U or L from the Cholesky factorization dA = U*H U or dA = L * L**H.
[in,out]	d_lP	COMPLEX_16 array of pointers on the GPU, dimension (ngpu). d_LAT[d] points to workspace of size hlddpnb on d-th GPU.
[in]	lddp	INTEGER The leading dimension of the array dP. LDDA >= max(1,N).
[in]	ldda	INTEGER The leading dimension of the array dA. LDDA >= max(1,N). To benefit from coalescent memory accesses LDDA must be divisible by 16.
[in,out]	A	COMPLEX_16 array on the CPU, dimension (LDA,H*NB) On exit, the panel is copied back to the CPU
[in]	lda	INTEGER The leading dimension of the array A. LDA >= max(1,N).
[in]	h	INTEGER It specifies the size of the CPU workspace, A.
[in]	queues	magma_queue_t queues is of dimension (ngpu,3) and contains the queues used for the partial factorization.
[in]	events	magma_event_t events is of dimension(ngpu,5) and contains the events used for the partial factorization.
[out]	info	INTEGER = 0: successful exit < 0: if INFO = -i, the i-th argument had an illegal value > 0: if INFO = i, the leading minor of order i is not positive definite, and the factorization could not be completed.

◆ magma_zpotrf_expert_gpu_work()

magma_int_t magma_zpotrf_expert_gpu_work	(	magma_uplo_t	uplo,
		magma_int_t	n,
		magmaDoubleComplex_ptr	dA,
		magma_int_t	ldda,
		magma_int_t *	info,
		magma_mode_t	mode,
		magma_int_t	nb,
		magma_int_t	recnb,
		void *	host_work,
		magma_int_t *	lwork_host,
		void *	device_work,
		magma_int_t *	lwork_device,
		magma_event_t	events[2],
		magma_queue_t	queues[2] )

ZPOTRF computes the Cholesky factorization of a complex Hermitian positive definite matrix dA.

The factorization has the form dA = U**H * U, if UPLO = MagmaUpper, or dA = L * L**H, if UPLO = MagmaLower, where U is an upper triangular matrix and L is lower triangular.

This is the block version of the algorithm, calling Level 3 BLAS. This algorithm uses left-looking Cholesky factorization

Parameters

[in]	uplo	magma_uplo_t = MagmaUpper: Upper triangle of dA is stored (hybrid mode only); = MagmaLower: Lower triangle of dA is stored (hybrid & native modes).
[in]	n	INTEGER The order of the matrix dA. N >= 0.
[in,out]	dA	COMPLEX_16 array on the GPU, dimension (LDDA,N) On entry, the Hermitian matrix dA. If UPLO = MagmaUpper, the leading N-by-N upper triangular part of dA contains the upper triangular part of the matrix dA, and the strictly lower triangular part of dA is not referenced. If UPLO = MagmaLower, the leading N-by-N lower triangular part of dA contains the lower triangular part of the matrix dA, and the strictly upper triangular part of dA is not referenced. On exit, if INFO = 0, the factor U or L from the Cholesky factorization dA = U*H U or dA = L * L**H.
[in]	ldda	INTEGER The leading dimension of the array dA. LDDA >= max(1,N). To benefit from coalescent memory accesses LDDA must be divisible by 16.
[out]	info	INTEGER = 0: successful exit < 0: if INFO = -i, the i-th argument had an illegal value > 0: if INFO = i, the leading minor of order i is not positive definite, and the factorization could not be completed.
[in]	mode	magma_mode_t = MagmaNative: Factorize dA using GPU only mode. = MagmaHybrid: Factorize dA using Hybrid (CPU/GPU) mode.
[in]	nb	INTEGER The blocking size used during the factorization. nb > 0; Users with no specific preference of nb can call magma_get_zpotrf_nb() to get the value of nb as determined by MAGMA's internal tuning.
[in]	recnb	INTEGER The blocking size used during the recursive panel factorization (0 < recnb <= nb); Users with no specific preference of recnb can set it to a fixed value of 64 or 128.
[in,out]	host_work	Workspace, allocated on host (CPU) memory. For faster CPU-GPU communication, user can allocate it as pinned memory using magma_malloc_pinned()
[in,out]	lwork_host	INTEGER pointer The size of the workspace (host_work) in bytes lwork_host[0] < 0: a workspace query is assumed, the routine calculates the required amount of workspace and returns it in lwork_host. The workspace itself is not referenced, and no factorization is performed.

lwork[0] >= 0: the routine assumes that the user has provided a workspace with the size in lwork_host.

Parameters

[in,out]	device_work	Workspace, allocated on device (GPU) memory.
[in,out]	lwork_device	INTEGER pointer The size of the workspace (device_work) in bytes lwork_device[0] < 0: a workspace query is assumed, the routine calculates the required amount of workspace and returns it in lwork_device. The workspace itself is not referenced, and no factorization is performed. lwork_device[0] >= 0: the routine assumes that the user has provided a workspace with the size in lwork_device.
[in]	events	magma_event_t array of size two created/destroyed by the user outside the routine Used to manage inter-stream dependencies
[in]	queues	magma_queue_t array of size two created/destroyed by the user outside the routine Used for concurrent kernel execution, if possible

◆ magma_zpotrf_expert_gpu()

magma_int_t magma_zpotrf_expert_gpu	(	magma_uplo_t	uplo,
		magma_int_t	n,
		magmaDoubleComplex_ptr	dA,
		magma_int_t	ldda,
		magma_int_t *	info,
		magma_int_t	nb,
		magma_mode_t	mode )

wrapper around magma_zpotrf_expert_gpu_work to hide workspace, event, and queue management

See also: magma_zpotrf_expert_gpu_work

◆ magma_zpotrf_gpu()

magma_int_t magma_zpotrf_gpu	(	magma_uplo_t	uplo,
		magma_int_t	n,
		magmaDoubleComplex_ptr	dA,
		magma_int_t	ldda,
		magma_int_t *	info )

magma_zpotrf_expert_gpu with mode = MagmaHybrid.

Computation is hybrid, part on CPU (panels), part on GPU (matrix updates).

See also: magma_zpotrf_expert_gpu

◆ magma_zpotrf_native()

magma_int_t magma_zpotrf_native	(	magma_uplo_t	uplo,
		magma_int_t	n,
		magmaDoubleComplex_ptr	dA,
		magma_int_t	ldda,
		magma_int_t *	info )

magma_zpotrf_expert_gpu with mode = MagmaNative.

Computation is done only on the GPU, not on the CPU.

See also: magma_zpotrf_expert_gpu

◆ magma_zpotrf_m()

magma_int_t magma_zpotrf_m	(	magma_int_t	ngpu,
		magma_uplo_t	uplo,
		magma_int_t	n,
		magmaDoubleComplex *	A,
		magma_int_t	lda,
		magma_int_t *	info )

ZPOTRF computes the Cholesky factorization of a complex Hermitian positive definite matrix A.

This version does not require work space on the GPU passed as input. GPU memory is allocated in the routine. The matrix A may exceed the GPU memory.

The factorization has the form A = U**H * U, if UPLO = MagmaUpper, or A = L * L**H, if UPLO = MagmaLower, where U is an upper triangular matrix and L is lower triangular.

This is the block version of the algorithm, calling Level 3 BLAS.

Parameters

[in]	ngpu	INTEGER Number of GPUs to use. ngpu > 0.
[in]	uplo	magma_uplo_t = MagmaUpper: Upper triangle of A is stored; = MagmaLower: Lower triangle of A is stored.
[in]	n	INTEGER The order of the matrix A. N >= 0.
[in,out]	A	COMPLEX_16 array, dimension (LDA,N) On entry, the symmetric matrix A. If UPLO = MagmaUpper, the leading N-by-N upper triangular part of A contains the upper triangular part of the matrix A, and the strictly lower triangular part of A is not referenced. If UPLO = MagmaLower, the leading N-by-N lower triangular part of A contains the lower triangular part of the matrix A, and the strictly upper triangular part of A is not referenced. On exit, if INFO = 0, the factor U or L from the Cholesky factorization A = U*H U or A = L * L**H. Higher performance is achieved if A is in pinned memory, e.g. allocated using magma_malloc_pinned.
[in]	lda	INTEGER The leading dimension of the array A. LDA >= max(1,N).
[out]	info	INTEGER = 0: successful exit < 0: if INFO = -i, the i-th argument had an illegal value or another error occured, such as memory allocation failed. > 0: if INFO = i, the leading minor of order i is not positive definite, and the factorization could not be completed.

◆ magma_zpotrf_mgpu()

magma_int_t magma_zpotrf_mgpu	(	magma_int_t	ngpu,
		magma_uplo_t	uplo,
		magma_int_t	n,
		magmaDoubleComplex_ptr	d_lA[],
		magma_int_t	ldda,
		magma_int_t *	info )

ZPOTRF computes the Cholesky factorization of a complex Hermitian positive definite matrix dA.

The factorization has the form dA = U**H * U, if UPLO = MagmaUpper, or dA = L * L**H, if UPLO = MagmaLower, where U is an upper triangular matrix and L is lower triangular.

This is the block version of the algorithm, calling Level 3 BLAS.

Parameters

[in]	ngpu	INTEGER Number of GPUs to use. ngpu > 0.
[in]	uplo	magma_uplo_t = MagmaUpper: Upper triangle of dA is stored; = MagmaLower: Lower triangle of dA is stored.
[in]	n	INTEGER The order of the matrix dA. N >= 0.
[in,out]	d_lA	COMPLEX_16 array of pointers on the GPU, dimension (ngpu) On entry, the Hermitian matrix dA distributed over GPUs (d_lA[d] points to the local matrix on the d-th GPU). It is distributed in 1D block column or row cyclic (with the block size of nb) if UPLO = MagmaUpper or MagmaLower, respectively. If UPLO = MagmaUpper, the leading N-by-N upper triangular part of dA contains the upper triangular part of the matrix dA, and the strictly lower triangular part of dA is not referenced. If UPLO = MagmaLower, the leading N-by-N lower triangular part of dA contains the lower triangular part of the matrix dA, and the strictly upper triangular part of dA is not referenced. On exit, if INFO = 0, the factor U or L from the Cholesky factorization dA = U*H U or dA = L * L**H.
[in]	ldda	INTEGER The leading dimension of the array d_lA. LDDA >= max(1,N). To benefit from coalescent memory accesses LDDA must be divisible by 16.
[out]	info	INTEGER = 0: successful exit < 0: if INFO = -i, the i-th argument had an illegal value > 0: if INFO = i, the leading minor of order i is not positive definite, and the factorization could not be completed.

◆ magma_zpotrf_rectile_native()

magma_int_t magma_zpotrf_rectile_native	(	magma_uplo_t	uplo,
		magma_int_t	n,
		magma_int_t	recnb,
		magmaDoubleComplex *	dA,
		magma_int_t	ldda,
		magma_int_t	gbstep,
		magma_int_t *	dinfo,
		magma_int_t *	info,
		magma_queue_t	queue )

ZPOTRF_RECTILE computes the Cholesky factorization of a complex Hermitian positive definite matrix dA.

The factorization has the form dA = U**H * U, if UPLO = MagmaUpper, or dA = L * L**H, if UPLO = MagmaLower, where U is an upper triangular matrix and L is lower triangular.

This is the block version of the algorithm, calling Level 3 BLAS.

Parameters

[in]	uplo	magma_uplo_t = MagmaUpper: Upper triangle of dA is stored. (Not currently supported) = MagmaLower: Lower triangle of dA is stored.
[in]	n	INTEGER The order of the matrix dA. N >= 0.
[in]	recnb	INTEGER The blocking size at which recursion stops.
[in,out]	dA	COMPLEX_16 array on the GPU, dimension (LDDA,N) On entry, the Hermitian matrix dA. If UPLO = MagmaUpper, the leading N-by-N upper triangular part of dA contains the upper triangular part of the matrix dA, and the strictly lower triangular part of dA is not referenced. If UPLO = MagmaLower, the leading N-by-N lower triangular part of dA contains the lower triangular part of the matrix dA, and the strictly upper triangular part of dA is not referenced. On exit, if INFO = 0, the factor U or L from the Cholesky factorization dA = U*H U or dA = L * L**H.
[in]	ldda	INTEGER The leading dimension of the array dA. LDDA >= max(1,N). To benefit from coalescent memory accesses LDDA must be divisible by 16.
[in]	gbstep	INTEGER Internal use.
[out]	dinfo	INTEGER, stored on the GPU. = 0: successful exit < 0: if INFO = -i, the i-th argument had an illegal value > 0: if INFO = i, the leading minor of order i is not positive definite, and the factorization could not be completed.
[out]	info	INTEGER, stored on the CPU. = 0: successful exit < 0: if INFO = -i, the i-th argument had an illegal value > 0: if INFO = i, the leading minor of order i is not positive definite, and the factorization could not be completed.
[in]	queue	magma_queue_t Queue to execute in.

This is an internal routine.

Functions

Detailed Description

Function Documentation

◆ magma_cpotrf_expert()

◆ magma_cpotrf3_mgpu()

◆ magma_cpotrf_expert_gpu_work()

◆ magma_cpotrf_expert_gpu()

◆ magma_cpotrf_gpu()

◆ magma_cpotrf_native()

◆ magma_cpotrf_m()

◆ magma_cpotrf_mgpu()

◆ magma_cpotrf_rectile_native()

◆ magma_dpotrf_expert()

◆ magma_dpotrf3_mgpu()

◆ magma_dpotrf_expert_gpu_work()

◆ magma_dpotrf_expert_gpu()

◆ magma_dpotrf_gpu()

◆ magma_dpotrf_native()

◆ magma_dpotrf_m()

◆ magma_dpotrf_mgpu()

◆ magma_dpotrf_rectile_native()

◆ magma_shpotrf_LL_expert_gpu()

◆ magma_shpotrf_gpu()

◆ magma_shpotrf_native()

◆ magma_spotrf_expert()

◆ magma_spotrf3_mgpu()

◆ magma_spotrf_expert_gpu_work()

◆ magma_spotrf_expert_gpu()

◆ magma_spotrf_gpu()

◆ magma_spotrf_native()

◆ magma_spotrf_m()

◆ magma_spotrf_mgpu()

◆ magma_spotrf_rectile_native()

◆ magma_zpotrf_expert()

◆ magma_zpotrf3_mgpu()

◆ magma_zpotrf_expert_gpu_work()

◆ magma_zpotrf_expert_gpu()

◆ magma_zpotrf_gpu()

◆ magma_zpotrf_native()

◆ magma_zpotrf_m()

◆ magma_zpotrf_mgpu()

◆ magma_zpotrf_rectile_native()