Functions
magma_int_t	magma_cgeqrf (magma_int_t m, magma_int_t n, magmaFloatComplex A, magma_int_t lda, magmaFloatComplex tau, magmaFloatComplex work, magma_int_t lwork, magma_int_t info)
	CGEQRF computes a QR factorization of a COMPLEX M-by-N matrix A: A = Q * R.

magma_int_t	magma_cgeqrf2_gpu (magma_int_t m, magma_int_t n, magmaFloatComplex_ptr dA, magma_int_t ldda, magmaFloatComplex tau, magma_int_t info)
	CGEQRF computes a QR factorization of a complex M-by-N matrix A: A = Q * R.

magma_int_t	magma_cgeqrf3_gpu (magma_int_t m, magma_int_t n, magmaFloatComplex_ptr dA, magma_int_t ldda, magmaFloatComplex tau, magmaFloatComplex_ptr dT, magma_int_t info)
	CGEQRF3 computes a QR factorization of a complex M-by-N matrix A: A = Q * R.

magma_int_t	magma_cgeqrf_expert_gpu_work (magma_int_t m, magma_int_t n, magmaFloatComplex_ptr dA, magma_int_t ldda, magmaFloatComplex tau, magmaFloatComplex_ptr dT, magma_int_t info, magma_mode_t mode, magma_int_t nb, void host_work, magma_int_t lwork_host, void device_work, magma_int_t lwork_device, magma_queue_t queues[2])
	CGEQRF computes a QR factorization of a complex M-by-N matrix A: A = Q * R.

magma_int_t	magma_cgeqrf_gpu (magma_int_t m, magma_int_t n, magmaFloatComplex_ptr dA, magma_int_t ldda, magmaFloatComplex tau, magmaFloatComplex_ptr dT, magma_int_t info)
	CGEQRF computes a QR factorization of a complex M-by-N matrix A: A = Q * R.

magma_int_t	magma_cgeqrf_m (magma_int_t ngpu, magma_int_t m, magma_int_t n, magmaFloatComplex A, magma_int_t lda, magmaFloatComplex tau, magmaFloatComplex work, magma_int_t lwork, magma_int_t info)
	CGEQRF computes a QR factorization of a COMPLEX M-by-N matrix A: A = Q * R using multiple GPUs.

magma_int_t	magma_cgeqrf2_mgpu (magma_int_t ngpu, magma_int_t m, magma_int_t n, magmaFloatComplex_ptr dlA[], magma_int_t ldda, magmaFloatComplex tau, magma_int_t info)
	CGEQRF computes a QR factorization of a complex M-by-N matrix A: A = Q * R.

magma_int_t	magma_cgeqrf_ooc (magma_int_t m, magma_int_t n, magmaFloatComplex A, magma_int_t lda, magmaFloatComplex tau, magmaFloatComplex work, magma_int_t lwork, magma_int_t info)
	CGEQRF_OOC computes a QR factorization of a COMPLEX M-by-N matrix A: A = Q * R.

magma_int_t	magma_dgeqrf (magma_int_t m, magma_int_t n, double A, magma_int_t lda, double tau, double work, magma_int_t lwork, magma_int_t info)
	DGEQRF computes a QR factorization of a DOUBLE PRECISION M-by-N matrix A: A = Q * R.

magma_int_t	magma_dgeqrf2_gpu (magma_int_t m, magma_int_t n, magmaDouble_ptr dA, magma_int_t ldda, double tau, magma_int_t info)
	DGEQRF computes a QR factorization of a real M-by-N matrix A: A = Q * R.

magma_int_t	magma_dgeqrf3_gpu (magma_int_t m, magma_int_t n, magmaDouble_ptr dA, magma_int_t ldda, double tau, magmaDouble_ptr dT, magma_int_t info)
	DGEQRF3 computes a QR factorization of a real M-by-N matrix A: A = Q * R.

magma_int_t	magma_dgeqrf_expert_gpu_work (magma_int_t m, magma_int_t n, magmaDouble_ptr dA, magma_int_t ldda, double tau, magmaDouble_ptr dT, magma_int_t info, magma_mode_t mode, magma_int_t nb, void host_work, magma_int_t lwork_host, void device_work, magma_int_t lwork_device, magma_queue_t queues[2])
	DGEQRF computes a QR factorization of a real M-by-N matrix A: A = Q * R.

magma_int_t	magma_dgeqrf_gpu (magma_int_t m, magma_int_t n, magmaDouble_ptr dA, magma_int_t ldda, double tau, magmaDouble_ptr dT, magma_int_t info)
	DGEQRF computes a QR factorization of a real M-by-N matrix A: A = Q * R.

magma_int_t	magma_dgeqrf_m (magma_int_t ngpu, magma_int_t m, magma_int_t n, double A, magma_int_t lda, double tau, double work, magma_int_t lwork, magma_int_t info)
	DGEQRF computes a QR factorization of a DOUBLE PRECISION M-by-N matrix A: A = Q * R using multiple GPUs.

magma_int_t	magma_dgeqrf2_mgpu (magma_int_t ngpu, magma_int_t m, magma_int_t n, magmaDouble_ptr dlA[], magma_int_t ldda, double tau, magma_int_t info)
	DGEQRF computes a QR factorization of a real M-by-N matrix A: A = Q * R.

magma_int_t	magma_dgeqrf_ooc (magma_int_t m, magma_int_t n, double A, magma_int_t lda, double tau, double work, magma_int_t lwork, magma_int_t info)
	DGEQRF_OOC computes a QR factorization of a DOUBLE PRECISION M-by-N matrix A: A = Q * R.

magma_int_t	magma_sgeqrf (magma_int_t m, magma_int_t n, float A, magma_int_t lda, float tau, float work, magma_int_t lwork, magma_int_t info)
	SGEQRF computes a QR factorization of a REAL M-by-N matrix A: A = Q * R.

magma_int_t	magma_sgeqrf2_gpu (magma_int_t m, magma_int_t n, magmaFloat_ptr dA, magma_int_t ldda, float tau, magma_int_t info)
	SGEQRF computes a QR factorization of a real M-by-N matrix A: A = Q * R.

magma_int_t	magma_sgeqrf3_gpu (magma_int_t m, magma_int_t n, magmaFloat_ptr dA, magma_int_t ldda, float tau, magmaFloat_ptr dT, magma_int_t info)
	SGEQRF3 computes a QR factorization of a real M-by-N matrix A: A = Q * R.

magma_int_t	magma_sgeqrf_expert_gpu_work (magma_int_t m, magma_int_t n, magmaFloat_ptr dA, magma_int_t ldda, float tau, magmaFloat_ptr dT, magma_int_t info, magma_mode_t mode, magma_int_t nb, void host_work, magma_int_t lwork_host, void device_work, magma_int_t lwork_device, magma_queue_t queues[2])
	SGEQRF computes a QR factorization of a real M-by-N matrix A: A = Q * R.

magma_int_t	magma_sgeqrf_gpu (magma_int_t m, magma_int_t n, magmaFloat_ptr dA, magma_int_t ldda, float tau, magmaFloat_ptr dT, magma_int_t info)
	SGEQRF computes a QR factorization of a real M-by-N matrix A: A = Q * R.

magma_int_t	magma_sgeqrf_m (magma_int_t ngpu, magma_int_t m, magma_int_t n, float A, magma_int_t lda, float tau, float work, magma_int_t lwork, magma_int_t info)
	SGEQRF computes a QR factorization of a REAL M-by-N matrix A: A = Q * R using multiple GPUs.

magma_int_t	magma_sgeqrf2_mgpu (magma_int_t ngpu, magma_int_t m, magma_int_t n, magmaFloat_ptr dlA[], magma_int_t ldda, float tau, magma_int_t info)
	SGEQRF computes a QR factorization of a real M-by-N matrix A: A = Q * R.

magma_int_t	magma_sgeqrf_ooc (magma_int_t m, magma_int_t n, float A, magma_int_t lda, float tau, float work, magma_int_t lwork, magma_int_t info)
	SGEQRF_OOC computes a QR factorization of a REAL M-by-N matrix A: A = Q * R.

magma_int_t	magma_zgeqrf (magma_int_t m, magma_int_t n, magmaDoubleComplex A, magma_int_t lda, magmaDoubleComplex tau, magmaDoubleComplex work, magma_int_t lwork, magma_int_t info)
	ZGEQRF computes a QR factorization of a COMPLEX_16 M-by-N matrix A: A = Q * R.

magma_int_t	magma_zgeqrf2_gpu (magma_int_t m, magma_int_t n, magmaDoubleComplex_ptr dA, magma_int_t ldda, magmaDoubleComplex tau, magma_int_t info)
	ZGEQRF computes a QR factorization of a complex M-by-N matrix A: A = Q * R.

magma_int_t	magma_zgeqrf3_gpu (magma_int_t m, magma_int_t n, magmaDoubleComplex_ptr dA, magma_int_t ldda, magmaDoubleComplex tau, magmaDoubleComplex_ptr dT, magma_int_t info)
	ZGEQRF3 computes a QR factorization of a complex M-by-N matrix A: A = Q * R.

magma_int_t	magma_zgeqrf_expert_gpu_work (magma_int_t m, magma_int_t n, magmaDoubleComplex_ptr dA, magma_int_t ldda, magmaDoubleComplex tau, magmaDoubleComplex_ptr dT, magma_int_t info, magma_mode_t mode, magma_int_t nb, void host_work, magma_int_t lwork_host, void device_work, magma_int_t lwork_device, magma_queue_t queues[2])
	ZGEQRF computes a QR factorization of a complex M-by-N matrix A: A = Q * R.

magma_int_t	magma_zgeqrf_gpu (magma_int_t m, magma_int_t n, magmaDoubleComplex_ptr dA, magma_int_t ldda, magmaDoubleComplex tau, magmaDoubleComplex_ptr dT, magma_int_t info)
	ZGEQRF computes a QR factorization of a complex M-by-N matrix A: A = Q * R.

magma_int_t	magma_zgeqrf_m (magma_int_t ngpu, magma_int_t m, magma_int_t n, magmaDoubleComplex A, magma_int_t lda, magmaDoubleComplex tau, magmaDoubleComplex work, magma_int_t lwork, magma_int_t info)
	ZGEQRF computes a QR factorization of a COMPLEX_16 M-by-N matrix A: A = Q * R using multiple GPUs.

magma_int_t	magma_zgeqrf2_mgpu (magma_int_t ngpu, magma_int_t m, magma_int_t n, magmaDoubleComplex_ptr dlA[], magma_int_t ldda, magmaDoubleComplex tau, magma_int_t info)
	ZGEQRF computes a QR factorization of a complex M-by-N matrix A: A = Q * R.

magma_int_t	magma_zgeqrf_ooc (magma_int_t m, magma_int_t n, magmaDoubleComplex A, magma_int_t lda, magmaDoubleComplex tau, magmaDoubleComplex work, magma_int_t lwork, magma_int_t info)
	ZGEQRF_OOC computes a QR factorization of a COMPLEX_16 M-by-N matrix A: A = Q * R.

Detailed Description

Function Documentation

◆ magma_cgeqrf()

magma_int_t magma_cgeqrf	(	magma_int_t	m,
		magma_int_t	n,
		magmaFloatComplex *	A,
		magma_int_t	lda,
		magmaFloatComplex *	tau,
		magmaFloatComplex *	work,
		magma_int_t	lwork,
		magma_int_t *	info )

CGEQRF computes a QR factorization of a COMPLEX M-by-N matrix A: A = Q * R.

This version does not require work space on the GPU passed as input. GPU memory is allocated in the routine.

This uses 2 queues to overlap communication and computation.

Parameters

[in]	m	INTEGER The number of rows of the matrix A. M >= 0.
[in]	n	INTEGER The number of columns of the matrix A. N >= 0.
[in,out]	A	COMPLEX array, dimension (LDA,N) On entry, the M-by-N matrix A. On exit, the elements on and above the diagonal of the array contain the min(M,N)-by-N upper trapezoidal matrix R (R is upper triangular if m >= n); the elements below the diagonal, with the array TAU, represent the orthogonal matrix Q as a product of min(m,n) elementary reflectors (see Further Details). Higher performance is achieved if A is in pinned memory, e.g. allocated using magma_malloc_pinned.
[in]	lda	INTEGER The leading dimension of the array A. LDA >= max(1,M).
[out]	tau	COMPLEX array, dimension (min(M,N)) The scalar factors of the elementary reflectors (see Further Details).
[out]	work	(workspace) COMPLEX array, dimension (MAX(1,LWORK)) On exit, if INFO = 0, WORK[0] returns the optimal LWORK. Higher performance is achieved if WORK is in pinned memory, e.g. allocated using magma_malloc_pinned.
[in]	lwork	INTEGER The dimension of the array WORK. LWORK >= N*NB, where NB can be obtained through magma_get_cgeqrf_nb( M, N ). If LWORK = -1, then a workspace query is assumed; the routine only calculates the optimal size of the WORK array, returns this value as the first entry of the WORK array, and no error message related to LWORK is issued.
[out]	info	INTEGER = 0: successful exit < 0: if INFO = -i, the i-th argument had an illegal value or another error occured, such as memory allocation failed.

Further Details

The matrix Q is represented as a product of elementary reflectors

Q = H(1) H(2) . . . H(k), where k = min(m,n).

Each H(i) has the form

H(i) = I - tau * v * v'

where tau is a complex scalar, and v is a complex vector with v(1:i-1) = 0 and v(i) = 1; v(i+1:m) is stored on exit in A(i+1:m,i), and tau in TAU(i).

◆ magma_cgeqrf2_gpu()

magma_int_t magma_cgeqrf2_gpu	(	magma_int_t	m,
		magma_int_t	n,
		magmaFloatComplex_ptr	dA,
		magma_int_t	ldda,
		magmaFloatComplex *	tau,
		magma_int_t *	info )

CGEQRF computes a QR factorization of a complex M-by-N matrix A: A = Q * R.

This version has LAPACK-complaint arguments.

Other versions (magma_cgeqrf_gpu and magma_cgeqrf3_gpu) store the intermediate T matrices.

Parameters

[in]	m	INTEGER The number of rows of the matrix A. M >= 0.
[in]	n	INTEGER The number of columns of the matrix A. N >= 0.
[in,out]	dA	COMPLEX array on the GPU, dimension (LDDA,N) On entry, the M-by-N matrix A. On exit, the elements on and above the diagonal of the array contain the min(M,N)-by-N upper trapezoidal matrix R (R is upper triangular if m >= n); the elements below the diagonal, with the array TAU, represent the orthogonal matrix Q as a product of min(m,n) elementary reflectors (see Further Details).
[in]	ldda	INTEGER The leading dimension of the array dA. LDDA >= max(1,M). To benefit from coalescent memory accesses LDDA must be divisible by 16.
[out]	tau	COMPLEX array, dimension (min(M,N)) The scalar factors of the elementary reflectors (see Further Details).
[out]	info	INTEGER = 0: successful exit < 0: if INFO = -i, the i-th argument had an illegal value or another error occured, such as memory allocation failed.

Further Details

The matrix Q is represented as a product of elementary reflectors

Q = H(1) H(2) . . . H(k), where k = min(m,n).

Each H(i) has the form

H(i) = I - tau * v * v^H

where tau is a complex scalar, and v is a complex vector with v(1:i-1) = 0 and v(i) = 1; v(i+1:m) is stored on exit in A(i+1:m,i), and tau in TAU(i).

◆ magma_cgeqrf3_gpu()

magma_int_t magma_cgeqrf3_gpu	(	magma_int_t	m,
		magma_int_t	n,
		magmaFloatComplex_ptr	dA,
		magma_int_t	ldda,
		magmaFloatComplex *	tau,
		magmaFloatComplex_ptr	dT,
		magma_int_t *	info )

CGEQRF3 computes a QR factorization of a complex M-by-N matrix A: A = Q * R.

This version stores the triangular dT matrices used in the block QR factorization so that they can be applied directly (i.e., without being recomputed) later. As a result, the application of Q is much faster. Also, the upper triangular matrices for V have 0s in them. The corresponding parts of the upper triangular R are stored separately in dT.

Parameters

[in]	m	INTEGER The number of rows of the matrix A. M >= 0.
[in]	n	INTEGER The number of columns of the matrix A. N >= 0.
[in,out]	dA	COMPLEX array on the GPU, dimension (LDDA,N) On entry, the M-by-N matrix A. On exit, the elements on and above the diagonal of the array contain the min(M,N)-by-N upper trapezoidal matrix R (R is upper triangular if m >= n); the elements below the diagonal, with the array TAU, represent the orthogonal matrix Q as a product of min(m,n) elementary reflectors (see Further Details).
[in]	ldda	INTEGER The leading dimension of the array dA. LDDA >= max(1,M). To benefit from coalescent memory accesses LDDA must be divisible by 16.
[out]	tau	COMPLEX array, dimension (min(M,N)) The scalar factors of the elementary reflectors (see Further Details).
[out]	dT	(workspace) COMPLEX array on the GPU, dimension (2MIN(M, N) + ceil(N/32)32 )NB, where NB can be obtained through magma_get_cgeqrf_nb( M, N ). It starts with a MIN(M,N)NB block that stores the triangular T matrices, followed by a MIN(M,N)*NB block that stores the diagonal blocks of the R matrix. The rest of the array is used as workspace.
[out]	info	INTEGER = 0: successful exit < 0: if INFO = -i, the i-th argument had an illegal value or another error occured, such as memory allocation failed.

Further Details

The matrix Q is represented as a product of elementary reflectors

Q = H(1) H(2) . . . H(k), where k = min(m,n).

Each H(i) has the form

H(i) = I - tau * v * v^H

where tau is a complex scalar, and v is a complex vector with v(1:i-1) = 0 and v(i) = 1; v(i+1:m) is stored on exit in A(i+1:m,i), and tau in TAU(i).

◆ magma_cgeqrf_expert_gpu_work()

magma_int_t magma_cgeqrf_expert_gpu_work	(	magma_int_t	m,
		magma_int_t	n,
		magmaFloatComplex_ptr	dA,
		magma_int_t	ldda,
		magmaFloatComplex *	tau,
		magmaFloatComplex_ptr	dT,
		magma_int_t *	info,
		magma_mode_t	mode,
		magma_int_t	nb,
		void *	host_work,
		magma_int_t *	lwork_host,
		void *	device_work,
		magma_int_t *	lwork_device,
		magma_queue_t	queues[2] )

CGEQRF computes a QR factorization of a complex M-by-N matrix A: A = Q * R.

This version stores the triangular dT matrices used in the block QR factorization so that they can be applied directly (i.e., without being recomputed) later. As a result, the application of Q is much faster. Also, the upper triangular matrices for V have 0s in them. The corresponding parts of the upper triangular R are inverted and stored separately in dT.

Parameters

[in]	m	INTEGER The number of rows of the matrix A. M >= 0.
[in]	n	INTEGER The number of columns of the matrix A. N >= 0.
[in,out]	dA	COMPLEX array on the GPU, dimension (LDDA,N) On entry, the M-by-N matrix A. On exit, the elements on and above the diagonal of the array contain the min(M,N)-by-N upper trapezoidal matrix R (R is upper triangular if m >= n); the elements below the diagonal, with the array TAU, represent the orthogonal matrix Q as a product of min(m,n) elementary reflectors (see Further Details).
[in]	ldda	INTEGER The leading dimension of the array dA. LDDA >= max(1,M). To benefit from coalescent memory accesses LDDA must be divisible by 16.
[out]	tau	COMPLEX array, dimension (min(M,N)) The scalar factors of the elementary reflectors (see Further Details).
[out]	dT	(workspace) COMPLEX array on the GPU, dimension (2MIN(M, N) + ceil(N/32)32 )NB, where NB can be obtained through magma_get_cgeqrf_nb( M, N ). It starts with a MIN(M,N)NB block that stores the triangular T matrices, followed by a MIN(M,N)*NB block that stores inverses of the diagonal blocks of the R matrix. The rest of the array is used as workspace.
[out]	info	INTEGER = 0: successful exit < 0: if INFO = -i, the i-th argument had an illegal value or another error occured, such as memory allocation failed.
[in]	mode	magma_mode_t Only mode = MagmaHybrid is currently supported, factorizes dA using Hybrid (CPU/GPU) mode.
[in]	nb	INTEGER The blocking size used during the factorization. nb > 0; Users with no specific preference of nb can call magma_get_cgeqrf_nb() to get the value of nb as determined by MAGMA's internal tuning.
[in,out]	host_work	Workspace, allocated on host (CPU) memory. For faster CPU-GPU communication, user can allocate it as pinned memory using magma_malloc_pinned()
[in,out]	lwork_host	INTEGER pointer The size of the workspace (host_work) in bytes lwork_host[0] < 0: a workspace query is assumed, the routine calculates the required amount of workspace and returns it in lwork_host. The workspace itself is not referenced, and no factorization is performed.

lwork[0] >= 0: the routine assumes that the user has provided a workspace with the size in lwork_host.

Parameters

[in,out]	device_work	Workspace, allocated on device (GPU) memory.
[in,out]	lwork_device	INTEGER pointer The size of the workspace (device_work) in bytes lwork_device[0] < 0: a workspace query is assumed, the routine calculates the required amount of workspace and returns it in lwork_device. The workspace itself is not referenced, and no factorization is performed. lwork_device[0] >= 0: the routine assumes that the user has provided a workspace with the size in lwork_device.
[in]	queues	magma_queue_t array of size two created/destroyed by the user outside the routine Used for concurrent kernel execution, if possible

Further Details

The matrix Q is represented as a product of elementary reflectors

Q = H(1) H(2) . . . H(k), where k = min(m,n).

Each H(i) has the form

H(i) = I - tau * v * v^H

where tau is a complex scalar, and v is a complex vector with v(1:i-1) = 0 and v(i) = 1; v(i+1:m) is stored on exit in A(i+1:m,i), and tau in TAU(i).

◆ magma_cgeqrf_gpu()

magma_int_t magma_cgeqrf_gpu	(	magma_int_t	m,
		magma_int_t	n,
		magmaFloatComplex_ptr	dA,
		magma_int_t	ldda,
		magmaFloatComplex *	tau,
		magmaFloatComplex_ptr	dT,
		magma_int_t *	info )

CGEQRF computes a QR factorization of a complex M-by-N matrix A: A = Q * R.

This version stores the triangular dT matrices used in the block QR factorization so that they can be applied directly (i.e., without being recomputed) later. As a result, the application of Q is much faster. Also, the upper triangular matrices for V have 0s in them. The corresponding parts of the upper triangular R are inverted and stored separately in dT.

Parameters

[in]	m	INTEGER The number of rows of the matrix A. M >= 0.
[in]	n	INTEGER The number of columns of the matrix A. N >= 0.
[in,out]	dA	COMPLEX array on the GPU, dimension (LDDA,N) On entry, the M-by-N matrix A. On exit, the elements on and above the diagonal of the array contain the min(M,N)-by-N upper trapezoidal matrix R (R is upper triangular if m >= n); the elements below the diagonal, with the array TAU, represent the orthogonal matrix Q as a product of min(m,n) elementary reflectors (see Further Details).
[in]	ldda	INTEGER The leading dimension of the array dA. LDDA >= max(1,M). To benefit from coalescent memory accesses LDDA must be divisible by 16.
[out]	tau	COMPLEX array, dimension (min(M,N)) The scalar factors of the elementary reflectors (see Further Details).
[out]	dT	(workspace) COMPLEX array on the GPU, dimension (2MIN(M, N) + ceil(N/32)32 )NB, where NB can be obtained through magma_get_cgeqrf_nb( M, N ). It starts with a MIN(M,N)NB block that stores the triangular T matrices, followed by a MIN(M,N)*NB block that stores inverses of the diagonal blocks of the R matrix. The rest of the array is used as workspace.
[out]	info	INTEGER = 0: successful exit < 0: if INFO = -i, the i-th argument had an illegal value or another error occured, such as memory allocation failed.

Further Details

The matrix Q is represented as a product of elementary reflectors

Q = H(1) H(2) . . . H(k), where k = min(m,n).

Each H(i) has the form

H(i) = I - tau * v * v^H

where tau is a complex scalar, and v is a complex vector with v(1:i-1) = 0 and v(i) = 1; v(i+1:m) is stored on exit in A(i+1:m,i), and tau in TAU(i).

◆ magma_cgeqrf_m()

magma_int_t magma_cgeqrf_m	(	magma_int_t	ngpu,
		magma_int_t	m,
		magma_int_t	n,
		magmaFloatComplex *	A,
		magma_int_t	lda,
		magmaFloatComplex *	tau,
		magmaFloatComplex *	work,
		magma_int_t	lwork,
		magma_int_t *	info )

CGEQRF computes a QR factorization of a COMPLEX M-by-N matrix A: A = Q * R using multiple GPUs.

This version does not require work space on the GPU passed as input. GPU memory is allocated in the routine.

Parameters

[in]	ngpu	INTEGER Number of GPUs to use. ngpu > 0.
[in]	m	INTEGER The number of rows of the matrix A. M >= 0.
[in]	n	INTEGER The number of columns of the matrix A. N >= 0.
[in,out]	A	COMPLEX array, dimension (LDA,N) On entry, the M-by-N matrix A. On exit, the elements on and above the diagonal of the array contain the min(M,N)-by-N upper trapezoidal matrix R (R is upper triangular if m >= n); the elements below the diagonal, with the array TAU, represent the orthogonal matrix Q as a product of min(m,n) elementary reflectors (see Further Details). Higher performance is achieved if A is in pinned memory, e.g. allocated using magma_malloc_pinned.
[in]	lda	INTEGER The leading dimension of the array A. LDA >= max(1,M).
[out]	tau	COMPLEX array, dimension (min(M,N)) The scalar factors of the elementary reflectors (see Further Details).
[out]	work	(workspace) COMPLEX array, dimension (MAX(1,LWORK)) On exit, if INFO = 0, WORK[0] returns the optimal LWORK. Higher performance is achieved if WORK is in pinned memory, e.g. allocated using magma_malloc_pinned.
[in]	lwork	INTEGER The dimension of the array WORK. LWORK >= N*NB, where NB can be obtained through magma_get_cgeqrf_nb( M, N ). If LWORK = -1, then a workspace query is assumed; the routine only calculates the optimal size of the WORK array, returns this value as the first entry of the WORK array, and no error message related to LWORK is issued.
[out]	info	INTEGER = 0: successful exit < 0: if INFO = -i, the i-th argument had an illegal value or another error occured, such as memory allocation failed.

Further Details

The matrix Q is represented as a product of elementary reflectors

Q = H(1) H(2) . . . H(k), where k = min(m,n).

Each H(i) has the form

H(i) = I - tau * v * v'

where tau is a complex scalar, and v is a complex vector with v(1:i-1) = 0 and v(i) = 1; v(i+1:m) is stored on exit in A(i+1:m,i), and tau in TAU(i).

◆ magma_cgeqrf2_mgpu()

magma_int_t magma_cgeqrf2_mgpu	(	magma_int_t	ngpu,
		magma_int_t	m,
		magma_int_t	n,
		magmaFloatComplex_ptr	dlA[],
		magma_int_t	ldda,
		magmaFloatComplex *	tau,
		magma_int_t *	info )

CGEQRF computes a QR factorization of a complex M-by-N matrix A: A = Q * R.

This is a GPU interface of the routine.

Parameters

[in]	ngpu	INTEGER Number of GPUs to use. ngpu > 0.
[in]	m	INTEGER The number of rows of the matrix A. M >= 0.
[in]	n	INTEGER The number of columns of the matrix A. N >= 0.
[in,out]	dlA	COMPLEX array of pointers on the GPU, dimension (ngpu). On entry, the M-by-N matrix A distributed over GPUs (d_lA[d] points to the local matrix on d-th GPU). It uses 1D block column cyclic format with the block size of nb, and each local matrix is stored by column. On exit, the elements on and above the diagonal of the array contain the min(M,N)-by-N upper trapezoidal matrix R (R is upper triangular if m >= n); the elements below the diagonal, with the array TAU, represent the orthogonal matrix Q as a product of min(m,n) elementary reflectors (see Further Details).
[in]	ldda	INTEGER The leading dimension of the array dA. LDDA >= max(1,M). To benefit from coalescent memory accesses LDDA must be divisible by 16.
[out]	tau	COMPLEX array, dimension (min(M,N)) The scalar factors of the elementary reflectors (see Further Details).
[out]	info	INTEGER = 0: successful exit < 0: if INFO = -i, the i-th argument had an illegal value or another error occured, such as memory allocation failed.

Further Details

The matrix Q is represented as a product of elementary reflectors

Q = H(1) H(2) . . . H(k), where k = min(m,n).

Each H(i) has the form

H(i) = I - tau * v * v'

where tau is a complex scalar, and v is a complex vector with v(1:i-1) = 0 and v(i) = 1; v(i+1:m) is stored on exit in A(i+1:m,i), and tau in TAU(i).

◆ magma_cgeqrf_ooc()

magma_int_t magma_cgeqrf_ooc	(	magma_int_t	m,
		magma_int_t	n,
		magmaFloatComplex *	A,
		magma_int_t	lda,
		magmaFloatComplex *	tau,
		magmaFloatComplex *	work,
		magma_int_t	lwork,
		magma_int_t *	info )

CGEQRF_OOC computes a QR factorization of a COMPLEX M-by-N matrix A: A = Q * R.

This version does not require work space on the GPU passed as input. GPU memory is allocated in the routine. This is an out-of-core (ooc) version that is similar to magma_cgeqrf but the difference is that this version can use a GPU even if the matrix does not fit into the GPU memory at once.

Parameters

[in]	m	INTEGER The number of rows of the matrix A. M >= 0.
[in]	n	INTEGER The number of columns of the matrix A. N >= 0.
[in,out]	A	COMPLEX array, dimension (LDA,N) On entry, the M-by-N matrix A. On exit, the elements on and above the diagonal of the array contain the min(M,N)-by-N upper trapezoidal matrix R (R is upper triangular if m >= n); the elements below the diagonal, with the array TAU, represent the orthogonal matrix Q as a product of min(m,n) elementary reflectors (see Further Details). Higher performance is achieved if A is in pinned memory, e.g. allocated using magma_malloc_pinned.
[in]	lda	INTEGER The leading dimension of the array A. LDA >= max(1,M).
[out]	tau	COMPLEX array, dimension (min(M,N)) The scalar factors of the elementary reflectors (see Further Details).
[out]	work	(workspace) COMPLEX array, dimension (MAX(1,LWORK)) On exit, if INFO = 0, WORK[0] returns the optimal LWORK. Higher performance is achieved if WORK is in pinned memory, e.g. allocated using magma_malloc_pinned.
[in]	lwork	INTEGER The dimension of the array WORK. LWORK >= N*NB, where NB can be obtained through magma_get_cgeqrf_nb( M, N ). If LWORK = -1, then a workspace query is assumed; the routine only calculates the optimal size of the WORK array, returns this value as the first entry of the WORK array, and no error message related to LWORK is issued.
[out]	info	INTEGER = 0: successful exit < 0: if INFO = -i, the i-th argument had an illegal value or another error occured, such as memory allocation failed.

Further Details

The matrix Q is represented as a product of elementary reflectors

Q = H(1) H(2) . . . H(k), where k = min(m,n).

Each H(i) has the form

H(i) = I - tau * v * v'

where tau is a complex scalar, and v is a complex vector with v(1:i-1) = 0 and v(i) = 1; v(i+1:m) is stored on exit in A(i+1:m,i), and tau in TAU(i).

◆ magma_dgeqrf()

magma_int_t magma_dgeqrf	(	magma_int_t	m,
		magma_int_t	n,
		double *	A,
		magma_int_t	lda,
		double *	tau,
		double *	work,
		magma_int_t	lwork,
		magma_int_t *	info )

DGEQRF computes a QR factorization of a DOUBLE PRECISION M-by-N matrix A: A = Q * R.

This version does not require work space on the GPU passed as input. GPU memory is allocated in the routine.

This uses 2 queues to overlap communication and computation.

Parameters

[in]	m	INTEGER The number of rows of the matrix A. M >= 0.
[in]	n	INTEGER The number of columns of the matrix A. N >= 0.
[in,out]	A	DOUBLE PRECISION array, dimension (LDA,N) On entry, the M-by-N matrix A. On exit, the elements on and above the diagonal of the array contain the min(M,N)-by-N upper trapezoidal matrix R (R is upper triangular if m >= n); the elements below the diagonal, with the array TAU, represent the orthogonal matrix Q as a product of min(m,n) elementary reflectors (see Further Details). Higher performance is achieved if A is in pinned memory, e.g. allocated using magma_malloc_pinned.
[in]	lda	INTEGER The leading dimension of the array A. LDA >= max(1,M).
[out]	tau	DOUBLE PRECISION array, dimension (min(M,N)) The scalar factors of the elementary reflectors (see Further Details).
[out]	work	(workspace) DOUBLE PRECISION array, dimension (MAX(1,LWORK)) On exit, if INFO = 0, WORK[0] returns the optimal LWORK. Higher performance is achieved if WORK is in pinned memory, e.g. allocated using magma_malloc_pinned.
[in]	lwork	INTEGER The dimension of the array WORK. LWORK >= N*NB, where NB can be obtained through magma_get_dgeqrf_nb( M, N ). If LWORK = -1, then a workspace query is assumed; the routine only calculates the optimal size of the WORK array, returns this value as the first entry of the WORK array, and no error message related to LWORK is issued.
[out]	info	INTEGER = 0: successful exit < 0: if INFO = -i, the i-th argument had an illegal value or another error occured, such as memory allocation failed.

Further Details

The matrix Q is represented as a product of elementary reflectors

Q = H(1) H(2) . . . H(k), where k = min(m,n).

Each H(i) has the form

H(i) = I - tau * v * v'

where tau is a real scalar, and v is a real vector with v(1:i-1) = 0 and v(i) = 1; v(i+1:m) is stored on exit in A(i+1:m,i), and tau in TAU(i).

◆ magma_dgeqrf2_gpu()

magma_int_t magma_dgeqrf2_gpu	(	magma_int_t	m,
		magma_int_t	n,
		magmaDouble_ptr	dA,
		magma_int_t	ldda,
		double *	tau,
		magma_int_t *	info )

DGEQRF computes a QR factorization of a real M-by-N matrix A: A = Q * R.

This version has LAPACK-complaint arguments.

Other versions (magma_dgeqrf_gpu and magma_dgeqrf3_gpu) store the intermediate T matrices.

Parameters

[in]	m	INTEGER The number of rows of the matrix A. M >= 0.
[in]	n	INTEGER The number of columns of the matrix A. N >= 0.
[in,out]	dA	DOUBLE PRECISION array on the GPU, dimension (LDDA,N) On entry, the M-by-N matrix A. On exit, the elements on and above the diagonal of the array contain the min(M,N)-by-N upper trapezoidal matrix R (R is upper triangular if m >= n); the elements below the diagonal, with the array TAU, represent the orthogonal matrix Q as a product of min(m,n) elementary reflectors (see Further Details).
[in]	ldda	INTEGER The leading dimension of the array dA. LDDA >= max(1,M). To benefit from coalescent memory accesses LDDA must be divisible by 16.
[out]	tau	DOUBLE PRECISION array, dimension (min(M,N)) The scalar factors of the elementary reflectors (see Further Details).
[out]	info	INTEGER = 0: successful exit < 0: if INFO = -i, the i-th argument had an illegal value or another error occured, such as memory allocation failed.

Further Details

The matrix Q is represented as a product of elementary reflectors

Q = H(1) H(2) . . . H(k), where k = min(m,n).

Each H(i) has the form

H(i) = I - tau * v * v^H

where tau is a real scalar, and v is a real vector with v(1:i-1) = 0 and v(i) = 1; v(i+1:m) is stored on exit in A(i+1:m,i), and tau in TAU(i).

◆ magma_dgeqrf3_gpu()

magma_int_t magma_dgeqrf3_gpu	(	magma_int_t	m,
		magma_int_t	n,
		magmaDouble_ptr	dA,
		magma_int_t	ldda,
		double *	tau,
		magmaDouble_ptr	dT,
		magma_int_t *	info )

DGEQRF3 computes a QR factorization of a real M-by-N matrix A: A = Q * R.

This version stores the triangular dT matrices used in the block QR factorization so that they can be applied directly (i.e., without being recomputed) later. As a result, the application of Q is much faster. Also, the upper triangular matrices for V have 0s in them. The corresponding parts of the upper triangular R are stored separately in dT.

Parameters

[in]	m	INTEGER The number of rows of the matrix A. M >= 0.
[in]	n	INTEGER The number of columns of the matrix A. N >= 0.
[in,out]	dA	DOUBLE PRECISION array on the GPU, dimension (LDDA,N) On entry, the M-by-N matrix A. On exit, the elements on and above the diagonal of the array contain the min(M,N)-by-N upper trapezoidal matrix R (R is upper triangular if m >= n); the elements below the diagonal, with the array TAU, represent the orthogonal matrix Q as a product of min(m,n) elementary reflectors (see Further Details).
[in]	ldda	INTEGER The leading dimension of the array dA. LDDA >= max(1,M). To benefit from coalescent memory accesses LDDA must be divisible by 16.
[out]	tau	DOUBLE PRECISION array, dimension (min(M,N)) The scalar factors of the elementary reflectors (see Further Details).
[out]	dT	(workspace) DOUBLE PRECISION array on the GPU, dimension (2MIN(M, N) + ceil(N/32)32 )NB, where NB can be obtained through magma_get_dgeqrf_nb( M, N ). It starts with a MIN(M,N)NB block that stores the triangular T matrices, followed by a MIN(M,N)*NB block that stores the diagonal blocks of the R matrix. The rest of the array is used as workspace.
[out]	info	INTEGER = 0: successful exit < 0: if INFO = -i, the i-th argument had an illegal value or another error occured, such as memory allocation failed.

Further Details

The matrix Q is represented as a product of elementary reflectors

Q = H(1) H(2) . . . H(k), where k = min(m,n).

Each H(i) has the form

H(i) = I - tau * v * v^H

where tau is a real scalar, and v is a real vector with v(1:i-1) = 0 and v(i) = 1; v(i+1:m) is stored on exit in A(i+1:m,i), and tau in TAU(i).

◆ magma_dgeqrf_expert_gpu_work()

magma_int_t magma_dgeqrf_expert_gpu_work	(	magma_int_t	m,
		magma_int_t	n,
		magmaDouble_ptr	dA,
		magma_int_t	ldda,
		double *	tau,
		magmaDouble_ptr	dT,
		magma_int_t *	info,
		magma_mode_t	mode,
		magma_int_t	nb,
		void *	host_work,
		magma_int_t *	lwork_host,
		void *	device_work,
		magma_int_t *	lwork_device,
		magma_queue_t	queues[2] )

DGEQRF computes a QR factorization of a real M-by-N matrix A: A = Q * R.

This version stores the triangular dT matrices used in the block QR factorization so that they can be applied directly (i.e., without being recomputed) later. As a result, the application of Q is much faster. Also, the upper triangular matrices for V have 0s in them. The corresponding parts of the upper triangular R are inverted and stored separately in dT.

Parameters

[in]	m	INTEGER The number of rows of the matrix A. M >= 0.
[in]	n	INTEGER The number of columns of the matrix A. N >= 0.
[in,out]	dA	DOUBLE PRECISION array on the GPU, dimension (LDDA,N) On entry, the M-by-N matrix A. On exit, the elements on and above the diagonal of the array contain the min(M,N)-by-N upper trapezoidal matrix R (R is upper triangular if m >= n); the elements below the diagonal, with the array TAU, represent the orthogonal matrix Q as a product of min(m,n) elementary reflectors (see Further Details).
[in]	ldda	INTEGER The leading dimension of the array dA. LDDA >= max(1,M). To benefit from coalescent memory accesses LDDA must be divisible by 16.
[out]	tau	DOUBLE PRECISION array, dimension (min(M,N)) The scalar factors of the elementary reflectors (see Further Details).
[out]	dT	(workspace) DOUBLE PRECISION array on the GPU, dimension (2MIN(M, N) + ceil(N/32)32 )NB, where NB can be obtained through magma_get_dgeqrf_nb( M, N ). It starts with a MIN(M,N)NB block that stores the triangular T matrices, followed by a MIN(M,N)*NB block that stores inverses of the diagonal blocks of the R matrix. The rest of the array is used as workspace.
[out]	info	INTEGER = 0: successful exit < 0: if INFO = -i, the i-th argument had an illegal value or another error occured, such as memory allocation failed.
[in]	mode	magma_mode_t Only mode = MagmaHybrid is currently supported, factorizes dA using Hybrid (CPU/GPU) mode.
[in]	nb	INTEGER The blocking size used during the factorization. nb > 0; Users with no specific preference of nb can call magma_get_dgeqrf_nb() to get the value of nb as determined by MAGMA's internal tuning.
[in,out]	host_work	Workspace, allocated on host (CPU) memory. For faster CPU-GPU communication, user can allocate it as pinned memory using magma_malloc_pinned()
[in,out]	lwork_host	INTEGER pointer The size of the workspace (host_work) in bytes lwork_host[0] < 0: a workspace query is assumed, the routine calculates the required amount of workspace and returns it in lwork_host. The workspace itself is not referenced, and no factorization is performed.

lwork[0] >= 0: the routine assumes that the user has provided a workspace with the size in lwork_host.

Parameters

[in,out]	device_work	Workspace, allocated on device (GPU) memory.
[in,out]	lwork_device	INTEGER pointer The size of the workspace (device_work) in bytes lwork_device[0] < 0: a workspace query is assumed, the routine calculates the required amount of workspace and returns it in lwork_device. The workspace itself is not referenced, and no factorization is performed. lwork_device[0] >= 0: the routine assumes that the user has provided a workspace with the size in lwork_device.
[in]	queues	magma_queue_t array of size two created/destroyed by the user outside the routine Used for concurrent kernel execution, if possible

Further Details

The matrix Q is represented as a product of elementary reflectors

Q = H(1) H(2) . . . H(k), where k = min(m,n).

Each H(i) has the form

H(i) = I - tau * v * v^H

where tau is a real scalar, and v is a real vector with v(1:i-1) = 0 and v(i) = 1; v(i+1:m) is stored on exit in A(i+1:m,i), and tau in TAU(i).

◆ magma_dgeqrf_gpu()

magma_int_t magma_dgeqrf_gpu	(	magma_int_t	m,
		magma_int_t	n,
		magmaDouble_ptr	dA,
		magma_int_t	ldda,
		double *	tau,
		magmaDouble_ptr	dT,
		magma_int_t *	info )

DGEQRF computes a QR factorization of a real M-by-N matrix A: A = Q * R.

This version stores the triangular dT matrices used in the block QR factorization so that they can be applied directly (i.e., without being recomputed) later. As a result, the application of Q is much faster. Also, the upper triangular matrices for V have 0s in them. The corresponding parts of the upper triangular R are inverted and stored separately in dT.

Parameters

[in]	m	INTEGER The number of rows of the matrix A. M >= 0.
[in]	n	INTEGER The number of columns of the matrix A. N >= 0.
[in,out]	dA	DOUBLE PRECISION array on the GPU, dimension (LDDA,N) On entry, the M-by-N matrix A. On exit, the elements on and above the diagonal of the array contain the min(M,N)-by-N upper trapezoidal matrix R (R is upper triangular if m >= n); the elements below the diagonal, with the array TAU, represent the orthogonal matrix Q as a product of min(m,n) elementary reflectors (see Further Details).
[in]	ldda	INTEGER The leading dimension of the array dA. LDDA >= max(1,M). To benefit from coalescent memory accesses LDDA must be divisible by 16.
[out]	tau	DOUBLE PRECISION array, dimension (min(M,N)) The scalar factors of the elementary reflectors (see Further Details).
[out]	dT	(workspace) DOUBLE PRECISION array on the GPU, dimension (2MIN(M, N) + ceil(N/32)32 )NB, where NB can be obtained through magma_get_dgeqrf_nb( M, N ). It starts with a MIN(M,N)NB block that stores the triangular T matrices, followed by a MIN(M,N)*NB block that stores inverses of the diagonal blocks of the R matrix. The rest of the array is used as workspace.
[out]	info	INTEGER = 0: successful exit < 0: if INFO = -i, the i-th argument had an illegal value or another error occured, such as memory allocation failed.

Further Details

The matrix Q is represented as a product of elementary reflectors

Q = H(1) H(2) . . . H(k), where k = min(m,n).

Each H(i) has the form

H(i) = I - tau * v * v^H

where tau is a real scalar, and v is a real vector with v(1:i-1) = 0 and v(i) = 1; v(i+1:m) is stored on exit in A(i+1:m,i), and tau in TAU(i).

◆ magma_dgeqrf_m()

magma_int_t magma_dgeqrf_m	(	magma_int_t	ngpu,
		magma_int_t	m,
		magma_int_t	n,
		double *	A,
		magma_int_t	lda,
		double *	tau,
		double *	work,
		magma_int_t	lwork,
		magma_int_t *	info )

DGEQRF computes a QR factorization of a DOUBLE PRECISION M-by-N matrix A: A = Q * R using multiple GPUs.

This version does not require work space on the GPU passed as input. GPU memory is allocated in the routine.

Parameters

[in]	ngpu	INTEGER Number of GPUs to use. ngpu > 0.
[in]	m	INTEGER The number of rows of the matrix A. M >= 0.
[in]	n	INTEGER The number of columns of the matrix A. N >= 0.
[in,out]	A	DOUBLE PRECISION array, dimension (LDA,N) On entry, the M-by-N matrix A. On exit, the elements on and above the diagonal of the array contain the min(M,N)-by-N upper trapezoidal matrix R (R is upper triangular if m >= n); the elements below the diagonal, with the array TAU, represent the orthogonal matrix Q as a product of min(m,n) elementary reflectors (see Further Details). Higher performance is achieved if A is in pinned memory, e.g. allocated using magma_malloc_pinned.
[in]	lda	INTEGER The leading dimension of the array A. LDA >= max(1,M).
[out]	tau	DOUBLE PRECISION array, dimension (min(M,N)) The scalar factors of the elementary reflectors (see Further Details).
[out]	work	(workspace) DOUBLE PRECISION array, dimension (MAX(1,LWORK)) On exit, if INFO = 0, WORK[0] returns the optimal LWORK. Higher performance is achieved if WORK is in pinned memory, e.g. allocated using magma_malloc_pinned.
[in]	lwork	INTEGER The dimension of the array WORK. LWORK >= N*NB, where NB can be obtained through magma_get_dgeqrf_nb( M, N ). If LWORK = -1, then a workspace query is assumed; the routine only calculates the optimal size of the WORK array, returns this value as the first entry of the WORK array, and no error message related to LWORK is issued.
[out]	info	INTEGER = 0: successful exit < 0: if INFO = -i, the i-th argument had an illegal value or another error occured, such as memory allocation failed.

Further Details

The matrix Q is represented as a product of elementary reflectors

Q = H(1) H(2) . . . H(k), where k = min(m,n).

Each H(i) has the form

H(i) = I - tau * v * v'

where tau is a real scalar, and v is a real vector with v(1:i-1) = 0 and v(i) = 1; v(i+1:m) is stored on exit in A(i+1:m,i), and tau in TAU(i).

◆ magma_dgeqrf2_mgpu()

magma_int_t magma_dgeqrf2_mgpu	(	magma_int_t	ngpu,
		magma_int_t	m,
		magma_int_t	n,
		magmaDouble_ptr	dlA[],
		magma_int_t	ldda,
		double *	tau,
		magma_int_t *	info )

DGEQRF computes a QR factorization of a real M-by-N matrix A: A = Q * R.

This is a GPU interface of the routine.

Parameters

[in]	ngpu	INTEGER Number of GPUs to use. ngpu > 0.
[in]	m	INTEGER The number of rows of the matrix A. M >= 0.
[in]	n	INTEGER The number of columns of the matrix A. N >= 0.
[in,out]	dlA	DOUBLE PRECISION array of pointers on the GPU, dimension (ngpu). On entry, the M-by-N matrix A distributed over GPUs (d_lA[d] points to the local matrix on d-th GPU). It uses 1D block column cyclic format with the block size of nb, and each local matrix is stored by column. On exit, the elements on and above the diagonal of the array contain the min(M,N)-by-N upper trapezoidal matrix R (R is upper triangular if m >= n); the elements below the diagonal, with the array TAU, represent the orthogonal matrix Q as a product of min(m,n) elementary reflectors (see Further Details).
[in]	ldda	INTEGER The leading dimension of the array dA. LDDA >= max(1,M). To benefit from coalescent memory accesses LDDA must be divisible by 16.
[out]	tau	DOUBLE PRECISION array, dimension (min(M,N)) The scalar factors of the elementary reflectors (see Further Details).
[out]	info	INTEGER = 0: successful exit < 0: if INFO = -i, the i-th argument had an illegal value or another error occured, such as memory allocation failed.

Further Details

The matrix Q is represented as a product of elementary reflectors

Q = H(1) H(2) . . . H(k), where k = min(m,n).

Each H(i) has the form

H(i) = I - tau * v * v'

where tau is a real scalar, and v is a real vector with v(1:i-1) = 0 and v(i) = 1; v(i+1:m) is stored on exit in A(i+1:m,i), and tau in TAU(i).

◆ magma_dgeqrf_ooc()

magma_int_t magma_dgeqrf_ooc	(	magma_int_t	m,
		magma_int_t	n,
		double *	A,
		magma_int_t	lda,
		double *	tau,
		double *	work,
		magma_int_t	lwork,
		magma_int_t *	info )

DGEQRF_OOC computes a QR factorization of a DOUBLE PRECISION M-by-N matrix A: A = Q * R.

This version does not require work space on the GPU passed as input. GPU memory is allocated in the routine. This is an out-of-core (ooc) version that is similar to magma_dgeqrf but the difference is that this version can use a GPU even if the matrix does not fit into the GPU memory at once.

Parameters

[in]	m	INTEGER The number of rows of the matrix A. M >= 0.
[in]	n	INTEGER The number of columns of the matrix A. N >= 0.
[in,out]	A	DOUBLE PRECISION array, dimension (LDA,N) On entry, the M-by-N matrix A. On exit, the elements on and above the diagonal of the array contain the min(M,N)-by-N upper trapezoidal matrix R (R is upper triangular if m >= n); the elements below the diagonal, with the array TAU, represent the orthogonal matrix Q as a product of min(m,n) elementary reflectors (see Further Details). Higher performance is achieved if A is in pinned memory, e.g. allocated using magma_malloc_pinned.
[in]	lda	INTEGER The leading dimension of the array A. LDA >= max(1,M).
[out]	tau	DOUBLE PRECISION array, dimension (min(M,N)) The scalar factors of the elementary reflectors (see Further Details).
[out]	work	(workspace) DOUBLE PRECISION array, dimension (MAX(1,LWORK)) On exit, if INFO = 0, WORK[0] returns the optimal LWORK. Higher performance is achieved if WORK is in pinned memory, e.g. allocated using magma_malloc_pinned.
[in]	lwork	INTEGER The dimension of the array WORK. LWORK >= N*NB, where NB can be obtained through magma_get_dgeqrf_nb( M, N ). If LWORK = -1, then a workspace query is assumed; the routine only calculates the optimal size of the WORK array, returns this value as the first entry of the WORK array, and no error message related to LWORK is issued.
[out]	info	INTEGER = 0: successful exit < 0: if INFO = -i, the i-th argument had an illegal value or another error occured, such as memory allocation failed.

Further Details

The matrix Q is represented as a product of elementary reflectors

Q = H(1) H(2) . . . H(k), where k = min(m,n).

Each H(i) has the form

H(i) = I - tau * v * v'

where tau is a real scalar, and v is a real vector with v(1:i-1) = 0 and v(i) = 1; v(i+1:m) is stored on exit in A(i+1:m,i), and tau in TAU(i).

◆ magma_sgeqrf()

magma_int_t magma_sgeqrf	(	magma_int_t	m,
		magma_int_t	n,
		float *	A,
		magma_int_t	lda,
		float *	tau,
		float *	work,
		magma_int_t	lwork,
		magma_int_t *	info )

SGEQRF computes a QR factorization of a REAL M-by-N matrix A: A = Q * R.

This version does not require work space on the GPU passed as input. GPU memory is allocated in the routine.

This uses 2 queues to overlap communication and computation.

Parameters

[in]	m	INTEGER The number of rows of the matrix A. M >= 0.
[in]	n	INTEGER The number of columns of the matrix A. N >= 0.
[in,out]	A	REAL array, dimension (LDA,N) On entry, the M-by-N matrix A. On exit, the elements on and above the diagonal of the array contain the min(M,N)-by-N upper trapezoidal matrix R (R is upper triangular if m >= n); the elements below the diagonal, with the array TAU, represent the orthogonal matrix Q as a product of min(m,n) elementary reflectors (see Further Details). Higher performance is achieved if A is in pinned memory, e.g. allocated using magma_malloc_pinned.
[in]	lda	INTEGER The leading dimension of the array A. LDA >= max(1,M).
[out]	tau	REAL array, dimension (min(M,N)) The scalar factors of the elementary reflectors (see Further Details).
[out]	work	(workspace) REAL array, dimension (MAX(1,LWORK)) On exit, if INFO = 0, WORK[0] returns the optimal LWORK. Higher performance is achieved if WORK is in pinned memory, e.g. allocated using magma_malloc_pinned.
[in]	lwork	INTEGER The dimension of the array WORK. LWORK >= N*NB, where NB can be obtained through magma_get_sgeqrf_nb( M, N ). If LWORK = -1, then a workspace query is assumed; the routine only calculates the optimal size of the WORK array, returns this value as the first entry of the WORK array, and no error message related to LWORK is issued.
[out]	info	INTEGER = 0: successful exit < 0: if INFO = -i, the i-th argument had an illegal value or another error occured, such as memory allocation failed.

Further Details

The matrix Q is represented as a product of elementary reflectors

Q = H(1) H(2) . . . H(k), where k = min(m,n).

Each H(i) has the form

H(i) = I - tau * v * v'

where tau is a real scalar, and v is a real vector with v(1:i-1) = 0 and v(i) = 1; v(i+1:m) is stored on exit in A(i+1:m,i), and tau in TAU(i).

◆ magma_sgeqrf2_gpu()

magma_int_t magma_sgeqrf2_gpu	(	magma_int_t	m,
		magma_int_t	n,
		magmaFloat_ptr	dA,
		magma_int_t	ldda,
		float *	tau,
		magma_int_t *	info )

SGEQRF computes a QR factorization of a real M-by-N matrix A: A = Q * R.

This version has LAPACK-complaint arguments.

Other versions (magma_sgeqrf_gpu and magma_sgeqrf3_gpu) store the intermediate T matrices.

Parameters

[in]	m	INTEGER The number of rows of the matrix A. M >= 0.
[in]	n	INTEGER The number of columns of the matrix A. N >= 0.
[in,out]	dA	REAL array on the GPU, dimension (LDDA,N) On entry, the M-by-N matrix A. On exit, the elements on and above the diagonal of the array contain the min(M,N)-by-N upper trapezoidal matrix R (R is upper triangular if m >= n); the elements below the diagonal, with the array TAU, represent the orthogonal matrix Q as a product of min(m,n) elementary reflectors (see Further Details).
[in]	ldda	INTEGER The leading dimension of the array dA. LDDA >= max(1,M). To benefit from coalescent memory accesses LDDA must be divisible by 16.
[out]	tau	REAL array, dimension (min(M,N)) The scalar factors of the elementary reflectors (see Further Details).
[out]	info	INTEGER = 0: successful exit < 0: if INFO = -i, the i-th argument had an illegal value or another error occured, such as memory allocation failed.

Further Details

The matrix Q is represented as a product of elementary reflectors

Q = H(1) H(2) . . . H(k), where k = min(m,n).

Each H(i) has the form

H(i) = I - tau * v * v^H

where tau is a real scalar, and v is a real vector with v(1:i-1) = 0 and v(i) = 1; v(i+1:m) is stored on exit in A(i+1:m,i), and tau in TAU(i).

◆ magma_sgeqrf3_gpu()

magma_int_t magma_sgeqrf3_gpu	(	magma_int_t	m,
		magma_int_t	n,
		magmaFloat_ptr	dA,
		magma_int_t	ldda,
		float *	tau,
		magmaFloat_ptr	dT,
		magma_int_t *	info )

SGEQRF3 computes a QR factorization of a real M-by-N matrix A: A = Q * R.

This version stores the triangular dT matrices used in the block QR factorization so that they can be applied directly (i.e., without being recomputed) later. As a result, the application of Q is much faster. Also, the upper triangular matrices for V have 0s in them. The corresponding parts of the upper triangular R are stored separately in dT.

Parameters

[in]	m	INTEGER The number of rows of the matrix A. M >= 0.
[in]	n	INTEGER The number of columns of the matrix A. N >= 0.
[in,out]	dA	REAL array on the GPU, dimension (LDDA,N) On entry, the M-by-N matrix A. On exit, the elements on and above the diagonal of the array contain the min(M,N)-by-N upper trapezoidal matrix R (R is upper triangular if m >= n); the elements below the diagonal, with the array TAU, represent the orthogonal matrix Q as a product of min(m,n) elementary reflectors (see Further Details).
[in]	ldda	INTEGER The leading dimension of the array dA. LDDA >= max(1,M). To benefit from coalescent memory accesses LDDA must be divisible by 16.
[out]	tau	REAL array, dimension (min(M,N)) The scalar factors of the elementary reflectors (see Further Details).
[out]	dT	(workspace) REAL array on the GPU, dimension (2MIN(M, N) + ceil(N/32)32 )NB, where NB can be obtained through magma_get_sgeqrf_nb( M, N ). It starts with a MIN(M,N)NB block that stores the triangular T matrices, followed by a MIN(M,N)*NB block that stores the diagonal blocks of the R matrix. The rest of the array is used as workspace.
[out]	info	INTEGER = 0: successful exit < 0: if INFO = -i, the i-th argument had an illegal value or another error occured, such as memory allocation failed.

Further Details

The matrix Q is represented as a product of elementary reflectors

Q = H(1) H(2) . . . H(k), where k = min(m,n).

Each H(i) has the form

H(i) = I - tau * v * v^H

where tau is a real scalar, and v is a real vector with v(1:i-1) = 0 and v(i) = 1; v(i+1:m) is stored on exit in A(i+1:m,i), and tau in TAU(i).

◆ magma_sgeqrf_expert_gpu_work()

magma_int_t magma_sgeqrf_expert_gpu_work	(	magma_int_t	m,
		magma_int_t	n,
		magmaFloat_ptr	dA,
		magma_int_t	ldda,
		float *	tau,
		magmaFloat_ptr	dT,
		magma_int_t *	info,
		magma_mode_t	mode,
		magma_int_t	nb,
		void *	host_work,
		magma_int_t *	lwork_host,
		void *	device_work,
		magma_int_t *	lwork_device,
		magma_queue_t	queues[2] )

SGEQRF computes a QR factorization of a real M-by-N matrix A: A = Q * R.

This version stores the triangular dT matrices used in the block QR factorization so that they can be applied directly (i.e., without being recomputed) later. As a result, the application of Q is much faster. Also, the upper triangular matrices for V have 0s in them. The corresponding parts of the upper triangular R are inverted and stored separately in dT.

Parameters

[in]	m	INTEGER The number of rows of the matrix A. M >= 0.
[in]	n	INTEGER The number of columns of the matrix A. N >= 0.
[in,out]	dA	REAL array on the GPU, dimension (LDDA,N) On entry, the M-by-N matrix A. On exit, the elements on and above the diagonal of the array contain the min(M,N)-by-N upper trapezoidal matrix R (R is upper triangular if m >= n); the elements below the diagonal, with the array TAU, represent the orthogonal matrix Q as a product of min(m,n) elementary reflectors (see Further Details).
[in]	ldda	INTEGER The leading dimension of the array dA. LDDA >= max(1,M). To benefit from coalescent memory accesses LDDA must be divisible by 16.
[out]	tau	REAL array, dimension (min(M,N)) The scalar factors of the elementary reflectors (see Further Details).
[out]	dT	(workspace) REAL array on the GPU, dimension (2MIN(M, N) + ceil(N/32)32 )NB, where NB can be obtained through magma_get_sgeqrf_nb( M, N ). It starts with a MIN(M,N)NB block that stores the triangular T matrices, followed by a MIN(M,N)*NB block that stores inverses of the diagonal blocks of the R matrix. The rest of the array is used as workspace.
[out]	info	INTEGER = 0: successful exit < 0: if INFO = -i, the i-th argument had an illegal value or another error occured, such as memory allocation failed.
[in]	mode	magma_mode_t Only mode = MagmaHybrid is currently supported, factorizes dA using Hybrid (CPU/GPU) mode.
[in]	nb	INTEGER The blocking size used during the factorization. nb > 0; Users with no specific preference of nb can call magma_get_sgeqrf_nb() to get the value of nb as determined by MAGMA's internal tuning.
[in,out]	host_work	Workspace, allocated on host (CPU) memory. For faster CPU-GPU communication, user can allocate it as pinned memory using magma_malloc_pinned()
[in,out]	lwork_host	INTEGER pointer The size of the workspace (host_work) in bytes lwork_host[0] < 0: a workspace query is assumed, the routine calculates the required amount of workspace and returns it in lwork_host. The workspace itself is not referenced, and no factorization is performed.

lwork[0] >= 0: the routine assumes that the user has provided a workspace with the size in lwork_host.

Parameters

[in,out]	device_work	Workspace, allocated on device (GPU) memory.
[in,out]	lwork_device	INTEGER pointer The size of the workspace (device_work) in bytes lwork_device[0] < 0: a workspace query is assumed, the routine calculates the required amount of workspace and returns it in lwork_device. The workspace itself is not referenced, and no factorization is performed. lwork_device[0] >= 0: the routine assumes that the user has provided a workspace with the size in lwork_device.
[in]	queues	magma_queue_t array of size two created/destroyed by the user outside the routine Used for concurrent kernel execution, if possible

Further Details

The matrix Q is represented as a product of elementary reflectors

Q = H(1) H(2) . . . H(k), where k = min(m,n).

Each H(i) has the form

H(i) = I - tau * v * v^H

where tau is a real scalar, and v is a real vector with v(1:i-1) = 0 and v(i) = 1; v(i+1:m) is stored on exit in A(i+1:m,i), and tau in TAU(i).

◆ magma_sgeqrf_gpu()

magma_int_t magma_sgeqrf_gpu	(	magma_int_t	m,
		magma_int_t	n,
		magmaFloat_ptr	dA,
		magma_int_t	ldda,
		float *	tau,
		magmaFloat_ptr	dT,
		magma_int_t *	info )

SGEQRF computes a QR factorization of a real M-by-N matrix A: A = Q * R.

This version stores the triangular dT matrices used in the block QR factorization so that they can be applied directly (i.e., without being recomputed) later. As a result, the application of Q is much faster. Also, the upper triangular matrices for V have 0s in them. The corresponding parts of the upper triangular R are inverted and stored separately in dT.

Parameters

[in]	m	INTEGER The number of rows of the matrix A. M >= 0.
[in]	n	INTEGER The number of columns of the matrix A. N >= 0.
[in,out]	dA	REAL array on the GPU, dimension (LDDA,N) On entry, the M-by-N matrix A. On exit, the elements on and above the diagonal of the array contain the min(M,N)-by-N upper trapezoidal matrix R (R is upper triangular if m >= n); the elements below the diagonal, with the array TAU, represent the orthogonal matrix Q as a product of min(m,n) elementary reflectors (see Further Details).
[in]	ldda	INTEGER The leading dimension of the array dA. LDDA >= max(1,M). To benefit from coalescent memory accesses LDDA must be divisible by 16.
[out]	tau	REAL array, dimension (min(M,N)) The scalar factors of the elementary reflectors (see Further Details).
[out]	dT	(workspace) REAL array on the GPU, dimension (2MIN(M, N) + ceil(N/32)32 )NB, where NB can be obtained through magma_get_sgeqrf_nb( M, N ). It starts with a MIN(M,N)NB block that stores the triangular T matrices, followed by a MIN(M,N)*NB block that stores inverses of the diagonal blocks of the R matrix. The rest of the array is used as workspace.
[out]	info	INTEGER = 0: successful exit < 0: if INFO = -i, the i-th argument had an illegal value or another error occured, such as memory allocation failed.

Further Details

The matrix Q is represented as a product of elementary reflectors

Q = H(1) H(2) . . . H(k), where k = min(m,n).

Each H(i) has the form

H(i) = I - tau * v * v^H

where tau is a real scalar, and v is a real vector with v(1:i-1) = 0 and v(i) = 1; v(i+1:m) is stored on exit in A(i+1:m,i), and tau in TAU(i).

◆ magma_sgeqrf_m()

magma_int_t magma_sgeqrf_m	(	magma_int_t	ngpu,
		magma_int_t	m,
		magma_int_t	n,
		float *	A,
		magma_int_t	lda,
		float *	tau,
		float *	work,
		magma_int_t	lwork,
		magma_int_t *	info )

SGEQRF computes a QR factorization of a REAL M-by-N matrix A: A = Q * R using multiple GPUs.

This version does not require work space on the GPU passed as input. GPU memory is allocated in the routine.

Parameters

[in]	ngpu	INTEGER Number of GPUs to use. ngpu > 0.
[in]	m	INTEGER The number of rows of the matrix A. M >= 0.
[in]	n	INTEGER The number of columns of the matrix A. N >= 0.
[in,out]	A	REAL array, dimension (LDA,N) On entry, the M-by-N matrix A. On exit, the elements on and above the diagonal of the array contain the min(M,N)-by-N upper trapezoidal matrix R (R is upper triangular if m >= n); the elements below the diagonal, with the array TAU, represent the orthogonal matrix Q as a product of min(m,n) elementary reflectors (see Further Details). Higher performance is achieved if A is in pinned memory, e.g. allocated using magma_malloc_pinned.
[in]	lda	INTEGER The leading dimension of the array A. LDA >= max(1,M).
[out]	tau	REAL array, dimension (min(M,N)) The scalar factors of the elementary reflectors (see Further Details).
[out]	work	(workspace) REAL array, dimension (MAX(1,LWORK)) On exit, if INFO = 0, WORK[0] returns the optimal LWORK. Higher performance is achieved if WORK is in pinned memory, e.g. allocated using magma_malloc_pinned.
[in]	lwork	INTEGER The dimension of the array WORK. LWORK >= N*NB, where NB can be obtained through magma_get_sgeqrf_nb( M, N ). If LWORK = -1, then a workspace query is assumed; the routine only calculates the optimal size of the WORK array, returns this value as the first entry of the WORK array, and no error message related to LWORK is issued.
[out]	info	INTEGER = 0: successful exit < 0: if INFO = -i, the i-th argument had an illegal value or another error occured, such as memory allocation failed.

Further Details

The matrix Q is represented as a product of elementary reflectors

Q = H(1) H(2) . . . H(k), where k = min(m,n).

Each H(i) has the form

H(i) = I - tau * v * v'

where tau is a real scalar, and v is a real vector with v(1:i-1) = 0 and v(i) = 1; v(i+1:m) is stored on exit in A(i+1:m,i), and tau in TAU(i).

◆ magma_sgeqrf2_mgpu()

magma_int_t magma_sgeqrf2_mgpu	(	magma_int_t	ngpu,
		magma_int_t	m,
		magma_int_t	n,
		magmaFloat_ptr	dlA[],
		magma_int_t	ldda,
		float *	tau,
		magma_int_t *	info )

SGEQRF computes a QR factorization of a real M-by-N matrix A: A = Q * R.

This is a GPU interface of the routine.

Parameters

[in]	ngpu	INTEGER Number of GPUs to use. ngpu > 0.
[in]	m	INTEGER The number of rows of the matrix A. M >= 0.
[in]	n	INTEGER The number of columns of the matrix A. N >= 0.
[in,out]	dlA	REAL array of pointers on the GPU, dimension (ngpu). On entry, the M-by-N matrix A distributed over GPUs (d_lA[d] points to the local matrix on d-th GPU). It uses 1D block column cyclic format with the block size of nb, and each local matrix is stored by column. On exit, the elements on and above the diagonal of the array contain the min(M,N)-by-N upper trapezoidal matrix R (R is upper triangular if m >= n); the elements below the diagonal, with the array TAU, represent the orthogonal matrix Q as a product of min(m,n) elementary reflectors (see Further Details).
[in]	ldda	INTEGER The leading dimension of the array dA. LDDA >= max(1,M). To benefit from coalescent memory accesses LDDA must be divisible by 16.
[out]	tau	REAL array, dimension (min(M,N)) The scalar factors of the elementary reflectors (see Further Details).
[out]	info	INTEGER = 0: successful exit < 0: if INFO = -i, the i-th argument had an illegal value or another error occured, such as memory allocation failed.

Further Details

The matrix Q is represented as a product of elementary reflectors

Q = H(1) H(2) . . . H(k), where k = min(m,n).

Each H(i) has the form

H(i) = I - tau * v * v'

where tau is a real scalar, and v is a real vector with v(1:i-1) = 0 and v(i) = 1; v(i+1:m) is stored on exit in A(i+1:m,i), and tau in TAU(i).

◆ magma_sgeqrf_ooc()

magma_int_t magma_sgeqrf_ooc	(	magma_int_t	m,
		magma_int_t	n,
		float *	A,
		magma_int_t	lda,
		float *	tau,
		float *	work,
		magma_int_t	lwork,
		magma_int_t *	info )

SGEQRF_OOC computes a QR factorization of a REAL M-by-N matrix A: A = Q * R.

This version does not require work space on the GPU passed as input. GPU memory is allocated in the routine. This is an out-of-core (ooc) version that is similar to magma_sgeqrf but the difference is that this version can use a GPU even if the matrix does not fit into the GPU memory at once.

Parameters

[in]	m	INTEGER The number of rows of the matrix A. M >= 0.
[in]	n	INTEGER The number of columns of the matrix A. N >= 0.
[in,out]	A	REAL array, dimension (LDA,N) On entry, the M-by-N matrix A. On exit, the elements on and above the diagonal of the array contain the min(M,N)-by-N upper trapezoidal matrix R (R is upper triangular if m >= n); the elements below the diagonal, with the array TAU, represent the orthogonal matrix Q as a product of min(m,n) elementary reflectors (see Further Details). Higher performance is achieved if A is in pinned memory, e.g. allocated using magma_malloc_pinned.
[in]	lda	INTEGER The leading dimension of the array A. LDA >= max(1,M).
[out]	tau	REAL array, dimension (min(M,N)) The scalar factors of the elementary reflectors (see Further Details).
[out]	work	(workspace) REAL array, dimension (MAX(1,LWORK)) On exit, if INFO = 0, WORK[0] returns the optimal LWORK. Higher performance is achieved if WORK is in pinned memory, e.g. allocated using magma_malloc_pinned.
[in]	lwork	INTEGER The dimension of the array WORK. LWORK >= N*NB, where NB can be obtained through magma_get_sgeqrf_nb( M, N ). If LWORK = -1, then a workspace query is assumed; the routine only calculates the optimal size of the WORK array, returns this value as the first entry of the WORK array, and no error message related to LWORK is issued.
[out]	info	INTEGER = 0: successful exit < 0: if INFO = -i, the i-th argument had an illegal value or another error occured, such as memory allocation failed.

Further Details

The matrix Q is represented as a product of elementary reflectors

Q = H(1) H(2) . . . H(k), where k = min(m,n).

Each H(i) has the form

H(i) = I - tau * v * v'

where tau is a real scalar, and v is a real vector with v(1:i-1) = 0 and v(i) = 1; v(i+1:m) is stored on exit in A(i+1:m,i), and tau in TAU(i).

◆ magma_zgeqrf()

magma_int_t magma_zgeqrf	(	magma_int_t	m,
		magma_int_t	n,
		magmaDoubleComplex *	A,
		magma_int_t	lda,
		magmaDoubleComplex *	tau,
		magmaDoubleComplex *	work,
		magma_int_t	lwork,
		magma_int_t *	info )

ZGEQRF computes a QR factorization of a COMPLEX_16 M-by-N matrix A: A = Q * R.

This version does not require work space on the GPU passed as input. GPU memory is allocated in the routine.

This uses 2 queues to overlap communication and computation.

Parameters

[in]	m	INTEGER The number of rows of the matrix A. M >= 0.
[in]	n	INTEGER The number of columns of the matrix A. N >= 0.
[in,out]	A	COMPLEX_16 array, dimension (LDA,N) On entry, the M-by-N matrix A. On exit, the elements on and above the diagonal of the array contain the min(M,N)-by-N upper trapezoidal matrix R (R is upper triangular if m >= n); the elements below the diagonal, with the array TAU, represent the orthogonal matrix Q as a product of min(m,n) elementary reflectors (see Further Details). Higher performance is achieved if A is in pinned memory, e.g. allocated using magma_malloc_pinned.
[in]	lda	INTEGER The leading dimension of the array A. LDA >= max(1,M).
[out]	tau	COMPLEX_16 array, dimension (min(M,N)) The scalar factors of the elementary reflectors (see Further Details).
[out]	work	(workspace) COMPLEX_16 array, dimension (MAX(1,LWORK)) On exit, if INFO = 0, WORK[0] returns the optimal LWORK. Higher performance is achieved if WORK is in pinned memory, e.g. allocated using magma_malloc_pinned.
[in]	lwork	INTEGER The dimension of the array WORK. LWORK >= N*NB, where NB can be obtained through magma_get_zgeqrf_nb( M, N ). If LWORK = -1, then a workspace query is assumed; the routine only calculates the optimal size of the WORK array, returns this value as the first entry of the WORK array, and no error message related to LWORK is issued.
[out]	info	INTEGER = 0: successful exit < 0: if INFO = -i, the i-th argument had an illegal value or another error occured, such as memory allocation failed.

Further Details

The matrix Q is represented as a product of elementary reflectors

Q = H(1) H(2) . . . H(k), where k = min(m,n).

Each H(i) has the form

H(i) = I - tau * v * v'

where tau is a complex scalar, and v is a complex vector with v(1:i-1) = 0 and v(i) = 1; v(i+1:m) is stored on exit in A(i+1:m,i), and tau in TAU(i).

◆ magma_zgeqrf2_gpu()

magma_int_t magma_zgeqrf2_gpu	(	magma_int_t	m,
		magma_int_t	n,
		magmaDoubleComplex_ptr	dA,
		magma_int_t	ldda,
		magmaDoubleComplex *	tau,
		magma_int_t *	info )

ZGEQRF computes a QR factorization of a complex M-by-N matrix A: A = Q * R.

This version has LAPACK-complaint arguments.

Other versions (magma_zgeqrf_gpu and magma_zgeqrf3_gpu) store the intermediate T matrices.

Parameters

[in]	m	INTEGER The number of rows of the matrix A. M >= 0.
[in]	n	INTEGER The number of columns of the matrix A. N >= 0.
[in,out]	dA	COMPLEX_16 array on the GPU, dimension (LDDA,N) On entry, the M-by-N matrix A. On exit, the elements on and above the diagonal of the array contain the min(M,N)-by-N upper trapezoidal matrix R (R is upper triangular if m >= n); the elements below the diagonal, with the array TAU, represent the orthogonal matrix Q as a product of min(m,n) elementary reflectors (see Further Details).
[in]	ldda	INTEGER The leading dimension of the array dA. LDDA >= max(1,M). To benefit from coalescent memory accesses LDDA must be divisible by 16.
[out]	tau	COMPLEX_16 array, dimension (min(M,N)) The scalar factors of the elementary reflectors (see Further Details).
[out]	info	INTEGER = 0: successful exit < 0: if INFO = -i, the i-th argument had an illegal value or another error occured, such as memory allocation failed.

Further Details

The matrix Q is represented as a product of elementary reflectors

Q = H(1) H(2) . . . H(k), where k = min(m,n).

Each H(i) has the form

H(i) = I - tau * v * v^H

where tau is a complex scalar, and v is a complex vector with v(1:i-1) = 0 and v(i) = 1; v(i+1:m) is stored on exit in A(i+1:m,i), and tau in TAU(i).

◆ magma_zgeqrf3_gpu()

magma_int_t magma_zgeqrf3_gpu	(	magma_int_t	m,
		magma_int_t	n,
		magmaDoubleComplex_ptr	dA,
		magma_int_t	ldda,
		magmaDoubleComplex *	tau,
		magmaDoubleComplex_ptr	dT,
		magma_int_t *	info )

ZGEQRF3 computes a QR factorization of a complex M-by-N matrix A: A = Q * R.

This version stores the triangular dT matrices used in the block QR factorization so that they can be applied directly (i.e., without being recomputed) later. As a result, the application of Q is much faster. Also, the upper triangular matrices for V have 0s in them. The corresponding parts of the upper triangular R are stored separately in dT.

Parameters

[in]	m	INTEGER The number of rows of the matrix A. M >= 0.
[in]	n	INTEGER The number of columns of the matrix A. N >= 0.
[in,out]	dA	COMPLEX_16 array on the GPU, dimension (LDDA,N) On entry, the M-by-N matrix A. On exit, the elements on and above the diagonal of the array contain the min(M,N)-by-N upper trapezoidal matrix R (R is upper triangular if m >= n); the elements below the diagonal, with the array TAU, represent the orthogonal matrix Q as a product of min(m,n) elementary reflectors (see Further Details).
[in]	ldda	INTEGER The leading dimension of the array dA. LDDA >= max(1,M). To benefit from coalescent memory accesses LDDA must be divisible by 16.
[out]	tau	COMPLEX_16 array, dimension (min(M,N)) The scalar factors of the elementary reflectors (see Further Details).
[out]	dT	(workspace) COMPLEX_16 array on the GPU, dimension (2MIN(M, N) + ceil(N/32)32 )NB, where NB can be obtained through magma_get_zgeqrf_nb( M, N ). It starts with a MIN(M,N)NB block that stores the triangular T matrices, followed by a MIN(M,N)*NB block that stores the diagonal blocks of the R matrix. The rest of the array is used as workspace.
[out]	info	INTEGER = 0: successful exit < 0: if INFO = -i, the i-th argument had an illegal value or another error occured, such as memory allocation failed.

Further Details

The matrix Q is represented as a product of elementary reflectors

Q = H(1) H(2) . . . H(k), where k = min(m,n).

Each H(i) has the form

H(i) = I - tau * v * v^H

where tau is a complex scalar, and v is a complex vector with v(1:i-1) = 0 and v(i) = 1; v(i+1:m) is stored on exit in A(i+1:m,i), and tau in TAU(i).

◆ magma_zgeqrf_expert_gpu_work()

magma_int_t magma_zgeqrf_expert_gpu_work	(	magma_int_t	m,
		magma_int_t	n,
		magmaDoubleComplex_ptr	dA,
		magma_int_t	ldda,
		magmaDoubleComplex *	tau,
		magmaDoubleComplex_ptr	dT,
		magma_int_t *	info,
		magma_mode_t	mode,
		magma_int_t	nb,
		void *	host_work,
		magma_int_t *	lwork_host,
		void *	device_work,
		magma_int_t *	lwork_device,
		magma_queue_t	queues[2] )

ZGEQRF computes a QR factorization of a complex M-by-N matrix A: A = Q * R.

This version stores the triangular dT matrices used in the block QR factorization so that they can be applied directly (i.e., without being recomputed) later. As a result, the application of Q is much faster. Also, the upper triangular matrices for V have 0s in them. The corresponding parts of the upper triangular R are inverted and stored separately in dT.

Parameters

[in]	m	INTEGER The number of rows of the matrix A. M >= 0.
[in]	n	INTEGER The number of columns of the matrix A. N >= 0.
[in,out]	dA	COMPLEX_16 array on the GPU, dimension (LDDA,N) On entry, the M-by-N matrix A. On exit, the elements on and above the diagonal of the array contain the min(M,N)-by-N upper trapezoidal matrix R (R is upper triangular if m >= n); the elements below the diagonal, with the array TAU, represent the orthogonal matrix Q as a product of min(m,n) elementary reflectors (see Further Details).
[in]	ldda	INTEGER The leading dimension of the array dA. LDDA >= max(1,M). To benefit from coalescent memory accesses LDDA must be divisible by 16.
[out]	tau	COMPLEX_16 array, dimension (min(M,N)) The scalar factors of the elementary reflectors (see Further Details).
[out]	dT	(workspace) COMPLEX_16 array on the GPU, dimension (2MIN(M, N) + ceil(N/32)32 )NB, where NB can be obtained through magma_get_zgeqrf_nb( M, N ). It starts with a MIN(M,N)NB block that stores the triangular T matrices, followed by a MIN(M,N)*NB block that stores inverses of the diagonal blocks of the R matrix. The rest of the array is used as workspace.
[out]	info	INTEGER = 0: successful exit < 0: if INFO = -i, the i-th argument had an illegal value or another error occured, such as memory allocation failed.
[in]	mode	magma_mode_t Only mode = MagmaHybrid is currently supported, factorizes dA using Hybrid (CPU/GPU) mode.
[in]	nb	INTEGER The blocking size used during the factorization. nb > 0; Users with no specific preference of nb can call magma_get_zgeqrf_nb() to get the value of nb as determined by MAGMA's internal tuning.
[in,out]	host_work	Workspace, allocated on host (CPU) memory. For faster CPU-GPU communication, user can allocate it as pinned memory using magma_malloc_pinned()
[in,out]	lwork_host	INTEGER pointer The size of the workspace (host_work) in bytes lwork_host[0] < 0: a workspace query is assumed, the routine calculates the required amount of workspace and returns it in lwork_host. The workspace itself is not referenced, and no factorization is performed.

lwork[0] >= 0: the routine assumes that the user has provided a workspace with the size in lwork_host.

Parameters

[in,out]	device_work	Workspace, allocated on device (GPU) memory.
[in,out]	lwork_device	INTEGER pointer The size of the workspace (device_work) in bytes lwork_device[0] < 0: a workspace query is assumed, the routine calculates the required amount of workspace and returns it in lwork_device. The workspace itself is not referenced, and no factorization is performed. lwork_device[0] >= 0: the routine assumes that the user has provided a workspace with the size in lwork_device.
[in]	queues	magma_queue_t array of size two created/destroyed by the user outside the routine Used for concurrent kernel execution, if possible

Further Details

The matrix Q is represented as a product of elementary reflectors

Q = H(1) H(2) . . . H(k), where k = min(m,n).

Each H(i) has the form

H(i) = I - tau * v * v^H

where tau is a complex scalar, and v is a complex vector with v(1:i-1) = 0 and v(i) = 1; v(i+1:m) is stored on exit in A(i+1:m,i), and tau in TAU(i).

◆ magma_zgeqrf_gpu()

magma_int_t magma_zgeqrf_gpu	(	magma_int_t	m,
		magma_int_t	n,
		magmaDoubleComplex_ptr	dA,
		magma_int_t	ldda,
		magmaDoubleComplex *	tau,
		magmaDoubleComplex_ptr	dT,
		magma_int_t *	info )

ZGEQRF computes a QR factorization of a complex M-by-N matrix A: A = Q * R.

This version stores the triangular dT matrices used in the block QR factorization so that they can be applied directly (i.e., without being recomputed) later. As a result, the application of Q is much faster. Also, the upper triangular matrices for V have 0s in them. The corresponding parts of the upper triangular R are inverted and stored separately in dT.

Parameters

[in]	m	INTEGER The number of rows of the matrix A. M >= 0.
[in]	n	INTEGER The number of columns of the matrix A. N >= 0.
[in,out]	dA	COMPLEX_16 array on the GPU, dimension (LDDA,N) On entry, the M-by-N matrix A. On exit, the elements on and above the diagonal of the array contain the min(M,N)-by-N upper trapezoidal matrix R (R is upper triangular if m >= n); the elements below the diagonal, with the array TAU, represent the orthogonal matrix Q as a product of min(m,n) elementary reflectors (see Further Details).
[in]	ldda	INTEGER The leading dimension of the array dA. LDDA >= max(1,M). To benefit from coalescent memory accesses LDDA must be divisible by 16.
[out]	tau	COMPLEX_16 array, dimension (min(M,N)) The scalar factors of the elementary reflectors (see Further Details).
[out]	dT	(workspace) COMPLEX_16 array on the GPU, dimension (2MIN(M, N) + ceil(N/32)32 )NB, where NB can be obtained through magma_get_zgeqrf_nb( M, N ). It starts with a MIN(M,N)NB block that stores the triangular T matrices, followed by a MIN(M,N)*NB block that stores inverses of the diagonal blocks of the R matrix. The rest of the array is used as workspace.
[out]	info	INTEGER = 0: successful exit < 0: if INFO = -i, the i-th argument had an illegal value or another error occured, such as memory allocation failed.

Further Details

The matrix Q is represented as a product of elementary reflectors

Q = H(1) H(2) . . . H(k), where k = min(m,n).

Each H(i) has the form

H(i) = I - tau * v * v^H

where tau is a complex scalar, and v is a complex vector with v(1:i-1) = 0 and v(i) = 1; v(i+1:m) is stored on exit in A(i+1:m,i), and tau in TAU(i).

◆ magma_zgeqrf_m()

magma_int_t magma_zgeqrf_m	(	magma_int_t	ngpu,
		magma_int_t	m,
		magma_int_t	n,
		magmaDoubleComplex *	A,
		magma_int_t	lda,
		magmaDoubleComplex *	tau,
		magmaDoubleComplex *	work,
		magma_int_t	lwork,
		magma_int_t *	info )

ZGEQRF computes a QR factorization of a COMPLEX_16 M-by-N matrix A: A = Q * R using multiple GPUs.

This version does not require work space on the GPU passed as input. GPU memory is allocated in the routine.

Parameters

[in]	ngpu	INTEGER Number of GPUs to use. ngpu > 0.
[in]	m	INTEGER The number of rows of the matrix A. M >= 0.
[in]	n	INTEGER The number of columns of the matrix A. N >= 0.
[in,out]	A	COMPLEX_16 array, dimension (LDA,N) On entry, the M-by-N matrix A. On exit, the elements on and above the diagonal of the array contain the min(M,N)-by-N upper trapezoidal matrix R (R is upper triangular if m >= n); the elements below the diagonal, with the array TAU, represent the orthogonal matrix Q as a product of min(m,n) elementary reflectors (see Further Details). Higher performance is achieved if A is in pinned memory, e.g. allocated using magma_malloc_pinned.
[in]	lda	INTEGER The leading dimension of the array A. LDA >= max(1,M).
[out]	tau	COMPLEX_16 array, dimension (min(M,N)) The scalar factors of the elementary reflectors (see Further Details).
[out]	work	(workspace) COMPLEX_16 array, dimension (MAX(1,LWORK)) On exit, if INFO = 0, WORK[0] returns the optimal LWORK. Higher performance is achieved if WORK is in pinned memory, e.g. allocated using magma_malloc_pinned.
[in]	lwork	INTEGER The dimension of the array WORK. LWORK >= N*NB, where NB can be obtained through magma_get_zgeqrf_nb( M, N ). If LWORK = -1, then a workspace query is assumed; the routine only calculates the optimal size of the WORK array, returns this value as the first entry of the WORK array, and no error message related to LWORK is issued.
[out]	info	INTEGER = 0: successful exit < 0: if INFO = -i, the i-th argument had an illegal value or another error occured, such as memory allocation failed.

Further Details

The matrix Q is represented as a product of elementary reflectors

Q = H(1) H(2) . . . H(k), where k = min(m,n).

Each H(i) has the form

H(i) = I - tau * v * v'

where tau is a complex scalar, and v is a complex vector with v(1:i-1) = 0 and v(i) = 1; v(i+1:m) is stored on exit in A(i+1:m,i), and tau in TAU(i).

◆ magma_zgeqrf2_mgpu()

magma_int_t magma_zgeqrf2_mgpu	(	magma_int_t	ngpu,
		magma_int_t	m,
		magma_int_t	n,
		magmaDoubleComplex_ptr	dlA[],
		magma_int_t	ldda,
		magmaDoubleComplex *	tau,
		magma_int_t *	info )

ZGEQRF computes a QR factorization of a complex M-by-N matrix A: A = Q * R.

This is a GPU interface of the routine.

Parameters

[in]	ngpu	INTEGER Number of GPUs to use. ngpu > 0.
[in]	m	INTEGER The number of rows of the matrix A. M >= 0.
[in]	n	INTEGER The number of columns of the matrix A. N >= 0.
[in,out]	dlA	COMPLEX_16 array of pointers on the GPU, dimension (ngpu). On entry, the M-by-N matrix A distributed over GPUs (d_lA[d] points to the local matrix on d-th GPU). It uses 1D block column cyclic format with the block size of nb, and each local matrix is stored by column. On exit, the elements on and above the diagonal of the array contain the min(M,N)-by-N upper trapezoidal matrix R (R is upper triangular if m >= n); the elements below the diagonal, with the array TAU, represent the orthogonal matrix Q as a product of min(m,n) elementary reflectors (see Further Details).
[in]	ldda	INTEGER The leading dimension of the array dA. LDDA >= max(1,M). To benefit from coalescent memory accesses LDDA must be divisible by 16.
[out]	tau	COMPLEX_16 array, dimension (min(M,N)) The scalar factors of the elementary reflectors (see Further Details).
[out]	info	INTEGER = 0: successful exit < 0: if INFO = -i, the i-th argument had an illegal value or another error occured, such as memory allocation failed.

Further Details

The matrix Q is represented as a product of elementary reflectors

Q = H(1) H(2) . . . H(k), where k = min(m,n).

Each H(i) has the form

H(i) = I - tau * v * v'

where tau is a complex scalar, and v is a complex vector with v(1:i-1) = 0 and v(i) = 1; v(i+1:m) is stored on exit in A(i+1:m,i), and tau in TAU(i).

◆ magma_zgeqrf_ooc()

magma_int_t magma_zgeqrf_ooc	(	magma_int_t	m,
		magma_int_t	n,
		magmaDoubleComplex *	A,
		magma_int_t	lda,
		magmaDoubleComplex *	tau,
		magmaDoubleComplex *	work,
		magma_int_t	lwork,
		magma_int_t *	info )

ZGEQRF_OOC computes a QR factorization of a COMPLEX_16 M-by-N matrix A: A = Q * R.

This version does not require work space on the GPU passed as input. GPU memory is allocated in the routine. This is an out-of-core (ooc) version that is similar to magma_zgeqrf but the difference is that this version can use a GPU even if the matrix does not fit into the GPU memory at once.

Parameters

[in]	m	INTEGER The number of rows of the matrix A. M >= 0.
[in]	n	INTEGER The number of columns of the matrix A. N >= 0.
[in,out]	A	COMPLEX_16 array, dimension (LDA,N) On entry, the M-by-N matrix A. On exit, the elements on and above the diagonal of the array contain the min(M,N)-by-N upper trapezoidal matrix R (R is upper triangular if m >= n); the elements below the diagonal, with the array TAU, represent the orthogonal matrix Q as a product of min(m,n) elementary reflectors (see Further Details). Higher performance is achieved if A is in pinned memory, e.g. allocated using magma_malloc_pinned.
[in]	lda	INTEGER The leading dimension of the array A. LDA >= max(1,M).
[out]	tau	COMPLEX_16 array, dimension (min(M,N)) The scalar factors of the elementary reflectors (see Further Details).
[out]	work	(workspace) COMPLEX_16 array, dimension (MAX(1,LWORK)) On exit, if INFO = 0, WORK[0] returns the optimal LWORK. Higher performance is achieved if WORK is in pinned memory, e.g. allocated using magma_malloc_pinned.
[in]	lwork	INTEGER The dimension of the array WORK. LWORK >= N*NB, where NB can be obtained through magma_get_zgeqrf_nb( M, N ). If LWORK = -1, then a workspace query is assumed; the routine only calculates the optimal size of the WORK array, returns this value as the first entry of the WORK array, and no error message related to LWORK is issued.
[out]	info	INTEGER = 0: successful exit < 0: if INFO = -i, the i-th argument had an illegal value or another error occured, such as memory allocation failed.

Further Details

The matrix Q is represented as a product of elementary reflectors

Q = H(1) H(2) . . . H(k), where k = min(m,n).

Each H(i) has the form

H(i) = I - tau * v * v'

where tau is a complex scalar, and v is a complex vector with v(1:i-1) = 0 and v(i) = 1; v(i+1:m) is stored on exit in A(i+1:m,i), and tau in TAU(i).

Functions

Detailed Description

Function Documentation

◆ magma_cgeqrf()

Further Details

◆ magma_cgeqrf2_gpu()

Further Details

◆ magma_cgeqrf3_gpu()

Further Details

◆ magma_cgeqrf_expert_gpu_work()

Further Details

◆ magma_cgeqrf_gpu()

Further Details

◆ magma_cgeqrf_m()

Further Details

◆ magma_cgeqrf2_mgpu()

Further Details

◆ magma_cgeqrf_ooc()

Further Details

◆ magma_dgeqrf()

Further Details

◆ magma_dgeqrf2_gpu()

Further Details

◆ magma_dgeqrf3_gpu()

Further Details

◆ magma_dgeqrf_expert_gpu_work()

Further Details

◆ magma_dgeqrf_gpu()

Further Details

◆ magma_dgeqrf_m()

Further Details

◆ magma_dgeqrf2_mgpu()

Further Details

◆ magma_dgeqrf_ooc()

Further Details

◆ magma_sgeqrf()

Further Details

◆ magma_sgeqrf2_gpu()

Further Details

◆ magma_sgeqrf3_gpu()

Further Details

◆ magma_sgeqrf_expert_gpu_work()

Further Details

◆ magma_sgeqrf_gpu()

Further Details

◆ magma_sgeqrf_m()

Further Details

◆ magma_sgeqrf2_mgpu()

Further Details

◆ magma_sgeqrf_ooc()

Further Details

◆ magma_zgeqrf()

Further Details

◆ magma_zgeqrf2_gpu()

Further Details

◆ magma_zgeqrf3_gpu()

Further Details

◆ magma_zgeqrf_expert_gpu_work()

Further Details

◆ magma_zgeqrf_gpu()

Further Details

◆ magma_zgeqrf_m()

Further Details

◆ magma_zgeqrf2_mgpu()

Further Details

◆ magma_zgeqrf_ooc()

Further Details