MAGMA 2.10.0
Matrix Algebra for GPU and Multicore Architectures
Loading...
Searching...
No Matches
or/unmqr: Multiply by Q from QR factorization

Functions

magma_int_t magma_cunmqr (magma_side_t side, magma_trans_t trans, magma_int_t m, magma_int_t n, magma_int_t k, magmaFloatComplex *A, magma_int_t lda, magmaFloatComplex *tau, magmaFloatComplex *C, magma_int_t ldc, magmaFloatComplex *work, magma_int_t lwork, magma_int_t *info)
 CUNMQR overwrites the general complex M-by-N matrix C with.
 
magma_int_t magma_cunmqr2_gpu (magma_side_t side, magma_trans_t trans, magma_int_t m, magma_int_t n, magma_int_t k, magmaFloatComplex_ptr dA, magma_int_t ldda, magmaFloatComplex *tau, magmaFloatComplex_ptr dC, magma_int_t lddc, const magmaFloatComplex *wA, magma_int_t ldwa, magma_int_t *info)
 CUNMQR overwrites the general complex M-by-N matrix C with.
 
magma_int_t magma_cunmqr_2stage_gpu (magma_side_t side, magma_trans_t trans, magma_int_t m, magma_int_t n, magma_int_t k, magmaFloatComplex_ptr dA, magma_int_t ldda, magmaFloatComplex_ptr dC, magma_int_t lddc, magmaFloatComplex_ptr dT, magma_int_t nb, magma_int_t *info)
 CUNMQR_GPU overwrites the general complex M-by-N matrix C with.
 
magma_int_t magma_cunmqr_batched (magma_side_t side, magma_trans_t trans, magma_int_t m, magma_int_t n, magma_int_t k, magmaFloatComplex **dA_array, magma_int_t ldda, magmaFloatComplex **dtau_array, magmaFloatComplex **dC_array, magma_int_t lddc, void *device_work, int64_t *lwork_device, magma_int_t *dinfo_array, magma_int_t batchCount, magma_queue_t queue)
 CUNMQR overwrites the general complex M-by-N matrix C with.
 
magma_int_t magma_cunmqr_gpu (magma_side_t side, magma_trans_t trans, magma_int_t m, magma_int_t n, magma_int_t k, magmaFloatComplex_const_ptr dA, magma_int_t ldda, magmaFloatComplex const *tau, magmaFloatComplex_ptr dC, magma_int_t lddc, magmaFloatComplex *hwork, magma_int_t lwork, magmaFloatComplex_ptr dT, magma_int_t nb, magma_int_t *info)
 CUNMQR_GPU overwrites the general complex M-by-N matrix C with.
 
magma_int_t magma_cunmqr_m (magma_int_t ngpu, magma_side_t side, magma_trans_t trans, magma_int_t m, magma_int_t n, magma_int_t k, magmaFloatComplex *A, magma_int_t lda, magmaFloatComplex *tau, magmaFloatComplex *C, magma_int_t ldc, magmaFloatComplex *work, magma_int_t lwork, magma_int_t *info)
 CUNMQR overwrites the general complex M-by-N matrix C with.
 
magma_int_t magma_dormqr (magma_side_t side, magma_trans_t trans, magma_int_t m, magma_int_t n, magma_int_t k, double *A, magma_int_t lda, double *tau, double *C, magma_int_t ldc, double *work, magma_int_t lwork, magma_int_t *info)
 DORMQR overwrites the general real M-by-N matrix C with.
 
magma_int_t magma_dormqr2_gpu (magma_side_t side, magma_trans_t trans, magma_int_t m, magma_int_t n, magma_int_t k, magmaDouble_ptr dA, magma_int_t ldda, double *tau, magmaDouble_ptr dC, magma_int_t lddc, const double *wA, magma_int_t ldwa, magma_int_t *info)
 DORMQR overwrites the general real M-by-N matrix C with.
 
magma_int_t magma_dormqr_2stage_gpu (magma_side_t side, magma_trans_t trans, magma_int_t m, magma_int_t n, magma_int_t k, magmaDouble_ptr dA, magma_int_t ldda, magmaDouble_ptr dC, magma_int_t lddc, magmaDouble_ptr dT, magma_int_t nb, magma_int_t *info)
 DORMQR_GPU overwrites the general real M-by-N matrix C with.
 
magma_int_t magma_dormqr_batched (magma_side_t side, magma_trans_t trans, magma_int_t m, magma_int_t n, magma_int_t k, double **dA_array, magma_int_t ldda, double **dtau_array, double **dC_array, magma_int_t lddc, void *device_work, int64_t *lwork_device, magma_int_t *dinfo_array, magma_int_t batchCount, magma_queue_t queue)
 DORMQR overwrites the general real M-by-N matrix C with.
 
magma_int_t magma_dormqr_gpu (magma_side_t side, magma_trans_t trans, magma_int_t m, magma_int_t n, magma_int_t k, magmaDouble_const_ptr dA, magma_int_t ldda, double const *tau, magmaDouble_ptr dC, magma_int_t lddc, double *hwork, magma_int_t lwork, magmaDouble_ptr dT, magma_int_t nb, magma_int_t *info)
 DORMQR_GPU overwrites the general real M-by-N matrix C with.
 
magma_int_t magma_dormqr_m (magma_int_t ngpu, magma_side_t side, magma_trans_t trans, magma_int_t m, magma_int_t n, magma_int_t k, double *A, magma_int_t lda, double *tau, double *C, magma_int_t ldc, double *work, magma_int_t lwork, magma_int_t *info)
 DORMQR overwrites the general real M-by-N matrix C with.
 
magma_int_t magma_sormqr (magma_side_t side, magma_trans_t trans, magma_int_t m, magma_int_t n, magma_int_t k, float *A, magma_int_t lda, float *tau, float *C, magma_int_t ldc, float *work, magma_int_t lwork, magma_int_t *info)
 SORMQR overwrites the general real M-by-N matrix C with.
 
magma_int_t magma_sormqr2_gpu (magma_side_t side, magma_trans_t trans, magma_int_t m, magma_int_t n, magma_int_t k, magmaFloat_ptr dA, magma_int_t ldda, float *tau, magmaFloat_ptr dC, magma_int_t lddc, const float *wA, magma_int_t ldwa, magma_int_t *info)
 SORMQR overwrites the general real M-by-N matrix C with.
 
magma_int_t magma_sormqr_2stage_gpu (magma_side_t side, magma_trans_t trans, magma_int_t m, magma_int_t n, magma_int_t k, magmaFloat_ptr dA, magma_int_t ldda, magmaFloat_ptr dC, magma_int_t lddc, magmaFloat_ptr dT, magma_int_t nb, magma_int_t *info)
 SORMQR_GPU overwrites the general real M-by-N matrix C with.
 
magma_int_t magma_sormqr_batched (magma_side_t side, magma_trans_t trans, magma_int_t m, magma_int_t n, magma_int_t k, float **dA_array, magma_int_t ldda, float **dtau_array, float **dC_array, magma_int_t lddc, void *device_work, int64_t *lwork_device, magma_int_t *dinfo_array, magma_int_t batchCount, magma_queue_t queue)
 SORMQR overwrites the general real M-by-N matrix C with.
 
magma_int_t magma_sormqr_gpu (magma_side_t side, magma_trans_t trans, magma_int_t m, magma_int_t n, magma_int_t k, magmaFloat_const_ptr dA, magma_int_t ldda, float const *tau, magmaFloat_ptr dC, magma_int_t lddc, float *hwork, magma_int_t lwork, magmaFloat_ptr dT, magma_int_t nb, magma_int_t *info)
 SORMQR_GPU overwrites the general real M-by-N matrix C with.
 
magma_int_t magma_sormqr_m (magma_int_t ngpu, magma_side_t side, magma_trans_t trans, magma_int_t m, magma_int_t n, magma_int_t k, float *A, magma_int_t lda, float *tau, float *C, magma_int_t ldc, float *work, magma_int_t lwork, magma_int_t *info)
 SORMQR overwrites the general real M-by-N matrix C with.
 
magma_int_t magma_zunmqr (magma_side_t side, magma_trans_t trans, magma_int_t m, magma_int_t n, magma_int_t k, magmaDoubleComplex *A, magma_int_t lda, magmaDoubleComplex *tau, magmaDoubleComplex *C, magma_int_t ldc, magmaDoubleComplex *work, magma_int_t lwork, magma_int_t *info)
 ZUNMQR overwrites the general complex M-by-N matrix C with.
 
magma_int_t magma_zunmqr2_gpu (magma_side_t side, magma_trans_t trans, magma_int_t m, magma_int_t n, magma_int_t k, magmaDoubleComplex_ptr dA, magma_int_t ldda, magmaDoubleComplex *tau, magmaDoubleComplex_ptr dC, magma_int_t lddc, const magmaDoubleComplex *wA, magma_int_t ldwa, magma_int_t *info)
 ZUNMQR overwrites the general complex M-by-N matrix C with.
 
magma_int_t magma_zunmqr_2stage_gpu (magma_side_t side, magma_trans_t trans, magma_int_t m, magma_int_t n, magma_int_t k, magmaDoubleComplex_ptr dA, magma_int_t ldda, magmaDoubleComplex_ptr dC, magma_int_t lddc, magmaDoubleComplex_ptr dT, magma_int_t nb, magma_int_t *info)
 ZUNMQR_GPU overwrites the general complex M-by-N matrix C with.
 
magma_int_t magma_zunmqr_batched (magma_side_t side, magma_trans_t trans, magma_int_t m, magma_int_t n, magma_int_t k, magmaDoubleComplex **dA_array, magma_int_t ldda, magmaDoubleComplex **dtau_array, magmaDoubleComplex **dC_array, magma_int_t lddc, void *device_work, int64_t *lwork_device, magma_int_t *dinfo_array, magma_int_t batchCount, magma_queue_t queue)
 ZUNMQR overwrites the general complex M-by-N matrix C with.
 
magma_int_t magma_zunmqr_gpu (magma_side_t side, magma_trans_t trans, magma_int_t m, magma_int_t n, magma_int_t k, magmaDoubleComplex_const_ptr dA, magma_int_t ldda, magmaDoubleComplex const *tau, magmaDoubleComplex_ptr dC, magma_int_t lddc, magmaDoubleComplex *hwork, magma_int_t lwork, magmaDoubleComplex_ptr dT, magma_int_t nb, magma_int_t *info)
 ZUNMQR_GPU overwrites the general complex M-by-N matrix C with.
 
magma_int_t magma_zunmqr_m (magma_int_t ngpu, magma_side_t side, magma_trans_t trans, magma_int_t m, magma_int_t n, magma_int_t k, magmaDoubleComplex *A, magma_int_t lda, magmaDoubleComplex *tau, magmaDoubleComplex *C, magma_int_t ldc, magmaDoubleComplex *work, magma_int_t lwork, magma_int_t *info)
 ZUNMQR overwrites the general complex M-by-N matrix C with.
 
magma_int_t magma_cunm2r_reg_batched (magma_side_t side, magma_trans_t trans, magma_int_t m, magma_int_t n, magma_int_t nb, magma_int_t k, magmaFloatComplex **dA_array, magma_int_t Ai, magma_int_t Aj, magma_int_t ldda, magmaFloatComplex **dV_array, magma_int_t Vi, magma_int_t Vj, magma_int_t lddv, magmaFloatComplex **dtau_array, magma_int_t taui, magma_int_t check_launch_only, magma_int_t batchCount, magma_queue_t queue)
 CUNM2R overwrites the general complex m-by-n matrix C with.
 
magma_int_t magma_cunm2r_reg_medium_batched (magma_side_t side, magma_trans_t trans, magma_int_t m, magma_int_t n, magma_int_t nb, magma_int_t ib, magmaFloatComplex **dA_array, magma_int_t Ai, magma_int_t Aj, magma_int_t ldda, magmaFloatComplex **dV_array, magma_int_t Vi, magma_int_t Vj, magma_int_t lddv, magmaFloatComplex **dtau_array, magma_int_t taui, magma_int_t check_launch_only, magma_int_t batchCount, magma_queue_t queue)
 CUNM2R overwrites the general complex m-by-n matrix C with.
 
magma_int_t magma_cunm2r_reg_tall_batched (magma_side_t side, magma_trans_t trans, magma_int_t m, magma_int_t n, magma_int_t nb, magma_int_t ib, magmaFloatComplex **dA_array, magma_int_t Ai, magma_int_t Aj, magma_int_t ldda, magmaFloatComplex **dV_array, magma_int_t Vi, magma_int_t Vj, magma_int_t lddv, magmaFloatComplex **dtau_array, magma_int_t taui, magma_int_t check_launch_only, magma_int_t batchCount, magma_queue_t queue)
 CUNM2R overwrites the general complex m-by-n matrix C with.
 
magma_int_t magma_dorm2r_reg_batched (magma_side_t side, magma_trans_t trans, magma_int_t m, magma_int_t n, magma_int_t nb, magma_int_t k, double **dA_array, magma_int_t Ai, magma_int_t Aj, magma_int_t ldda, double **dV_array, magma_int_t Vi, magma_int_t Vj, magma_int_t lddv, double **dtau_array, magma_int_t taui, magma_int_t check_launch_only, magma_int_t batchCount, magma_queue_t queue)
 DORM2R overwrites the general real m-by-n matrix C with.
 
magma_int_t magma_dorm2r_reg_medium_batched (magma_side_t side, magma_trans_t trans, magma_int_t m, magma_int_t n, magma_int_t nb, magma_int_t ib, double **dA_array, magma_int_t Ai, magma_int_t Aj, magma_int_t ldda, double **dV_array, magma_int_t Vi, magma_int_t Vj, magma_int_t lddv, double **dtau_array, magma_int_t taui, magma_int_t check_launch_only, magma_int_t batchCount, magma_queue_t queue)
 DORM2R overwrites the general real m-by-n matrix C with.
 
magma_int_t magma_dorm2r_reg_tall_batched (magma_side_t side, magma_trans_t trans, magma_int_t m, magma_int_t n, magma_int_t nb, magma_int_t ib, double **dA_array, magma_int_t Ai, magma_int_t Aj, magma_int_t ldda, double **dV_array, magma_int_t Vi, magma_int_t Vj, magma_int_t lddv, double **dtau_array, magma_int_t taui, magma_int_t check_launch_only, magma_int_t batchCount, magma_queue_t queue)
 DORM2R overwrites the general real m-by-n matrix C with.
 
magma_int_t magma_sorm2r_reg_batched (magma_side_t side, magma_trans_t trans, magma_int_t m, magma_int_t n, magma_int_t nb, magma_int_t k, float **dA_array, magma_int_t Ai, magma_int_t Aj, magma_int_t ldda, float **dV_array, magma_int_t Vi, magma_int_t Vj, magma_int_t lddv, float **dtau_array, magma_int_t taui, magma_int_t check_launch_only, magma_int_t batchCount, magma_queue_t queue)
 SORM2R overwrites the general real m-by-n matrix C with.
 
magma_int_t magma_sorm2r_reg_medium_batched (magma_side_t side, magma_trans_t trans, magma_int_t m, magma_int_t n, magma_int_t nb, magma_int_t ib, float **dA_array, magma_int_t Ai, magma_int_t Aj, magma_int_t ldda, float **dV_array, magma_int_t Vi, magma_int_t Vj, magma_int_t lddv, float **dtau_array, magma_int_t taui, magma_int_t check_launch_only, magma_int_t batchCount, magma_queue_t queue)
 SORM2R overwrites the general real m-by-n matrix C with.
 
magma_int_t magma_sorm2r_reg_tall_batched (magma_side_t side, magma_trans_t trans, magma_int_t m, magma_int_t n, magma_int_t nb, magma_int_t ib, float **dA_array, magma_int_t Ai, magma_int_t Aj, magma_int_t ldda, float **dV_array, magma_int_t Vi, magma_int_t Vj, magma_int_t lddv, float **dtau_array, magma_int_t taui, magma_int_t check_launch_only, magma_int_t batchCount, magma_queue_t queue)
 SORM2R overwrites the general real m-by-n matrix C with.
 
magma_int_t magma_zunm2r_reg_batched (magma_side_t side, magma_trans_t trans, magma_int_t m, magma_int_t n, magma_int_t nb, magma_int_t k, magmaDoubleComplex **dA_array, magma_int_t Ai, magma_int_t Aj, magma_int_t ldda, magmaDoubleComplex **dV_array, magma_int_t Vi, magma_int_t Vj, magma_int_t lddv, magmaDoubleComplex **dtau_array, magma_int_t taui, magma_int_t check_launch_only, magma_int_t batchCount, magma_queue_t queue)
 ZUNM2R overwrites the general complex m-by-n matrix C with.
 
magma_int_t magma_zunm2r_reg_medium_batched (magma_side_t side, magma_trans_t trans, magma_int_t m, magma_int_t n, magma_int_t nb, magma_int_t ib, magmaDoubleComplex **dA_array, magma_int_t Ai, magma_int_t Aj, magma_int_t ldda, magmaDoubleComplex **dV_array, magma_int_t Vi, magma_int_t Vj, magma_int_t lddv, magmaDoubleComplex **dtau_array, magma_int_t taui, magma_int_t check_launch_only, magma_int_t batchCount, magma_queue_t queue)
 ZUNM2R overwrites the general complex m-by-n matrix C with.
 
magma_int_t magma_zunm2r_reg_tall_batched (magma_side_t side, magma_trans_t trans, magma_int_t m, magma_int_t n, magma_int_t nb, magma_int_t ib, magmaDoubleComplex **dA_array, magma_int_t Ai, magma_int_t Aj, magma_int_t ldda, magmaDoubleComplex **dV_array, magma_int_t Vi, magma_int_t Vj, magma_int_t lddv, magmaDoubleComplex **dtau_array, magma_int_t taui, magma_int_t check_launch_only, magma_int_t batchCount, magma_queue_t queue)
 ZUNM2R overwrites the general complex m-by-n matrix C with.
 

Detailed Description

Function Documentation

◆ magma_cunmqr()

magma_int_t magma_cunmqr ( magma_side_t side,
magma_trans_t trans,
magma_int_t m,
magma_int_t n,
magma_int_t k,
magmaFloatComplex * A,
magma_int_t lda,
magmaFloatComplex * tau,
magmaFloatComplex * C,
magma_int_t ldc,
magmaFloatComplex * work,
magma_int_t lwork,
magma_int_t * info )

CUNMQR overwrites the general complex M-by-N matrix C with.

                          SIDE = MagmaLeft   SIDE = MagmaRight
TRANS = MagmaNoTrans:     Q * C              C * Q
TRANS = Magma_ConjTrans:  Q**H * C           C * Q**H

where Q is a complex unitary matrix defined as the product of k elementary reflectors

Q = H(1) H(2) . . . H(k)

as returned by CGEQRF. Q is of order M if SIDE = MagmaLeft and of order N if SIDE = MagmaRight.

Parameters
[in]sidemagma_side_t
  • = MagmaLeft: apply Q or Q**H from the Left;
  • = MagmaRight: apply Q or Q**H from the Right.
[in]transmagma_trans_t
  • = MagmaNoTrans: No transpose, apply Q;
  • = Magma_ConjTrans: Conjugate transpose, apply Q**H.
[in]mINTEGER The number of rows of the matrix C. M >= 0.
[in]nINTEGER The number of columns of the matrix C. N >= 0.
[in]kINTEGER The number of elementary reflectors whose product defines the matrix Q. If SIDE = MagmaLeft, M >= K >= 0; if SIDE = MagmaRight, N >= K >= 0.
[in]ACOMPLEX array, dimension (LDA,K) The i-th column must contain the vector which defines the elementary reflector H(i), for i = 1,2,...,k, as returned by CGEQRF in the first k columns of its array argument A. A is modified by the routine but restored on exit.
[in]ldaINTEGER The leading dimension of the array A. If SIDE = MagmaLeft, LDA >= max(1,M); if SIDE = MagmaRight, LDA >= max(1,N).
[in]tauCOMPLEX array, dimension (K) TAU(i) must contain the scalar factor of the elementary reflector H(i), as returned by CGEQRF.
[in,out]CCOMPLEX array, dimension (LDC,N) On entry, the M-by-N matrix C. On exit, C is overwritten by Q*C or Q**H * C or C * Q**H or C*Q.
[in]ldcINTEGER The leading dimension of the array C. LDC >= max(1,M).
[out]work(workspace) COMPLEX array, dimension (MAX(1,LWORK)) On exit, if INFO = 0, WORK[0] returns the optimal LWORK.
[in]lworkINTEGER The dimension of the array WORK. If SIDE = MagmaLeft, LWORK >= max(1,N); if SIDE = MagmaRight, LWORK >= max(1,M). For optimum performance if SIDE = MagmaLeft, LWORK >= N*NB; if SIDE = MagmaRight, LWORK >= M*NB, where NB is the optimal blocksize.
If LWORK = -1, then a workspace query is assumed; the routine only calculates the optimal size of the WORK array, returns this value as the first entry of the WORK array, and no error message related to LWORK is issued by XERBLA.
[out]infoINTEGER
  • = 0: successful exit
  • < 0: if INFO = -i, the i-th argument had an illegal value

◆ magma_cunmqr2_gpu()

magma_int_t magma_cunmqr2_gpu ( magma_side_t side,
magma_trans_t trans,
magma_int_t m,
magma_int_t n,
magma_int_t k,
magmaFloatComplex_ptr dA,
magma_int_t ldda,
magmaFloatComplex * tau,
magmaFloatComplex_ptr dC,
magma_int_t lddc,
const magmaFloatComplex * wA,
magma_int_t ldwa,
magma_int_t * info )

CUNMQR overwrites the general complex M-by-N matrix C with.

                           SIDE = MagmaLeft    SIDE = MagmaRight
TRANS = MagmaNoTrans:      Q * C               C * Q
TRANS = Magma_ConjTrans:   Q**H * C            C * Q**H

where Q is a complex unitary matrix defined as the product of k elementary reflectors

  Q = H(1) H(2) . . . H(k)

as returned by CGEQRF. Q is of order M if SIDE = MagmaLeft and of order N if SIDE = MagmaRight.

Parameters
[in]sidemagma_side_t
  • = MagmaLeft: apply Q or Q**H from the Left;
  • = MagmaRight: apply Q or Q**H from the Right.
[in]transmagma_trans_t
  • = MagmaNoTrans: No transpose, apply Q;
  • = Magma_ConjTrans: Conjugate transpose, apply Q**H.
[in]mINTEGER The number of rows of the matrix C. M >= 0.
[in]nINTEGER The number of columns of the matrix C. N >= 0.
[in]kINTEGER The number of elementary reflectors whose product defines the matrix Q. If SIDE = MagmaLeft, M >= K >= 0; if SIDE = MagmaRight, N >= K >= 0.
[in,out]dACOMPLEX array on the GPU, dimension (LDDA,K) The i-th column must contain the vector which defines the elementary reflector H(i), for i = 1,2,...,k, as returned by CGEQRF in the first k columns of its array argument dA. The diagonal and the upper part are destroyed, the reflectors are not modified.
[in]lddaINTEGER The leading dimension of the array dA. If SIDE = MagmaLeft, LDDA >= max(1,M); if SIDE = MagmaRight, LDDA >= max(1,N).
[in]tauCOMPLEX array, dimension (K) TAU(i) must contain the scalar factor of the elementary reflector H(i), as returned by CGEQRF.
[in,out]dCCOMPLEX array on the GPU, dimension (LDDC,N) On entry, the M-by-N matrix C. On exit, C is overwritten by (Q*C) or (Q**H * C) or (C * Q**H) or (C*Q).
[in]lddcINTEGER The leading dimension of the array dC. LDDC >= max(1,M).
[in]wACOMPLEX array, dimension (LDWA,M) if SIDE = MagmaLeft (LDWA,N) if SIDE = MagmaRight The vectors which define the elementary reflectors, as returned by CHETRD_GPU. (A copy of the upper or lower part of dA, on the host.)
[in]ldwaINTEGER The leading dimension of the array wA. If SIDE = MagmaLeft, LDWA >= max(1,M); if SIDE = MagmaRight, LDWA >= max(1,N).
[out]infoINTEGER
  • = 0: successful exit
  • < 0: if INFO = -i, the i-th argument had an illegal value

◆ magma_cunmqr_2stage_gpu()

magma_int_t magma_cunmqr_2stage_gpu ( magma_side_t side,
magma_trans_t trans,
magma_int_t m,
magma_int_t n,
magma_int_t k,
magmaFloatComplex_ptr dA,
magma_int_t ldda,
magmaFloatComplex_ptr dC,
magma_int_t lddc,
magmaFloatComplex_ptr dT,
magma_int_t nb,
magma_int_t * info )

CUNMQR_GPU overwrites the general complex M-by-N matrix C with.

                           SIDE = MagmaLeft    SIDE = MagmaRight
TRANS = MagmaNoTrans:      Q * C               C * Q
TRANS = Magma_ConjTrans:   Q**H * C            C * Q**H

where Q is a complex unitary matrix defined as the product of k elementary reflectors

Q = H(1) H(2) . . . H(k)

as returned by CGEQRF. Q is of order M if SIDE = MagmaLeft and of order N if SIDE = MagmaRight.

Parameters
[in]sidemagma_side_t
  • = MagmaLeft: apply Q or Q**H from the Left;
  • = MagmaRight: apply Q or Q**H from the Right.
[in]transmagma_trans_t
  • = MagmaNoTrans: No transpose, apply Q;
  • = Magma_ConjTrans: Conjugate transpose, apply Q**H.
[in]mINTEGER The number of rows of the matrix C. M >= 0.
[in]nINTEGER The number of columns of the matrix C. N >= 0.
[in]kINTEGER The number of elementary reflectors whose product defines the matrix Q. If SIDE = MagmaLeft, M >= K >= 0; if SIDE = MagmaRight, N >= K >= 0.
[in]dACOMPLEX array on the GPU, dimension (LDDA,K) The i-th column must contain the vector which defines the elementary reflector H(i), for i = 1,2,...,k, as returned by CGEQRF in the first k columns of its array argument DA. DA is modified by the routine but restored on exit.
[in]lddaINTEGER The leading dimension of the array DA. If SIDE = MagmaLeft, LDDA >= max(1,M); if SIDE = MagmaRight, LDDA >= max(1,N).
[in,out]dCCOMPLEX array on the GPU, dimension (LDDC,N) On entry, the M-by-N matrix C. On exit, C is overwritten by Q*C or Q**H * C or C * Q**H or C*Q.
[in]lddcINTEGER The leading dimension of the array DC. LDDC >= max(1,M).
[in]dTCOMPLEX array on the GPU that is the output (the 9th argument) of magma_cgeqrf_gpu.
[in]nbINTEGER This is the blocking size that was used in pre-computing DT, e.g., the blocking size used in magma_cgeqrf_gpu.
[out]infoINTEGER
  • = 0: successful exit
  • < 0: if INFO = -i, the i-th argument had an illegal value

◆ magma_cunmqr_batched()

magma_int_t magma_cunmqr_batched ( magma_side_t side,
magma_trans_t trans,
magma_int_t m,
magma_int_t n,
magma_int_t k,
magmaFloatComplex ** dA_array,
magma_int_t ldda,
magmaFloatComplex ** dtau_array,
magmaFloatComplex ** dC_array,
magma_int_t lddc,
void * device_work,
int64_t * lwork_device,
magma_int_t * dinfo_array,
magma_int_t batchCount,
magma_queue_t queue )

CUNMQR overwrites the general complex M-by-N matrix C with.

                           SIDE = MagmaLeft    SIDE = MagmaRight
TRANS = MagmaNoTrans:      Q * C               C * Q
TRANS = Magma_ConjTrans:   Q**H * C            C * Q**H

where Q is a complex unitary matrix defined as the product of k elementary reflectors

  Q = H(1) H(2) . . . H(k)

as returned by CGEQRF. Q is of order M if SIDE = MagmaLeft and of order N if SIDE = MagmaRight.

  • Only SIDE = MagmaLeft is currently supported
  • This is the batch version of the routine
Parameters
[in]sidemagma_side_t
  • = MagmaLeft: apply Q or Q**H from the Left;
  • = MagmaRight: apply Q or Q**H from the Right (not currently supported).
[in]transmagma_trans_t
  • = MagmaNoTrans: No transpose, apply Q;
  • = Magma_ConjTrans: Conjugate transpose, apply Q**H.
[in]mINTEGER The number of rows of the matrix C. M >= 0.
[in]nINTEGER The number of columns of the matrix C. N >= 0.
[in]kINTEGER The number of elementary reflectors whose product defines the matrix Q. If SIDE = MagmaLeft, M >= K >= 0; if SIDE = MagmaRight, N >= K >= 0.
[in]dA_arrayArray of pointers, dimension (batchCount) Each is a COMPLEX array on the GPU, dimension (LDDA,K) The i-th column must contain the vector which defines the elementary reflector H(i), for i = 1,2,...,k, as returned by CGEQRF in the first k columns of its array argument dA.
[in]lddaINTEGER The leading dimension of each array dA. If SIDE = MagmaLeft, LDDA >= max(1,M); if SIDE = MagmaRight, LDDA >= max(1,N).
[in]dtau_arrayArray of pointers, dimension(batchCount) Each is a COMPLEX array, dimension (K) TAU(i) must contain the scalar factor of the elementary reflector H(i), as returned by CGEQRF.
[in,out]dC_arrayArray of pointers, dimension (batchCount) Each is a COMPLEX array on the GPU, dimension (LDDC,N) On entry, the M-by-N matrix C. On exit, C is overwritten by (Q*C) or (Q**H * C) or (C * Q**H) or (C*Q).
[in]lddcINTEGER The leading dimension of each array DC. LDDC >= max(1,M).
[in,out]device_workWorkspace, allocated on device (GPU) memory.
[in,out]lwork_deviceINTEGER pointer The size of the workspace (device_work) in bytes
  • lwork_device[0] < 0: a workspace query is assumed, the routine calculates the required amount of workspace and returns it in lwork_device. The workspace itself is not referenced, and no computation is performed.
  • lwork_device[0] >= 0: the routine assumes that the user has provided a workspace with the size in lwork_device.
[out]dinfo_arrayINTEGER array on GPU memory. Each entry is either,
  • = 0: successful exit
  • < 0: if INFO = -i, the i-th argument had an illegal value
[in]batchCountINTEGER The number of matrices to operate on.
[in]queuemagma_queue_t Queue to execute in.

◆ magma_cunmqr_gpu()

magma_int_t magma_cunmqr_gpu ( magma_side_t side,
magma_trans_t trans,
magma_int_t m,
magma_int_t n,
magma_int_t k,
magmaFloatComplex_const_ptr dA,
magma_int_t ldda,
magmaFloatComplex const * tau,
magmaFloatComplex_ptr dC,
magma_int_t lddc,
magmaFloatComplex * hwork,
magma_int_t lwork,
magmaFloatComplex_ptr dT,
magma_int_t nb,
magma_int_t * info )

CUNMQR_GPU overwrites the general complex M-by-N matrix C with.

                           SIDE = MagmaLeft    SIDE = MagmaRight
TRANS = MagmaNoTrans:      Q * C               C * Q
TRANS = Magma_ConjTrans:   Q**H * C            C * Q**H

where Q is a complex unitary matrix defined as the product of k elementary reflectors

  Q = H(1) H(2) . . . H(k)

as returned by CGEQRF. Q is of order M if SIDE = MagmaLeft and of order N if SIDE = MagmaRight.

Parameters
[in]sidemagma_side_t
  • = MagmaLeft: apply Q or Q**H from the Left;
  • = MagmaRight: apply Q or Q**H from the Right.
[in]transmagma_trans_t
  • = MagmaNoTrans: No transpose, apply Q;
  • = Magma_ConjTrans: Conjugate transpose, apply Q**H.
[in]mINTEGER The number of rows of the matrix C. M >= 0.
[in]nINTEGER The number of columns of the matrix C. N >= 0.
[in]kINTEGER The number of elementary reflectors whose product defines the matrix Q. If SIDE = MagmaLeft, M >= K >= 0; if SIDE = MagmaRight, N >= K >= 0.
[in]dACOMPLEX array on the GPU, dimension (LDDA,K) The i-th column must contain the vector which defines the elementary reflector H(i), for i = 1,2,...,k, as returned by CGEQRF in the first k columns of its array argument dA. dA is modified by the routine but restored on exit.
[in]lddaINTEGER The leading dimension of the array dA. If SIDE = MagmaLeft, LDDA >= max(1,M); if SIDE = MagmaRight, LDDA >= max(1,N).
[in]tauCOMPLEX array, dimension (K) TAU(i) must contain the scalar factor of the elementary reflector H(i), as returned by CGEQRF.
[in,out]dCCOMPLEX array on the GPU, dimension (LDDC,N) On entry, the M-by-N matrix C. On exit, C is overwritten by (Q*C) or (Q**H * C) or (C * Q**H) or (C*Q).
[in]lddcINTEGER The leading dimension of the array DC. LDDC >= max(1,M).
[out]hwork(workspace) COMPLEX array, dimension (MAX(1,LWORK))
Currently, cgetrs_gpu assumes that on exit, hwork contains the last block of A and C. This will change and should not be relied on!
[in]lworkINTEGER The dimension of the array HWORK. LWORK >= (M-K+NB)*(N+NB) + N*NB if SIDE = MagmaLeft, and LWORK >= (N-K+NB)*(M+NB) + M*NB if SIDE = MagmaRight, where NB is the given blocksize.
If LWORK = -1, then a workspace query is assumed; the routine only calculates the optimal size of the HWORK array, returns this value as the first entry of the HWORK array, and no error message related to LWORK is issued by XERBLA.
[in,out]dTCOMPLEX array on the GPU that is the output (the 9th argument) of magma_cgeqrf_gpu. Part used as workspace.
[in]nbINTEGER This is the blocking size that was used in pre-computing DT, e.g., the blocking size used in magma_cgeqrf_gpu.
[out]infoINTEGER
  • = 0: successful exit
  • < 0: if INFO = -i, the i-th argument had an illegal value

◆ magma_cunmqr_m()

magma_int_t magma_cunmqr_m ( magma_int_t ngpu,
magma_side_t side,
magma_trans_t trans,
magma_int_t m,
magma_int_t n,
magma_int_t k,
magmaFloatComplex * A,
magma_int_t lda,
magmaFloatComplex * tau,
magmaFloatComplex * C,
magma_int_t ldc,
magmaFloatComplex * work,
magma_int_t lwork,
magma_int_t * info )

CUNMQR overwrites the general complex M-by-N matrix C with.

                            SIDE = MagmaLeft    SIDE = MagmaRight
TRANS = MagmaNoTrans:       Q * C               C * Q
TRANS = Magma_ConjTrans:    Q**H * C            C * Q**H

where Q is a complex unitary matrix defined as the product of k elementary reflectors

  Q = H(1) H(2) . . . H(k)

as returned by CGEQRF. Q is of order M if SIDE = MagmaLeft and of order N if SIDE = MagmaRight.

Parameters
[in]ngpuINTEGER Number of GPUs to use. ngpu > 0.
[in]sidemagma_side_t
  • = MagmaLeft: apply Q or Q**H from the Left;
  • = MagmaRight: apply Q or Q**H from the Right.
[in]transmagma_trans_t
  • = MagmaNoTrans: No transpose, apply Q;
  • = Magma_ConjTrans: Conjugate transpose, apply Q**H.
[in]mINTEGER The number of rows of the matrix C. M >= 0.
[in]nINTEGER The number of columns of the matrix C. N >= 0.
[in]kINTEGER The number of elementary reflectors whose product defines the matrix Q. If SIDE = MagmaLeft, M >= K >= 0; if SIDE = MagmaRight, N >= K >= 0.
[in]ACOMPLEX array, dimension (LDA,K) The i-th column must contain the vector which defines the elementary reflector H(i), for i = 1,2,...,k, as returned by CGEQRF in the first k columns of its array argument A.
[in]ldaINTEGER The leading dimension of the array A. If SIDE = MagmaLeft, LDA >= max(1,M); if SIDE = MagmaRight, LDA >= max(1,N).
[in]tauCOMPLEX array, dimension (K) TAU(i) must contain the scalar factor of the elementary reflector H(i), as returned by CGEQRF.
[in,out]CCOMPLEX array, dimension (LDC,N) On entry, the M-by-N matrix C. On exit, C is overwritten by Q*C or Q**H*C or C*Q**H or C*Q.
[in]ldcINTEGER The leading dimension of the array C. LDC >= max(1,M).
[out]work(workspace) COMPLEX array, dimension (MAX(1,LWORK)) On exit, if INFO = 0, WORK[0] returns the optimal LWORK.
[in]lworkINTEGER The dimension of the array WORK. If SIDE = MagmaLeft, LWORK >= max(1,N); if SIDE = MagmaRight, LWORK >= max(1,M). For optimum performance LWORK >= N*NB if SIDE = MagmaLeft, and LWORK >= M*NB if SIDE = MagmaRight, where NB is the optimal blocksize.
If LWORK = -1, then a workspace query is assumed; the routine only calculates the optimal size of the WORK array, returns this value as the first entry of the WORK array, and no error message related to LWORK is issued by XERBLA.
[out]infoINTEGER
  • = 0: successful exit
  • < 0: if INFO = -i, the i-th argument had an illegal value

◆ magma_dormqr()

magma_int_t magma_dormqr ( magma_side_t side,
magma_trans_t trans,
magma_int_t m,
magma_int_t n,
magma_int_t k,
double * A,
magma_int_t lda,
double * tau,
double * C,
magma_int_t ldc,
double * work,
magma_int_t lwork,
magma_int_t * info )

DORMQR overwrites the general real M-by-N matrix C with.

                          SIDE = MagmaLeft   SIDE = MagmaRight
TRANS = MagmaNoTrans:     Q * C              C * Q
TRANS = MagmaTrans:  Q**H * C           C * Q**H

where Q is a real orthogonal matrix defined as the product of k elementary reflectors

Q = H(1) H(2) . . . H(k)

as returned by DGEQRF. Q is of order M if SIDE = MagmaLeft and of order N if SIDE = MagmaRight.

Parameters
[in]sidemagma_side_t
  • = MagmaLeft: apply Q or Q**H from the Left;
  • = MagmaRight: apply Q or Q**H from the Right.
[in]transmagma_trans_t
  • = MagmaNoTrans: No transpose, apply Q;
  • = MagmaTrans: Conjugate transpose, apply Q**H.
[in]mINTEGER The number of rows of the matrix C. M >= 0.
[in]nINTEGER The number of columns of the matrix C. N >= 0.
[in]kINTEGER The number of elementary reflectors whose product defines the matrix Q. If SIDE = MagmaLeft, M >= K >= 0; if SIDE = MagmaRight, N >= K >= 0.
[in]ADOUBLE PRECISION array, dimension (LDA,K) The i-th column must contain the vector which defines the elementary reflector H(i), for i = 1,2,...,k, as returned by DGEQRF in the first k columns of its array argument A. A is modified by the routine but restored on exit.
[in]ldaINTEGER The leading dimension of the array A. If SIDE = MagmaLeft, LDA >= max(1,M); if SIDE = MagmaRight, LDA >= max(1,N).
[in]tauDOUBLE PRECISION array, dimension (K) TAU(i) must contain the scalar factor of the elementary reflector H(i), as returned by DGEQRF.
[in,out]CDOUBLE PRECISION array, dimension (LDC,N) On entry, the M-by-N matrix C. On exit, C is overwritten by Q*C or Q**H * C or C * Q**H or C*Q.
[in]ldcINTEGER The leading dimension of the array C. LDC >= max(1,M).
[out]work(workspace) DOUBLE PRECISION array, dimension (MAX(1,LWORK)) On exit, if INFO = 0, WORK[0] returns the optimal LWORK.
[in]lworkINTEGER The dimension of the array WORK. If SIDE = MagmaLeft, LWORK >= max(1,N); if SIDE = MagmaRight, LWORK >= max(1,M). For optimum performance if SIDE = MagmaLeft, LWORK >= N*NB; if SIDE = MagmaRight, LWORK >= M*NB, where NB is the optimal blocksize.
If LWORK = -1, then a workspace query is assumed; the routine only calculates the optimal size of the WORK array, returns this value as the first entry of the WORK array, and no error message related to LWORK is issued by XERBLA.
[out]infoINTEGER
  • = 0: successful exit
  • < 0: if INFO = -i, the i-th argument had an illegal value

◆ magma_dormqr2_gpu()

magma_int_t magma_dormqr2_gpu ( magma_side_t side,
magma_trans_t trans,
magma_int_t m,
magma_int_t n,
magma_int_t k,
magmaDouble_ptr dA,
magma_int_t ldda,
double * tau,
magmaDouble_ptr dC,
magma_int_t lddc,
const double * wA,
magma_int_t ldwa,
magma_int_t * info )

DORMQR overwrites the general real M-by-N matrix C with.

                           SIDE = MagmaLeft    SIDE = MagmaRight
TRANS = MagmaNoTrans:      Q * C               C * Q
TRANS = MagmaTrans:   Q**H * C            C * Q**H

where Q is a real orthogonal matrix defined as the product of k elementary reflectors

  Q = H(1) H(2) . . . H(k)

as returned by DGEQRF. Q is of order M if SIDE = MagmaLeft and of order N if SIDE = MagmaRight.

Parameters
[in]sidemagma_side_t
  • = MagmaLeft: apply Q or Q**H from the Left;
  • = MagmaRight: apply Q or Q**H from the Right.
[in]transmagma_trans_t
  • = MagmaNoTrans: No transpose, apply Q;
  • = MagmaTrans: Conjugate transpose, apply Q**H.
[in]mINTEGER The number of rows of the matrix C. M >= 0.
[in]nINTEGER The number of columns of the matrix C. N >= 0.
[in]kINTEGER The number of elementary reflectors whose product defines the matrix Q. If SIDE = MagmaLeft, M >= K >= 0; if SIDE = MagmaRight, N >= K >= 0.
[in,out]dADOUBLE PRECISION array on the GPU, dimension (LDDA,K) The i-th column must contain the vector which defines the elementary reflector H(i), for i = 1,2,...,k, as returned by DGEQRF in the first k columns of its array argument dA. The diagonal and the upper part are destroyed, the reflectors are not modified.
[in]lddaINTEGER The leading dimension of the array dA. If SIDE = MagmaLeft, LDDA >= max(1,M); if SIDE = MagmaRight, LDDA >= max(1,N).
[in]tauDOUBLE PRECISION array, dimension (K) TAU(i) must contain the scalar factor of the elementary reflector H(i), as returned by DGEQRF.
[in,out]dCDOUBLE PRECISION array on the GPU, dimension (LDDC,N) On entry, the M-by-N matrix C. On exit, C is overwritten by (Q*C) or (Q**H * C) or (C * Q**H) or (C*Q).
[in]lddcINTEGER The leading dimension of the array dC. LDDC >= max(1,M).
[in]wADOUBLE PRECISION array, dimension (LDWA,M) if SIDE = MagmaLeft (LDWA,N) if SIDE = MagmaRight The vectors which define the elementary reflectors, as returned by DSYTRD_GPU. (A copy of the upper or lower part of dA, on the host.)
[in]ldwaINTEGER The leading dimension of the array wA. If SIDE = MagmaLeft, LDWA >= max(1,M); if SIDE = MagmaRight, LDWA >= max(1,N).
[out]infoINTEGER
  • = 0: successful exit
  • < 0: if INFO = -i, the i-th argument had an illegal value

◆ magma_dormqr_2stage_gpu()

magma_int_t magma_dormqr_2stage_gpu ( magma_side_t side,
magma_trans_t trans,
magma_int_t m,
magma_int_t n,
magma_int_t k,
magmaDouble_ptr dA,
magma_int_t ldda,
magmaDouble_ptr dC,
magma_int_t lddc,
magmaDouble_ptr dT,
magma_int_t nb,
magma_int_t * info )

DORMQR_GPU overwrites the general real M-by-N matrix C with.

                           SIDE = MagmaLeft    SIDE = MagmaRight
TRANS = MagmaNoTrans:      Q * C               C * Q
TRANS = MagmaTrans:   Q**H * C            C * Q**H

where Q is a real orthogonal matrix defined as the product of k elementary reflectors

Q = H(1) H(2) . . . H(k)

as returned by DGEQRF. Q is of order M if SIDE = MagmaLeft and of order N if SIDE = MagmaRight.

Parameters
[in]sidemagma_side_t
  • = MagmaLeft: apply Q or Q**H from the Left;
  • = MagmaRight: apply Q or Q**H from the Right.
[in]transmagma_trans_t
  • = MagmaNoTrans: No transpose, apply Q;
  • = MagmaTrans: Conjugate transpose, apply Q**H.
[in]mINTEGER The number of rows of the matrix C. M >= 0.
[in]nINTEGER The number of columns of the matrix C. N >= 0.
[in]kINTEGER The number of elementary reflectors whose product defines the matrix Q. If SIDE = MagmaLeft, M >= K >= 0; if SIDE = MagmaRight, N >= K >= 0.
[in]dADOUBLE PRECISION array on the GPU, dimension (LDDA,K) The i-th column must contain the vector which defines the elementary reflector H(i), for i = 1,2,...,k, as returned by DGEQRF in the first k columns of its array argument DA. DA is modified by the routine but restored on exit.
[in]lddaINTEGER The leading dimension of the array DA. If SIDE = MagmaLeft, LDDA >= max(1,M); if SIDE = MagmaRight, LDDA >= max(1,N).
[in,out]dCDOUBLE PRECISION array on the GPU, dimension (LDDC,N) On entry, the M-by-N matrix C. On exit, C is overwritten by Q*C or Q**H * C or C * Q**H or C*Q.
[in]lddcINTEGER The leading dimension of the array DC. LDDC >= max(1,M).
[in]dTDOUBLE PRECISION array on the GPU that is the output (the 9th argument) of magma_dgeqrf_gpu.
[in]nbINTEGER This is the blocking size that was used in pre-computing DT, e.g., the blocking size used in magma_dgeqrf_gpu.
[out]infoINTEGER
  • = 0: successful exit
  • < 0: if INFO = -i, the i-th argument had an illegal value

◆ magma_dormqr_batched()

magma_int_t magma_dormqr_batched ( magma_side_t side,
magma_trans_t trans,
magma_int_t m,
magma_int_t n,
magma_int_t k,
double ** dA_array,
magma_int_t ldda,
double ** dtau_array,
double ** dC_array,
magma_int_t lddc,
void * device_work,
int64_t * lwork_device,
magma_int_t * dinfo_array,
magma_int_t batchCount,
magma_queue_t queue )

DORMQR overwrites the general real M-by-N matrix C with.

                           SIDE = MagmaLeft    SIDE = MagmaRight
TRANS = MagmaNoTrans:      Q * C               C * Q
TRANS = MagmaTrans:   Q**H * C            C * Q**H

where Q is a real orthogonal matrix defined as the product of k elementary reflectors

  Q = H(1) H(2) . . . H(k)

as returned by DGEQRF. Q is of order M if SIDE = MagmaLeft and of order N if SIDE = MagmaRight.

  • Only SIDE = MagmaLeft is currently supported
  • This is the batch version of the routine
Parameters
[in]sidemagma_side_t
  • = MagmaLeft: apply Q or Q**H from the Left;
  • = MagmaRight: apply Q or Q**H from the Right (not currently supported).
[in]transmagma_trans_t
  • = MagmaNoTrans: No transpose, apply Q;
  • = MagmaTrans: Conjugate transpose, apply Q**H.
[in]mINTEGER The number of rows of the matrix C. M >= 0.
[in]nINTEGER The number of columns of the matrix C. N >= 0.
[in]kINTEGER The number of elementary reflectors whose product defines the matrix Q. If SIDE = MagmaLeft, M >= K >= 0; if SIDE = MagmaRight, N >= K >= 0.
[in]dA_arrayArray of pointers, dimension (batchCount) Each is a DOUBLE PRECISION array on the GPU, dimension (LDDA,K) The i-th column must contain the vector which defines the elementary reflector H(i), for i = 1,2,...,k, as returned by DGEQRF in the first k columns of its array argument dA.
[in]lddaINTEGER The leading dimension of each array dA. If SIDE = MagmaLeft, LDDA >= max(1,M); if SIDE = MagmaRight, LDDA >= max(1,N).
[in]dtau_arrayArray of pointers, dimension(batchCount) Each is a DOUBLE PRECISION array, dimension (K) TAU(i) must contain the scalar factor of the elementary reflector H(i), as returned by DGEQRF.
[in,out]dC_arrayArray of pointers, dimension (batchCount) Each is a DOUBLE PRECISION array on the GPU, dimension (LDDC,N) On entry, the M-by-N matrix C. On exit, C is overwritten by (Q*C) or (Q**H * C) or (C * Q**H) or (C*Q).
[in]lddcINTEGER The leading dimension of each array DC. LDDC >= max(1,M).
[in,out]device_workWorkspace, allocated on device (GPU) memory.
[in,out]lwork_deviceINTEGER pointer The size of the workspace (device_work) in bytes
  • lwork_device[0] < 0: a workspace query is assumed, the routine calculates the required amount of workspace and returns it in lwork_device. The workspace itself is not referenced, and no computation is performed.
  • lwork_device[0] >= 0: the routine assumes that the user has provided a workspace with the size in lwork_device.
[out]dinfo_arrayINTEGER array on GPU memory. Each entry is either,
  • = 0: successful exit
  • < 0: if INFO = -i, the i-th argument had an illegal value
[in]batchCountINTEGER The number of matrices to operate on.
[in]queuemagma_queue_t Queue to execute in.

◆ magma_dormqr_gpu()

magma_int_t magma_dormqr_gpu ( magma_side_t side,
magma_trans_t trans,
magma_int_t m,
magma_int_t n,
magma_int_t k,
magmaDouble_const_ptr dA,
magma_int_t ldda,
double const * tau,
magmaDouble_ptr dC,
magma_int_t lddc,
double * hwork,
magma_int_t lwork,
magmaDouble_ptr dT,
magma_int_t nb,
magma_int_t * info )

DORMQR_GPU overwrites the general real M-by-N matrix C with.

                           SIDE = MagmaLeft    SIDE = MagmaRight
TRANS = MagmaNoTrans:      Q * C               C * Q
TRANS = MagmaTrans:   Q**H * C            C * Q**H

where Q is a real orthogonal matrix defined as the product of k elementary reflectors

  Q = H(1) H(2) . . . H(k)

as returned by DGEQRF. Q is of order M if SIDE = MagmaLeft and of order N if SIDE = MagmaRight.

Parameters
[in]sidemagma_side_t
  • = MagmaLeft: apply Q or Q**H from the Left;
  • = MagmaRight: apply Q or Q**H from the Right.
[in]transmagma_trans_t
  • = MagmaNoTrans: No transpose, apply Q;
  • = MagmaTrans: Conjugate transpose, apply Q**H.
[in]mINTEGER The number of rows of the matrix C. M >= 0.
[in]nINTEGER The number of columns of the matrix C. N >= 0.
[in]kINTEGER The number of elementary reflectors whose product defines the matrix Q. If SIDE = MagmaLeft, M >= K >= 0; if SIDE = MagmaRight, N >= K >= 0.
[in]dADOUBLE PRECISION array on the GPU, dimension (LDDA,K) The i-th column must contain the vector which defines the elementary reflector H(i), for i = 1,2,...,k, as returned by DGEQRF in the first k columns of its array argument dA. dA is modified by the routine but restored on exit.
[in]lddaINTEGER The leading dimension of the array dA. If SIDE = MagmaLeft, LDDA >= max(1,M); if SIDE = MagmaRight, LDDA >= max(1,N).
[in]tauDOUBLE PRECISION array, dimension (K) TAU(i) must contain the scalar factor of the elementary reflector H(i), as returned by DGEQRF.
[in,out]dCDOUBLE PRECISION array on the GPU, dimension (LDDC,N) On entry, the M-by-N matrix C. On exit, C is overwritten by (Q*C) or (Q**H * C) or (C * Q**H) or (C*Q).
[in]lddcINTEGER The leading dimension of the array DC. LDDC >= max(1,M).
[out]hwork(workspace) DOUBLE PRECISION array, dimension (MAX(1,LWORK))
Currently, dgetrs_gpu assumes that on exit, hwork contains the last block of A and C. This will change and should not be relied on!
[in]lworkINTEGER The dimension of the array HWORK. LWORK >= (M-K+NB)*(N+NB) + N*NB if SIDE = MagmaLeft, and LWORK >= (N-K+NB)*(M+NB) + M*NB if SIDE = MagmaRight, where NB is the given blocksize.
If LWORK = -1, then a workspace query is assumed; the routine only calculates the optimal size of the HWORK array, returns this value as the first entry of the HWORK array, and no error message related to LWORK is issued by XERBLA.
[in,out]dTDOUBLE PRECISION array on the GPU that is the output (the 9th argument) of magma_dgeqrf_gpu. Part used as workspace.
[in]nbINTEGER This is the blocking size that was used in pre-computing DT, e.g., the blocking size used in magma_dgeqrf_gpu.
[out]infoINTEGER
  • = 0: successful exit
  • < 0: if INFO = -i, the i-th argument had an illegal value

◆ magma_dormqr_m()

magma_int_t magma_dormqr_m ( magma_int_t ngpu,
magma_side_t side,
magma_trans_t trans,
magma_int_t m,
magma_int_t n,
magma_int_t k,
double * A,
magma_int_t lda,
double * tau,
double * C,
magma_int_t ldc,
double * work,
magma_int_t lwork,
magma_int_t * info )

DORMQR overwrites the general real M-by-N matrix C with.

                            SIDE = MagmaLeft    SIDE = MagmaRight
TRANS = MagmaNoTrans:       Q * C               C * Q
TRANS = MagmaTrans:    Q**H * C            C * Q**H

where Q is a real orthogonal matrix defined as the product of k elementary reflectors

  Q = H(1) H(2) . . . H(k)

as returned by DGEQRF. Q is of order M if SIDE = MagmaLeft and of order N if SIDE = MagmaRight.

Parameters
[in]ngpuINTEGER Number of GPUs to use. ngpu > 0.
[in]sidemagma_side_t
  • = MagmaLeft: apply Q or Q**H from the Left;
  • = MagmaRight: apply Q or Q**H from the Right.
[in]transmagma_trans_t
  • = MagmaNoTrans: No transpose, apply Q;
  • = MagmaTrans: Conjugate transpose, apply Q**H.
[in]mINTEGER The number of rows of the matrix C. M >= 0.
[in]nINTEGER The number of columns of the matrix C. N >= 0.
[in]kINTEGER The number of elementary reflectors whose product defines the matrix Q. If SIDE = MagmaLeft, M >= K >= 0; if SIDE = MagmaRight, N >= K >= 0.
[in]ADOUBLE PRECISION array, dimension (LDA,K) The i-th column must contain the vector which defines the elementary reflector H(i), for i = 1,2,...,k, as returned by DGEQRF in the first k columns of its array argument A.
[in]ldaINTEGER The leading dimension of the array A. If SIDE = MagmaLeft, LDA >= max(1,M); if SIDE = MagmaRight, LDA >= max(1,N).
[in]tauDOUBLE PRECISION array, dimension (K) TAU(i) must contain the scalar factor of the elementary reflector H(i), as returned by DGEQRF.
[in,out]CDOUBLE PRECISION array, dimension (LDC,N) On entry, the M-by-N matrix C. On exit, C is overwritten by Q*C or Q**H*C or C*Q**H or C*Q.
[in]ldcINTEGER The leading dimension of the array C. LDC >= max(1,M).
[out]work(workspace) DOUBLE PRECISION array, dimension (MAX(1,LWORK)) On exit, if INFO = 0, WORK[0] returns the optimal LWORK.
[in]lworkINTEGER The dimension of the array WORK. If SIDE = MagmaLeft, LWORK >= max(1,N); if SIDE = MagmaRight, LWORK >= max(1,M). For optimum performance LWORK >= N*NB if SIDE = MagmaLeft, and LWORK >= M*NB if SIDE = MagmaRight, where NB is the optimal blocksize.
If LWORK = -1, then a workspace query is assumed; the routine only calculates the optimal size of the WORK array, returns this value as the first entry of the WORK array, and no error message related to LWORK is issued by XERBLA.
[out]infoINTEGER
  • = 0: successful exit
  • < 0: if INFO = -i, the i-th argument had an illegal value

◆ magma_sormqr()

magma_int_t magma_sormqr ( magma_side_t side,
magma_trans_t trans,
magma_int_t m,
magma_int_t n,
magma_int_t k,
float * A,
magma_int_t lda,
float * tau,
float * C,
magma_int_t ldc,
float * work,
magma_int_t lwork,
magma_int_t * info )

SORMQR overwrites the general real M-by-N matrix C with.

                          SIDE = MagmaLeft   SIDE = MagmaRight
TRANS = MagmaNoTrans:     Q * C              C * Q
TRANS = MagmaTrans:  Q**H * C           C * Q**H

where Q is a real orthogonal matrix defined as the product of k elementary reflectors

Q = H(1) H(2) . . . H(k)

as returned by SGEQRF. Q is of order M if SIDE = MagmaLeft and of order N if SIDE = MagmaRight.

Parameters
[in]sidemagma_side_t
  • = MagmaLeft: apply Q or Q**H from the Left;
  • = MagmaRight: apply Q or Q**H from the Right.
[in]transmagma_trans_t
  • = MagmaNoTrans: No transpose, apply Q;
  • = MagmaTrans: Conjugate transpose, apply Q**H.
[in]mINTEGER The number of rows of the matrix C. M >= 0.
[in]nINTEGER The number of columns of the matrix C. N >= 0.
[in]kINTEGER The number of elementary reflectors whose product defines the matrix Q. If SIDE = MagmaLeft, M >= K >= 0; if SIDE = MagmaRight, N >= K >= 0.
[in]AREAL array, dimension (LDA,K) The i-th column must contain the vector which defines the elementary reflector H(i), for i = 1,2,...,k, as returned by SGEQRF in the first k columns of its array argument A. A is modified by the routine but restored on exit.
[in]ldaINTEGER The leading dimension of the array A. If SIDE = MagmaLeft, LDA >= max(1,M); if SIDE = MagmaRight, LDA >= max(1,N).
[in]tauREAL array, dimension (K) TAU(i) must contain the scalar factor of the elementary reflector H(i), as returned by SGEQRF.
[in,out]CREAL array, dimension (LDC,N) On entry, the M-by-N matrix C. On exit, C is overwritten by Q*C or Q**H * C or C * Q**H or C*Q.
[in]ldcINTEGER The leading dimension of the array C. LDC >= max(1,M).
[out]work(workspace) REAL array, dimension (MAX(1,LWORK)) On exit, if INFO = 0, WORK[0] returns the optimal LWORK.
[in]lworkINTEGER The dimension of the array WORK. If SIDE = MagmaLeft, LWORK >= max(1,N); if SIDE = MagmaRight, LWORK >= max(1,M). For optimum performance if SIDE = MagmaLeft, LWORK >= N*NB; if SIDE = MagmaRight, LWORK >= M*NB, where NB is the optimal blocksize.
If LWORK = -1, then a workspace query is assumed; the routine only calculates the optimal size of the WORK array, returns this value as the first entry of the WORK array, and no error message related to LWORK is issued by XERBLA.
[out]infoINTEGER
  • = 0: successful exit
  • < 0: if INFO = -i, the i-th argument had an illegal value

◆ magma_sormqr2_gpu()

magma_int_t magma_sormqr2_gpu ( magma_side_t side,
magma_trans_t trans,
magma_int_t m,
magma_int_t n,
magma_int_t k,
magmaFloat_ptr dA,
magma_int_t ldda,
float * tau,
magmaFloat_ptr dC,
magma_int_t lddc,
const float * wA,
magma_int_t ldwa,
magma_int_t * info )

SORMQR overwrites the general real M-by-N matrix C with.

                           SIDE = MagmaLeft    SIDE = MagmaRight
TRANS = MagmaNoTrans:      Q * C               C * Q
TRANS = MagmaTrans:   Q**H * C            C * Q**H

where Q is a real orthogonal matrix defined as the product of k elementary reflectors

  Q = H(1) H(2) . . . H(k)

as returned by SGEQRF. Q is of order M if SIDE = MagmaLeft and of order N if SIDE = MagmaRight.

Parameters
[in]sidemagma_side_t
  • = MagmaLeft: apply Q or Q**H from the Left;
  • = MagmaRight: apply Q or Q**H from the Right.
[in]transmagma_trans_t
  • = MagmaNoTrans: No transpose, apply Q;
  • = MagmaTrans: Conjugate transpose, apply Q**H.
[in]mINTEGER The number of rows of the matrix C. M >= 0.
[in]nINTEGER The number of columns of the matrix C. N >= 0.
[in]kINTEGER The number of elementary reflectors whose product defines the matrix Q. If SIDE = MagmaLeft, M >= K >= 0; if SIDE = MagmaRight, N >= K >= 0.
[in,out]dAREAL array on the GPU, dimension (LDDA,K) The i-th column must contain the vector which defines the elementary reflector H(i), for i = 1,2,...,k, as returned by SGEQRF in the first k columns of its array argument dA. The diagonal and the upper part are destroyed, the reflectors are not modified.
[in]lddaINTEGER The leading dimension of the array dA. If SIDE = MagmaLeft, LDDA >= max(1,M); if SIDE = MagmaRight, LDDA >= max(1,N).
[in]tauREAL array, dimension (K) TAU(i) must contain the scalar factor of the elementary reflector H(i), as returned by SGEQRF.
[in,out]dCREAL array on the GPU, dimension (LDDC,N) On entry, the M-by-N matrix C. On exit, C is overwritten by (Q*C) or (Q**H * C) or (C * Q**H) or (C*Q).
[in]lddcINTEGER The leading dimension of the array dC. LDDC >= max(1,M).
[in]wAREAL array, dimension (LDWA,M) if SIDE = MagmaLeft (LDWA,N) if SIDE = MagmaRight The vectors which define the elementary reflectors, as returned by SSYTRD_GPU. (A copy of the upper or lower part of dA, on the host.)
[in]ldwaINTEGER The leading dimension of the array wA. If SIDE = MagmaLeft, LDWA >= max(1,M); if SIDE = MagmaRight, LDWA >= max(1,N).
[out]infoINTEGER
  • = 0: successful exit
  • < 0: if INFO = -i, the i-th argument had an illegal value

◆ magma_sormqr_2stage_gpu()

magma_int_t magma_sormqr_2stage_gpu ( magma_side_t side,
magma_trans_t trans,
magma_int_t m,
magma_int_t n,
magma_int_t k,
magmaFloat_ptr dA,
magma_int_t ldda,
magmaFloat_ptr dC,
magma_int_t lddc,
magmaFloat_ptr dT,
magma_int_t nb,
magma_int_t * info )

SORMQR_GPU overwrites the general real M-by-N matrix C with.

                           SIDE = MagmaLeft    SIDE = MagmaRight
TRANS = MagmaNoTrans:      Q * C               C * Q
TRANS = MagmaTrans:   Q**H * C            C * Q**H

where Q is a real orthogonal matrix defined as the product of k elementary reflectors

Q = H(1) H(2) . . . H(k)

as returned by SGEQRF. Q is of order M if SIDE = MagmaLeft and of order N if SIDE = MagmaRight.

Parameters
[in]sidemagma_side_t
  • = MagmaLeft: apply Q or Q**H from the Left;
  • = MagmaRight: apply Q or Q**H from the Right.
[in]transmagma_trans_t
  • = MagmaNoTrans: No transpose, apply Q;
  • = MagmaTrans: Conjugate transpose, apply Q**H.
[in]mINTEGER The number of rows of the matrix C. M >= 0.
[in]nINTEGER The number of columns of the matrix C. N >= 0.
[in]kINTEGER The number of elementary reflectors whose product defines the matrix Q. If SIDE = MagmaLeft, M >= K >= 0; if SIDE = MagmaRight, N >= K >= 0.
[in]dAREAL array on the GPU, dimension (LDDA,K) The i-th column must contain the vector which defines the elementary reflector H(i), for i = 1,2,...,k, as returned by SGEQRF in the first k columns of its array argument DA. DA is modified by the routine but restored on exit.
[in]lddaINTEGER The leading dimension of the array DA. If SIDE = MagmaLeft, LDDA >= max(1,M); if SIDE = MagmaRight, LDDA >= max(1,N).
[in,out]dCREAL array on the GPU, dimension (LDDC,N) On entry, the M-by-N matrix C. On exit, C is overwritten by Q*C or Q**H * C or C * Q**H or C*Q.
[in]lddcINTEGER The leading dimension of the array DC. LDDC >= max(1,M).
[in]dTREAL array on the GPU that is the output (the 9th argument) of magma_sgeqrf_gpu.
[in]nbINTEGER This is the blocking size that was used in pre-computing DT, e.g., the blocking size used in magma_sgeqrf_gpu.
[out]infoINTEGER
  • = 0: successful exit
  • < 0: if INFO = -i, the i-th argument had an illegal value

◆ magma_sormqr_batched()

magma_int_t magma_sormqr_batched ( magma_side_t side,
magma_trans_t trans,
magma_int_t m,
magma_int_t n,
magma_int_t k,
float ** dA_array,
magma_int_t ldda,
float ** dtau_array,
float ** dC_array,
magma_int_t lddc,
void * device_work,
int64_t * lwork_device,
magma_int_t * dinfo_array,
magma_int_t batchCount,
magma_queue_t queue )

SORMQR overwrites the general real M-by-N matrix C with.

                           SIDE = MagmaLeft    SIDE = MagmaRight
TRANS = MagmaNoTrans:      Q * C               C * Q
TRANS = MagmaTrans:   Q**H * C            C * Q**H

where Q is a real orthogonal matrix defined as the product of k elementary reflectors

  Q = H(1) H(2) . . . H(k)

as returned by SGEQRF. Q is of order M if SIDE = MagmaLeft and of order N if SIDE = MagmaRight.

  • Only SIDE = MagmaLeft is currently supported
  • This is the batch version of the routine
Parameters
[in]sidemagma_side_t
  • = MagmaLeft: apply Q or Q**H from the Left;
  • = MagmaRight: apply Q or Q**H from the Right (not currently supported).
[in]transmagma_trans_t
  • = MagmaNoTrans: No transpose, apply Q;
  • = MagmaTrans: Conjugate transpose, apply Q**H.
[in]mINTEGER The number of rows of the matrix C. M >= 0.
[in]nINTEGER The number of columns of the matrix C. N >= 0.
[in]kINTEGER The number of elementary reflectors whose product defines the matrix Q. If SIDE = MagmaLeft, M >= K >= 0; if SIDE = MagmaRight, N >= K >= 0.
[in]dA_arrayArray of pointers, dimension (batchCount) Each is a REAL array on the GPU, dimension (LDDA,K) The i-th column must contain the vector which defines the elementary reflector H(i), for i = 1,2,...,k, as returned by SGEQRF in the first k columns of its array argument dA.
[in]lddaINTEGER The leading dimension of each array dA. If SIDE = MagmaLeft, LDDA >= max(1,M); if SIDE = MagmaRight, LDDA >= max(1,N).
[in]dtau_arrayArray of pointers, dimension(batchCount) Each is a REAL array, dimension (K) TAU(i) must contain the scalar factor of the elementary reflector H(i), as returned by SGEQRF.
[in,out]dC_arrayArray of pointers, dimension (batchCount) Each is a REAL array on the GPU, dimension (LDDC,N) On entry, the M-by-N matrix C. On exit, C is overwritten by (Q*C) or (Q**H * C) or (C * Q**H) or (C*Q).
[in]lddcINTEGER The leading dimension of each array DC. LDDC >= max(1,M).
[in,out]device_workWorkspace, allocated on device (GPU) memory.
[in,out]lwork_deviceINTEGER pointer The size of the workspace (device_work) in bytes
  • lwork_device[0] < 0: a workspace query is assumed, the routine calculates the required amount of workspace and returns it in lwork_device. The workspace itself is not referenced, and no computation is performed.
  • lwork_device[0] >= 0: the routine assumes that the user has provided a workspace with the size in lwork_device.
[out]dinfo_arrayINTEGER array on GPU memory. Each entry is either,
  • = 0: successful exit
  • < 0: if INFO = -i, the i-th argument had an illegal value
[in]batchCountINTEGER The number of matrices to operate on.
[in]queuemagma_queue_t Queue to execute in.

◆ magma_sormqr_gpu()

magma_int_t magma_sormqr_gpu ( magma_side_t side,
magma_trans_t trans,
magma_int_t m,
magma_int_t n,
magma_int_t k,
magmaFloat_const_ptr dA,
magma_int_t ldda,
float const * tau,
magmaFloat_ptr dC,
magma_int_t lddc,
float * hwork,
magma_int_t lwork,
magmaFloat_ptr dT,
magma_int_t nb,
magma_int_t * info )

SORMQR_GPU overwrites the general real M-by-N matrix C with.

                           SIDE = MagmaLeft    SIDE = MagmaRight
TRANS = MagmaNoTrans:      Q * C               C * Q
TRANS = MagmaTrans:   Q**H * C            C * Q**H

where Q is a real orthogonal matrix defined as the product of k elementary reflectors

  Q = H(1) H(2) . . . H(k)

as returned by SGEQRF. Q is of order M if SIDE = MagmaLeft and of order N if SIDE = MagmaRight.

Parameters
[in]sidemagma_side_t
  • = MagmaLeft: apply Q or Q**H from the Left;
  • = MagmaRight: apply Q or Q**H from the Right.
[in]transmagma_trans_t
  • = MagmaNoTrans: No transpose, apply Q;
  • = MagmaTrans: Conjugate transpose, apply Q**H.
[in]mINTEGER The number of rows of the matrix C. M >= 0.
[in]nINTEGER The number of columns of the matrix C. N >= 0.
[in]kINTEGER The number of elementary reflectors whose product defines the matrix Q. If SIDE = MagmaLeft, M >= K >= 0; if SIDE = MagmaRight, N >= K >= 0.
[in]dAREAL array on the GPU, dimension (LDDA,K) The i-th column must contain the vector which defines the elementary reflector H(i), for i = 1,2,...,k, as returned by SGEQRF in the first k columns of its array argument dA. dA is modified by the routine but restored on exit.
[in]lddaINTEGER The leading dimension of the array dA. If SIDE = MagmaLeft, LDDA >= max(1,M); if SIDE = MagmaRight, LDDA >= max(1,N).
[in]tauREAL array, dimension (K) TAU(i) must contain the scalar factor of the elementary reflector H(i), as returned by SGEQRF.
[in,out]dCREAL array on the GPU, dimension (LDDC,N) On entry, the M-by-N matrix C. On exit, C is overwritten by (Q*C) or (Q**H * C) or (C * Q**H) or (C*Q).
[in]lddcINTEGER The leading dimension of the array DC. LDDC >= max(1,M).
[out]hwork(workspace) REAL array, dimension (MAX(1,LWORK))
Currently, sgetrs_gpu assumes that on exit, hwork contains the last block of A and C. This will change and should not be relied on!
[in]lworkINTEGER The dimension of the array HWORK. LWORK >= (M-K+NB)*(N+NB) + N*NB if SIDE = MagmaLeft, and LWORK >= (N-K+NB)*(M+NB) + M*NB if SIDE = MagmaRight, where NB is the given blocksize.
If LWORK = -1, then a workspace query is assumed; the routine only calculates the optimal size of the HWORK array, returns this value as the first entry of the HWORK array, and no error message related to LWORK is issued by XERBLA.
[in,out]dTREAL array on the GPU that is the output (the 9th argument) of magma_sgeqrf_gpu. Part used as workspace.
[in]nbINTEGER This is the blocking size that was used in pre-computing DT, e.g., the blocking size used in magma_sgeqrf_gpu.
[out]infoINTEGER
  • = 0: successful exit
  • < 0: if INFO = -i, the i-th argument had an illegal value

◆ magma_sormqr_m()

magma_int_t magma_sormqr_m ( magma_int_t ngpu,
magma_side_t side,
magma_trans_t trans,
magma_int_t m,
magma_int_t n,
magma_int_t k,
float * A,
magma_int_t lda,
float * tau,
float * C,
magma_int_t ldc,
float * work,
magma_int_t lwork,
magma_int_t * info )

SORMQR overwrites the general real M-by-N matrix C with.

                            SIDE = MagmaLeft    SIDE = MagmaRight
TRANS = MagmaNoTrans:       Q * C               C * Q
TRANS = MagmaTrans:    Q**H * C            C * Q**H

where Q is a real orthogonal matrix defined as the product of k elementary reflectors

  Q = H(1) H(2) . . . H(k)

as returned by SGEQRF. Q is of order M if SIDE = MagmaLeft and of order N if SIDE = MagmaRight.

Parameters
[in]ngpuINTEGER Number of GPUs to use. ngpu > 0.
[in]sidemagma_side_t
  • = MagmaLeft: apply Q or Q**H from the Left;
  • = MagmaRight: apply Q or Q**H from the Right.
[in]transmagma_trans_t
  • = MagmaNoTrans: No transpose, apply Q;
  • = MagmaTrans: Conjugate transpose, apply Q**H.
[in]mINTEGER The number of rows of the matrix C. M >= 0.
[in]nINTEGER The number of columns of the matrix C. N >= 0.
[in]kINTEGER The number of elementary reflectors whose product defines the matrix Q. If SIDE = MagmaLeft, M >= K >= 0; if SIDE = MagmaRight, N >= K >= 0.
[in]AREAL array, dimension (LDA,K) The i-th column must contain the vector which defines the elementary reflector H(i), for i = 1,2,...,k, as returned by SGEQRF in the first k columns of its array argument A.
[in]ldaINTEGER The leading dimension of the array A. If SIDE = MagmaLeft, LDA >= max(1,M); if SIDE = MagmaRight, LDA >= max(1,N).
[in]tauREAL array, dimension (K) TAU(i) must contain the scalar factor of the elementary reflector H(i), as returned by SGEQRF.
[in,out]CREAL array, dimension (LDC,N) On entry, the M-by-N matrix C. On exit, C is overwritten by Q*C or Q**H*C or C*Q**H or C*Q.
[in]ldcINTEGER The leading dimension of the array C. LDC >= max(1,M).
[out]work(workspace) REAL array, dimension (MAX(1,LWORK)) On exit, if INFO = 0, WORK[0] returns the optimal LWORK.
[in]lworkINTEGER The dimension of the array WORK. If SIDE = MagmaLeft, LWORK >= max(1,N); if SIDE = MagmaRight, LWORK >= max(1,M). For optimum performance LWORK >= N*NB if SIDE = MagmaLeft, and LWORK >= M*NB if SIDE = MagmaRight, where NB is the optimal blocksize.
If LWORK = -1, then a workspace query is assumed; the routine only calculates the optimal size of the WORK array, returns this value as the first entry of the WORK array, and no error message related to LWORK is issued by XERBLA.
[out]infoINTEGER
  • = 0: successful exit
  • < 0: if INFO = -i, the i-th argument had an illegal value

◆ magma_zunmqr()

magma_int_t magma_zunmqr ( magma_side_t side,
magma_trans_t trans,
magma_int_t m,
magma_int_t n,
magma_int_t k,
magmaDoubleComplex * A,
magma_int_t lda,
magmaDoubleComplex * tau,
magmaDoubleComplex * C,
magma_int_t ldc,
magmaDoubleComplex * work,
magma_int_t lwork,
magma_int_t * info )

ZUNMQR overwrites the general complex M-by-N matrix C with.

                          SIDE = MagmaLeft   SIDE = MagmaRight
TRANS = MagmaNoTrans:     Q * C              C * Q
TRANS = Magma_ConjTrans:  Q**H * C           C * Q**H

where Q is a complex unitary matrix defined as the product of k elementary reflectors

Q = H(1) H(2) . . . H(k)

as returned by ZGEQRF. Q is of order M if SIDE = MagmaLeft and of order N if SIDE = MagmaRight.

Parameters
[in]sidemagma_side_t
  • = MagmaLeft: apply Q or Q**H from the Left;
  • = MagmaRight: apply Q or Q**H from the Right.
[in]transmagma_trans_t
  • = MagmaNoTrans: No transpose, apply Q;
  • = Magma_ConjTrans: Conjugate transpose, apply Q**H.
[in]mINTEGER The number of rows of the matrix C. M >= 0.
[in]nINTEGER The number of columns of the matrix C. N >= 0.
[in]kINTEGER The number of elementary reflectors whose product defines the matrix Q. If SIDE = MagmaLeft, M >= K >= 0; if SIDE = MagmaRight, N >= K >= 0.
[in]ACOMPLEX_16 array, dimension (LDA,K) The i-th column must contain the vector which defines the elementary reflector H(i), for i = 1,2,...,k, as returned by ZGEQRF in the first k columns of its array argument A. A is modified by the routine but restored on exit.
[in]ldaINTEGER The leading dimension of the array A. If SIDE = MagmaLeft, LDA >= max(1,M); if SIDE = MagmaRight, LDA >= max(1,N).
[in]tauCOMPLEX_16 array, dimension (K) TAU(i) must contain the scalar factor of the elementary reflector H(i), as returned by ZGEQRF.
[in,out]CCOMPLEX_16 array, dimension (LDC,N) On entry, the M-by-N matrix C. On exit, C is overwritten by Q*C or Q**H * C or C * Q**H or C*Q.
[in]ldcINTEGER The leading dimension of the array C. LDC >= max(1,M).
[out]work(workspace) COMPLEX_16 array, dimension (MAX(1,LWORK)) On exit, if INFO = 0, WORK[0] returns the optimal LWORK.
[in]lworkINTEGER The dimension of the array WORK. If SIDE = MagmaLeft, LWORK >= max(1,N); if SIDE = MagmaRight, LWORK >= max(1,M). For optimum performance if SIDE = MagmaLeft, LWORK >= N*NB; if SIDE = MagmaRight, LWORK >= M*NB, where NB is the optimal blocksize.
If LWORK = -1, then a workspace query is assumed; the routine only calculates the optimal size of the WORK array, returns this value as the first entry of the WORK array, and no error message related to LWORK is issued by XERBLA.
[out]infoINTEGER
  • = 0: successful exit
  • < 0: if INFO = -i, the i-th argument had an illegal value

◆ magma_zunmqr2_gpu()

magma_int_t magma_zunmqr2_gpu ( magma_side_t side,
magma_trans_t trans,
magma_int_t m,
magma_int_t n,
magma_int_t k,
magmaDoubleComplex_ptr dA,
magma_int_t ldda,
magmaDoubleComplex * tau,
magmaDoubleComplex_ptr dC,
magma_int_t lddc,
const magmaDoubleComplex * wA,
magma_int_t ldwa,
magma_int_t * info )

ZUNMQR overwrites the general complex M-by-N matrix C with.

                           SIDE = MagmaLeft    SIDE = MagmaRight
TRANS = MagmaNoTrans:      Q * C               C * Q
TRANS = Magma_ConjTrans:   Q**H * C            C * Q**H

where Q is a complex unitary matrix defined as the product of k elementary reflectors

  Q = H(1) H(2) . . . H(k)

as returned by ZGEQRF. Q is of order M if SIDE = MagmaLeft and of order N if SIDE = MagmaRight.

Parameters
[in]sidemagma_side_t
  • = MagmaLeft: apply Q or Q**H from the Left;
  • = MagmaRight: apply Q or Q**H from the Right.
[in]transmagma_trans_t
  • = MagmaNoTrans: No transpose, apply Q;
  • = Magma_ConjTrans: Conjugate transpose, apply Q**H.
[in]mINTEGER The number of rows of the matrix C. M >= 0.
[in]nINTEGER The number of columns of the matrix C. N >= 0.
[in]kINTEGER The number of elementary reflectors whose product defines the matrix Q. If SIDE = MagmaLeft, M >= K >= 0; if SIDE = MagmaRight, N >= K >= 0.
[in,out]dACOMPLEX_16 array on the GPU, dimension (LDDA,K) The i-th column must contain the vector which defines the elementary reflector H(i), for i = 1,2,...,k, as returned by ZGEQRF in the first k columns of its array argument dA. The diagonal and the upper part are destroyed, the reflectors are not modified.
[in]lddaINTEGER The leading dimension of the array dA. If SIDE = MagmaLeft, LDDA >= max(1,M); if SIDE = MagmaRight, LDDA >= max(1,N).
[in]tauCOMPLEX_16 array, dimension (K) TAU(i) must contain the scalar factor of the elementary reflector H(i), as returned by ZGEQRF.
[in,out]dCCOMPLEX_16 array on the GPU, dimension (LDDC,N) On entry, the M-by-N matrix C. On exit, C is overwritten by (Q*C) or (Q**H * C) or (C * Q**H) or (C*Q).
[in]lddcINTEGER The leading dimension of the array dC. LDDC >= max(1,M).
[in]wACOMPLEX_16 array, dimension (LDWA,M) if SIDE = MagmaLeft (LDWA,N) if SIDE = MagmaRight The vectors which define the elementary reflectors, as returned by ZHETRD_GPU. (A copy of the upper or lower part of dA, on the host.)
[in]ldwaINTEGER The leading dimension of the array wA. If SIDE = MagmaLeft, LDWA >= max(1,M); if SIDE = MagmaRight, LDWA >= max(1,N).
[out]infoINTEGER
  • = 0: successful exit
  • < 0: if INFO = -i, the i-th argument had an illegal value

◆ magma_zunmqr_2stage_gpu()

magma_int_t magma_zunmqr_2stage_gpu ( magma_side_t side,
magma_trans_t trans,
magma_int_t m,
magma_int_t n,
magma_int_t k,
magmaDoubleComplex_ptr dA,
magma_int_t ldda,
magmaDoubleComplex_ptr dC,
magma_int_t lddc,
magmaDoubleComplex_ptr dT,
magma_int_t nb,
magma_int_t * info )

ZUNMQR_GPU overwrites the general complex M-by-N matrix C with.

                           SIDE = MagmaLeft    SIDE = MagmaRight
TRANS = MagmaNoTrans:      Q * C               C * Q
TRANS = Magma_ConjTrans:   Q**H * C            C * Q**H

where Q is a complex unitary matrix defined as the product of k elementary reflectors

Q = H(1) H(2) . . . H(k)

as returned by ZGEQRF. Q is of order M if SIDE = MagmaLeft and of order N if SIDE = MagmaRight.

Parameters
[in]sidemagma_side_t
  • = MagmaLeft: apply Q or Q**H from the Left;
  • = MagmaRight: apply Q or Q**H from the Right.
[in]transmagma_trans_t
  • = MagmaNoTrans: No transpose, apply Q;
  • = Magma_ConjTrans: Conjugate transpose, apply Q**H.
[in]mINTEGER The number of rows of the matrix C. M >= 0.
[in]nINTEGER The number of columns of the matrix C. N >= 0.
[in]kINTEGER The number of elementary reflectors whose product defines the matrix Q. If SIDE = MagmaLeft, M >= K >= 0; if SIDE = MagmaRight, N >= K >= 0.
[in]dACOMPLEX_16 array on the GPU, dimension (LDDA,K) The i-th column must contain the vector which defines the elementary reflector H(i), for i = 1,2,...,k, as returned by ZGEQRF in the first k columns of its array argument DA. DA is modified by the routine but restored on exit.
[in]lddaINTEGER The leading dimension of the array DA. If SIDE = MagmaLeft, LDDA >= max(1,M); if SIDE = MagmaRight, LDDA >= max(1,N).
[in,out]dCCOMPLEX_16 array on the GPU, dimension (LDDC,N) On entry, the M-by-N matrix C. On exit, C is overwritten by Q*C or Q**H * C or C * Q**H or C*Q.
[in]lddcINTEGER The leading dimension of the array DC. LDDC >= max(1,M).
[in]dTCOMPLEX_16 array on the GPU that is the output (the 9th argument) of magma_zgeqrf_gpu.
[in]nbINTEGER This is the blocking size that was used in pre-computing DT, e.g., the blocking size used in magma_zgeqrf_gpu.
[out]infoINTEGER
  • = 0: successful exit
  • < 0: if INFO = -i, the i-th argument had an illegal value

◆ magma_zunmqr_batched()

magma_int_t magma_zunmqr_batched ( magma_side_t side,
magma_trans_t trans,
magma_int_t m,
magma_int_t n,
magma_int_t k,
magmaDoubleComplex ** dA_array,
magma_int_t ldda,
magmaDoubleComplex ** dtau_array,
magmaDoubleComplex ** dC_array,
magma_int_t lddc,
void * device_work,
int64_t * lwork_device,
magma_int_t * dinfo_array,
magma_int_t batchCount,
magma_queue_t queue )

ZUNMQR overwrites the general complex M-by-N matrix C with.

                           SIDE = MagmaLeft    SIDE = MagmaRight
TRANS = MagmaNoTrans:      Q * C               C * Q
TRANS = Magma_ConjTrans:   Q**H * C            C * Q**H

where Q is a complex unitary matrix defined as the product of k elementary reflectors

  Q = H(1) H(2) . . . H(k)

as returned by ZGEQRF. Q is of order M if SIDE = MagmaLeft and of order N if SIDE = MagmaRight.

  • Only SIDE = MagmaLeft is currently supported
  • This is the batch version of the routine
Parameters
[in]sidemagma_side_t
  • = MagmaLeft: apply Q or Q**H from the Left;
  • = MagmaRight: apply Q or Q**H from the Right (not currently supported).
[in]transmagma_trans_t
  • = MagmaNoTrans: No transpose, apply Q;
  • = Magma_ConjTrans: Conjugate transpose, apply Q**H.
[in]mINTEGER The number of rows of the matrix C. M >= 0.
[in]nINTEGER The number of columns of the matrix C. N >= 0.
[in]kINTEGER The number of elementary reflectors whose product defines the matrix Q. If SIDE = MagmaLeft, M >= K >= 0; if SIDE = MagmaRight, N >= K >= 0.
[in]dA_arrayArray of pointers, dimension (batchCount) Each is a COMPLEX_16 array on the GPU, dimension (LDDA,K) The i-th column must contain the vector which defines the elementary reflector H(i), for i = 1,2,...,k, as returned by ZGEQRF in the first k columns of its array argument dA.
[in]lddaINTEGER The leading dimension of each array dA. If SIDE = MagmaLeft, LDDA >= max(1,M); if SIDE = MagmaRight, LDDA >= max(1,N).
[in]dtau_arrayArray of pointers, dimension(batchCount) Each is a COMPLEX_16 array, dimension (K) TAU(i) must contain the scalar factor of the elementary reflector H(i), as returned by ZGEQRF.
[in,out]dC_arrayArray of pointers, dimension (batchCount) Each is a COMPLEX_16 array on the GPU, dimension (LDDC,N) On entry, the M-by-N matrix C. On exit, C is overwritten by (Q*C) or (Q**H * C) or (C * Q**H) or (C*Q).
[in]lddcINTEGER The leading dimension of each array DC. LDDC >= max(1,M).
[in,out]device_workWorkspace, allocated on device (GPU) memory.
[in,out]lwork_deviceINTEGER pointer The size of the workspace (device_work) in bytes
  • lwork_device[0] < 0: a workspace query is assumed, the routine calculates the required amount of workspace and returns it in lwork_device. The workspace itself is not referenced, and no computation is performed.
  • lwork_device[0] >= 0: the routine assumes that the user has provided a workspace with the size in lwork_device.
[out]dinfo_arrayINTEGER array on GPU memory. Each entry is either,
  • = 0: successful exit
  • < 0: if INFO = -i, the i-th argument had an illegal value
[in]batchCountINTEGER The number of matrices to operate on.
[in]queuemagma_queue_t Queue to execute in.

◆ magma_zunmqr_gpu()

magma_int_t magma_zunmqr_gpu ( magma_side_t side,
magma_trans_t trans,
magma_int_t m,
magma_int_t n,
magma_int_t k,
magmaDoubleComplex_const_ptr dA,
magma_int_t ldda,
magmaDoubleComplex const * tau,
magmaDoubleComplex_ptr dC,
magma_int_t lddc,
magmaDoubleComplex * hwork,
magma_int_t lwork,
magmaDoubleComplex_ptr dT,
magma_int_t nb,
magma_int_t * info )

ZUNMQR_GPU overwrites the general complex M-by-N matrix C with.

                           SIDE = MagmaLeft    SIDE = MagmaRight
TRANS = MagmaNoTrans:      Q * C               C * Q
TRANS = Magma_ConjTrans:   Q**H * C            C * Q**H

where Q is a complex unitary matrix defined as the product of k elementary reflectors

  Q = H(1) H(2) . . . H(k)

as returned by ZGEQRF. Q is of order M if SIDE = MagmaLeft and of order N if SIDE = MagmaRight.

Parameters
[in]sidemagma_side_t
  • = MagmaLeft: apply Q or Q**H from the Left;
  • = MagmaRight: apply Q or Q**H from the Right.
[in]transmagma_trans_t
  • = MagmaNoTrans: No transpose, apply Q;
  • = Magma_ConjTrans: Conjugate transpose, apply Q**H.
[in]mINTEGER The number of rows of the matrix C. M >= 0.
[in]nINTEGER The number of columns of the matrix C. N >= 0.
[in]kINTEGER The number of elementary reflectors whose product defines the matrix Q. If SIDE = MagmaLeft, M >= K >= 0; if SIDE = MagmaRight, N >= K >= 0.
[in]dACOMPLEX_16 array on the GPU, dimension (LDDA,K) The i-th column must contain the vector which defines the elementary reflector H(i), for i = 1,2,...,k, as returned by ZGEQRF in the first k columns of its array argument dA. dA is modified by the routine but restored on exit.
[in]lddaINTEGER The leading dimension of the array dA. If SIDE = MagmaLeft, LDDA >= max(1,M); if SIDE = MagmaRight, LDDA >= max(1,N).
[in]tauCOMPLEX_16 array, dimension (K) TAU(i) must contain the scalar factor of the elementary reflector H(i), as returned by ZGEQRF.
[in,out]dCCOMPLEX_16 array on the GPU, dimension (LDDC,N) On entry, the M-by-N matrix C. On exit, C is overwritten by (Q*C) or (Q**H * C) or (C * Q**H) or (C*Q).
[in]lddcINTEGER The leading dimension of the array DC. LDDC >= max(1,M).
[out]hwork(workspace) COMPLEX_16 array, dimension (MAX(1,LWORK))
Currently, zgetrs_gpu assumes that on exit, hwork contains the last block of A and C. This will change and should not be relied on!
[in]lworkINTEGER The dimension of the array HWORK. LWORK >= (M-K+NB)*(N+NB) + N*NB if SIDE = MagmaLeft, and LWORK >= (N-K+NB)*(M+NB) + M*NB if SIDE = MagmaRight, where NB is the given blocksize.
If LWORK = -1, then a workspace query is assumed; the routine only calculates the optimal size of the HWORK array, returns this value as the first entry of the HWORK array, and no error message related to LWORK is issued by XERBLA.
[in,out]dTCOMPLEX_16 array on the GPU that is the output (the 9th argument) of magma_zgeqrf_gpu. Part used as workspace.
[in]nbINTEGER This is the blocking size that was used in pre-computing DT, e.g., the blocking size used in magma_zgeqrf_gpu.
[out]infoINTEGER
  • = 0: successful exit
  • < 0: if INFO = -i, the i-th argument had an illegal value

◆ magma_zunmqr_m()

magma_int_t magma_zunmqr_m ( magma_int_t ngpu,
magma_side_t side,
magma_trans_t trans,
magma_int_t m,
magma_int_t n,
magma_int_t k,
magmaDoubleComplex * A,
magma_int_t lda,
magmaDoubleComplex * tau,
magmaDoubleComplex * C,
magma_int_t ldc,
magmaDoubleComplex * work,
magma_int_t lwork,
magma_int_t * info )

ZUNMQR overwrites the general complex M-by-N matrix C with.

                            SIDE = MagmaLeft    SIDE = MagmaRight
TRANS = MagmaNoTrans:       Q * C               C * Q
TRANS = Magma_ConjTrans:    Q**H * C            C * Q**H

where Q is a complex unitary matrix defined as the product of k elementary reflectors

  Q = H(1) H(2) . . . H(k)

as returned by ZGEQRF. Q is of order M if SIDE = MagmaLeft and of order N if SIDE = MagmaRight.

Parameters
[in]ngpuINTEGER Number of GPUs to use. ngpu > 0.
[in]sidemagma_side_t
  • = MagmaLeft: apply Q or Q**H from the Left;
  • = MagmaRight: apply Q or Q**H from the Right.
[in]transmagma_trans_t
  • = MagmaNoTrans: No transpose, apply Q;
  • = Magma_ConjTrans: Conjugate transpose, apply Q**H.
[in]mINTEGER The number of rows of the matrix C. M >= 0.
[in]nINTEGER The number of columns of the matrix C. N >= 0.
[in]kINTEGER The number of elementary reflectors whose product defines the matrix Q. If SIDE = MagmaLeft, M >= K >= 0; if SIDE = MagmaRight, N >= K >= 0.
[in]ACOMPLEX_16 array, dimension (LDA,K) The i-th column must contain the vector which defines the elementary reflector H(i), for i = 1,2,...,k, as returned by ZGEQRF in the first k columns of its array argument A.
[in]ldaINTEGER The leading dimension of the array A. If SIDE = MagmaLeft, LDA >= max(1,M); if SIDE = MagmaRight, LDA >= max(1,N).
[in]tauCOMPLEX_16 array, dimension (K) TAU(i) must contain the scalar factor of the elementary reflector H(i), as returned by ZGEQRF.
[in,out]CCOMPLEX_16 array, dimension (LDC,N) On entry, the M-by-N matrix C. On exit, C is overwritten by Q*C or Q**H*C or C*Q**H or C*Q.
[in]ldcINTEGER The leading dimension of the array C. LDC >= max(1,M).
[out]work(workspace) COMPLEX_16 array, dimension (MAX(1,LWORK)) On exit, if INFO = 0, WORK[0] returns the optimal LWORK.
[in]lworkINTEGER The dimension of the array WORK. If SIDE = MagmaLeft, LWORK >= max(1,N); if SIDE = MagmaRight, LWORK >= max(1,M). For optimum performance LWORK >= N*NB if SIDE = MagmaLeft, and LWORK >= M*NB if SIDE = MagmaRight, where NB is the optimal blocksize.
If LWORK = -1, then a workspace query is assumed; the routine only calculates the optimal size of the WORK array, returns this value as the first entry of the WORK array, and no error message related to LWORK is issued by XERBLA.
[out]infoINTEGER
  • = 0: successful exit
  • < 0: if INFO = -i, the i-th argument had an illegal value

◆ magma_cunm2r_reg_batched()

magma_int_t magma_cunm2r_reg_batched ( magma_side_t side,
magma_trans_t trans,
magma_int_t m,
magma_int_t n,
magma_int_t nb,
magma_int_t k,
magmaFloatComplex ** dA_array,
magma_int_t Ai,
magma_int_t Aj,
magma_int_t ldda,
magmaFloatComplex ** dV_array,
magma_int_t Vi,
magma_int_t Vj,
magma_int_t lddv,
magmaFloatComplex ** dtau_array,
magma_int_t taui,
magma_int_t check_launch_only,
magma_int_t batchCount,
magma_queue_t queue )

CUNM2R overwrites the general complex m-by-n matrix C with.

Q * C if SIDE = MagmaLeft and TRANS = 'N', or Q**H* C if SIDE = MagmaLeft and TRANS = 'C', or

C * Q if SIDE = MagmaRight and TRANS = 'N', or C * Q**H if SIDE = MagmaRight and TRANS = 'C',

where Q is a complex unitary matrix defined as the product of k elementary reflectors

Q = H(1) H(2) . . . H(k)

as returned by CGEQRF. Q is of order m if SIDE = MagmaLeft and of order n if SIDE = MagmaRight.

  • This is an internal batch implementation of CUNM2R
  • The implementation uses register blocking
  • Only SIDE = MagmaLeft is currently supported

◆ magma_cunm2r_reg_medium_batched()

magma_int_t magma_cunm2r_reg_medium_batched ( magma_side_t side,
magma_trans_t trans,
magma_int_t m,
magma_int_t n,
magma_int_t nb,
magma_int_t ib,
magmaFloatComplex ** dA_array,
magma_int_t Ai,
magma_int_t Aj,
magma_int_t ldda,
magmaFloatComplex ** dV_array,
magma_int_t Vi,
magma_int_t Vj,
magma_int_t lddv,
magmaFloatComplex ** dtau_array,
magma_int_t taui,
magma_int_t check_launch_only,
magma_int_t batchCount,
magma_queue_t queue )

CUNM2R overwrites the general complex m-by-n matrix C with.

Q * C if SIDE = MagmaLeft and TRANS = 'N', or Q**H* C if SIDE = MagmaLeft and TRANS = 'C', or

C * Q if SIDE = MagmaRight and TRANS = 'N', or C * Q**H if SIDE = MagmaRight and TRANS = 'C',

where Q is a complex unitary matrix defined as the product of k elementary reflectors

Q = H(1) H(2) . . . H(k)

as returned by CGEQRF. Q is of order m if SIDE = MagmaLeft and of order n if SIDE = MagmaRight.

  • This is an internal batch implementation of CUNM2R
  • The implementation uses register blocking
  • Only SIDE = MagmaLeft is currently supported

◆ magma_cunm2r_reg_tall_batched()

magma_int_t magma_cunm2r_reg_tall_batched ( magma_side_t side,
magma_trans_t trans,
magma_int_t m,
magma_int_t n,
magma_int_t nb,
magma_int_t ib,
magmaFloatComplex ** dA_array,
magma_int_t Ai,
magma_int_t Aj,
magma_int_t ldda,
magmaFloatComplex ** dV_array,
magma_int_t Vi,
magma_int_t Vj,
magma_int_t lddv,
magmaFloatComplex ** dtau_array,
magma_int_t taui,
magma_int_t check_launch_only,
magma_int_t batchCount,
magma_queue_t queue )

CUNM2R overwrites the general complex m-by-n matrix C with.

Q * C if SIDE = MagmaLeft and TRANS = 'N', or Q**H* C if SIDE = MagmaLeft and TRANS = 'C', or

C * Q if SIDE = MagmaRight and TRANS = 'N', or C * Q**H if SIDE = MagmaRight and TRANS = 'C',

where Q is a complex unitary matrix defined as the product of k elementary reflectors

Q = H(1) H(2) . . . H(k)

as returned by CGEQRF. Q is of order m if SIDE = MagmaLeft and of order n if SIDE = MagmaRight.

  • This is an internal batch implementation of CUNM2R
  • The implementation uses register blocking
  • Only SIDE = MagmaLeft is currently supported

◆ magma_dorm2r_reg_batched()

magma_int_t magma_dorm2r_reg_batched ( magma_side_t side,
magma_trans_t trans,
magma_int_t m,
magma_int_t n,
magma_int_t nb,
magma_int_t k,
double ** dA_array,
magma_int_t Ai,
magma_int_t Aj,
magma_int_t ldda,
double ** dV_array,
magma_int_t Vi,
magma_int_t Vj,
magma_int_t lddv,
double ** dtau_array,
magma_int_t taui,
magma_int_t check_launch_only,
magma_int_t batchCount,
magma_queue_t queue )

DORM2R overwrites the general real m-by-n matrix C with.

Q * C if SIDE = MagmaLeft and TRANS = 'N', or Q**H* C if SIDE = MagmaLeft and TRANS = 'C', or

C * Q if SIDE = MagmaRight and TRANS = 'N', or C * Q**H if SIDE = MagmaRight and TRANS = 'C',

where Q is a real orthogonal matrix defined as the product of k elementary reflectors

Q = H(1) H(2) . . . H(k)

as returned by DGEQRF. Q is of order m if SIDE = MagmaLeft and of order n if SIDE = MagmaRight.

  • This is an internal batch implementation of DORM2R
  • The implementation uses register blocking
  • Only SIDE = MagmaLeft is currently supported

◆ magma_dorm2r_reg_medium_batched()

magma_int_t magma_dorm2r_reg_medium_batched ( magma_side_t side,
magma_trans_t trans,
magma_int_t m,
magma_int_t n,
magma_int_t nb,
magma_int_t ib,
double ** dA_array,
magma_int_t Ai,
magma_int_t Aj,
magma_int_t ldda,
double ** dV_array,
magma_int_t Vi,
magma_int_t Vj,
magma_int_t lddv,
double ** dtau_array,
magma_int_t taui,
magma_int_t check_launch_only,
magma_int_t batchCount,
magma_queue_t queue )

DORM2R overwrites the general real m-by-n matrix C with.

Q * C if SIDE = MagmaLeft and TRANS = 'N', or Q**H* C if SIDE = MagmaLeft and TRANS = 'C', or

C * Q if SIDE = MagmaRight and TRANS = 'N', or C * Q**H if SIDE = MagmaRight and TRANS = 'C',

where Q is a real orthogonal matrix defined as the product of k elementary reflectors

Q = H(1) H(2) . . . H(k)

as returned by DGEQRF. Q is of order m if SIDE = MagmaLeft and of order n if SIDE = MagmaRight.

  • This is an internal batch implementation of DORM2R
  • The implementation uses register blocking
  • Only SIDE = MagmaLeft is currently supported

◆ magma_dorm2r_reg_tall_batched()

magma_int_t magma_dorm2r_reg_tall_batched ( magma_side_t side,
magma_trans_t trans,
magma_int_t m,
magma_int_t n,
magma_int_t nb,
magma_int_t ib,
double ** dA_array,
magma_int_t Ai,
magma_int_t Aj,
magma_int_t ldda,
double ** dV_array,
magma_int_t Vi,
magma_int_t Vj,
magma_int_t lddv,
double ** dtau_array,
magma_int_t taui,
magma_int_t check_launch_only,
magma_int_t batchCount,
magma_queue_t queue )

DORM2R overwrites the general real m-by-n matrix C with.

Q * C if SIDE = MagmaLeft and TRANS = 'N', or Q**H* C if SIDE = MagmaLeft and TRANS = 'C', or

C * Q if SIDE = MagmaRight and TRANS = 'N', or C * Q**H if SIDE = MagmaRight and TRANS = 'C',

where Q is a real orthogonal matrix defined as the product of k elementary reflectors

Q = H(1) H(2) . . . H(k)

as returned by DGEQRF. Q is of order m if SIDE = MagmaLeft and of order n if SIDE = MagmaRight.

  • This is an internal batch implementation of DORM2R
  • The implementation uses register blocking
  • Only SIDE = MagmaLeft is currently supported

◆ magma_sorm2r_reg_batched()

magma_int_t magma_sorm2r_reg_batched ( magma_side_t side,
magma_trans_t trans,
magma_int_t m,
magma_int_t n,
magma_int_t nb,
magma_int_t k,
float ** dA_array,
magma_int_t Ai,
magma_int_t Aj,
magma_int_t ldda,
float ** dV_array,
magma_int_t Vi,
magma_int_t Vj,
magma_int_t lddv,
float ** dtau_array,
magma_int_t taui,
magma_int_t check_launch_only,
magma_int_t batchCount,
magma_queue_t queue )

SORM2R overwrites the general real m-by-n matrix C with.

Q * C if SIDE = MagmaLeft and TRANS = 'N', or Q**H* C if SIDE = MagmaLeft and TRANS = 'C', or

C * Q if SIDE = MagmaRight and TRANS = 'N', or C * Q**H if SIDE = MagmaRight and TRANS = 'C',

where Q is a real orthogonal matrix defined as the product of k elementary reflectors

Q = H(1) H(2) . . . H(k)

as returned by SGEQRF. Q is of order m if SIDE = MagmaLeft and of order n if SIDE = MagmaRight.

  • This is an internal batch implementation of SORM2R
  • The implementation uses register blocking
  • Only SIDE = MagmaLeft is currently supported

◆ magma_sorm2r_reg_medium_batched()

magma_int_t magma_sorm2r_reg_medium_batched ( magma_side_t side,
magma_trans_t trans,
magma_int_t m,
magma_int_t n,
magma_int_t nb,
magma_int_t ib,
float ** dA_array,
magma_int_t Ai,
magma_int_t Aj,
magma_int_t ldda,
float ** dV_array,
magma_int_t Vi,
magma_int_t Vj,
magma_int_t lddv,
float ** dtau_array,
magma_int_t taui,
magma_int_t check_launch_only,
magma_int_t batchCount,
magma_queue_t queue )

SORM2R overwrites the general real m-by-n matrix C with.

Q * C if SIDE = MagmaLeft and TRANS = 'N', or Q**H* C if SIDE = MagmaLeft and TRANS = 'C', or

C * Q if SIDE = MagmaRight and TRANS = 'N', or C * Q**H if SIDE = MagmaRight and TRANS = 'C',

where Q is a real orthogonal matrix defined as the product of k elementary reflectors

Q = H(1) H(2) . . . H(k)

as returned by SGEQRF. Q is of order m if SIDE = MagmaLeft and of order n if SIDE = MagmaRight.

  • This is an internal batch implementation of SORM2R
  • The implementation uses register blocking
  • Only SIDE = MagmaLeft is currently supported

◆ magma_sorm2r_reg_tall_batched()

magma_int_t magma_sorm2r_reg_tall_batched ( magma_side_t side,
magma_trans_t trans,
magma_int_t m,
magma_int_t n,
magma_int_t nb,
magma_int_t ib,
float ** dA_array,
magma_int_t Ai,
magma_int_t Aj,
magma_int_t ldda,
float ** dV_array,
magma_int_t Vi,
magma_int_t Vj,
magma_int_t lddv,
float ** dtau_array,
magma_int_t taui,
magma_int_t check_launch_only,
magma_int_t batchCount,
magma_queue_t queue )

SORM2R overwrites the general real m-by-n matrix C with.

Q * C if SIDE = MagmaLeft and TRANS = 'N', or Q**H* C if SIDE = MagmaLeft and TRANS = 'C', or

C * Q if SIDE = MagmaRight and TRANS = 'N', or C * Q**H if SIDE = MagmaRight and TRANS = 'C',

where Q is a real orthogonal matrix defined as the product of k elementary reflectors

Q = H(1) H(2) . . . H(k)

as returned by SGEQRF. Q is of order m if SIDE = MagmaLeft and of order n if SIDE = MagmaRight.

  • This is an internal batch implementation of SORM2R
  • The implementation uses register blocking
  • Only SIDE = MagmaLeft is currently supported

◆ magma_zunm2r_reg_batched()

magma_int_t magma_zunm2r_reg_batched ( magma_side_t side,
magma_trans_t trans,
magma_int_t m,
magma_int_t n,
magma_int_t nb,
magma_int_t k,
magmaDoubleComplex ** dA_array,
magma_int_t Ai,
magma_int_t Aj,
magma_int_t ldda,
magmaDoubleComplex ** dV_array,
magma_int_t Vi,
magma_int_t Vj,
magma_int_t lddv,
magmaDoubleComplex ** dtau_array,
magma_int_t taui,
magma_int_t check_launch_only,
magma_int_t batchCount,
magma_queue_t queue )

ZUNM2R overwrites the general complex m-by-n matrix C with.

Q * C if SIDE = MagmaLeft and TRANS = 'N', or Q**H* C if SIDE = MagmaLeft and TRANS = 'C', or

C * Q if SIDE = MagmaRight and TRANS = 'N', or C * Q**H if SIDE = MagmaRight and TRANS = 'C',

where Q is a complex unitary matrix defined as the product of k elementary reflectors

Q = H(1) H(2) . . . H(k)

as returned by ZGEQRF. Q is of order m if SIDE = MagmaLeft and of order n if SIDE = MagmaRight.

  • This is an internal batch implementation of ZUNM2R
  • The implementation uses register blocking
  • Only SIDE = MagmaLeft is currently supported

◆ magma_zunm2r_reg_medium_batched()

magma_int_t magma_zunm2r_reg_medium_batched ( magma_side_t side,
magma_trans_t trans,
magma_int_t m,
magma_int_t n,
magma_int_t nb,
magma_int_t ib,
magmaDoubleComplex ** dA_array,
magma_int_t Ai,
magma_int_t Aj,
magma_int_t ldda,
magmaDoubleComplex ** dV_array,
magma_int_t Vi,
magma_int_t Vj,
magma_int_t lddv,
magmaDoubleComplex ** dtau_array,
magma_int_t taui,
magma_int_t check_launch_only,
magma_int_t batchCount,
magma_queue_t queue )

ZUNM2R overwrites the general complex m-by-n matrix C with.

Q * C if SIDE = MagmaLeft and TRANS = 'N', or Q**H* C if SIDE = MagmaLeft and TRANS = 'C', or

C * Q if SIDE = MagmaRight and TRANS = 'N', or C * Q**H if SIDE = MagmaRight and TRANS = 'C',

where Q is a complex unitary matrix defined as the product of k elementary reflectors

Q = H(1) H(2) . . . H(k)

as returned by ZGEQRF. Q is of order m if SIDE = MagmaLeft and of order n if SIDE = MagmaRight.

  • This is an internal batch implementation of ZUNM2R
  • The implementation uses register blocking
  • Only SIDE = MagmaLeft is currently supported

◆ magma_zunm2r_reg_tall_batched()

magma_int_t magma_zunm2r_reg_tall_batched ( magma_side_t side,
magma_trans_t trans,
magma_int_t m,
magma_int_t n,
magma_int_t nb,
magma_int_t ib,
magmaDoubleComplex ** dA_array,
magma_int_t Ai,
magma_int_t Aj,
magma_int_t ldda,
magmaDoubleComplex ** dV_array,
magma_int_t Vi,
magma_int_t Vj,
magma_int_t lddv,
magmaDoubleComplex ** dtau_array,
magma_int_t taui,
magma_int_t check_launch_only,
magma_int_t batchCount,
magma_queue_t queue )

ZUNM2R overwrites the general complex m-by-n matrix C with.

Q * C if SIDE = MagmaLeft and TRANS = 'N', or Q**H* C if SIDE = MagmaLeft and TRANS = 'C', or

C * Q if SIDE = MagmaRight and TRANS = 'N', or C * Q**H if SIDE = MagmaRight and TRANS = 'C',

where Q is a complex unitary matrix defined as the product of k elementary reflectors

Q = H(1) H(2) . . . H(k)

as returned by ZGEQRF. Q is of order m if SIDE = MagmaLeft and of order n if SIDE = MagmaRight.

  • This is an internal batch implementation of ZUNM2R
  • The implementation uses register blocking
  • Only SIDE = MagmaLeft is currently supported