MAGMA prototypes
magma_ functions return magma_int_t
error code.
magmablas_ functions are void unless noted.
Triangular factorization (LU, Cholesky)
magma_[sdcz] getrf ( m, n, A, lda, ipiv, &info ); magma_[sdcz] potrf ( uplo, n, A, lda, &info );
QR, QL, LQ factorization
magma_[sdcz] geqrf ( m, n, A, lda, tau, work, lwork, &info ); magma_[sdcz] geqlf ( m, n, A, lda, tau, work, lwork, &info ); magma_[sdcz] gelqf ( m, n, A, lda, tau, work, lwork, &info );
multiply by Q after factorization
magma_[sd ] ormqr ( side, trans, m, n, k, A, lda, tau, C, ldc, work, lwork, &info ); magma_[ cz] unmqr ( side, trans, m, n, k, A, lda, tau, C, ldc, work, lwork, &info ); magma_[sd ] ormql ( side, trans, m, n, k, A, lda, tau, C, ldc, work, lwork, &info ); magma_[ cz] unmql ( side, trans, m, n, k, A, lda, tau, C, ldc, work, lwork, &info ); magma_[sd ] ormtr ( side, uplo, trans, m, n, A, lda, tau, C, ldc, work, lwork, &info ); magma_[ cz] unmtr ( side, uplo, trans, m, n, A, lda, tau, C, ldc, work, lwork, &info );
generate Q after factorization
magma_[sd ] orgqr ( m, n, k, A, lda, tau, dwork, nb, &info ); magma_[ cz] ungqr ( m, n, k, A, lda, tau, dwork, nb, &info );
SVD
magma_[sdcz] gesvd ( jobu, jobvt, m, n, A, lda, s, u, ldu, vt, ldvt, work, lwork, rwork, &info );
non-symmetric eigenvalue
magma_[sdcz] geev ( jobvl, jobvr, n, A, lda, w, vl, ldvl, vr, ldvr, work, lwork, rwork, &info );
symmetric eigenvalue (divide-and-conquer)
magma_[sd ] syevd ( jobz, uplo, n, A, lda, w, work, lwork, rwork, lrwork, iwork, liwork, &info ); magma_[ cz] heevd ( jobz, uplo, n, A, lda, w, work, lwork, rwork, lrwork, iwork, liwork, &info );
generalized symmetric eigenvalue (divide-and-conquer)
magma_[sd ] sygvd ( itype, jobz, uplo, n, A, lda, B, ldb, w, work, lwork, rwork, lrwork, iwork, liwork, &info ); magma_[ cz] hegvd ( itype, jobz, uplo, n, A, lda, B, ldb, w, work, lwork, rwork, lrwork, iwork, liwork, &info ); // auxiliary routines magma_[d ] sygst ( itype, uplo, n, A, lda, B, ldb, &info ); magma_[ cz] hegst ( itype, uplo, n, A, lda, B, ldb, &info );
tridiagonal reduction
magma_[sd ] sytrd ( uplo, n, A, lda, d, e, tau, work, lwork, &info ); magma_[ cz] hetrd ( uplo, n, A, lda, d, e, tau, work, lwork, &info ); // auxiliary routines magma_[sdcz] latrd ( uplo, n, nb, A, lda, e, tau, w, ldw, da, ldda, dw, lddw ); magma_[sdcz] latrd2 ( uplo, n, nb, A, lda, e, tau, w, ldw, da, ldda, dw, lddw, dwork, ldwork );
bidiagonal reduction
magma_[sdcz] gebrd ( m, n, A, lda, d, e, tauq, taup, work, lwork, &info );
Hessenberg reduction
magma_[sdcz] gehrd ( n, ilo, ihi, A, lda, tau, work, lwork, d_T, &info ); magma_[sdcz] gehrd2 ( n, ilo, ihi, A, lda, tau, work, lwork, &info ); // auxiliary routines magma_[sdcz] lahr2 ( m, n, nb, da, dv, A, lda, tau, t, ldt, y, ldy ); magma_[sdcz] lahru ( m, n, nb, A, lda, da, y, v, t, dwork );
generate Q after Hessenberg reduction (gehrd)
magma_[sd ] orghr ( n, ilo, ihi, A, lda, tau, dT, nb, &info ); magma_[ cz] unghr ( n, ilo, ihi, A, lda, tau, dT, nb, &info );
auxiliary functions
double cpu_gpu_[sdcz] diff( m, n, a, lda, da, ldda ); [sdcz] zero_32x32_block( foo, foo ); [sdcz] zero_nbxnb_block( foo, foo, foo ); magmablas_[sdcz] inplace_transpose( foo, foo, foo ); magmablas_[sdcz] permute_long ( foo, foo, foo, foo, foo ); magmablas_[sdcz] permute_long2 ( foo, foo, foo, foo, foo ); magmablas_[sdcz] transpose ( foo, foo, foo, foo, foo, foo ); magmablas_[sdcz] transpose2 ( foo, foo, foo, foo, foo, foo );
LAPACK auxiliary functions
magmablas_[sdcz] lacpy ( uplo, m, n, A, lda, B, ldb ); magmablas_[sdcz] lascl ( type, kl, ku, cfrom, cto, m, n, A, lda, info ); magmablas_[sdcz] laset ( m, n, A, lda ); magmablas_[sdcz] laswp ( n, dAT, lda, i1, i2, ipiv, inci ); magmablas_[sdcz] laswpx ( n, dAT, ldx, ldy, i1, i2, ipiv, inci ); // matrix norm double magmablas_[sdcz] lange ( norm, m, n, A, lda, work ); double magmablas_[ cz] lanhe ( norm, uplo, n, A, lda, work ); double magmablas_[sdcz] lansy ( norm, uplo, n, A, lda, work );
Level 1 BLAS
magmablas_[sdcz] swap ( n, dA1, lda1, dA2, lda2 ); magmablas_[sdcz] swapblk ( storev, n, dA1, lda1, dA2, lda2, i1, i2, ipiv, inci, offset ); magmablas_[sdcz] swapdblk ( n, nb, dA1, ldda1, inca1, dA2, ldda2, inca2 );
Level 2 BLAS
magmablas_[sdcz] gemv ( trans, m, n, alpha, A, lda, X, incX, beta, Y, incY ); magmablas_[ cz] hemv ( uplo, n, alpha, A, lda, X, incX, beta, Y, incY ); magmablas_[ cz] symv ( uplo, n, alpha, A, lda, X, incX, beta, Y, incY ); magmablas_[sd ] symv ( uplo, n, alpha, A, lda, X, incX, beta, Y, incY );
Level 3 BLAS
magmablas_[sdcz] gemm ( transA, transB, m, n, k, alpha, A, lda, B, ldb, beta, C, ldc ); magmablas_[ cz] hemm ( side, uplo, m, n, alpha, A, lda, B, ldb, beta, C, ldc ); magmablas_[ cz] symm ( side, uplo, m, n, alpha, A, lda, B, ldb, beta, C, ldc ); magmablas_[sd ] symm ( side, uplo, m, n, alpha, A, lda, B, ldb, beta, C, ldc ); magmablas_[sdcz] syrk ( uplo, trans, n, k, alpha, A, lda, beta, C, ldc ); magmablas_[ cz] herk ( uplo, trans, n, k, alpha, A, lda, beta, C, ldc ); magmablas_[sdcz] syr2k ( uplo, trans, n, k, alpha, A, lda, B, ldb, beta, C, ldc ); magmablas_[ cz] her2k ( uplo, trans, n, k, alpha, A, lda, B, ldb, beta, C, ldc ); magmablas_[sdcz] trmm ( side, uplo, trans, d, m, n, alpha, A, lda, B, ldb ); magmablas_[sdcz] trsm ( side, uplo, trans, d, m, n, alpha, A, lda, B, ldb );
Workspace interface
magmablasw_[sdcz] symv( uplo, n, alpha, A, lda, x, incx, beta, y, incy, dwork );
Mixed precision
magmablas_{ds,zc} axpycp ( foo, foo, foo, foo, foo ); // 2 versions? magmablas_{ds,zc} laswp ( foo, foo, foo, foo, foo, foo, foo );
Conversion
magmablas_dlag2s ( m, n, A, lda, SA, ldsa, info ); magmablas_slag2d ( m, n, SA, ldsa, A, lda, info ); magmablas_dlat2s ( uplo, n, A, lda, SA, ldsa, info ); magmablas_zlag2c ( m, n, A, lda, SA, ldsa, info ); magmablas_clag2z ( m, n, SA, ldsa, A, lda, info ); magmablas_zlat2c ( uplo, n, A, lda, SA, ldsa, info );