/*
    -- MAGMA (version 2.10.0) --
       Univ. of Tennessee, Knoxville
       Univ. of California, Berkeley
       Univ. of Colorado, Denver
       @date February 2026

       @author Azzam Haidar
       @author Tingxing Dong

       @generated from include/magma_zbatched.h, normal z -> d, Thu Feb 19 19:24:07 2026
*/

#ifndef MAGMA_DBATCHED_H
#define MAGMA_DBATCHED_H

#include "magma_types.h"

#define MAGMA_REAL

#ifdef __cplusplus
extern "C" {
#endif
  /*
   *  local auxiliary routines
   */
void
magma_dset_pointer(
    double **output_array,
    double *input,
    magma_int_t lda,
    magma_int_t row, magma_int_t column,
    magma_int_t batch_offset,
    magma_int_t batchCount, magma_queue_t queue);

void
magma_ddisplace_pointers(
    double **output_array,
    double **input_array, magma_int_t lda,
    magma_int_t row, magma_int_t column,
    magma_int_t batchCount, magma_queue_t queue);

magma_int_t
magma_drecommend_cublas_gemm_batched(
    magma_trans_t transa, magma_trans_t transb,
    magma_int_t m, magma_int_t n, magma_int_t k);

magma_int_t
magma_drecommend_cublas_gemm_stream(
    magma_trans_t transa, magma_trans_t transb,
    magma_int_t m, magma_int_t n, magma_int_t k);

void magma_get_dpotrf_batched_nbparam(magma_int_t n, magma_int_t *nb, magma_int_t *recnb);

magma_int_t magma_get_dpotrf_batched_crossover();

void magma_get_dgetrf_batched_nbparam(magma_int_t n, magma_int_t *nb, magma_int_t *recnb);
magma_int_t magma_get_dgetrf_batched_ntcol(magma_int_t m, magma_int_t n);
magma_int_t magma_get_dgemm_batched_ntcol(magma_int_t n);
magma_int_t magma_get_dgemm_batched_smallsq_limit(magma_int_t n);
magma_int_t magma_get_dgeqrf_batched_nb(magma_int_t m);
magma_int_t magma_use_dgeqrf_batched_fused_update(magma_int_t m, magma_int_t n, magma_int_t batchCount);
magma_int_t magma_get_dgeqr2_fused_sm_batched_nthreads(magma_int_t m, magma_int_t n);
magma_int_t magma_get_dgeqrf_batched_ntcol(magma_int_t m, magma_int_t n);
magma_int_t magma_get_dgetri_batched_ntcol(magma_int_t m, magma_int_t n);
magma_int_t magma_get_dtrsm_batched_stop_nb(magma_side_t side, magma_int_t m, magma_int_t n);
magma_int_t magma_dorm2r_batched_kernel_sm_size(magma_side_t side, magma_trans_t trans, magma_int_t m, magma_int_t n, magma_int_t ib, magma_int_t nb);
magma_int_t magma_get_dsyevj_batched_small_nthreads(magma_int_t n);
void        magma_get_dgbtrf_batched_params(magma_int_t m, magma_int_t n, magma_int_t kl, magma_int_t ku, magma_int_t *nb, magma_int_t *threads);
bool        magma_dgesvj_batched_use_fused( magma_vec_t jobu, magma_vec_t jobv, magma_int_t m, magma_int_t n );
magma_int_t magma_get_dgesvj_batched_nb( magma_int_t m, magma_int_t n );

#ifdef MAGMA_REAL
magma_int_t
magmablas_dsort_batched(
    magma_sort_t sort, magma_int_t n,
    double const * const * dx_array, magma_int_t incx,
    double               **dy_array, magma_int_t incy,
    magma_int_t** dindex_array,
    magma_int_t batchCount, magma_queue_t queue);
#endif

  /*
   *  LAPACK batched routines
   */

  /*
   *  BLAS batched routines
   */
void
magmablas_dgemm_batched_core(
    magma_trans_t transA, magma_trans_t transB,
    magma_int_t m, magma_int_t n, magma_int_t k,
    double alpha,
    double const * const * dA_array, magma_int_t Ai, magma_int_t Aj, magma_int_t ldda,
    double const * const * dB_array, magma_int_t Bi, magma_int_t Bj, magma_int_t lddb,
    double beta,
    double **dC_array, magma_int_t Ci, magma_int_t Cj, magma_int_t lddc,
    magma_int_t batchCount, magma_queue_t queue );

void
magma_dgemm_batched_core(
    magma_trans_t transA, magma_trans_t transB,
    magma_int_t m, magma_int_t n, magma_int_t k,
    double alpha,
    double const * const * dA_array, magma_int_t Ai, magma_int_t Aj, magma_int_t ldda,
    double const * const * dB_array, magma_int_t Bi, magma_int_t Bj, magma_int_t lddb,
    double beta,
    double **dC_array, magma_int_t Ci, magma_int_t Cj, magma_int_t lddc,
    magma_int_t batchCount, magma_queue_t queue );

void
magma_dgemm_batched(
    magma_trans_t transA, magma_trans_t transB,
    magma_int_t m, magma_int_t n, magma_int_t k,
    double alpha,
    double const * const * dA_array, magma_int_t ldda,
    double const * const * dB_array, magma_int_t lddb,
    double beta,
    double **dC_array, magma_int_t lddc,
    magma_int_t batchCount, magma_queue_t queue );

void
magmablas_dgemm_batched(
    magma_trans_t transA, magma_trans_t transB,
    magma_int_t m, magma_int_t n, magma_int_t k,
    double alpha,
    double const * const * dA_array, magma_int_t ldda,
    double const * const * dB_array, magma_int_t lddb,
    double beta,
    double **dC_array, magma_int_t lddc,
    magma_int_t batchCount, magma_queue_t queue );

void
magmablas_dgemm_batched_strided(
    magma_trans_t transA, magma_trans_t transB,
    magma_int_t m, magma_int_t n, magma_int_t k,
    double alpha,
    double const * dA, magma_int_t ldda, magma_int_t strideA,
    double const * dB, magma_int_t lddb, magma_int_t strideB,
    double beta,
    double       * dC, magma_int_t lddc, magma_int_t strideC,
    magma_int_t batchCount, magma_queue_t queue );

void
magmablas_dgemm_batched_smallsq(
    magma_trans_t transA, magma_trans_t transB,
    magma_int_t m, magma_int_t n, magma_int_t k,
    double alpha,
    double const * const * dA_array, magma_int_t ai, magma_int_t aj, magma_int_t ldda,
    double const * const * dB_array, magma_int_t bi, magma_int_t bj, magma_int_t lddb,
    double beta,
    double **dC_array, magma_int_t ci, magma_int_t cj, magma_int_t lddc,
    magma_int_t batchCount, magma_queue_t queue );

void
magmablas_dsyrk_batched_core(
    magma_uplo_t uplo, magma_trans_t trans,
    magma_int_t n, magma_int_t k,
    double alpha,
    double const * const * dA_array, magma_int_t ai, magma_int_t aj, magma_int_t ldda,
    double const * const * dB_array, magma_int_t bi, magma_int_t bj, magma_int_t lddb,
    double beta,
    double **dC_array, magma_int_t ci, magma_int_t cj, magma_int_t lddc,
    magma_int_t batchCount, magma_queue_t queue );

void
magmablas_dsyrk_batched_core(
    magma_uplo_t uplo, magma_trans_t trans,
    magma_int_t n, magma_int_t k,
    double alpha,
    double const * const * dA_array, magma_int_t ai, magma_int_t aj, magma_int_t ldda,
    double const * const * dB_array, magma_int_t bi, magma_int_t bj, magma_int_t lddb,
    double beta,
    double **dC_array, magma_int_t ci, magma_int_t cj, magma_int_t lddc,
    magma_int_t batchCount, magma_queue_t queue );

void
magmablas_dsyrk_batched(
    magma_uplo_t uplo, magma_trans_t trans,
    magma_int_t n, magma_int_t k,
    double alpha,
    double const * const * dA_array, magma_int_t ldda,
    double beta,
    double **dC_array, magma_int_t lddc,
    magma_int_t batchCount, magma_queue_t queue );

void
magma_dsyrk_batched(
    magma_uplo_t uplo, magma_trans_t trans, magma_int_t n, magma_int_t k,
    double alpha,
    double const * const * dA_array, magma_int_t ldda,
    double beta,
    double **dC_array, magma_int_t lddc,
    magma_int_t batchCount, magma_queue_t queue );

void
magmablas_dsyrk_batched(
    magma_uplo_t uplo, magma_trans_t trans, magma_int_t n, magma_int_t k,
    double alpha,
    double const * const * dA_array, magma_int_t ldda,
    double beta,
    double **dC_array, magma_int_t lddc,
    magma_int_t batchCount, magma_queue_t queue );

void
magmablas_dsyr2k_batched(
    magma_uplo_t uplo, magma_trans_t trans, magma_int_t n, magma_int_t k,
    double alpha,
    double const * const * dA_array, magma_int_t ldda,
    double const * const * dB_array, magma_int_t lddb,
    double beta, double **dC_array, magma_int_t lddc,
    magma_int_t batchCount, magma_queue_t queue );

magma_int_t
magma_dsyevj_batched_expert_small_sm(
    magma_vec_t jobz, magma_uplo_t uplo,
    magma_int_t n, double** dA_array, magma_int_t ldda,
    double **dW_array, magma_int_t* info_array,
    int* batch_mask, int *num_sweeps,
    double heevj_tol, magma_int_t sort_flag, magma_int_t max_sweeps,
    magma_int_t nthreads, magma_int_t batchCount, magma_queue_t queue );

magma_int_t
magma_dsyevj_batched_strided_expert_small_sm(
    magma_vec_t jobz, magma_uplo_t uplo,
    magma_int_t n, double* dA, magma_int_t ldda, magma_int_t strideA,
    double *dW, magma_int_t strideW,
    magma_int_t* info_array, int* batch_mask, int *num_sweeps,
    double heevj_tol, magma_int_t sort_flag, magma_int_t max_sweeps,
    magma_int_t nthreads, magma_int_t batchCount, magma_queue_t queue );

magma_int_t
magma_dsyevj_batched_small_sm(
    magma_vec_t jobz, magma_uplo_t uplo,
    magma_int_t n,
    double** dA_array, magma_int_t ldda, double **dW_array,
    magma_int_t* info_array,
    magma_int_t batchCount, magma_queue_t queue );

magma_int_t
magma_dsyevj_batched_strided_small_sm(
    magma_vec_t jobz, magma_uplo_t uplo,
    magma_int_t n,
    double* dA, magma_int_t ldda, magma_int_t strideA,
    double *dW, magma_int_t strideW,
    magma_int_t* info_array,
    magma_int_t batchCount, magma_queue_t queue );

void
magmablas_dsyr2k_batched(
    magma_uplo_t uplo, magma_trans_t trans, magma_int_t n, magma_int_t k,
    double alpha,
    double const * const * dA_array, magma_int_t ldda,
    double const * const * dB_array, magma_int_t lddb,
    double beta, double **dC_array, magma_int_t lddc,
    magma_int_t batchCount, magma_queue_t queue );

void
magmablas_dtrtri_diag_batched(
    magma_uplo_t uplo, magma_diag_t diag, magma_int_t n,
    double const * const *dA_array, magma_int_t ldda,
    double **dinvA_array,
    magma_int_t resetozero,
    magma_int_t batchCount, magma_queue_t queue);

void
magmablas_dtrsm_small_batched(
        magma_side_t side, magma_uplo_t uplo, magma_trans_t transA, magma_diag_t diag,
        magma_int_t m, magma_int_t n,
        double alpha,
        double **dA_array, magma_int_t Ai, magma_int_t Aj, magma_int_t ldda,
        double **dB_array, magma_int_t Bi, magma_int_t Bj, magma_int_t lddb,
        magma_int_t batchCount, magma_queue_t queue );

void
magmablas_dtrsm_recursive_batched(
        magma_side_t side, magma_uplo_t uplo, magma_trans_t transA, magma_diag_t diag,
        magma_int_t m, magma_int_t n,
        double alpha,
        double **dA_array, magma_int_t Ai, magma_int_t Aj, magma_int_t ldda,
        double **dB_array, magma_int_t Bi, magma_int_t Bj, magma_int_t lddb,
        magma_int_t batchCount, magma_queue_t queue );

void
magmablas_dtrsm_batched(
        magma_side_t side, magma_uplo_t uplo, magma_trans_t transA, magma_diag_t diag,
        magma_int_t m, magma_int_t n,
        double alpha,
        double **dA_array, magma_int_t ldda,
        double **dB_array, magma_int_t lddb,
        magma_int_t batchCount, magma_queue_t queue );

void
magmablas_dtrsm_inv_batched(
    magma_side_t side, magma_uplo_t uplo, magma_trans_t transA, magma_diag_t diag,
    magma_int_t m, magma_int_t n,
    double alpha,
    double** dA_array,    magma_int_t ldda,
    double** dB_array,    magma_int_t lddb,
    magma_int_t batchCount, magma_queue_t queue);

void magmablas_dtrsm_inv_work_batched(
    magma_side_t side, magma_uplo_t uplo, magma_trans_t transA, magma_diag_t diag,
    magma_int_t flag, magma_int_t m, magma_int_t n,
    double alpha,
    double** dA_array,    magma_int_t ldda,
    double** dB_array,    magma_int_t lddb,
    double** dX_array,    magma_int_t lddx,
    double** dinvA_array, magma_int_t dinvA_length,
    double** dA_displ, double** dB_displ,
    double** dX_displ, double** dinvA_displ,
    magma_int_t resetozero,
    magma_int_t batchCount, magma_queue_t queue);

void
magmablas_dtrsm_inv_outofplace_batched(
    magma_side_t side, magma_uplo_t uplo, magma_trans_t transA, magma_diag_t diag,
    magma_int_t flag, magma_int_t m, magma_int_t n,
    double alpha,
    double** dA_array,    magma_int_t ldda,
    double** dB_array,    magma_int_t lddb,
    double** dX_array,    magma_int_t lddx,
    double** dinvA_array, magma_int_t dinvA_length,
    double** dA_displ, double** dB_displ,
    double** dX_displ, double** dinvA_displ,
    magma_int_t resetozero,
    magma_int_t batchCount, magma_queue_t queue);

void magmablas_dtrsv_batched(
    magma_uplo_t uplo, magma_trans_t transA, magma_diag_t diag,
    magma_int_t n,
    double** dA_array,    magma_int_t ldda,
    double** dB_array,    magma_int_t incb,
    magma_int_t batchCount, magma_queue_t queue);

void
magmablas_dtrsv_recursive_batched(
        magma_uplo_t uplo, magma_trans_t transA, magma_diag_t diag,
        magma_int_t n,
        double **dA_array, magma_int_t Ai, magma_int_t Aj, magma_int_t ldda,
        double **dx_array, magma_int_t xi, magma_int_t incx,
        magma_int_t batchCount, magma_queue_t queue );

void
magmablas_dtrmm_batched_core(
        magma_side_t side, magma_uplo_t uplo, magma_trans_t transA, magma_diag_t diag,
        magma_int_t m, magma_int_t n,
        double alpha,
        double **dA_array, magma_int_t Ai, magma_int_t Aj, magma_int_t ldda,
        double **dB_array, magma_int_t Bi, magma_int_t Bj, magma_int_t lddb,
        magma_int_t batchCount, magma_queue_t queue );

void
magmablas_dtrmm_batched(
        magma_side_t side, magma_uplo_t uplo, magma_trans_t transA, magma_diag_t diag,
        magma_int_t m, magma_int_t n,
        double alpha,
        double **dA_array, magma_int_t ldda,
        double **dB_array, magma_int_t lddb,
        magma_int_t batchCount, magma_queue_t queue );

void
magmablas_dsymm_batched_core(
        magma_side_t side, magma_uplo_t uplo,
        magma_int_t m, magma_int_t n,
        double alpha,
        double **dA_array, magma_int_t ldda,
        double **dB_array, magma_int_t lddb,
        double beta,
        double **dC_array, magma_int_t lddc,
        magma_int_t roffA, magma_int_t coffA, magma_int_t roffB, magma_int_t coffB, magma_int_t roffC, magma_int_t coffC,
        magma_int_t batchCount, magma_queue_t queue );

void
magmablas_dsymm_batched(
        magma_side_t side, magma_uplo_t uplo,
        magma_int_t m, magma_int_t n,
        double alpha,
        double **dA_array, magma_int_t ldda,
        double **dB_array, magma_int_t lddb,
        double beta,
        double **dC_array, magma_int_t lddc,
        magma_int_t batchCount, magma_queue_t queue );

void
magmablas_dsymv_batched_core(
        magma_uplo_t uplo, magma_int_t n,
        double alpha,
        double **dA_array, magma_int_t ldda,
        double **dX_array, magma_int_t incx,
        double beta,
        double **dY_array, magma_int_t incy,
        magma_int_t offA, magma_int_t offX, magma_int_t offY,
        magma_int_t batchCount, magma_queue_t queue );

void
magmablas_dsymv_batched(
        magma_uplo_t uplo, magma_int_t n,
        double alpha,
        double **dA_array, magma_int_t ldda,
        double **dX_array, magma_int_t incx,
        double beta,
        double **dY_array, magma_int_t incy,
        magma_int_t batchCount, magma_queue_t queue );

magma_int_t
magma_dpotrf_batched(
    magma_uplo_t uplo, magma_int_t n,
    double **dA_array, magma_int_t lda,
    magma_int_t *info_array,
    magma_int_t batchCount, magma_queue_t queue);

magma_int_t
magma_dpotrf_expert_batched(
    magma_uplo_t uplo, magma_int_t n, magma_int_t nb, magma_int_t recnb,
    double **dA_array, magma_int_t ldda,
    magma_int_t *info_array,  magma_int_t batchCount, magma_queue_t queue);

magma_int_t
magma_dpotf2_batched(
    magma_uplo_t uplo, magma_int_t n,
    double **dA_array, magma_int_t ai, magma_int_t aj, magma_int_t lda,
    magma_int_t *info_array, magma_int_t gbstep,
    magma_int_t batchCount, magma_queue_t queue);

magma_int_t
magma_dpotrf_panel_batched(
    magma_uplo_t uplo, magma_int_t n, magma_int_t nb,
    double** dA_array, magma_int_t ai, magma_int_t aj, magma_int_t ldda,
    magma_int_t *info_array, magma_int_t gbstep,
    magma_int_t batchCount, magma_queue_t queue);

magma_int_t
magma_dpotrf_recpanel_batched(
    magma_uplo_t uplo, magma_int_t m, magma_int_t n, magma_int_t min_recpnb,
    double** dA_array, magma_int_t ai, magma_int_t aj, magma_int_t ldda,
    magma_int_t *info_array, magma_int_t gbstep,
    magma_int_t batchCount, magma_queue_t queue);

magma_int_t
magma_dpotrf_rectile_batched(
    magma_uplo_t uplo, magma_int_t m, magma_int_t n, magma_int_t min_recpnb,
    double** dA_array, magma_int_t ai, magma_int_t aj, magma_int_t ldda,
    magma_int_t *info_array, magma_int_t gbstep,
    magma_int_t batchCount, magma_queue_t queue);

magma_int_t
magma_dpotrs_batched(
    magma_uplo_t uplo, magma_int_t n, magma_int_t nrhs,
    double **dA_array, magma_int_t ldda,
    double **dB_array, magma_int_t lddb,
    magma_int_t batchCount, magma_queue_t queue);

magma_int_t
magma_dposv_batched(
    magma_uplo_t uplo, magma_int_t n, magma_int_t nrhs,
    double **dA_array, magma_int_t ldda,
    double **dB_array, magma_int_t lddb,
    magma_int_t *dinfo_array,
    magma_int_t batchCount, magma_queue_t queue);

magma_int_t
magma_dgetrs_batched(
    magma_trans_t trans, magma_int_t n, magma_int_t nrhs,
    double **dA_array, magma_int_t ldda,
    magma_int_t **dipiv_array,
    double **dB_array, magma_int_t lddb,
    magma_int_t batchCount, magma_queue_t queue);

void
magma_dlaswp_rowparallel_batched(
    magma_int_t n,
    double**  input_array, magma_int_t  input_i, magma_int_t  input_j, magma_int_t ldi,
    double** output_array, magma_int_t output_i, magma_int_t output_j, magma_int_t ldo,
    magma_int_t k1, magma_int_t k2,
    magma_int_t **pivinfo_array,
    magma_int_t batchCount, magma_queue_t queue);

void
magma_dlaswp_rowserial_batched(
    magma_int_t n, double** dA_array, magma_int_t lda,
    magma_int_t k1, magma_int_t k2,
    magma_int_t **ipiv_array,
    magma_int_t batchCount, magma_queue_t queue);

void
magma_dlaswp_columnserial_batched(
    magma_int_t n, double** dA_array, magma_int_t lda,
    magma_int_t k1, magma_int_t k2,
    magma_int_t **ipiv_array,
    magma_int_t batchCount, magma_queue_t queue);

void
magmablas_dtranspose_batched(
    magma_int_t m, magma_int_t n,
    double **dA_array,  magma_int_t ldda,
    double **dAT_array, magma_int_t lddat,
    magma_int_t batchCount, magma_queue_t queue);

void
magmablas_dtranspose_batched(
    magma_int_t m, magma_int_t n,
    double **dA_array,  magma_int_t ldda,
    double **dAT_array, magma_int_t lddat,
    magma_int_t batchCount, magma_queue_t queue );

void magmablas_dlaset_internal_batched(
    magma_uplo_t uplo, magma_int_t m, magma_int_t n,
    double offdiag, double diag,
    magmaDouble_ptr dAarray[], magma_int_t Ai, magma_int_t Aj, magma_int_t ldda,
    magma_int_t batchCount, magma_queue_t queue);

void
magmablas_dlaset_batched(
    magma_uplo_t uplo, magma_int_t m, magma_int_t n,
    double offdiag, double diag,
    magmaDouble_ptr dAarray[], magma_int_t ldda,
    magma_int_t batchCount, magma_queue_t queue);

magma_int_t
magma_dgbsv_batched(
    magma_int_t n, magma_int_t kl, magma_int_t ku, magma_int_t nrhs,
    double **dA_array, magma_int_t ldda, magma_int_t **dipiv_array,
    double** dB_array, magma_int_t lddb,
    magma_int_t *info_array,
    magma_int_t batchCount, magma_queue_t queue);

magma_int_t
magma_dgbsv_batched_fused_sm(
    magma_int_t n, magma_int_t kl, magma_int_t ku, magma_int_t nrhs,
    double** dA_array, magma_int_t ldda, magma_int_t** ipiv_array,
    double** dB_array, magma_int_t lddb, magma_int_t* info_array,
    magma_int_t nthreads, magma_int_t ntcol,
    magma_int_t batchCount, magma_queue_t queue );

magma_int_t
magma_dgbsv_batched_strided_work(
    magma_int_t n, magma_int_t kl, magma_int_t ku, magma_int_t nrhs,
    double* dA, magma_int_t ldda, magma_int_t strideA,
    magma_int_t* dipiv, magma_int_t stride_piv,
    double* dB, magma_int_t lddb, magma_int_t strideB,
    magma_int_t *info_array,
    void* device_work, magma_int_t *lwork,
    magma_int_t batchCount, magma_queue_t queue);

magma_int_t
magma_dgbsv_batched_strided(
    magma_int_t n, magma_int_t kl, magma_int_t ku, magma_int_t nrhs,
    double* dA, magma_int_t ldda, magma_int_t strideA,
    magma_int_t* dipiv, magma_int_t stride_piv,
    double* dB, magma_int_t lddb, magma_int_t strideB,
    magma_int_t *info_array,
    magma_int_t batchCount, magma_queue_t queue);

magma_int_t
magma_dgbsv_batched_work(
    magma_int_t n, magma_int_t kl, magma_int_t ku, magma_int_t nrhs,
    double** dA_array, magma_int_t ldda, magma_int_t **dipiv_array,
    double** dB_array, magma_int_t lddb,
    magma_int_t *info_array,
    void* device_work, magma_int_t *lwork,
    magma_int_t batchCount, magma_queue_t queue);

void
magma_dgbtrf_set_fillin(
    magma_int_t n, magma_int_t kl, magma_int_t ku,
    double** dAB_array, magma_int_t lddab,
    magma_int_t** dipiv_array, int* ju_array, magma_int_t gbstep,
    magma_int_t batchCount, magma_queue_t queue);

magma_int_t
magma_dgbtf2_dswap_batched(
    magma_int_t kl, magma_int_t ku,
    double **dAB_array, magma_int_t ai, magma_int_t aj, magma_int_t lddab,
    magma_int_t** dipiv_array, magma_int_t ipiv_offset,
    int* ju_array, magma_int_t gbstep, magma_int_t batchCount, magma_queue_t queue);

magma_int_t
magma_dgbtf2_scal_ger_batched(
    magma_int_t m, magma_int_t n, magma_int_t kl, magma_int_t ku,
    double **dAB_array, magma_int_t ai, magma_int_t aj, magma_int_t lddab,
    int* ju_array, magma_int_t gbstep, magma_int_t *info_array,
    magma_int_t batchCount, magma_queue_t queue);

magma_int_t
magma_dgbtrf_batched_fused_sm(
    magma_int_t m,  magma_int_t n,
    magma_int_t kl, magma_int_t ku,
    double** dAB_array, magma_int_t lddab,
    magma_int_t** ipiv_array, magma_int_t* info_array,
    magma_int_t nthreads, magma_int_t ntcol,
    magma_int_t batchCount, magma_queue_t queue );

magma_int_t
magma_dgbtrf_batched_sliding_window_loopout(
    magma_int_t m,  magma_int_t n,
    magma_int_t kl, magma_int_t ku,
    double** dAB_array, magma_int_t lddab,
    magma_int_t** ipiv_array, magma_int_t* info_array,
    void* device_work, magma_int_t *lwork,
    magma_int_t batchCount, magma_queue_t queue );

magma_int_t
magma_dgbtrf_batched_sliding_window_loopin(
    magma_int_t m,  magma_int_t n,
    magma_int_t kl, magma_int_t ku,
    double** dAB_array, magma_int_t lddab,
    magma_int_t** ipiv_array, magma_int_t* info_array,
    magma_int_t batchCount, magma_queue_t queue );

magma_int_t
magma_dgbtrf_batched_strided(
        magma_int_t m, magma_int_t n,
        magma_int_t kl, magma_int_t ku,
        double* dAB, magma_int_t lddab, magma_int_t strideAB,
        magma_int_t* dipiv, magma_int_t stride_piv,
        magma_int_t *info_array,
        magma_int_t batchCount, magma_queue_t queue);

magma_int_t
magma_dgbtrf_batched_strided_work(
        magma_int_t m, magma_int_t n,
        magma_int_t kl, magma_int_t ku,
        double* dAB, magma_int_t lddab, magma_int_t strideAB,
        magma_int_t* dipiv, magma_int_t stride_piv,
        magma_int_t *info_array,
        void* device_work, magma_int_t *lwork,
        magma_int_t batchCount, magma_queue_t queue);

magma_int_t
magma_dgbtrf_batched_work(
        magma_int_t m, magma_int_t n,
        magma_int_t kl, magma_int_t ku,
        double **dAB_array, magma_int_t lddab,
        magma_int_t **dipiv_array, magma_int_t *info_array,
        void* device_work, magma_int_t *lwork,
        magma_int_t batchCount, magma_queue_t queue);

magma_int_t
magma_dgbtrf_batched(
    magma_int_t m, magma_int_t n,
    magma_int_t kl, magma_int_t ku,
    double **dAB_array, magma_int_t lddab,
    magma_int_t **dipiv_array, magma_int_t *info_array,
    magma_int_t batchCount, magma_queue_t queue);

magma_int_t
magma_dgbtrs_batched(
    magma_trans_t transA,
    magma_int_t n, magma_int_t kl, magma_int_t ku, magma_int_t nrhs,
    double **dA_array, magma_int_t ldda, magma_int_t **dipiv_array,
    double** dB_array, magma_int_t lddb,
    magma_int_t *info_array,
    magma_int_t batchCount, magma_queue_t queue);

magma_int_t
magma_dgbtrs_lower_batched(
    magma_trans_t transA,
    magma_int_t n, magma_int_t kl, magma_int_t ku, magma_int_t nrhs,
    double** dA_array, magma_int_t ldda, magma_int_t **dipiv_array,
    double** dB_array, magma_int_t lddb,
    magma_int_t *info_array,
    magma_int_t batchCount, magma_queue_t queue);

magma_int_t
magmablas_dgbtrs_lower_blocked_batched(
        magma_int_t n, magma_int_t kl, magma_int_t ku, magma_int_t nrhs,
        double** dA_array, magma_int_t ldda, magma_int_t** dipiv_array,
        double** dB_array, magma_int_t lddb,
        magma_int_t batchCount, magma_queue_t queue );

void
magmablas_dgbtrs_swap_batched(
    magma_int_t n, double** dA_array, magma_int_t ldda,
    magma_int_t** dipiv_array, magma_int_t j,
    magma_int_t batchCount, magma_queue_t queue);

magma_int_t
magma_dgbtrs_upper_batched(
    magma_trans_t transA,
    magma_int_t n, magma_int_t kl, magma_int_t ku, magma_int_t nrhs,
    double** dA_array, magma_int_t ldda, magma_int_t **dipiv_array,
    double** dB_array, magma_int_t lddb,
    magma_int_t *info_array,
    magma_int_t batchCount, magma_queue_t queue);

magma_int_t
magmablas_dgbtrs_upper_blocked_batched(
        magma_int_t n, magma_int_t kl, magma_int_t ku, magma_int_t nrhs,
        double** dA_array, magma_int_t ldda,
        double** dB_array, magma_int_t lddb,
        magma_int_t batchCount, magma_queue_t queue );

void
magmablas_dgbtrs_upper_columnwise_batched(
    magma_int_t n, magma_int_t kl, magma_int_t ku,
    magma_int_t nrhs, magma_int_t j,
    double** dA_array, magma_int_t ldda,
    double** dB_array, magma_int_t lddb,
    magma_int_t batchCount, magma_queue_t queue );

void
magmablas_dger_batched_core(
    magma_int_t m, magma_int_t n,
    double alpha,
    double** dX_array, magma_int_t xi, magma_int_t xj, magma_int_t lddx, magma_int_t incx,
    double** dY_array, magma_int_t yi, magma_int_t yj, magma_int_t lddy, magma_int_t incy,
    double** dA_array, magma_int_t ai, magma_int_t aj, magma_int_t ldda,
    magma_int_t batchCount, magma_queue_t queue );

magma_int_t
magma_dgesv_batched_small(
    magma_int_t n, magma_int_t nrhs,
    double** dA_array, magma_int_t ldda,
    magma_int_t** dipiv_array,
    double **dB_array, magma_int_t lddb,
    magma_int_t* dinfo_array,
    magma_int_t batchCount, magma_queue_t queue );

magma_int_t
magma_dgesvj_batched(
    magma_vec_t jobu, magma_vec_t jobv,
    magma_int_t m, magma_int_t n,
    double** dA_array, magma_int_t ldda, double **dS_array,
    double** dU_array, magma_int_t lddu,
    double** dV_array, magma_int_t lddv,
    magma_int_t* info_array,
    magma_int_t batchCount, magma_queue_t queue );

magma_int_t
magma_dgesvj_blocked_expert_batched(
    magma_vec_t jobu, magma_vec_t jobv,
    magma_int_t morg, magma_int_t norg,
    double** dA_array, magma_int_t ldda, double **dS_array,
    double** dU_array, magma_int_t lddu,
    double** dV_array, magma_int_t lddv,
    magma_int_t* info_array,
    magma_int_t nb, magma_int_t max_sweeps,
    magma_int_t heevj_max_sweeps, double heevj_tol, double heevj_tol_min, double heevj_tol_scal,
    void *device_work, int64_t *device_lwork,
    magma_int_t batchCount, magma_queue_t queue );

magma_int_t
magma_dgesvj_expert_batched(
    magma_vec_t jobu, magma_vec_t jobv,
    magma_int_t morg, magma_int_t norg,
    double** dA_array, magma_int_t ldda, double **dS_array,
    double** dU_array, magma_int_t lddu,
    double** dV_array, magma_int_t lddv,
    magma_int_t* info_array,
    void *device_work, int64_t *device_lwork,
    magma_int_t batchCount, magma_queue_t queue );

magma_int_t
magma_dgesvj_expert_batched_strided(
    magma_vec_t jobu, magma_vec_t jobv,
    magma_int_t morg, magma_int_t norg,
    magmaDouble_ptr dA, magma_int_t ldda, magma_int_t strideA,
    magmaDouble_ptr        dS, magma_int_t strideS,
    magmaDouble_ptr dU, magma_int_t lddu, magma_int_t strideU,
    magmaDouble_ptr dV, magma_int_t lddv, magma_int_t strideV,
    magmaInt_ptr dinfo_array,
    void *device_work, int64_t *device_lwork,
    magma_int_t batchCount, magma_queue_t queue );

magma_int_t
magma_dgesvj_qr_expert_batched(
    magma_vec_t jobu_org, magma_vec_t jobv_org,
    magma_int_t morg, magma_int_t norg,
    double** dA_array, magma_int_t ldda, double **dS_array,
    double** dU_array, magma_int_t lddu,
    double** dV_array, magma_int_t lddv,
    magma_int_t* info_array,
    void *device_work, int64_t *device_lwork,
    magma_int_t batchCount, magma_queue_t queue );

void
magma_dgesvj_batched_finalize_values(
    magma_int_t m, magma_int_t n,
    double const* const* dA_array, magma_int_t ldda,
    double** dSigma_array, magma_int_t batchCount, magma_queue_t queue);

void
magma_dgesvj_batched_finalize_vectors(
    magma_vec_t jobu, magma_vec_t jobv, magma_int_t m, magma_int_t n,
    double const* const* dUi_array, magma_int_t lddui,
    double const* const* dVi_array, magma_int_t lddvi,
    double            ** dUo_array, magma_int_t ldduo,
    double            ** dVo_array, magma_int_t lddvo,
    double** dSigma_array, magma_int_t** index_array,
    magma_int_t batchCount, magma_queue_t queue);

void
magma_dgesvj_batched_setup_ptr_arrays(
    magma_vec_t jobv, magma_int_t i_gesvj_sweep, magma_int_t nb, magma_int_t nblk_col2,
    double** dUi_array, double** dUo_array, magma_int_t lddu,
    double** dVi_array, double** dVo_array, magma_int_t lddv,
    double** dAgemm0_array, double** dAgemm1_array,
    double** dUjVj_input_array, double** dUkVk_input_array, double** dUjkVjk_output_array,
    magma_int_t flat_batchCount, magma_queue_t queue);

void
magma_dgesvj_batched_test_convergence(
    magma_int_t gesvj_iters_per_sweep, magma_int_t sub_batch, magma_int_t batchCount,
    magma_int_t* dheevj_info, int* dheevj_nsweeps,
    int* dheevj_mask, int* all_converged,
    magma_queue_t queue );

magma_int_t
magma_dgesvj_batched_small_sm(
    magma_vec_t jobu, magma_vec_t jobv,
    magma_int_t m, magma_int_t n,
    double** dA_array, magma_int_t ldda, double **dS_array,
    double** dU_array, magma_int_t lddu,
    double** dV_array, magma_int_t lddv,
    magma_int_t* info_array, magma_int_t batchCount,
    magma_queue_t queue );

magma_int_t
magma_dgesvj_batched_update_vectors(
    magma_int_t m, magma_int_t nb,
    double **dU0array, magma_int_t lddu0,
    double **dU1array, magma_int_t lddu1,
    double **dGarray,  magma_int_t lddg,
    magma_int_t *heevj_info, int *heevj_nsweeps,
    magma_int_t batchCount, magma_queue_t queue );

magma_int_t
magma_dgetf2_batched(
    magma_int_t m, magma_int_t n,
    double **dA_array, magma_int_t ai, magma_int_t aj, magma_int_t lda,
    magma_int_t **ipiv_array,
    magma_int_t **dpivinfo_array,
    magma_int_t *info_array,
    magma_int_t gbstep,
    magma_int_t batchCount, magma_queue_t queue);

magma_int_t
magma_dgetrf_recpanel_batched(
    magma_int_t m, magma_int_t n, magma_int_t min_recpnb,
    double** dA_array, magma_int_t ai, magma_int_t aj, magma_int_t ldda,
    magma_int_t** dipiv_array, magma_int_t** dpivinfo_array,
    magma_int_t *info_array, magma_int_t gbstep,
    magma_int_t batchCount,  magma_queue_t queue);

magma_int_t
magma_dgetrf_batched(
    magma_int_t m, magma_int_t n,
    double **dA_array,
    magma_int_t lda,
    magma_int_t **ipiv_array,
    magma_int_t *info_array,
    magma_int_t batchCount, magma_queue_t queue);

magma_int_t
magma_dgetf2_fused_batched(
    magma_int_t m, magma_int_t n,
    double **dA_array, magma_int_t ai, magma_int_t aj, magma_int_t ldda,
    magma_int_t **dipiv_array,
    magma_int_t *info_array, magma_int_t batchCount,
    magma_queue_t queue);

magma_int_t
magma_dgetrf_batched_smallsq_noshfl(
    magma_int_t n,
    double** dA_array, magma_int_t ldda,
    magma_int_t** ipiv_array, magma_int_t* info_array,
    magma_int_t batchCount, magma_queue_t queue );

magma_int_t
magma_dgetri_outofplace_batched(
    magma_int_t n,
    double **dA_array, magma_int_t ldda,
    magma_int_t **dipiv_array,
    double **dinvA_array, magma_int_t lddia,
    magma_int_t *info_array,
    magma_int_t batchCount, magma_queue_t queue);

void
magma_ddisplace_intpointers(
    magma_int_t **output_array,
    magma_int_t **input_array, magma_int_t lda,
    magma_int_t row, magma_int_t column,
    magma_int_t batchCount, magma_queue_t queue);

void
magmablas_idamax_atomic_batched(
    magma_int_t n,
    double** x_array, magma_int_t incx,
    magma_int_t **max_id_array,
    magma_int_t batchCount);

void
magmablas_idamax_tree_batched(
    magma_int_t n,
    double** x_array, magma_int_t incx,
    magma_int_t **max_id_array,
    magma_int_t batchCount);

void
magmablas_idamax_batched(
    magma_int_t n,
    double** x_array, magma_int_t incx,
    magma_int_t **max_id_array,
    magma_int_t batchCount);

void
magmablas_idamax(
    magma_int_t n,
    double* x, magma_int_t incx,
    magma_int_t *max_id);

magma_int_t
magma_idamax_batched(
        magma_int_t length,
        double **x_array, magma_int_t xi, magma_int_t xj, magma_int_t lda, magma_int_t incx,
        magma_int_t** ipiv_array, magma_int_t ipiv_i,
        magma_int_t step, magma_int_t gbstep, magma_int_t *info_array,
        magma_int_t batchCount, magma_queue_t queue);

magma_int_t
magma_dswap_batched(
    magma_int_t n, double **x_array, magma_int_t xi, magma_int_t xj, magma_int_t incx,
    magma_int_t step, magma_int_t** ipiv_array,
    magma_int_t batchCount, magma_queue_t queue);

magma_int_t
magma_dscal_dger_batched(
    magma_int_t m, magma_int_t n,
    double **dA_array, magma_int_t ai, magma_int_t aj, magma_int_t lda,
    magma_int_t *info_array, magma_int_t step, magma_int_t gbstep,
    magma_int_t batchCount, magma_queue_t queue);

magma_int_t
magma_dcomputecolumn_batched(
    magma_int_t m, magma_int_t paneloffset, magma_int_t step,
    double **dA_array,  magma_int_t lda,
    magma_int_t ai, magma_int_t aj,
    magma_int_t **ipiv_array,
    magma_int_t *info_array, magma_int_t gbstep,
    magma_int_t batchCount, magma_queue_t queue);

void
magma_dgetf2trsm_batched(
    magma_int_t ib, magma_int_t n,
    double **dA_array,  magma_int_t j, magma_int_t lda,
    magma_int_t batchCount, magma_queue_t queue);

magma_int_t
magma_dgetf2_nopiv_internal_batched(
    magma_int_t m, magma_int_t n,
    double** dA_array, magma_int_t ai, magma_int_t aj, magma_int_t ldda,
    magma_int_t* info_array, magma_int_t gbstep,
    magma_int_t batchCount, magma_queue_t queue );

magma_int_t
magma_dgetf2_nopiv_batched(
    magma_int_t m, magma_int_t n,
    double **dA_array, magma_int_t ai, magma_int_t aj, magma_int_t ldda,
    magma_int_t *info_array, magma_int_t gbstep,
    magma_int_t batchCount, magma_queue_t queue);

magma_int_t
magma_dgetrf_recpanel_nopiv_batched(
    magma_int_t m, magma_int_t n, magma_int_t min_recpnb,
    double** dA_array,    magma_int_t ldda,
    double** dX_array,    magma_int_t dX_length,
    double** dinvA_array, magma_int_t dinvA_length,
    double** dW1_displ, double** dW2_displ,
    double** dW3_displ, double** dW4_displ,
    double** dW5_displ,
    magma_int_t *info_array, magma_int_t gbstep,
    magma_int_t batchCount, magma_queue_t queue);

magma_int_t
magma_dgetrf_nopiv_batched(
    magma_int_t m, magma_int_t n,
    double **dA_array,
    magma_int_t lda,
    magma_int_t *info_array,
    magma_int_t batchCount, magma_queue_t queue);

magma_int_t
magma_dgetrs_nopiv_batched(
    magma_trans_t trans, magma_int_t n, magma_int_t nrhs,
    double **dA_array, magma_int_t ldda,
    double **dB_array, magma_int_t lddb,
    magma_int_t *info_array,
    magma_int_t batchCount, magma_queue_t queue);

magma_int_t
magma_dgesv_nopiv_batched(
    magma_int_t n, magma_int_t nrhs,
    double **dA_array, magma_int_t ldda,
    double **dB_array, magma_int_t lddb,
    magma_int_t *info_array,
    magma_int_t batchCount, magma_queue_t queue);

magma_int_t
magma_dgesv_rbt_batched(
    magma_int_t n, magma_int_t nrhs,
    double **dA_array, magma_int_t ldda,
    double **dB_array, magma_int_t lddb,
    magma_int_t *info_array,
    magma_int_t batchCount, magma_queue_t queue);

magma_int_t
magma_dgesv_batched(
    magma_int_t n, magma_int_t nrhs,
    double **dA_array, magma_int_t ldda,
    magma_int_t **dipiv_array,
    double **dB_array, magma_int_t lddb,
    magma_int_t *dinfo_array,
    magma_int_t batchCount, magma_queue_t queue);

magma_int_t
magma_dgerbt_batched(
    magma_bool_t gen, magma_int_t n, magma_int_t nrhs,
    double **dA_array, magma_int_t ldda,
    double **dB_array, magma_int_t lddb,
    double *U, double *V,
    magma_int_t *info,
    magma_int_t batchCount, magma_queue_t queue);

void
magmablas_dprbt_batched(
    magma_int_t n,
    double **dA_array, magma_int_t ldda,
    double *du, double *dv,
    magma_int_t batchCount, magma_queue_t queue);

void
magmablas_dprbt_mv_batched(
    magma_int_t n, magma_int_t nrhs,
    double *dv, double **db_array, magma_int_t lddb,
    magma_int_t batchCount, magma_queue_t queue);

void
magmablas_dprbt_mtv_batched(
    magma_int_t n, magma_int_t nrhs,
    double *du, double **db_array, magma_int_t lddb,
    magma_int_t batchCount, magma_queue_t queue);

void
setup_pivinfo(
    magma_int_t *pivinfo, magma_int_t *ipiv,
    magma_int_t m, magma_int_t nb,
    magma_queue_t queue);

void
magmablas_dgeadd_batched(
    magma_int_t m, magma_int_t n,
    double alpha,
    magmaDouble_const_ptr  const dAarray[], magma_int_t ldda,
    magmaDouble_ptr              dBarray[], magma_int_t lddb,
    magma_int_t batchCount, magma_queue_t queue );

void
magmablas_dlacpy_internal_batched(
    magma_uplo_t uplo, magma_int_t m, magma_int_t n,
    magmaDouble_const_ptr const dAarray[], magma_int_t Ai, magma_int_t Aj, magma_int_t ldda,
    magmaDouble_ptr             dBarray[], magma_int_t Bi, magma_int_t Bj, magma_int_t lddb,
    magma_int_t batchCount, magma_queue_t queue );

void
magmablas_dlacpy_batched(
    magma_uplo_t uplo, magma_int_t m, magma_int_t n,
    magmaDouble_const_ptr  const dAarray[], magma_int_t ldda,
    magmaDouble_ptr              dBarray[], magma_int_t lddb,
    magma_int_t batchCount, magma_queue_t queue );

void
magmablas_dgemv_batched_core(
    magma_trans_t trans, magma_int_t m, magma_int_t n,
    const double alpha,
    double const * const * dA_array, magma_int_t Ai, magma_int_t Aj, magma_int_t ldda,
    double const * const * dx_array, magma_int_t xi, magma_int_t incx,
    const double beta,
    double** dy_array, magma_int_t yi, magma_int_t incy,
    magma_int_t batchCount, magma_queue_t queue);

void
magmablas_dgemv_batched_internal(
    magma_trans_t trans, magma_int_t m, magma_int_t n,
    const double alpha,
    double const * const * dA_array, const double* dA, magma_int_t ldda, magma_int_t strideA, magma_int_t Ai, magma_int_t Aj,
    double const * const * dx_array, const double* dx, magma_int_t incx, magma_int_t stridex, magma_int_t xi,
    const double beta,
    double** dy_array, double* dy, magma_int_t incy, magma_int_t stridey, magma_int_t yi,
    magma_int_t batchCount, magma_queue_t queue);

void
magmablas_dgemv_batched_strided_core(
    magma_trans_t trans, magma_int_t m, magma_int_t n,
    const double alpha,
    const double* dA, magma_int_t ldda, magma_int_t Ai, magma_int_t Aj, magma_int_t strideA,
    const double* dx, magma_int_t incx, magma_int_t xi, magma_int_t stridex,
    const double beta,
          double* dy, magma_int_t incy, magma_int_t yi, magma_int_t stridey,
    magma_int_t batchCount, magma_queue_t queue);

void
magmablas_dgemv_batched(
    magma_trans_t trans, magma_int_t m, magma_int_t n,
    const double alpha,
    double const * const * dA_array, magma_int_t ldda,
    double const * const * dx_array, magma_int_t incx,
    const double beta,
    double** dy_array, magma_int_t incy,
    magma_int_t batchCount, magma_queue_t queue);

void
magmablas_dgemv_batched_strided(
    magma_trans_t trans, magma_int_t m, magma_int_t n,
    const double alpha,
    const double* dA, magma_int_t ldda, magma_int_t strideA,
    const double* dx, magma_int_t incx, magma_int_t stridex,
    const double beta,
    double* dy, magma_int_t incy, magma_int_t stridey,
    magma_int_t batchCount, magma_queue_t queue);

magma_int_t
magmablas_dgemv_batched_smallsq(
    magma_trans_t trans, magma_int_t n,
    const double alpha,
    double const * const * dA_array, magma_int_t Ai, magma_int_t Aj, magma_int_t ldda,
    double const * const * dx_array, magma_int_t xi, magma_int_t incx,
    const double beta,
    double** dy_array, magma_int_t yi, magma_int_t incy,
    magma_int_t batchCount, magma_queue_t queue);

magma_int_t
magmablas_dgemv_batched_strided_smallsq(
    magma_trans_t transA, magma_int_t n,
    const double alpha,
    const double* dA, magma_int_t Ai, magma_int_t Aj, magma_int_t ldda, magma_int_t strideA,
    const double* dx, magma_int_t xi, magma_int_t incx, magma_int_t stridex,
    const double beta,
    double* dy, magma_int_t yi, magma_int_t incy, magma_int_t stridey,
    magma_int_t batchCount, magma_queue_t queue);

magma_int_t
magma_dgeqrf_batched_smallsq(
    magma_int_t n,
    double** dA_array, magma_int_t Ai, magma_int_t Aj, magma_int_t ldda,
    double **dtau_array, magma_int_t taui, magma_int_t* info_array,
    magma_int_t batchCount, magma_queue_t queue );

magma_int_t
magma_dgeqrf_batched(
    magma_int_t m, magma_int_t n,
    double **dA_array,
    magma_int_t lda,
    double **dtau_array,
    magma_int_t *info_array,
    magma_int_t batchCount, magma_queue_t queue);

magma_int_t
magma_dgeqrf_batched_work(
    magma_int_t m, magma_int_t n,
    double **dA_array, magma_int_t ldda,
    double **dtau_array,
    magma_int_t *info_array,
    void* device_work, magma_int_t* device_lwork,
    magma_int_t batchCount, magma_queue_t queue);

magma_int_t
magma_dgeqrf_expert_batched(
    magma_int_t m, magma_int_t n, magma_int_t nb,
    double **dA_array, magma_int_t ldda,
    double **dR_array, magma_int_t lddr,
    double **dT_array, magma_int_t lddt,
    double **dtau_array, magma_int_t provide_RT,
    double **dW_array,
    magma_int_t *info_array,
    magma_int_t batchCount, magma_queue_t queue);

magma_int_t
magma_dgeqrf_batched_v4(
    magma_int_t m, magma_int_t n,
    double **dA_array,
    magma_int_t lda,
    double **tau_array,
    magma_int_t *info_array,
    magma_int_t batchCount);

magma_int_t
magma_dgeqrf_panel_fused_update_batched(
        magma_int_t m, magma_int_t n, magma_int_t nb,
        double** dA_array, magma_int_t Ai, magma_int_t Aj, magma_int_t ldda,
        double** tau_array, magma_int_t taui,
        double** dR_array, magma_int_t Ri, magma_int_t Rj, magma_int_t lddr,
        magma_int_t *info_array, magma_int_t separate_R_V,
        magma_int_t batchCount, magma_queue_t queue);

magma_int_t
magma_dgeqrf_panel_internal_batched(
        magma_int_t m, magma_int_t n, magma_int_t nb,
        double** dA_array, magma_int_t Ai, magma_int_t Aj, magma_int_t ldda,
        double** tau_array, magma_int_t taui,
        double** dT_array, magma_int_t Ti, magma_int_t Tj, magma_int_t lddt,
        double** dR_array, magma_int_t Ri, magma_int_t Rj, magma_int_t lddr,
        double** dwork_array,
        magma_int_t *info_array,
        magma_int_t batchCount, magma_queue_t queue);

magma_int_t
magma_dgeqrf_panel_batched(
    magma_int_t m, magma_int_t n, magma_int_t nb,
    double** dA_array,    magma_int_t ldda,
    double** tau_array,
    double** dT_array, magma_int_t ldt,
    double** dR_array, magma_int_t ldr,
    double** dwork_array,
    magma_int_t *info_array,
    magma_int_t batchCount, magma_queue_t queue);

magma_int_t
magma_dgels_batched(
    magma_trans_t trans, magma_int_t m, magma_int_t n, magma_int_t nrhs,
    double **dA_array, magma_int_t ldda,
    double **dB_array, magma_int_t lddb,
    double *hwork, magma_int_t lwork,
    magma_int_t *info,
    magma_int_t batchCount, magma_queue_t queue);

magma_int_t
magma_dgeqr2_fused_reg_tall_batched(
    magma_int_t m, magma_int_t n,
    double** dA_array, magma_int_t Ai, magma_int_t Aj, magma_int_t ldda,
    double **dtau_array, magma_int_t taui,
    magma_int_t* info_array, magma_int_t check_launch_only,
    magma_int_t batchCount, magma_queue_t queue );

magma_int_t
magma_dgeqr2_fused_reg_medium_batched(
    magma_int_t m, magma_int_t n,
    double** dA_array, magma_int_t Ai, magma_int_t Aj, magma_int_t ldda,
    double **dtau_array, magma_int_t taui,
    magma_int_t* info_array, magma_int_t check_launch_only,
    magma_int_t batchCount, magma_queue_t queue );

magma_int_t
magma_dgeqr2_fused_reg_batched(
    magma_int_t m, magma_int_t n,
    double** dA_array, magma_int_t Ai, magma_int_t Aj, magma_int_t ldda,
    double **dtau_array, magma_int_t taui,
    magma_int_t* info_array, magma_int_t check_launch_only,
    magma_int_t batchCount, magma_queue_t queue );

magma_int_t
magma_dgeqr2_fused_sm_batched(
    magma_int_t m, magma_int_t n,
    double** dA_array, magma_int_t Ai, magma_int_t Aj, magma_int_t ldda,
    double **dtau_array, magma_int_t taui,
    magma_int_t* info_array, magma_int_t nthreads, magma_int_t check_launch_only,
    magma_int_t batchCount, magma_queue_t queue );

magma_int_t
magma_dgeqr2_batched(
    magma_int_t m, magma_int_t n,
    double **dA_array, magma_int_t Ai, magma_int_t Aj, magma_int_t ldda,
    double **dtau_array, magma_int_t taui,
    magma_int_t *info_array, magma_int_t batchCount, magma_queue_t queue);

magma_int_t
magma_dorm2r_reg_tall_batched(
    magma_side_t side, magma_trans_t trans,
    magma_int_t m, magma_int_t n, magma_int_t nb, magma_int_t ib,
    double** dA_array, magma_int_t Ai, magma_int_t Aj, magma_int_t ldda,
    double** dV_array, magma_int_t Vi, magma_int_t Vj, magma_int_t lddv,
    double **dtau_array, magma_int_t taui,
    magma_int_t check_launch_only,
    magma_int_t batchCount, magma_queue_t queue );

magma_int_t
magma_dorm2r_reg_medium_batched(
    magma_side_t side, magma_trans_t trans,
    magma_int_t m, magma_int_t n, magma_int_t nb, magma_int_t ib,
    double** dA_array, magma_int_t Ai, magma_int_t Aj, magma_int_t ldda,
    double** dV_array, magma_int_t Vi, magma_int_t Vj, magma_int_t lddv,
    double **dtau_array, magma_int_t taui,
    magma_int_t check_launch_only,
    magma_int_t batchCount, magma_queue_t queue );

magma_int_t
magma_dorm2r_reg_batched(
    magma_side_t side, magma_trans_t trans,
    magma_int_t m, magma_int_t n, magma_int_t nb, magma_int_t k,
    double** dA_array, magma_int_t Ai, magma_int_t Aj, magma_int_t ldda,
    double** dV_array, magma_int_t Vi, magma_int_t Vj, magma_int_t lddv,
    double **dtau_array, magma_int_t taui,
    magma_int_t check_launch_only,
    magma_int_t batchCount, magma_queue_t queue );

magma_int_t
magma_dorm2r_sm_batched(
    magma_side_t side, magma_trans_t trans,
    magma_int_t m, magma_int_t n, magma_int_t nb, magma_int_t k,
    double** dA_array, magma_int_t Ai, magma_int_t Aj, magma_int_t ldda,
    double** dV_array, magma_int_t Vi, magma_int_t Vj, magma_int_t lddv,
    double **dtau_array, magma_int_t taui,
    magma_int_t check_launch_only,
    magma_int_t batchCount, magma_queue_t queue );

magma_int_t
magma_dlarfb_gemm_internal_batched(
    magma_side_t side, magma_trans_t trans, magma_direct_t direct, magma_storev_t storev,
    magma_int_t m, magma_int_t n, magma_int_t k,
    magmaDouble_const_ptr dV_array[],    magma_int_t vi, magma_int_t vj, magma_int_t lddv,
    magmaDouble_const_ptr dT_array[],    magma_int_t Ti, magma_int_t Tj, magma_int_t lddt,
    magmaDouble_ptr dC_array[],          magma_int_t Ci, magma_int_t Cj, magma_int_t lddc,
    magmaDouble_ptr dwork_array[],       magma_int_t ldwork,
    magmaDouble_ptr dworkvt_array[],     magma_int_t ldworkvt,
    magma_int_t batchCount, magma_queue_t queue);

magma_int_t
magma_dlarfb_gemm_batched(
    magma_side_t side, magma_trans_t trans, magma_direct_t direct, magma_storev_t storev,
    magma_int_t m, magma_int_t n, magma_int_t k,
    magmaDouble_const_ptr dV_array[],    magma_int_t lddv,
    magmaDouble_const_ptr dT_array[],    magma_int_t lddt,
    magmaDouble_ptr dC_array[],          magma_int_t lddc,
    magmaDouble_ptr dwork_array[],       magma_int_t ldwork,
    magmaDouble_ptr dworkvt_array[],     magma_int_t ldworkvt,
    magma_int_t batchCount, magma_queue_t queue);

magma_int_t
magma_dlarft_internal_batched(
        magma_int_t n, magma_int_t k, magma_int_t stair_T,
        double **v_array,   magma_int_t vi, magma_int_t vj, magma_int_t ldv,
        double **tau_array, magma_int_t taui,
        double **T_array,   magma_int_t Ti, magma_int_t Tj, magma_int_t ldt,
        double **work_array, magma_int_t lwork,
        magma_int_t batchCount, magma_queue_t queue);

magma_int_t
magma_dlarft_batched(
    magma_int_t n, magma_int_t k, magma_int_t stair_T,
    double **v_array, magma_int_t ldv,
    double **tau_array,
    double **T_array, magma_int_t ldt,
    double **work_array, magma_int_t lwork,
    magma_int_t batchCount, magma_queue_t queue);

void
magma_dlarft_sm32x32_batched(
    magma_int_t n, magma_int_t k,
    double **v_array, magma_int_t vi, magma_int_t vj, magma_int_t ldv,
    double **tau_array, magma_int_t taui,
    double **T_array, magma_int_t Ti, magma_int_t Tj, magma_int_t ldt,
    magma_int_t batchCount, magma_queue_t queue);

void magmablas_dlarft_recdtrmv_sm32x32(
    magma_int_t m, magma_int_t n,
    double *tau,
    double *Trec, magma_int_t ldtrec,
    double *Ttri, magma_int_t ldttri,
    magma_queue_t queue);

void magmablas_dlarft_recdtrmv_sm32x32_batched(
    magma_int_t m, magma_int_t n,
    double **tau_array,  magma_int_t taui,
    double **Trec_array, magma_int_t Treci, magma_int_t Trecj, magma_int_t ldtrec,
    double **Ttri_array, magma_int_t Ttrii, magma_int_t Ttrij, magma_int_t ldttri,
    magma_int_t batchCount, magma_queue_t queue);

void magmablas_dlarft_dtrmv_sm32x32(
    magma_int_t m, magma_int_t n,
    double *tau,
    double *Tin, magma_int_t ldtin,
    double *Tout, magma_int_t ldtout,
    magma_queue_t queue);

void magmablas_dlarft_dtrmv_sm32x32_batched(
    magma_int_t m, magma_int_t n,
    double **tau_array, magma_int_t taui,
    double **Tin_array, magma_int_t Tini, magma_int_t Tinj, magma_int_t ldtin,
    double **Tout_array, magma_int_t Touti, magma_int_t Toutj, magma_int_t ldtout,
    magma_int_t batchCount, magma_queue_t queue);

void
magmablas_dnrm2_cols_batched(
    magma_int_t m, magma_int_t n,
    double **dA_array, magma_int_t lda,
    double **dxnorm_array,
    magma_int_t batchCount);

void
magma_dlarfgx_batched(
    magma_int_t n, double **dx0_array, double **dx_array,
    double **dtau_array, double **dxnorm_array,
    double **dR_array, magma_int_t it,
    magma_int_t batchCount);

void
magma_dlarfx_batched_v4(
    magma_int_t m, magma_int_t n,
    double **v_array,
    double **tau_array,
    double **C_array, magma_int_t ldc, double **xnorm_array,
    magma_int_t step,
    magma_int_t batchCount);

void
magmablas_dlarfg_batched(
    magma_int_t n,
    double** dalpha_array,
    double** dx_array, magma_int_t incx,
    double** dtau_array,
    magma_int_t batchCount );

magma_int_t
magma_dpotrf_lpout_batched(
    magma_uplo_t uplo, magma_int_t n,
    double **dA_array, magma_int_t ai, magma_int_t aj, magma_int_t lda, magma_int_t gbstep,
    magma_int_t *info_array,
    magma_int_t batchCount, magma_queue_t queue);

magma_int_t
magma_dpotrf_lpin_batched(
    magma_uplo_t uplo, magma_int_t n,
    double **dA_array, magma_int_t ai, magma_int_t aj, magma_int_t lda, magma_int_t gbstep,
    magma_int_t *info_array,
    magma_int_t batchCount, magma_queue_t queue);

// host interface
void
blas_dlacpy_batched(
    magma_uplo_t uplo, magma_int_t m, magma_int_t n,
    double const * const * hA_array, magma_int_t lda,
    double               **hB_array, magma_int_t ldb,
    magma_int_t batchCount );

void
blas_dgemm_batched(
    magma_trans_t transA, magma_trans_t transB,
    magma_int_t m, magma_int_t n, magma_int_t k,
    double alpha,
    double const * const * hA_array, magma_int_t lda,
    double const * const * hB_array, magma_int_t ldb,
    double beta,
    double **hC_array, magma_int_t ldc,
    magma_int_t batchCount );

void
blas_dtrsm_batched(
        magma_side_t side, magma_uplo_t uplo, magma_trans_t transA, magma_diag_t diag,
        magma_int_t m, magma_int_t n,
        double alpha,
        double **hA_array, magma_int_t lda,
        double **hB_array, magma_int_t ldb,
        magma_int_t batchCount );

void
blas_dtrmm_batched(
        magma_side_t side, magma_uplo_t uplo, magma_trans_t transA, magma_diag_t diag,
        magma_int_t m, magma_int_t n,
        double alpha,
        double **hA_array, magma_int_t lda,
        double **hB_array, magma_int_t ldb,
        magma_int_t batchCount );

void
blas_dsymm_batched(
        magma_side_t side, magma_uplo_t uplo,
        magma_int_t m, magma_int_t n,
        double alpha,
        double **hA_array, magma_int_t lda,
        double **hB_array, magma_int_t ldb,
        double beta,
        double **hC_array, magma_int_t ldc,
        magma_int_t batchCount );

void
blas_dsyrk_batched(
    magma_uplo_t uplo, magma_trans_t trans,
    magma_int_t n, magma_int_t k,
    double alpha, double const * const * hA_array, magma_int_t lda,
    double beta,  double               **hC_array, magma_int_t ldc,
    magma_int_t batchCount );

void
blas_dsyr2k_batched(
    magma_uplo_t uplo, magma_trans_t trans,
    magma_int_t n, magma_int_t k,
    double alpha, double const * const * hA_array, magma_int_t lda,
                              double const * const * hB_array, magma_int_t ldb,
    double beta,              double               **hC_array, magma_int_t ldc,
    magma_int_t batchCount );

magma_int_t
magma_dormqr_batched(
    magma_side_t side, magma_trans_t trans,
    magma_int_t m, magma_int_t n, magma_int_t k,
    double **dA_array, magma_int_t ldda,
    double **dtau_array,
    double **dC_array, magma_int_t lddc,
    void *device_work, int64_t *lwork_device,
    magma_int_t *dinfo_array, magma_int_t batchCount,
    magma_queue_t queue);
// for debugging purpose
void
dset_stepinit_ipiv(
    magma_int_t **ipiv_array,
    magma_int_t pm,
    magma_int_t batchCount);

#ifdef __cplusplus
}
#endif

#undef MAGMA_REAL

#endif  /* MAGMA_DBATCHED_H */
