Below is my minimal code to test magma_dgeqp3:
Code: Select all
# include <stdio.h>
# include <cuda.h>
# include "magma.h"
# include "magma_lapack.h"
# define min(a, b ) ((( a ) <( b ))?( a ):( b ))
# define max(a, b ) ((( a ) <( b ))?( b ):( a ))
int main( int argc , char ** argv )
{
magma_init(); // initialize Magma
double gpu_time = 0.0 , cpu_time = 0.0;
magma_int_t m = 100 , n = m, n2=m*n;
double *a, *r; // a, r - mxn matrices on the host
double * h_work ; // workspace
double *tau ; // scalars defining the elementary reflectors
magma_int_t * jpvt ; // pivoting information
magma_int_t i, j, info, nb;
magma_int_t min_mn = min(m, n);
magma_int_t ione = 1 , lwork ; // lwork - workspace size
magma_int_t ISEED [4] = {0 ,0 ,0 ,1}; // seed
double c_neg_one = MAGMA_D_NEG_ONE ;
nb = magma_get_dgeqp3_nb( m, n ); // optimal blocksize
jpvt =( magma_int_t *) malloc(n* sizeof( magma_int_t )); // host mem .
// for jpvt
magma_dmalloc_cpu(& tau , min_mn ); // host memory for tau
magma_dmalloc_pinned(&a,n2 ); // host memory for a
magma_dmalloc_pinned(&r,n2 ); // host memory for r
lwork = 2*n + ( n+1 )* nb;
lwork = max(lwork , m * n + n);
magma_dmalloc_cpu(& h_work , lwork ); // host memory for h_work
// Random matrix a, copy a -> r
lapackf77_dlarnv(& ione ,ISEED ,&n2 ,a);
lapackf77_dlacpy( MagmaUpperLowerStr ,&m ,&n,a ,&m,r ,&m); // a- >r
// MAGMA
lapackf77_dlacpy( MagmaUpperLowerStr ,&m ,&n,a ,&m,r ,&m);
for (j = 0; j < n; j++)
jpvt[j] = 0 ;
// QR decomposition with column pivoting , Magma version
magma_dgeqp3(m,n,r,m,jpvt,tau,h_work,lwork,&info);
printf("info = %d \n", info);
printf(" MAGMA time : %7.3f sec .\n",gpu_time ); // Magma time
// Free memory
free( jpvt ); // free host memory
free( tau ); // free host memory
magma_free_pinned(a); // free host memory
magma_free_pinned(r); // free host memory
free( h_work ); // free host memory
magma_finalize( ); // finalize Magma
return EXIT_SUCCESS ;
}
Code: Select all
# Definitions of variables
CC = nvcc
CCFLAGS = -O
LD = nvcc
LDFLAGS = -O
GENCODE_FLAGS = -arch=sm_35 -gencode arch=compute_35,code=compute_35
# Definitions of rules
testing-dgeqp3.x : testing-dgeqp3.o
@$(LD) $(GENCODE_FLAGS) \
-o testing-dgeqp3.x \
testing-dgeqp3.o \
-L/usr/local/cuda/lib64 \
-L/usr/local/magma/lib \
-lmagma -lcudart -lcusolver -lcublas -lgomp \
/opt/intel/mkl/lib/intel64/libmkl_intel_lp64.a \
/opt/intel/mkl/lib/intel64/libmkl_gnu_thread.a \
/opt/intel/mkl/lib/intel64/libmkl_core.a \
-ldl -lpthread -lgomp
testing-dgeqp3.o : testing-dgeqp3.cpp
@$(CC) $(GENCODE_FLAGS) -c testing-dgeqp3.cpp \
-I/opt/intel/mkl/include \
-I/usr/local/magma/include \
-DADD_
clean : FORCE
rm -f a.out *.o *~ core
FORCE :