It is a simple code that:
i. gets a matrix from matlab
ii. converts from the matlab ordering to magmaDoubleComplex
iii. transfers the data to the GPU
iv. calls Magma to find the inverse
v. transfers the data back to the Host
vi converts from magmaDoubleComplex to matlab ordering.
I have posted it below. The code runs fine if the calls to magma_zgetrf and magma_zgetri are commented out. This shows that the matrix is successfully being transfer to and from the GPU. But when I uncomment those lines the whole thing crashes.
To test the code I compiled with: mex CC=gcc CFLAGS="\$CFLAGS -DADD_" LDFLAGS="-lmagma -lcudart -lcublas" magmaZinv.c
Then from matlab, I ran:
a=rand(3)+rand(3)*1i;
magmaZinv(a)
Code: Select all
#include <mex.h>
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <math.h>
#include <stddef.h>
#include <magma_v2.h>
#include "magma_lapack.h"
#include <cuda_runtime.h>
void mat2magma(magmaDoubleComplex* p, double* pr, double* pi,int numElements)
{
int j=0;
for(j=0;j<numElements;j++){
p[j].x=pr[j];
p[j].y=pi[j];
}
}
void magma2mat(magmaDoubleComplex* p, double* pr, double* pi,int numElements)
{
int j=0;
for(j=0;j<numElements;j++){
pr[j]= p[j].x;
pi[j]= p[j].y;
}
}
/*gateway function*/
void mexFunction( int nlhs, mxArray *plhs[],
int nrhs, const mxArray *prhs[]) {
/*initialize magma*/
magma_init();
magma_queue_t queue = NULL;
magma_device_t dev;
magma_getdevice(&dev);
magma_queue_create(dev,&queue );
magma_int_t m,ldwork,info;
magma_int_t *piv;
magmaDoubleComplex *a,*da,*dwork;
/* Matlab -> Host */
m=mxGetM(prhs[0]);
piv=(magma_int_t*) malloc(m*sizeof(magma_int_t));
magma_zmalloc_cpu(&a,m*m);
mat2magma(a,mxGetPr(prhs[0]),mxGetPi(prhs[0]),m*m);
ldwork = m*magma_get_zgetri_nb(m);
/* Host -> GPU */
magma_zmalloc(&dwork,ldwork);
magma_zmalloc(&da,m*m);
magma_zsetmatrix(m,m,a,m,da,m,queue);
/*LU and Inverse */
magma_zgetrf_gpu(m,m,da,m,piv,&info);
magma_zgetri_gpu(m,da,m,piv,dwork,ldwork,&info);
/*GPU -> Host */
magma_zgetmatrix(m,m,da,m,a,m,queue);
/*Host -> Matlab*/
plhs[0] = mxCreateDoubleMatrix(m,m,mxCOMPLEX);
magma2mat(a,mxGetPr(plhs[0]),mxGetPi(plhs[0]),m*m);
free(a);
free(piv);
magma_free(dwork);
magma_free(da);
magma_queue_destroy(queue);
magma_finalize();
}