At the moment, in clMAGMA, a magma ptr type is a cl_mem object. (We may change this at some point, but if so would provide a mechanism to go convert back-and-forth.)
I think the problem is you need to use the same context as clMAGMA. At the moment, this means initializing clMAGMA, then getting the context from it. This can be done by creating a queue and querying it for its context. (A clMAGMA queue is currently the same as an OpenCL queue.) Below is a working example.
This is a bit more obscure than I would like, so we'll add some functions to make integration with outside OpenCL contexts easier.
Code: Select all
// swap.cpp
#include <stdio.h>
#include <string.h>
#include "magma.h"
// ------------------------------------------------------------
// internal MAGMA routine, in interface_opencl/error.cpp
// This interface may change in the future!
extern "C"
const char* magma_clGetErrorString( cl_int error );
// ------------------------------------------------------------
void exitOnFail_( int err, const char* msg,
const char* func, const char* file, int line )
{
if ( err != 0 ) {
fprintf( stderr, "Error: %s; %d: %s in %s at %s:%d\n",
msg, err, magma_clGetErrorString(err),
func, file, line );
exit(1);
}
}
#define exitOnFail( err, msg ) \
exitOnFail_( err, msg, __func__, __FILE__, __LINE__ )
// ------------------------------------------------------------
int main( int argc, char** argv )
{
// ----- added
magma_init();
int err = 0;
// I don't really recommend embedding OpenCL code this way, but for brevity...
const char* src =
"__kernel void dswap( int n, __global double* x, __global double *y )"
"{"
" int i = get_local_id(0);"
" double tmp = x[i];"
" x[i] = y[i];"
" y[i] = tmp;"
"}";
double alpha = 3.0;
double beta = 2.0;
int ndevices = 0;
magma_device_t device;
magma_getdevices( &device, 1, &ndevices );
magma_queue_t queue;
magma_queue_create( device, &queue );
// get OpenCL context from MAGMA queue
// (probably MAGMA will provide a function to do this nicely in the future.)
cl_context context;
err = clGetCommandQueueInfo( queue, CL_QUEUE_CONTEXT, sizeof(cl_context), &context, NULL );
exitOnFail( err, "clGetCommandQueueInfo" );
cl_program program = clCreateProgramWithSource( context, 1, &src, NULL, &err );
exitOnFail( err, "clCreateProgramWithSource" );
err = clBuildProgram( program, 1, &device, NULL, NULL, NULL );
exitOnFail( err, "clBuildProgram" );
char text[ 8192 ];
err = clGetProgramBuildInfo( program, device, CL_PROGRAM_BUILD_LOG, sizeof(text), text, NULL );
exitOnFail( err, "clGetProgramBuildInfo" );
int len = strlen( text );
if ( len > 0 ) {
printf( "warning:\n%s\n\n", text );
}
// ----- done added
int row = 3, col = 3;
double *A_h, *B_h;
cl_mem A_k, B_k;
magma_malloc( &A_k, row*col*sizeof(double) );
magma_malloc( &B_k, row*col*sizeof(double) );
magma_malloc_cpu( (void**) &A_h, row*col*sizeof(double) );
magma_malloc_cpu( (void**) &B_h, row*col*sizeof(double) );
for(int z = 0; z < row*col; z++) {
A_h[z] = z+1;
}
printf( "A = [ " );
for(int z = 0; z < row*col; z++) {
printf( "%6.2f ", A_h[z] );
}
printf( "]\n" );
int offset = 0; // changed "size" to "offset"
magma_dsetmatrix( row, col, A_h, row, A_k, offset, row, queue );
magma_dsetmatrix( row, col, A_h, row, B_k, offset, row, queue ); // added (else B_k is unset)
magma_dgemm( MagmaNoTrans, MagmaTrans, row, row, col,
alpha, A_k, offset, row,
A_k, offset, row,
beta, B_k, offset, row, queue );
magma_dgetmatrix( row, col, B_k, offset, row, B_h, row, queue );
printf( "B = [ " );
for(int z = 0; z < row*col; z++) {
printf( "%6.2f ", B_h[z] );
}
printf( "]\n" );
cl_kernel k_prueba;
k_prueba = clCreateKernel( program, "dswap", &err );
exitOnFail( err, "clCreateKernel" );
// added couple arguments for dswap kernel
int size = row*col;
int arg = 0;
err = clSetKernelArg( k_prueba, arg++, sizeof(size), &size ); exitOnFail( err, "clSetKernelArg size" );
err = clSetKernelArg( k_prueba, arg++, sizeof(A_k), &A_k ); exitOnFail( err, "clSetKernelArg A_k" );
err = clSetKernelArg( k_prueba, arg++, sizeof(B_k), &B_k ); exitOnFail( err, "clSetKernelArg B_k" );
printf( "swap\n" );
size_t global_prueba = row*col;
err = clEnqueueNDRangeKernel( queue, k_prueba, 1, NULL, &global_prueba, NULL, 0, NULL, NULL );
exitOnFail( err, "clEnqueueNDRangeKernel" );
magma_dgetmatrix( row, col, A_k, offset, row, A_h, row, queue );
printf( "A = [ " );
for(int z = 0; z < row*col; z++) {
printf( "%6.2f ", A_h[z]);
}
printf( "]\n" );
magma_dgetmatrix( row, col, B_k, offset, row, B_h, row, queue );
printf( "B = [ " );
for(int z = 0; z < row*col; z++) {
printf( "%6.2f ", B_h[z]);
}
printf( "]\n" );
// ----- added
magma_finalize();
return 0;
}
Compile and link with clMAGMA. Here's an example on MacOS using the OpenCL framework. Adjust the libraries as needed to link with your OpenCL library.