d5/d12/simpleMultiGPU_8h_source.html

/*

 * PAPI Multiple GPU example.  This example is taken from the NVIDIA

 * documentation (Copyright 1993-2013 NVIDIA Corporation) and has been

 * adapted to show the use of CUPTI and PAPI in collecting event

 * counters for multiple GPU contexts.  PAPI Team (2015)

 */


/*

 * This software contains source code provided by NVIDIA Corporation

 *

 * According to the Nvidia EULA (compute 5.5 version)

 * http://developer.download.nvidia.com/compute/cuda/5_5/rel/docs/EULA.pdf

 *

 * Chapter 2. NVIDIA CORPORATION CUDA SAMPLES END USER LICENSE AGREEMENT

 * 2.1.1. Source Code

 * Developer shall have the right to modify and create derivative works with the Source

 * Code. Developer shall own any derivative works ("Derivatives") it creates to the Source

 * Code, provided that Developer uses the Materials in accordance with the terms and

 * conditions of this Agreement. Developer may distribute the Derivatives, provided that

 * all NVIDIA copyright notices and trademarks are propagated and used properly and

 * the Derivatives include the following statement: “This software contains source code

 * provided by NVIDIA Corporation.”

 */


/*

 * This application demonstrates how to use the CUDA API to use multiple GPUs.

 *

 * Note that in order to detect multiple GPUs in your system you have to disable

 * SLI in the nvidia control panel. Otherwise only one GPU is visible to the

 * application. On the other side, you can still extend your desktop to screens

 * attached to both GPUs.

 */


#ifndef SIMPLEMULTIGPU_H

#define SIMPLEMULTIGPU_H


typedef struct

{

    //Host-side input data

    int dataN;

    float *h_Data;


    //Partial sum for this GPU

    float *h_Sum;


    //Device buffers

    float *d_Data,*d_Sum;


    //Reduction copied back from GPU

    float *h_Sum_from_device;


    //Stream for asynchronous command execution

    cudaStream_t stream;


} TGPUplan;


extern "C"

void launch_reduceKernel(float *d_Result, float *d_Input, int N, int BLOCK_N, int THREAD_N, cudaStream_t &s);


#endif

N
#define N
Definition: byte_profile.c:32

s
double s
Definition: byte_profile.c:36

launch_reduceKernel
void launch_reduceKernel(float *d_Result, float *d_Input, int N, int BLOCK_N, int THREAD_N, cudaStream_t &s)

TGPUplan
Definition: simpleMultiGPU.h:38

TGPUplan::stream
cudaStream_t stream
Definition: simpleMultiGPU.h:53

TGPUplan::dataN
int dataN
Definition: simpleMultiGPU.h:40

TGPUplan::h_Data
float * h_Data
Definition: simpleMultiGPU.h:41

TGPUplan::d_Data
float * d_Data
Definition: simpleMultiGPU.h:47

TGPUplan::h_Sum
float * h_Sum
Definition: simpleMultiGPU.h:44

TGPUplan::h_Sum_from_device
float * h_Sum_from_device
Definition: simpleMultiGPU.h:50