PAPI 7.1.0.0
Loading...
Searching...
No Matches
simpleMultiGPU.h
Go to the documentation of this file.
1/*
2 * PAPI Multiple GPU example. This example is taken from the NVIDIA
3 * documentation (Copyright 1993-2013 NVIDIA Corporation) and has been
4 * adapted to show the use of CUPTI and PAPI in collecting event
5 * counters for multiple GPU contexts. PAPI Team (2015)
6 */
7
8/*
9 * This software contains source code provided by NVIDIA Corporation
10 *
11 * According to the Nvidia EULA (compute 5.5 version)
12 * http://developer.download.nvidia.com/compute/cuda/5_5/rel/docs/EULA.pdf
13 *
14 * Chapter 2. NVIDIA CORPORATION CUDA SAMPLES END USER LICENSE AGREEMENT
15 * 2.1.1. Source Code
16 * Developer shall have the right to modify and create derivative works with the Source
17 * Code. Developer shall own any derivative works ("Derivatives") it creates to the Source
18 * Code, provided that Developer uses the Materials in accordance with the terms and
19 * conditions of this Agreement. Developer may distribute the Derivatives, provided that
20 * all NVIDIA copyright notices and trademarks are propagated and used properly and
21 * the Derivatives include the following statement: “This software contains source code
22 * provided by NVIDIA Corporation.”
23 */
24
25/*
26 * This application demonstrates how to use the CUDA API to use multiple GPUs.
27 *
28 * Note that in order to detect multiple GPUs in your system you have to disable
29 * SLI in the nvidia control panel. Otherwise only one GPU is visible to the
30 * application. On the other side, you can still extend your desktop to screens
31 * attached to both GPUs.
32 */
33
34#ifndef SIMPLEMULTIGPU_H
35#define SIMPLEMULTIGPU_H
36
37typedef struct
38{
39 //Host-side input data
40 int dataN;
41 float *h_Data;
42
43 //Partial sum for this GPU
44 float *h_Sum;
45
46 //Device buffers
47 float *d_Data,*d_Sum;
48
49 //Reduction copied back from GPU
51
52 //Stream for asynchronous command execution
53 cudaStream_t stream;
54
55} TGPUplan;
56
57extern "C"
58void launch_reduceKernel(float *d_Result, float *d_Input, int N, int BLOCK_N, int THREAD_N, cudaStream_t &s);
59
60#endif
#define N
Definition: byte_profile.c:32
double s
Definition: byte_profile.c:36
void launch_reduceKernel(float *d_Result, float *d_Input, int N, int BLOCK_N, int THREAD_N, cudaStream_t &s)
cudaStream_t stream
float * h_Data
float * d_Data
float * h_Sum
float * h_Sum_from_device