PAPI
7.1.0.0
Loading...
Searching...
No Matches
simpleMultiGPU.h
Go to the documentation of this file.
1
/*
2
* PAPI Multiple GPU example. This example is taken from the NVIDIA
3
* documentation (Copyright 1993-2013 NVIDIA Corporation) and has been
4
* adapted to show the use of CUPTI and PAPI in collecting event
5
* counters for multiple GPU contexts. PAPI Team (2015)
6
*/
7
8
/*
9
* This software contains source code provided by NVIDIA Corporation
10
*
11
* According to the Nvidia EULA (compute 5.5 version)
12
* http://developer.download.nvidia.com/compute/cuda/5_5/rel/docs/EULA.pdf
13
*
14
* Chapter 2. NVIDIA CORPORATION CUDA SAMPLES END USER LICENSE AGREEMENT
15
* 2.1.1. Source Code
16
* Developer shall have the right to modify and create derivative works with the Source
17
* Code. Developer shall own any derivative works ("Derivatives") it creates to the Source
18
* Code, provided that Developer uses the Materials in accordance with the terms and
19
* conditions of this Agreement. Developer may distribute the Derivatives, provided that
20
* all NVIDIA copyright notices and trademarks are propagated and used properly and
21
* the Derivatives include the following statement: “This software contains source code
22
* provided by NVIDIA Corporation.”
23
*/
24
25
/*
26
* This application demonstrates how to use the CUDA API to use multiple GPUs.
27
*
28
* Note that in order to detect multiple GPUs in your system you have to disable
29
* SLI in the nvidia control panel. Otherwise only one GPU is visible to the
30
* application. On the other side, you can still extend your desktop to screens
31
* attached to both GPUs.
32
*/
33
34
#ifndef SIMPLEMULTIGPU_H
35
#define SIMPLEMULTIGPU_H
36
37
typedef
struct
38
{
39
//Host-side input data
40
int
dataN
;
41
float
*
h_Data
;
42
43
//Partial sum for this GPU
44
float
*
h_Sum
;
45
46
//Device buffers
47
float
*
d_Data
,*d_Sum;
48
49
//Reduction copied back from GPU
50
float
*
h_Sum_from_device
;
51
52
//Stream for asynchronous command execution
53
cudaStream_t
stream
;
54
55
}
TGPUplan
;
56
57
extern
"C"
58
void
launch_reduceKernel
(
float
*d_Result,
float
*d_Input,
int
N
,
int
BLOCK_N,
int
THREAD_N, cudaStream_t &
s
);
59
60
#endif
N
#define N
Definition:
byte_profile.c:32
s
double s
Definition:
byte_profile.c:36
launch_reduceKernel
void launch_reduceKernel(float *d_Result, float *d_Input, int N, int BLOCK_N, int THREAD_N, cudaStream_t &s)
TGPUplan
Definition:
simpleMultiGPU.h:38
TGPUplan::stream
cudaStream_t stream
Definition:
simpleMultiGPU.h:53
TGPUplan::dataN
int dataN
Definition:
simpleMultiGPU.h:40
TGPUplan::h_Data
float * h_Data
Definition:
simpleMultiGPU.h:41
TGPUplan::d_Data
float * d_Data
Definition:
simpleMultiGPU.h:47
TGPUplan::h_Sum
float * h_Sum
Definition:
simpleMultiGPU.h:44
TGPUplan::h_Sum_from_device
float * h_Sum_from_device
Definition:
simpleMultiGPU.h:50
src
components
cuda
tests
simpleMultiGPU.h
Generated on Wed Dec 20 2023 18:12:51 for PAPI by
1.9.6