3typedef unsigned long long uint64;
6void test_hp_x86_128B_VEC(
int instr_per_loop,
uint64 iterations,
int EventSet, FILE *
fp );
7void test_sp_x86_128B_VEC(
int instr_per_loop,
uint64 iterations,
int EventSet, FILE *
fp );
8void test_dp_x86_128B_VEC(
int instr_per_loop,
uint64 iterations,
int EventSet, FILE *
fp );
10void test_hp_x86_256B_VEC(
int instr_per_loop,
uint64 iterations,
int EventSet, FILE *
fp );
11void test_sp_x86_256B_VEC(
int instr_per_loop,
uint64 iterations,
int EventSet, FILE *
fp );
12void test_dp_x86_256B_VEC(
int instr_per_loop,
uint64 iterations,
int EventSet, FILE *
fp );
14void test_hp_x86_512B_VEC(
int instr_per_loop,
uint64 iterations,
int EventSet, FILE *
fp );
15void test_sp_x86_512B_VEC(
int instr_per_loop,
uint64 iterations,
int EventSet, FILE *
fp );
16void test_dp_x86_512B_VEC(
int instr_per_loop,
uint64 iterations,
int EventSet, FILE *
fp );
18void test_hp_x86_128B_VEC_FMA(
int instr_per_loop,
uint64 iterations,
int EventSet, FILE *
fp );
19void test_sp_x86_128B_VEC_FMA(
int instr_per_loop,
uint64 iterations,
int EventSet, FILE *
fp );
20void test_dp_x86_128B_VEC_FMA(
int instr_per_loop,
uint64 iterations,
int EventSet, FILE *
fp );
22void test_hp_x86_256B_VEC_FMA(
int instr_per_loop,
uint64 iterations,
int EventSet, FILE *
fp );
23void test_sp_x86_256B_VEC_FMA(
int instr_per_loop,
uint64 iterations,
int EventSet, FILE *
fp );
24void test_dp_x86_256B_VEC_FMA(
int instr_per_loop,
uint64 iterations,
int EventSet, FILE *
fp );
26void test_hp_x86_512B_VEC_FMA(
int instr_per_loop,
uint64 iterations,
int EventSet, FILE *
fp );
27void test_sp_x86_512B_VEC_FMA(
int instr_per_loop,
uint64 iterations,
int EventSet, FILE *
fp );
28void test_dp_x86_512B_VEC_FMA(
int instr_per_loop,
uint64 iterations,
int EventSet, FILE *
fp );
32typedef __m128 SP_SCALAR_TYPE;
33typedef __m128d DP_SCALAR_TYPE;
35#define SET_VEC_SS(_I_) _mm_set_ss( _I_ );
36#define ADD_VEC_SS(_I_,_J_) _mm_add_ss( _I_ , _J_ );
37#define MUL_VEC_SS(_I_,_J_) _mm_mul_ss( _I_ , _J_ );
38#define FMA_VEC_SS(_out_,_I_,_J_,_K_) { _out_ = _mm_fmadd_ss( _I_ , _J_ , _K_ ); }
40#define SET_VEC_SD(_I_) _mm_set_sd( _I_ );
41#define ADD_VEC_SD(_I_,_J_) _mm_add_sd( _I_ , _J_ );
42#define MUL_VEC_SD(_I_,_J_) _mm_mul_sd( _I_ , _J_ );
43#define FMA_VEC_SD(_out_,_I_,_J_,_K_) { _out_ = _mm_fmadd_sd( _I_ , _J_ , _K_ ); }
45#if defined(X86_VEC_WIDTH_128B)
46typedef __m128 SP_VEC_TYPE;
47typedef __m128d DP_VEC_TYPE;
49#define SET_VEC_PS(_I_) _mm_set1_ps( _I_ );
50#define ADD_VEC_PS(_I_,_J_) _mm_add_ps( _I_ , _J_ );
51#define MUL_VEC_PS(_I_,_J_) _mm_mul_ps( _I_ , _J_ );
52#define FMA_VEC_PS(_I_,_J_,_K_) _mm_fmadd_ps( _I_ , _J_ , _K_ );
54#define SET_VEC_PD(_I_) _mm_set1_pd( _I_ );
55#define ADD_VEC_PD(_I_,_J_) _mm_add_pd( _I_ , _J_ );
56#define MUL_VEC_PD(_I_,_J_) _mm_mul_pd( _I_ , _J_ );
57#define FMA_VEC_PD(_I_,_J_,_K_) _mm_fmadd_pd( _I_ , _J_ , _K_ );
59#elif defined(X86_VEC_WIDTH_512B)
60typedef __m512 SP_VEC_TYPE;
61typedef __m512d DP_VEC_TYPE;
63#define SET_VEC_PS(_I_) _mm512_set1_ps( _I_ );
64#define ADD_VEC_PS(_I_,_J_) _mm512_add_ps( _I_ , _J_ );
65#define MUL_VEC_PS(_I_,_J_) _mm512_mul_ps( _I_ , _J_ );
66#define FMA_VEC_PS(_I_,_J_,_K_) _mm512_fmadd_ps( _I_ , _J_ , _K_ );
68#define SET_VEC_PD(_I_) _mm512_set1_pd( _I_ );
69#define ADD_VEC_PD(_I_,_J_) _mm512_add_pd( _I_ , _J_ );
70#define MUL_VEC_PD(_I_,_J_) _mm512_mul_pd( _I_ , _J_ );
71#define FMA_VEC_PD(_I_,_J_,_K_) _mm512_fmadd_pd( _I_ , _J_ , _K_ );
74typedef __m256 SP_VEC_TYPE;
75typedef __m256d DP_VEC_TYPE;
77#define SET_VEC_PS(_I_) _mm256_set1_ps( _I_ );
78#define ADD_VEC_PS(_I_,_J_) _mm256_add_ps( _I_ , _J_ );
79#define MUL_VEC_PS(_I_,_J_) _mm256_mul_ps( _I_ , _J_ );
80#define FMA_VEC_PS(_I_,_J_,_K_) _mm256_fmadd_ps( _I_ , _J_ , _K_ );
82#define SET_VEC_PD(_I_) _mm256_set1_pd( _I_ );
83#define ADD_VEC_PD(_I_,_J_) _mm256_add_pd( _I_ , _J_ );
84#define MUL_VEC_PD(_I_,_J_) _mm256_mul_pd( _I_ , _J_ );
85#define FMA_VEC_PD(_I_,_J_,_K_) _mm256_fmadd_pd( _I_ , _J_ , _K_ );
89void test_hp_arm_VEC(
int instr_per_loop,
uint64 iterations,
int EventSet, FILE *
fp );
90void test_sp_arm_VEC(
int instr_per_loop,
uint64 iterations,
int EventSet, FILE *
fp );
91void test_dp_arm_VEC(
int instr_per_loop,
uint64 iterations,
int EventSet, FILE *
fp );
92void test_hp_arm_VEC_FMA(
int instr_per_loop,
uint64 iterations,
int EventSet, FILE *
fp );
93void test_sp_arm_VEC_FMA(
int instr_per_loop,
uint64 iterations,
int EventSet, FILE *
fp );
94void test_dp_arm_VEC_FMA(
int instr_per_loop,
uint64 iterations,
int EventSet, FILE *
fp );
99typedef float SP_SCALAR_TYPE;
100typedef double DP_SCALAR_TYPE;
101typedef float16x8_t HP_VEC_TYPE;
102typedef float32x4_t SP_VEC_TYPE;
103typedef float64x2_t DP_VEC_TYPE;
105#define SET_VEC_PH(_I_) (HP_VEC_TYPE)vdupq_n_f16( _I_ );
106#define SET_VEC_PS(_I_) (SP_VEC_TYPE)vdupq_n_f32( _I_ );
107#define SET_VEC_PD(_I_) (DP_VEC_TYPE)vdupq_n_f64( _I_ );
109#define ADD_VEC_PH(_I_,_J_) (HP_VEC_TYPE)vaddq_f16( _I_ , _J_ );
110#define ADD_VEC_PS(_I_,_J_) (SP_VEC_TYPE)vaddq_f32( _I_ , _J_ );
111#define ADD_VEC_PD(_I_,_J_) (DP_VEC_TYPE)vaddq_f64( _I_ , _J_ );
113#define MUL_VEC_PH(_I_,_J_) (HP_VEC_TYPE)vmulq_f16( _I_ , _J_ );
114#define MUL_VEC_PS(_I_,_J_) (SP_VEC_TYPE)vmulq_f32( _I_ , _J_ );
115#define MUL_VEC_PD(_I_,_J_) (DP_VEC_TYPE)vmulq_f64( _I_ , _J_ );
117#define FMA_VEC_PH(_I_,_J_,_K_) (HP_VEC_TYPE)vfmaq_f16( _K_ , _J_ , _I_ );
118#define FMA_VEC_PS(_I_,_J_,_K_) (SP_VEC_TYPE)vfmaq_f32( _K_ , _J_ , _I_ );
119#define FMA_VEC_PD(_I_,_J_,_K_) (DP_VEC_TYPE)vfmaq_f64( _K_ , _J_ , _I_ );
122#define SET_VEC_SH(_I_) _I_ ;
123#define ADD_VEC_SH(_I_,_J_) vaddh_f16( _I_ , _J_ );
124#define MUL_VEC_SH(_I_,_J_) vmulh_f16( _I_ , _J_ );
125#define SQRT_VEC_SH(_I_) vsqrth_f16( _I_ );
126#define FMA_VEC_SH(_out_,_I_,_J_,_K_) {\
127 HP_VEC_TYPE arg1 = SET_VEC_PH(_I_);\
128 HP_VEC_TYPE arg2 = SET_VEC_PH(_J_);\
129 HP_VEC_TYPE arg3 = SET_VEC_PH(_K_);\
131 argTmp = FMA_VEC_PH( arg1 , arg2 , arg3 );\
132 _out_ = ((half*)&(argTmp))[0];\
135#define SET_VEC_SS(_I_) _I_ ;
136#define ADD_VEC_SS(_I_,_J_) _I_ + _J_ ;
137#define MUL_VEC_SS(_I_,_J_) _I_ * _J_ ;
138#define FMA_VEC_SS(_out_,_I_,_J_,_K_) {\
139 SP_VEC_TYPE arg1 = SET_VEC_PS(_I_);\
140 SP_VEC_TYPE arg2 = SET_VEC_PS(_J_);\
141 SP_VEC_TYPE arg3 = SET_VEC_PS(_K_);\
143 argTmp = FMA_VEC_PS( arg1 , arg2 , arg3 );\
144 _out_ = ((SP_SCALAR_TYPE*)&(argTmp))[0];\
147#define SET_VEC_SD(_I_) _I_ ;
148#define ADD_VEC_SD(_I_,_J_) _I_ + _J_ ;
149#define MUL_VEC_SD(_I_,_J_) _I_ * _J_ ;
150#define FMA_VEC_SD(_out_,_I_,_J_,_K_) {\
151 DP_VEC_TYPE arg1 = SET_VEC_PD(_I_);\
152 DP_VEC_TYPE arg2 = SET_VEC_PD(_J_);\
153 DP_VEC_TYPE arg3 = SET_VEC_PD(_K_);\
155 argTmp = FMA_VEC_PD( arg1 , arg2 , arg3 );\
156 _out_ = ((DP_SCALAR_TYPE*)&(argTmp))[0];\
160void test_hp_power_VEC(
int instr_per_loop,
uint64 iterations,
int EventSet, FILE *
fp );
161void test_sp_power_VEC(
int instr_per_loop,
uint64 iterations,
int EventSet, FILE *
fp );
162void test_dp_power_VEC(
int instr_per_loop,
uint64 iterations,
int EventSet, FILE *
fp );
163void test_hp_power_VEC_FMA(
int instr_per_loop,
uint64 iterations,
int EventSet, FILE *
fp );
164void test_sp_power_VEC_FMA(
int instr_per_loop,
uint64 iterations,
int EventSet, FILE *
fp );
165void test_dp_power_VEC_FMA(
int instr_per_loop,
uint64 iterations,
int EventSet, FILE *
fp );
169typedef float SP_SCALAR_TYPE;
170typedef double DP_SCALAR_TYPE;
171typedef __vector
float SP_VEC_TYPE;
172typedef __vector
double DP_VEC_TYPE;
174#define SET_VEC_PS(_I_) (SP_VEC_TYPE){ _I_ , _I_ , _I_ , _I_ };
175#define SET_VEC_PD(_I_) (DP_VEC_TYPE){ _I_ , _I_ };
177#define ADD_VEC_PS(_I_,_J_) (SP_VEC_TYPE)vec_add( _I_ , _J_ );
178#define ADD_VEC_PD(_I_,_J_) (DP_VEC_TYPE)vec_add( _I_ , _J_ );
180#define MUL_VEC_PS(_I_,_J_) (SP_VEC_TYPE)vec_mul( _I_ , _J_ );
181#define MUL_VEC_PD(_I_,_J_) (DP_VEC_TYPE)vec_mul( _I_ , _J_ );
183#define FMA_VEC_PS(_I_,_J_,_K_) (SP_VEC_TYPE)vec_madd( _I_ , _J_ , _K_ );
184#define FMA_VEC_PD(_I_,_J_,_K_) (DP_VEC_TYPE)vec_madd( _I_ , _J_ , _K_ );
187#define SET_VEC_SS(_I_) _I_ ;
188#define ADD_VEC_SS(_I_,_J_) _I_ + _J_ ;
189#define MUL_VEC_SS(_I_,_J_) _I_ * _J_ ;
190#define FMA_VEC_SS(_out_,_I_,_J_,_K_) {\
191 SP_VEC_TYPE arg1 = SET_VEC_PS(_I_);\
192 SP_VEC_TYPE arg2 = SET_VEC_PS(_J_);\
193 SP_VEC_TYPE arg3 = SET_VEC_PS(_K_);\
195 argTmp = FMA_VEC_PS( arg1 , arg2 , arg3 );\
196 _out_ = ((SP_SCALAR_TYPE*)&(argTmp))[0];\
199#define SET_VEC_SD(_I_) _I_ ;
200#define ADD_VEC_SD(_I_,_J_) _I_ + _J_ ;
201#define MUL_VEC_SD(_I_,_J_) _I_ * _J_ ;
202#define FMA_VEC_SD(_out_,_I_,_J_,_K_) {\
203 DP_VEC_TYPE arg1 = SET_VEC_PD(_I_);\
204 DP_VEC_TYPE arg2 = SET_VEC_PD(_J_);\
205 DP_VEC_TYPE arg3 = SET_VEC_PD(_K_);\
207 argTmp = FMA_VEC_PD( arg1 , arg2 , arg3 );\
208 _out_ = ((DP_SCALAR_TYPE*)&(argTmp))[0];\
unsigned long long uint64