5 fprintf(
fp,
"%lld 0\n", theory);
14 fprintf(
stderr,
"Problem.\n");
18 fprintf(
fp,
"%lld %lld\n", theory, flpins);
23 register half r0,r1,r2,r3,r4,r5,r6,r7,r8,r9,rA,rB,rC,rD,rE,rF;
26 r0 = SET_VEC_SH(0.01);
27 r1 = SET_VEC_SH(0.02);
28 r2 = SET_VEC_SH(0.03);
29 r3 = SET_VEC_SH(0.04);
30 r4 = SET_VEC_SH(0.05);
31 r5 = SET_VEC_SH(0.06);
32 r6 = SET_VEC_SH(0.07);
33 r7 = SET_VEC_SH(0.08);
34 r8 = SET_VEC_SH(0.09);
35 r9 = SET_VEC_SH(0.10);
36 rA = SET_VEC_SH(0.11);
37 rB = SET_VEC_SH(0.12);
38 rC = SET_VEC_SH(0.13);
39 rD = SET_VEC_SH(0.14);
40 rE = SET_VEC_SH(0.15);
41 rF = SET_VEC_SH(0.16);
44 while (
c < iterations){
49 r0 = MUL_VEC_SH(r0,rC);
50 r1 = ADD_VEC_SH(r1,rD);
51 r2 = MUL_VEC_SH(r2,rE);
52 r3 = ADD_VEC_SH(r3,rF);
53 r4 = MUL_VEC_SH(r4,rC);
54 r5 = ADD_VEC_SH(r5,rD);
55 r6 = MUL_VEC_SH(r6,rE);
56 r7 = ADD_VEC_SH(r7,rF);
57 r8 = MUL_VEC_SH(r8,rC);
58 r9 = ADD_VEC_SH(r9,rD);
59 rA = MUL_VEC_SH(rA,rE);
60 rB = ADD_VEC_SH(rB,rF);
62 r0 = ADD_VEC_SH(r0,rF);
63 r1 = MUL_VEC_SH(r1,rE);
64 r2 = ADD_VEC_SH(r2,rD);
65 r3 = MUL_VEC_SH(r3,rC);
66 r4 = ADD_VEC_SH(r4,rF);
67 r5 = MUL_VEC_SH(r5,rE);
68 r6 = ADD_VEC_SH(r6,rD);
69 r7 = MUL_VEC_SH(r7,rC);
70 r8 = ADD_VEC_SH(r8,rF);
71 r9 = MUL_VEC_SH(r9,rE);
72 rA = ADD_VEC_SH(rA,rD);
73 rB = MUL_VEC_SH(rB,rC);
81 r0 = ADD_VEC_SH(r0,r1);
82 r2 = ADD_VEC_SH(r2,r3);
83 r4 = ADD_VEC_SH(r4,r5);
84 r6 = ADD_VEC_SH(r6,r7);
85 r8 = ADD_VEC_SH(r8,r9);
86 rA = ADD_VEC_SH(rA,rB);
88 r0 = ADD_VEC_SH(r0,r2);
89 r4 = ADD_VEC_SH(r4,r6);
90 r8 = ADD_VEC_SH(r8,rA);
92 r0 = ADD_VEC_SH(r0,r4);
93 r0 = ADD_VEC_SH(r0,r8);
97 out = ADD_VEC_SH(out,temp);
103 register half r0,r1,r2,r3,r4,r5,r6,r7,r8,r9,rA,rB,rC,rD,rE,rF;
106 r0 = SET_VEC_SH(0.01);
107 r1 = SET_VEC_SH(0.02);
108 r2 = SET_VEC_SH(0.03);
109 r3 = SET_VEC_SH(0.04);
110 r4 = SET_VEC_SH(0.05);
111 r5 = SET_VEC_SH(0.06);
112 r6 = SET_VEC_SH(0.07);
113 r7 = SET_VEC_SH(0.08);
114 r8 = SET_VEC_SH(0.09);
115 r9 = SET_VEC_SH(0.10);
116 rA = SET_VEC_SH(0.11);
117 rB = SET_VEC_SH(0.12);
118 rC = SET_VEC_SH(0.13);
119 rD = SET_VEC_SH(0.14);
120 rE = SET_VEC_SH(0.15);
121 rF = SET_VEC_SH(0.16);
124 while (
c < iterations){
129 r0 = MUL_VEC_SH(r0,rC);
130 r1 = ADD_VEC_SH(r1,rD);
131 r2 = MUL_VEC_SH(r2,rE);
132 r3 = ADD_VEC_SH(r3,rF);
133 r4 = MUL_VEC_SH(r4,rC);
134 r5 = ADD_VEC_SH(r5,rD);
135 r6 = MUL_VEC_SH(r6,rE);
136 r7 = ADD_VEC_SH(r7,rF);
137 r8 = MUL_VEC_SH(r8,rC);
138 r9 = ADD_VEC_SH(r9,rD);
139 rA = MUL_VEC_SH(rA,rE);
140 rB = ADD_VEC_SH(rB,rF);
142 r0 = ADD_VEC_SH(r0,rF);
143 r1 = MUL_VEC_SH(r1,rE);
144 r2 = ADD_VEC_SH(r2,rD);
145 r3 = MUL_VEC_SH(r3,rC);
146 r4 = ADD_VEC_SH(r4,rF);
147 r5 = MUL_VEC_SH(r5,rE);
148 r6 = ADD_VEC_SH(r6,rD);
149 r7 = MUL_VEC_SH(r7,rC);
150 r8 = ADD_VEC_SH(r8,rF);
151 r9 = MUL_VEC_SH(r9,rE);
152 rA = ADD_VEC_SH(rA,rD);
153 rB = MUL_VEC_SH(rB,rC);
155 r0 = MUL_VEC_SH(r0,rC);
156 r1 = ADD_VEC_SH(r1,rD);
157 r2 = MUL_VEC_SH(r2,rE);
158 r3 = ADD_VEC_SH(r3,rF);
159 r4 = MUL_VEC_SH(r4,rC);
160 r5 = ADD_VEC_SH(r5,rD);
161 r6 = MUL_VEC_SH(r6,rE);
162 r7 = ADD_VEC_SH(r7,rF);
163 r8 = MUL_VEC_SH(r8,rC);
164 r9 = ADD_VEC_SH(r9,rD);
165 rA = MUL_VEC_SH(rA,rE);
166 rB = ADD_VEC_SH(rB,rF);
168 r0 = ADD_VEC_SH(r0,rF);
169 r1 = MUL_VEC_SH(r1,rE);
170 r2 = ADD_VEC_SH(r2,rD);
171 r3 = MUL_VEC_SH(r3,rC);
172 r4 = ADD_VEC_SH(r4,rF);
173 r5 = MUL_VEC_SH(r5,rE);
174 r6 = ADD_VEC_SH(r6,rD);
175 r7 = MUL_VEC_SH(r7,rC);
176 r8 = ADD_VEC_SH(r8,rF);
177 r9 = MUL_VEC_SH(r9,rE);
178 rA = ADD_VEC_SH(rA,rD);
179 rB = MUL_VEC_SH(rB,rC);
187 r0 = ADD_VEC_SH(r0,r1);
188 r2 = ADD_VEC_SH(r2,r3);
189 r4 = ADD_VEC_SH(r4,r5);
190 r6 = ADD_VEC_SH(r6,r7);
191 r8 = ADD_VEC_SH(r8,r9);
192 rA = ADD_VEC_SH(rA,rB);
194 r0 = ADD_VEC_SH(r0,r2);
195 r4 = ADD_VEC_SH(r4,r6);
196 r8 = ADD_VEC_SH(r8,rA);
198 r0 = ADD_VEC_SH(r0,r4);
199 r0 = ADD_VEC_SH(r0,r8);
203 out = ADD_VEC_SH(out,temp);
209 register half r0,r1,r2,r3,r4,r5,r6,r7,r8,r9,rA,rB,rC,rD,rE,rF;
212 r0 = SET_VEC_SH(0.01);
213 r1 = SET_VEC_SH(0.02);
214 r2 = SET_VEC_SH(0.03);
215 r3 = SET_VEC_SH(0.04);
216 r4 = SET_VEC_SH(0.05);
217 r5 = SET_VEC_SH(0.06);
218 r6 = SET_VEC_SH(0.07);
219 r7 = SET_VEC_SH(0.08);
220 r8 = SET_VEC_SH(0.09);
221 r9 = SET_VEC_SH(0.10);
222 rA = SET_VEC_SH(0.11);
223 rB = SET_VEC_SH(0.12);
224 rC = SET_VEC_SH(0.13);
225 rD = SET_VEC_SH(0.14);
226 rE = SET_VEC_SH(0.15);
227 rF = SET_VEC_SH(0.16);
230 while (
c < iterations){
235 r0 = MUL_VEC_SH(r0,rC);
236 r1 = ADD_VEC_SH(r1,rD);
237 r2 = MUL_VEC_SH(r2,rE);
238 r3 = ADD_VEC_SH(r3,rF);
239 r4 = MUL_VEC_SH(r4,rC);
240 r5 = ADD_VEC_SH(r5,rD);
241 r6 = MUL_VEC_SH(r6,rE);
242 r7 = ADD_VEC_SH(r7,rF);
243 r8 = MUL_VEC_SH(r8,rC);
244 r9 = ADD_VEC_SH(r9,rD);
245 rA = MUL_VEC_SH(rA,rE);
246 rB = ADD_VEC_SH(rB,rF);
248 r0 = ADD_VEC_SH(r0,rF);
249 r1 = MUL_VEC_SH(r1,rE);
250 r2 = ADD_VEC_SH(r2,rD);
251 r3 = MUL_VEC_SH(r3,rC);
252 r4 = ADD_VEC_SH(r4,rF);
253 r5 = MUL_VEC_SH(r5,rE);
254 r6 = ADD_VEC_SH(r6,rD);
255 r7 = MUL_VEC_SH(r7,rC);
256 r8 = ADD_VEC_SH(r8,rF);
257 r9 = MUL_VEC_SH(r9,rE);
258 rA = ADD_VEC_SH(rA,rD);
259 rB = MUL_VEC_SH(rB,rC);
261 r0 = MUL_VEC_SH(r0,rC);
262 r1 = ADD_VEC_SH(r1,rD);
263 r2 = MUL_VEC_SH(r2,rE);
264 r3 = ADD_VEC_SH(r3,rF);
265 r4 = MUL_VEC_SH(r4,rC);
266 r5 = ADD_VEC_SH(r5,rD);
267 r6 = MUL_VEC_SH(r6,rE);
268 r7 = ADD_VEC_SH(r7,rF);
269 r8 = MUL_VEC_SH(r8,rC);
270 r9 = ADD_VEC_SH(r9,rD);
271 rA = MUL_VEC_SH(rA,rE);
272 rB = ADD_VEC_SH(rB,rF);
274 r0 = ADD_VEC_SH(r0,rF);
275 r1 = MUL_VEC_SH(r1,rE);
276 r2 = ADD_VEC_SH(r2,rD);
277 r3 = MUL_VEC_SH(r3,rC);
278 r4 = ADD_VEC_SH(r4,rF);
279 r5 = MUL_VEC_SH(r5,rE);
280 r6 = ADD_VEC_SH(r6,rD);
281 r7 = MUL_VEC_SH(r7,rC);
282 r8 = ADD_VEC_SH(r8,rF);
283 r9 = MUL_VEC_SH(r9,rE);
284 rA = ADD_VEC_SH(rA,rD);
285 rB = MUL_VEC_SH(rB,rC);
287 r0 = MUL_VEC_SH(r0,rC);
288 r1 = ADD_VEC_SH(r1,rD);
289 r2 = MUL_VEC_SH(r2,rE);
290 r3 = ADD_VEC_SH(r3,rF);
291 r4 = MUL_VEC_SH(r4,rC);
292 r5 = ADD_VEC_SH(r5,rD);
293 r6 = MUL_VEC_SH(r6,rE);
294 r7 = ADD_VEC_SH(r7,rF);
295 r8 = MUL_VEC_SH(r8,rC);
296 r9 = ADD_VEC_SH(r9,rD);
297 rA = MUL_VEC_SH(rA,rE);
298 rB = ADD_VEC_SH(rB,rF);
300 r0 = ADD_VEC_SH(r0,rF);
301 r1 = MUL_VEC_SH(r1,rE);
302 r2 = ADD_VEC_SH(r2,rD);
303 r3 = MUL_VEC_SH(r3,rC);
304 r4 = ADD_VEC_SH(r4,rF);
305 r5 = MUL_VEC_SH(r5,rE);
306 r6 = ADD_VEC_SH(r6,rD);
307 r7 = MUL_VEC_SH(r7,rC);
308 r8 = ADD_VEC_SH(r8,rF);
309 r9 = MUL_VEC_SH(r9,rE);
310 rA = ADD_VEC_SH(rA,rD);
311 rB = MUL_VEC_SH(rB,rC);
313 r0 = MUL_VEC_SH(r0,rC);
314 r1 = ADD_VEC_SH(r1,rD);
315 r2 = MUL_VEC_SH(r2,rE);
316 r3 = ADD_VEC_SH(r3,rF);
317 r4 = MUL_VEC_SH(r4,rC);
318 r5 = ADD_VEC_SH(r5,rD);
319 r6 = MUL_VEC_SH(r6,rE);
320 r7 = ADD_VEC_SH(r7,rF);
321 r8 = MUL_VEC_SH(r8,rC);
322 r9 = ADD_VEC_SH(r9,rD);
323 rA = MUL_VEC_SH(rA,rE);
324 rB = ADD_VEC_SH(rB,rF);
326 r0 = ADD_VEC_SH(r0,rF);
327 r1 = MUL_VEC_SH(r1,rE);
328 r2 = ADD_VEC_SH(r2,rD);
329 r3 = MUL_VEC_SH(r3,rC);
330 r4 = ADD_VEC_SH(r4,rF);
331 r5 = MUL_VEC_SH(r5,rE);
332 r6 = ADD_VEC_SH(r6,rD);
333 r7 = MUL_VEC_SH(r7,rC);
334 r8 = ADD_VEC_SH(r8,rF);
335 r9 = MUL_VEC_SH(r9,rE);
336 rA = ADD_VEC_SH(rA,rD);
337 rB = MUL_VEC_SH(rB,rC);
345 r0 = ADD_VEC_SH(r0,r1);
346 r2 = ADD_VEC_SH(r2,r3);
347 r4 = ADD_VEC_SH(r4,r5);
348 r6 = ADD_VEC_SH(r6,r7);
349 r8 = ADD_VEC_SH(r8,r9);
350 rA = ADD_VEC_SH(rA,rB);
352 r0 = ADD_VEC_SH(r0,r2);
353 r4 = ADD_VEC_SH(r4,r6);
354 r8 = ADD_VEC_SH(r8,rA);
356 r0 = ADD_VEC_SH(r0,r4);
357 r0 = ADD_VEC_SH(r0,r8);
361 out = ADD_VEC_SH(out,temp);
390 register SP_SCALAR_TYPE r0,r1,r2,r3,r4,r5,r6,r7,r8,r9,rA,rB,rC,rD,rE,rF;
393 r0 = SET_VEC_SS(0.01);
394 r1 = SET_VEC_SS(0.02);
395 r2 = SET_VEC_SS(0.03);
396 r3 = SET_VEC_SS(0.04);
397 r4 = SET_VEC_SS(0.05);
398 r5 = SET_VEC_SS(0.06);
399 r6 = SET_VEC_SS(0.07);
400 r7 = SET_VEC_SS(0.08);
401 r8 = SET_VEC_SS(0.09);
402 r9 = SET_VEC_SS(0.10);
403 rA = SET_VEC_SS(0.11);
404 rB = SET_VEC_SS(0.12);
405 rC = SET_VEC_SS(0.13);
406 rD = SET_VEC_SS(0.14);
407 rE = SET_VEC_SS(0.15);
408 rF = SET_VEC_SS(0.16);
411 while (
c < iterations){
416 r0 = MUL_VEC_SS(r0,rC);
417 r1 = ADD_VEC_SS(r1,rD);
418 r2 = MUL_VEC_SS(r2,rE);
419 r3 = ADD_VEC_SS(r3,rF);
420 r4 = MUL_VEC_SS(r4,rC);
421 r5 = ADD_VEC_SS(r5,rD);
422 r6 = MUL_VEC_SS(r6,rE);
423 r7 = ADD_VEC_SS(r7,rF);
424 r8 = MUL_VEC_SS(r8,rC);
425 r9 = ADD_VEC_SS(r9,rD);
426 rA = MUL_VEC_SS(rA,rE);
427 rB = ADD_VEC_SS(rB,rF);
429 r0 = ADD_VEC_SS(r0,rF);
430 r1 = MUL_VEC_SS(r1,rE);
431 r2 = ADD_VEC_SS(r2,rD);
432 r3 = MUL_VEC_SS(r3,rC);
433 r4 = ADD_VEC_SS(r4,rF);
434 r5 = MUL_VEC_SS(r5,rE);
435 r6 = ADD_VEC_SS(r6,rD);
436 r7 = MUL_VEC_SS(r7,rC);
437 r8 = ADD_VEC_SS(r8,rF);
438 r9 = MUL_VEC_SS(r9,rE);
439 rA = ADD_VEC_SS(rA,rD);
440 rB = MUL_VEC_SS(rB,rC);
448 r0 = ADD_VEC_SS(r0,r1);
449 r2 = ADD_VEC_SS(r2,r3);
450 r4 = ADD_VEC_SS(r4,r5);
451 r6 = ADD_VEC_SS(r6,r7);
452 r8 = ADD_VEC_SS(r8,r9);
453 rA = ADD_VEC_SS(rA,rB);
455 r0 = ADD_VEC_SS(r0,r2);
456 r4 = ADD_VEC_SS(r4,r6);
457 r8 = ADD_VEC_SS(r8,rA);
459 r0 = ADD_VEC_SS(r0,r4);
460 r0 = ADD_VEC_SS(r0,r8);
463 SP_SCALAR_TYPE temp = r0;
464 out += ((
float*)&temp)[0];
473 register SP_SCALAR_TYPE r0,r1,r2,r3,r4,r5,r6,r7,r8,r9,rA,rB,rC,rD,rE,rF;
476 r0 = SET_VEC_SS(0.01);
477 r1 = SET_VEC_SS(0.02);
478 r2 = SET_VEC_SS(0.03);
479 r3 = SET_VEC_SS(0.04);
480 r4 = SET_VEC_SS(0.05);
481 r5 = SET_VEC_SS(0.06);
482 r6 = SET_VEC_SS(0.07);
483 r7 = SET_VEC_SS(0.08);
484 r8 = SET_VEC_SS(0.09);
485 r9 = SET_VEC_SS(0.10);
486 rA = SET_VEC_SS(0.11);
487 rB = SET_VEC_SS(0.12);
488 rC = SET_VEC_SS(0.13);
489 rD = SET_VEC_SS(0.14);
490 rE = SET_VEC_SS(0.15);
491 rF = SET_VEC_SS(0.16);
494 while (
c < iterations){
499 r0 = MUL_VEC_SS(r0,rC);
500 r1 = ADD_VEC_SS(r1,rD);
501 r2 = MUL_VEC_SS(r2,rE);
502 r3 = ADD_VEC_SS(r3,rF);
503 r4 = MUL_VEC_SS(r4,rC);
504 r5 = ADD_VEC_SS(r5,rD);
505 r6 = MUL_VEC_SS(r6,rE);
506 r7 = ADD_VEC_SS(r7,rF);
507 r8 = MUL_VEC_SS(r8,rC);
508 r9 = ADD_VEC_SS(r9,rD);
509 rA = MUL_VEC_SS(rA,rE);
510 rB = ADD_VEC_SS(rB,rF);
512 r0 = ADD_VEC_SS(r0,rF);
513 r1 = MUL_VEC_SS(r1,rE);
514 r2 = ADD_VEC_SS(r2,rD);
515 r3 = MUL_VEC_SS(r3,rC);
516 r4 = ADD_VEC_SS(r4,rF);
517 r5 = MUL_VEC_SS(r5,rE);
518 r6 = ADD_VEC_SS(r6,rD);
519 r7 = MUL_VEC_SS(r7,rC);
520 r8 = ADD_VEC_SS(r8,rF);
521 r9 = MUL_VEC_SS(r9,rE);
522 rA = ADD_VEC_SS(rA,rD);
523 rB = MUL_VEC_SS(rB,rC);
525 r0 = MUL_VEC_SS(r0,rC);
526 r1 = ADD_VEC_SS(r1,rD);
527 r2 = MUL_VEC_SS(r2,rE);
528 r3 = ADD_VEC_SS(r3,rF);
529 r4 = MUL_VEC_SS(r4,rC);
530 r5 = ADD_VEC_SS(r5,rD);
531 r6 = MUL_VEC_SS(r6,rE);
532 r7 = ADD_VEC_SS(r7,rF);
533 r8 = MUL_VEC_SS(r8,rC);
534 r9 = ADD_VEC_SS(r9,rD);
535 rA = MUL_VEC_SS(rA,rE);
536 rB = ADD_VEC_SS(rB,rF);
538 r0 = ADD_VEC_SS(r0,rF);
539 r1 = MUL_VEC_SS(r1,rE);
540 r2 = ADD_VEC_SS(r2,rD);
541 r3 = MUL_VEC_SS(r3,rC);
542 r4 = ADD_VEC_SS(r4,rF);
543 r5 = MUL_VEC_SS(r5,rE);
544 r6 = ADD_VEC_SS(r6,rD);
545 r7 = MUL_VEC_SS(r7,rC);
546 r8 = ADD_VEC_SS(r8,rF);
547 r9 = MUL_VEC_SS(r9,rE);
548 rA = ADD_VEC_SS(rA,rD);
549 rB = MUL_VEC_SS(rB,rC);
557 r0 = ADD_VEC_SS(r0,r1);
558 r2 = ADD_VEC_SS(r2,r3);
559 r4 = ADD_VEC_SS(r4,r5);
560 r6 = ADD_VEC_SS(r6,r7);
561 r8 = ADD_VEC_SS(r8,r9);
562 rA = ADD_VEC_SS(rA,rB);
564 r0 = ADD_VEC_SS(r0,r2);
565 r4 = ADD_VEC_SS(r4,r6);
566 r8 = ADD_VEC_SS(r8,rA);
568 r0 = ADD_VEC_SS(r0,r4);
569 r0 = ADD_VEC_SS(r0,r8);
572 SP_SCALAR_TYPE temp = r0;
573 out += ((
float*)&temp)[0];
582 register SP_SCALAR_TYPE r0,r1,r2,r3,r4,r5,r6,r7,r8,r9,rA,rB,rC,rD,rE,rF;
585 r0 = SET_VEC_SS(0.01);
586 r1 = SET_VEC_SS(0.02);
587 r2 = SET_VEC_SS(0.03);
588 r3 = SET_VEC_SS(0.04);
589 r4 = SET_VEC_SS(0.05);
590 r5 = SET_VEC_SS(0.06);
591 r6 = SET_VEC_SS(0.07);
592 r7 = SET_VEC_SS(0.08);
593 r8 = SET_VEC_SS(0.09);
594 r9 = SET_VEC_SS(0.10);
595 rA = SET_VEC_SS(0.11);
596 rB = SET_VEC_SS(0.12);
597 rC = SET_VEC_SS(0.13);
598 rD = SET_VEC_SS(0.14);
599 rE = SET_VEC_SS(0.15);
600 rF = SET_VEC_SS(0.16);
603 while (
c < iterations){
608 r0 = MUL_VEC_SS(r0,rC);
609 r1 = ADD_VEC_SS(r1,rD);
610 r2 = MUL_VEC_SS(r2,rE);
611 r3 = ADD_VEC_SS(r3,rF);
612 r4 = MUL_VEC_SS(r4,rC);
613 r5 = ADD_VEC_SS(r5,rD);
614 r6 = MUL_VEC_SS(r6,rE);
615 r7 = ADD_VEC_SS(r7,rF);
616 r8 = MUL_VEC_SS(r8,rC);
617 r9 = ADD_VEC_SS(r9,rD);
618 rA = MUL_VEC_SS(rA,rE);
619 rB = ADD_VEC_SS(rB,rF);
621 r0 = ADD_VEC_SS(r0,rF);
622 r1 = MUL_VEC_SS(r1,rE);
623 r2 = ADD_VEC_SS(r2,rD);
624 r3 = MUL_VEC_SS(r3,rC);
625 r4 = ADD_VEC_SS(r4,rF);
626 r5 = MUL_VEC_SS(r5,rE);
627 r6 = ADD_VEC_SS(r6,rD);
628 r7 = MUL_VEC_SS(r7,rC);
629 r8 = ADD_VEC_SS(r8,rF);
630 r9 = MUL_VEC_SS(r9,rE);
631 rA = ADD_VEC_SS(rA,rD);
632 rB = MUL_VEC_SS(rB,rC);
634 r0 = MUL_VEC_SS(r0,rC);
635 r1 = ADD_VEC_SS(r1,rD);
636 r2 = MUL_VEC_SS(r2,rE);
637 r3 = ADD_VEC_SS(r3,rF);
638 r4 = MUL_VEC_SS(r4,rC);
639 r5 = ADD_VEC_SS(r5,rD);
640 r6 = MUL_VEC_SS(r6,rE);
641 r7 = ADD_VEC_SS(r7,rF);
642 r8 = MUL_VEC_SS(r8,rC);
643 r9 = ADD_VEC_SS(r9,rD);
644 rA = MUL_VEC_SS(rA,rE);
645 rB = ADD_VEC_SS(rB,rF);
647 r0 = ADD_VEC_SS(r0,rF);
648 r1 = MUL_VEC_SS(r1,rE);
649 r2 = ADD_VEC_SS(r2,rD);
650 r3 = MUL_VEC_SS(r3,rC);
651 r4 = ADD_VEC_SS(r4,rF);
652 r5 = MUL_VEC_SS(r5,rE);
653 r6 = ADD_VEC_SS(r6,rD);
654 r7 = MUL_VEC_SS(r7,rC);
655 r8 = ADD_VEC_SS(r8,rF);
656 r9 = MUL_VEC_SS(r9,rE);
657 rA = ADD_VEC_SS(rA,rD);
658 rB = MUL_VEC_SS(rB,rC);
660 r0 = MUL_VEC_SS(r0,rC);
661 r1 = ADD_VEC_SS(r1,rD);
662 r2 = MUL_VEC_SS(r2,rE);
663 r3 = ADD_VEC_SS(r3,rF);
664 r4 = MUL_VEC_SS(r4,rC);
665 r5 = ADD_VEC_SS(r5,rD);
666 r6 = MUL_VEC_SS(r6,rE);
667 r7 = ADD_VEC_SS(r7,rF);
668 r8 = MUL_VEC_SS(r8,rC);
669 r9 = ADD_VEC_SS(r9,rD);
670 rA = MUL_VEC_SS(rA,rE);
671 rB = ADD_VEC_SS(rB,rF);
673 r0 = ADD_VEC_SS(r0,rF);
674 r1 = MUL_VEC_SS(r1,rE);
675 r2 = ADD_VEC_SS(r2,rD);
676 r3 = MUL_VEC_SS(r3,rC);
677 r4 = ADD_VEC_SS(r4,rF);
678 r5 = MUL_VEC_SS(r5,rE);
679 r6 = ADD_VEC_SS(r6,rD);
680 r7 = MUL_VEC_SS(r7,rC);
681 r8 = ADD_VEC_SS(r8,rF);
682 r9 = MUL_VEC_SS(r9,rE);
683 rA = ADD_VEC_SS(rA,rD);
684 rB = MUL_VEC_SS(rB,rC);
686 r0 = MUL_VEC_SS(r0,rC);
687 r1 = ADD_VEC_SS(r1,rD);
688 r2 = MUL_VEC_SS(r2,rE);
689 r3 = ADD_VEC_SS(r3,rF);
690 r4 = MUL_VEC_SS(r4,rC);
691 r5 = ADD_VEC_SS(r5,rD);
692 r6 = MUL_VEC_SS(r6,rE);
693 r7 = ADD_VEC_SS(r7,rF);
694 r8 = MUL_VEC_SS(r8,rC);
695 r9 = ADD_VEC_SS(r9,rD);
696 rA = MUL_VEC_SS(rA,rE);
697 rB = ADD_VEC_SS(rB,rF);
699 r0 = ADD_VEC_SS(r0,rF);
700 r1 = MUL_VEC_SS(r1,rE);
701 r2 = ADD_VEC_SS(r2,rD);
702 r3 = MUL_VEC_SS(r3,rC);
703 r4 = ADD_VEC_SS(r4,rF);
704 r5 = MUL_VEC_SS(r5,rE);
705 r6 = ADD_VEC_SS(r6,rD);
706 r7 = MUL_VEC_SS(r7,rC);
707 r8 = ADD_VEC_SS(r8,rF);
708 r9 = MUL_VEC_SS(r9,rE);
709 rA = ADD_VEC_SS(rA,rD);
710 rB = MUL_VEC_SS(rB,rC);
718 r0 = ADD_VEC_SS(r0,r1);
719 r2 = ADD_VEC_SS(r2,r3);
720 r4 = ADD_VEC_SS(r4,r5);
721 r6 = ADD_VEC_SS(r6,r7);
722 r8 = ADD_VEC_SS(r8,r9);
723 rA = ADD_VEC_SS(rA,rB);
725 r0 = ADD_VEC_SS(r0,r2);
726 r4 = ADD_VEC_SS(r4,r6);
727 r8 = ADD_VEC_SS(r8,rA);
729 r0 = ADD_VEC_SS(r0,r4);
730 r0 = ADD_VEC_SS(r0,r8);
733 SP_SCALAR_TYPE temp = r0;
734 out += ((
float*)&temp)[0];
743 register DP_SCALAR_TYPE r0,r1,r2,r3,r4,r5,r6,r7,r8,r9,rA,rB,rC,rD,rE,rF;
746 r0 = SET_VEC_SD(0.01);
747 r1 = SET_VEC_SD(0.02);
748 r2 = SET_VEC_SD(0.03);
749 r3 = SET_VEC_SD(0.04);
750 r4 = SET_VEC_SD(0.05);
751 r5 = SET_VEC_SD(0.06);
752 r6 = SET_VEC_SD(0.07);
753 r7 = SET_VEC_SD(0.08);
754 r8 = SET_VEC_SD(0.09);
755 r9 = SET_VEC_SD(0.10);
756 rA = SET_VEC_SD(0.11);
757 rB = SET_VEC_SD(0.12);
758 rC = SET_VEC_SD(0.13);
759 rD = SET_VEC_SD(0.14);
760 rE = SET_VEC_SD(0.15);
761 rF = SET_VEC_SD(0.16);
764 while (
c < iterations){
769 r0 = MUL_VEC_SD(r0,rC);
770 r1 = ADD_VEC_SD(r1,rD);
771 r2 = MUL_VEC_SD(r2,rE);
772 r3 = ADD_VEC_SD(r3,rF);
773 r4 = MUL_VEC_SD(r4,rC);
774 r5 = ADD_VEC_SD(r5,rD);
775 r6 = MUL_VEC_SD(r6,rE);
776 r7 = ADD_VEC_SD(r7,rF);
777 r8 = MUL_VEC_SD(r8,rC);
778 r9 = ADD_VEC_SD(r9,rD);
779 rA = MUL_VEC_SD(rA,rE);
780 rB = ADD_VEC_SD(rB,rF);
782 r0 = ADD_VEC_SD(r0,rF);
783 r1 = MUL_VEC_SD(r1,rE);
784 r2 = ADD_VEC_SD(r2,rD);
785 r3 = MUL_VEC_SD(r3,rC);
786 r4 = ADD_VEC_SD(r4,rF);
787 r5 = MUL_VEC_SD(r5,rE);
788 r6 = ADD_VEC_SD(r6,rD);
789 r7 = MUL_VEC_SD(r7,rC);
790 r8 = ADD_VEC_SD(r8,rF);
791 r9 = MUL_VEC_SD(r9,rE);
792 rA = ADD_VEC_SD(rA,rD);
793 rB = MUL_VEC_SD(rB,rC);
801 r0 = ADD_VEC_SD(r0,r1);
802 r2 = ADD_VEC_SD(r2,r3);
803 r4 = ADD_VEC_SD(r4,r5);
804 r6 = ADD_VEC_SD(r6,r7);
805 r8 = ADD_VEC_SD(r8,r9);
806 rA = ADD_VEC_SD(rA,rB);
808 r0 = ADD_VEC_SD(r0,r2);
809 r4 = ADD_VEC_SD(r4,r6);
810 r8 = ADD_VEC_SD(r8,rA);
812 r0 = ADD_VEC_SD(r0,r4);
813 r0 = ADD_VEC_SD(r0,r8);
816 DP_SCALAR_TYPE temp = r0;
817 out += ((
double*)&temp)[0];
826 register DP_SCALAR_TYPE r0,r1,r2,r3,r4,r5,r6,r7,r8,r9,rA,rB,rC,rD,rE,rF;
829 r0 = SET_VEC_SD(0.01);
830 r1 = SET_VEC_SD(0.02);
831 r2 = SET_VEC_SD(0.03);
832 r3 = SET_VEC_SD(0.04);
833 r4 = SET_VEC_SD(0.05);
834 r5 = SET_VEC_SD(0.06);
835 r6 = SET_VEC_SD(0.07);
836 r7 = SET_VEC_SD(0.08);
837 r8 = SET_VEC_SD(0.09);
838 r9 = SET_VEC_SD(0.10);
839 rA = SET_VEC_SD(0.11);
840 rB = SET_VEC_SD(0.12);
841 rC = SET_VEC_SD(0.13);
842 rD = SET_VEC_SD(0.14);
843 rE = SET_VEC_SD(0.15);
844 rF = SET_VEC_SD(0.16);
847 while (
c < iterations){
852 r0 = MUL_VEC_SD(r0,rC);
853 r1 = ADD_VEC_SD(r1,rD);
854 r2 = MUL_VEC_SD(r2,rE);
855 r3 = ADD_VEC_SD(r3,rF);
856 r4 = MUL_VEC_SD(r4,rC);
857 r5 = ADD_VEC_SD(r5,rD);
858 r6 = MUL_VEC_SD(r6,rE);
859 r7 = ADD_VEC_SD(r7,rF);
860 r8 = MUL_VEC_SD(r8,rC);
861 r9 = ADD_VEC_SD(r9,rD);
862 rA = MUL_VEC_SD(rA,rE);
863 rB = ADD_VEC_SD(rB,rF);
865 r0 = ADD_VEC_SD(r0,rF);
866 r1 = MUL_VEC_SD(r1,rE);
867 r2 = ADD_VEC_SD(r2,rD);
868 r3 = MUL_VEC_SD(r3,rC);
869 r4 = ADD_VEC_SD(r4,rF);
870 r5 = MUL_VEC_SD(r5,rE);
871 r6 = ADD_VEC_SD(r6,rD);
872 r7 = MUL_VEC_SD(r7,rC);
873 r8 = ADD_VEC_SD(r8,rF);
874 r9 = MUL_VEC_SD(r9,rE);
875 rA = ADD_VEC_SD(rA,rD);
876 rB = MUL_VEC_SD(rB,rC);
878 r0 = MUL_VEC_SD(r0,rC);
879 r1 = ADD_VEC_SD(r1,rD);
880 r2 = MUL_VEC_SD(r2,rE);
881 r3 = ADD_VEC_SD(r3,rF);
882 r4 = MUL_VEC_SD(r4,rC);
883 r5 = ADD_VEC_SD(r5,rD);
884 r6 = MUL_VEC_SD(r6,rE);
885 r7 = ADD_VEC_SD(r7,rF);
886 r8 = MUL_VEC_SD(r8,rC);
887 r9 = ADD_VEC_SD(r9,rD);
888 rA = MUL_VEC_SD(rA,rE);
889 rB = ADD_VEC_SD(rB,rF);
891 r0 = ADD_VEC_SD(r0,rF);
892 r1 = MUL_VEC_SD(r1,rE);
893 r2 = ADD_VEC_SD(r2,rD);
894 r3 = MUL_VEC_SD(r3,rC);
895 r4 = ADD_VEC_SD(r4,rF);
896 r5 = MUL_VEC_SD(r5,rE);
897 r6 = ADD_VEC_SD(r6,rD);
898 r7 = MUL_VEC_SD(r7,rC);
899 r8 = ADD_VEC_SD(r8,rF);
900 r9 = MUL_VEC_SD(r9,rE);
901 rA = ADD_VEC_SD(rA,rD);
902 rB = MUL_VEC_SD(rB,rC);
910 r0 = ADD_VEC_SD(r0,r1);
911 r2 = ADD_VEC_SD(r2,r3);
912 r4 = ADD_VEC_SD(r4,r5);
913 r6 = ADD_VEC_SD(r6,r7);
914 r8 = ADD_VEC_SD(r8,r9);
915 rA = ADD_VEC_SD(rA,rB);
917 r0 = ADD_VEC_SD(r0,r2);
918 r4 = ADD_VEC_SD(r4,r6);
919 r8 = ADD_VEC_SD(r8,rA);
921 r0 = ADD_VEC_SD(r0,r4);
922 r0 = ADD_VEC_SD(r0,r8);
925 DP_SCALAR_TYPE temp = r0;
926 out += ((
double*)&temp)[0];
935 register DP_SCALAR_TYPE r0,r1,r2,r3,r4,r5,r6,r7,r8,r9,rA,rB,rC,rD,rE,rF;
938 r0 = SET_VEC_SD(0.01);
939 r1 = SET_VEC_SD(0.02);
940 r2 = SET_VEC_SD(0.03);
941 r3 = SET_VEC_SD(0.04);
942 r4 = SET_VEC_SD(0.05);
943 r5 = SET_VEC_SD(0.06);
944 r6 = SET_VEC_SD(0.07);
945 r7 = SET_VEC_SD(0.08);
946 r8 = SET_VEC_SD(0.09);
947 r9 = SET_VEC_SD(0.10);
948 rA = SET_VEC_SD(0.11);
949 rB = SET_VEC_SD(0.12);
950 rC = SET_VEC_SD(0.13);
951 rD = SET_VEC_SD(0.14);
952 rE = SET_VEC_SD(0.15);
953 rF = SET_VEC_SD(0.16);
956 while (
c < iterations){
961 r0 = MUL_VEC_SD(r0,rC);
962 r1 = ADD_VEC_SD(r1,rD);
963 r2 = MUL_VEC_SD(r2,rE);
964 r3 = ADD_VEC_SD(r3,rF);
965 r4 = MUL_VEC_SD(r4,rC);
966 r5 = ADD_VEC_SD(r5,rD);
967 r6 = MUL_VEC_SD(r6,rE);
968 r7 = ADD_VEC_SD(r7,rF);
969 r8 = MUL_VEC_SD(r8,rC);
970 r9 = ADD_VEC_SD(r9,rD);
971 rA = MUL_VEC_SD(rA,rE);
972 rB = ADD_VEC_SD(rB,rF);
974 r0 = ADD_VEC_SD(r0,rF);
975 r1 = MUL_VEC_SD(r1,rE);
976 r2 = ADD_VEC_SD(r2,rD);
977 r3 = MUL_VEC_SD(r3,rC);
978 r4 = ADD_VEC_SD(r4,rF);
979 r5 = MUL_VEC_SD(r5,rE);
980 r6 = ADD_VEC_SD(r6,rD);
981 r7 = MUL_VEC_SD(r7,rC);
982 r8 = ADD_VEC_SD(r8,rF);
983 r9 = MUL_VEC_SD(r9,rE);
984 rA = ADD_VEC_SD(rA,rD);
985 rB = MUL_VEC_SD(rB,rC);
987 r0 = MUL_VEC_SD(r0,rC);
988 r1 = ADD_VEC_SD(r1,rD);
989 r2 = MUL_VEC_SD(r2,rE);
990 r3 = ADD_VEC_SD(r3,rF);
991 r4 = MUL_VEC_SD(r4,rC);
992 r5 = ADD_VEC_SD(r5,rD);
993 r6 = MUL_VEC_SD(r6,rE);
994 r7 = ADD_VEC_SD(r7,rF);
995 r8 = MUL_VEC_SD(r8,rC);
996 r9 = ADD_VEC_SD(r9,rD);
997 rA = MUL_VEC_SD(rA,rE);
998 rB = ADD_VEC_SD(rB,rF);
1000 r0 = ADD_VEC_SD(r0,rF);
1001 r1 = MUL_VEC_SD(r1,rE);
1002 r2 = ADD_VEC_SD(r2,rD);
1003 r3 = MUL_VEC_SD(r3,rC);
1004 r4 = ADD_VEC_SD(r4,rF);
1005 r5 = MUL_VEC_SD(r5,rE);
1006 r6 = ADD_VEC_SD(r6,rD);
1007 r7 = MUL_VEC_SD(r7,rC);
1008 r8 = ADD_VEC_SD(r8,rF);
1009 r9 = MUL_VEC_SD(r9,rE);
1010 rA = ADD_VEC_SD(rA,rD);
1011 rB = MUL_VEC_SD(rB,rC);
1013 r0 = MUL_VEC_SD(r0,rC);
1014 r1 = ADD_VEC_SD(r1,rD);
1015 r2 = MUL_VEC_SD(r2,rE);
1016 r3 = ADD_VEC_SD(r3,rF);
1017 r4 = MUL_VEC_SD(r4,rC);
1018 r5 = ADD_VEC_SD(r5,rD);
1019 r6 = MUL_VEC_SD(r6,rE);
1020 r7 = ADD_VEC_SD(r7,rF);
1021 r8 = MUL_VEC_SD(r8,rC);
1022 r9 = ADD_VEC_SD(r9,rD);
1023 rA = MUL_VEC_SD(rA,rE);
1024 rB = ADD_VEC_SD(rB,rF);
1026 r0 = ADD_VEC_SD(r0,rF);
1027 r1 = MUL_VEC_SD(r1,rE);
1028 r2 = ADD_VEC_SD(r2,rD);
1029 r3 = MUL_VEC_SD(r3,rC);
1030 r4 = ADD_VEC_SD(r4,rF);
1031 r5 = MUL_VEC_SD(r5,rE);
1032 r6 = ADD_VEC_SD(r6,rD);
1033 r7 = MUL_VEC_SD(r7,rC);
1034 r8 = ADD_VEC_SD(r8,rF);
1035 r9 = MUL_VEC_SD(r9,rE);
1036 rA = ADD_VEC_SD(rA,rD);
1037 rB = MUL_VEC_SD(rB,rC);
1039 r0 = MUL_VEC_SD(r0,rC);
1040 r1 = ADD_VEC_SD(r1,rD);
1041 r2 = MUL_VEC_SD(r2,rE);
1042 r3 = ADD_VEC_SD(r3,rF);
1043 r4 = MUL_VEC_SD(r4,rC);
1044 r5 = ADD_VEC_SD(r5,rD);
1045 r6 = MUL_VEC_SD(r6,rE);
1046 r7 = ADD_VEC_SD(r7,rF);
1047 r8 = MUL_VEC_SD(r8,rC);
1048 r9 = ADD_VEC_SD(r9,rD);
1049 rA = MUL_VEC_SD(rA,rE);
1050 rB = ADD_VEC_SD(rB,rF);
1052 r0 = ADD_VEC_SD(r0,rF);
1053 r1 = MUL_VEC_SD(r1,rE);
1054 r2 = ADD_VEC_SD(r2,rD);
1055 r3 = MUL_VEC_SD(r3,rC);
1056 r4 = ADD_VEC_SD(r4,rF);
1057 r5 = MUL_VEC_SD(r5,rE);
1058 r6 = ADD_VEC_SD(r6,rD);
1059 r7 = MUL_VEC_SD(r7,rC);
1060 r8 = ADD_VEC_SD(r8,rF);
1061 r9 = MUL_VEC_SD(r9,rE);
1062 rA = ADD_VEC_SD(rA,rD);
1063 rB = MUL_VEC_SD(rB,rC);
1071 r0 = ADD_VEC_SD(r0,r1);
1072 r2 = ADD_VEC_SD(r2,r3);
1073 r4 = ADD_VEC_SD(r4,r5);
1074 r6 = ADD_VEC_SD(r6,r7);
1075 r8 = ADD_VEC_SD(r8,r9);
1076 rA = ADD_VEC_SD(rA,rB);
1078 r0 = ADD_VEC_SD(r0,r2);
1079 r4 = ADD_VEC_SD(r4,r6);
1080 r8 = ADD_VEC_SD(r8,rA);
1082 r0 = ADD_VEC_SD(r0,r4);
1083 r0 = ADD_VEC_SD(r0,r8);
1086 DP_SCALAR_TYPE temp = r0;
1087 out += ((
double*)&temp)[0];
1094 register half r0,r1,r2,r3,r4,r5,r6,r7,r8,r9,rA,rB,rC,rD,rE,rF;
1097 r0 = SET_VEC_SH(0.01);
1098 r1 = SET_VEC_SH(0.02);
1099 r2 = SET_VEC_SH(0.03);
1100 r3 = SET_VEC_SH(0.04);
1101 r4 = SET_VEC_SH(0.05);
1102 r5 = SET_VEC_SH(0.06);
1103 r6 = SET_VEC_SH(0.07);
1104 r7 = SET_VEC_SH(0.08);
1105 r8 = SET_VEC_SH(0.09);
1106 r9 = SET_VEC_SH(0.10);
1107 rA = SET_VEC_SH(0.11);
1108 rB = SET_VEC_SH(0.12);
1109 rC = SET_VEC_SH(0.13);
1110 rD = SET_VEC_SH(0.14);
1111 rE = SET_VEC_SH(0.15);
1112 rF = SET_VEC_SH(0.16);
1115 while (
c < iterations){
1120 FMA_VEC_SH(r0,r0,r7,r9);
1121 FMA_VEC_SH(r1,r1,r8,rA);
1122 FMA_VEC_SH(r2,r2,r9,rB);
1123 FMA_VEC_SH(r3,r3,rA,rC);
1124 FMA_VEC_SH(r4,r4,rB,rD);
1125 FMA_VEC_SH(r5,r5,rC,rE);
1127 FMA_VEC_SH(r0,r0,rD,rF);
1128 FMA_VEC_SH(r1,r1,rC,rE);
1129 FMA_VEC_SH(r2,r2,rB,rD);
1130 FMA_VEC_SH(r3,r3,rA,rC);
1131 FMA_VEC_SH(r4,r4,r9,rB);
1132 FMA_VEC_SH(r5,r5,r8,rA);
1140 r0 = ADD_VEC_SH(r0,r1);
1141 r2 = ADD_VEC_SH(r2,r3);
1142 r4 = ADD_VEC_SH(r4,r5);
1144 r0 = ADD_VEC_SH(r0,r6);
1145 r2 = ADD_VEC_SH(r2,r4);
1147 r0 = ADD_VEC_SH(r0,r2);
1151 out = ADD_VEC_SH(out,temp);
1157 register half r0,r1,r2,r3,r4,r5,r6,r7,r8,r9,rA,rB,rC,rD,rE,rF;
1160 r0 = SET_VEC_SH(0.01);
1161 r1 = SET_VEC_SH(0.02);
1162 r2 = SET_VEC_SH(0.03);
1163 r3 = SET_VEC_SH(0.04);
1164 r4 = SET_VEC_SH(0.05);
1165 r5 = SET_VEC_SH(0.06);
1166 r6 = SET_VEC_SH(0.07);
1167 r7 = SET_VEC_SH(0.08);
1168 r8 = SET_VEC_SH(0.09);
1169 r9 = SET_VEC_SH(0.10);
1170 rA = SET_VEC_SH(0.11);
1171 rB = SET_VEC_SH(0.12);
1172 rC = SET_VEC_SH(0.13);
1173 rD = SET_VEC_SH(0.14);
1174 rE = SET_VEC_SH(0.15);
1175 rF = SET_VEC_SH(0.16);
1178 while (
c < iterations){
1183 FMA_VEC_SH(r0,r0,r7,r9);
1184 FMA_VEC_SH(r1,r1,r8,rA);
1185 FMA_VEC_SH(r2,r2,r9,rB);
1186 FMA_VEC_SH(r3,r3,rA,rC);
1187 FMA_VEC_SH(r4,r4,rB,rD);
1188 FMA_VEC_SH(r5,r5,rC,rE);
1190 FMA_VEC_SH(r0,r0,rD,rF);
1191 FMA_VEC_SH(r1,r1,rC,rE);
1192 FMA_VEC_SH(r2,r2,rB,rD);
1193 FMA_VEC_SH(r3,r3,rA,rC);
1194 FMA_VEC_SH(r4,r4,r9,rB);
1195 FMA_VEC_SH(r5,r5,r8,rA);
1197 FMA_VEC_SH(r0,r0,r7,r9);
1198 FMA_VEC_SH(r1,r1,r8,rA);
1199 FMA_VEC_SH(r2,r2,r9,rB);
1200 FMA_VEC_SH(r3,r3,rA,rC);
1201 FMA_VEC_SH(r4,r4,rB,rD);
1202 FMA_VEC_SH(r5,r5,rC,rE);
1204 FMA_VEC_SH(r0,r0,rD,rF);
1205 FMA_VEC_SH(r1,r1,rC,rE);
1206 FMA_VEC_SH(r2,r2,rB,rD);
1207 FMA_VEC_SH(r3,r3,rA,rC);
1208 FMA_VEC_SH(r4,r4,r9,rB);
1209 FMA_VEC_SH(r5,r5,r8,rA);
1217 r0 = ADD_VEC_SH(r0,r1);
1218 r2 = ADD_VEC_SH(r2,r3);
1219 r4 = ADD_VEC_SH(r4,r5);
1221 r0 = ADD_VEC_SH(r0,r6);
1222 r2 = ADD_VEC_SH(r2,r4);
1224 r0 = ADD_VEC_SH(r0,r2);
1228 out = ADD_VEC_SH(out,temp);
1234 register half r0,r1,r2,r3,r4,r5,r6,r7,r8,r9,rA,rB,rC,rD,rE,rF;
1237 r0 = SET_VEC_SH(0.01);
1238 r1 = SET_VEC_SH(0.02);
1239 r2 = SET_VEC_SH(0.03);
1240 r3 = SET_VEC_SH(0.04);
1241 r4 = SET_VEC_SH(0.05);
1242 r5 = SET_VEC_SH(0.06);
1243 r6 = SET_VEC_SH(0.07);
1244 r7 = SET_VEC_SH(0.08);
1245 r8 = SET_VEC_SH(0.09);
1246 r9 = SET_VEC_SH(0.10);
1247 rA = SET_VEC_SH(0.11);
1248 rB = SET_VEC_SH(0.12);
1249 rC = SET_VEC_SH(0.13);
1250 rD = SET_VEC_SH(0.14);
1251 rE = SET_VEC_SH(0.15);
1252 rF = SET_VEC_SH(0.16);
1255 while (
c < iterations){
1260 FMA_VEC_SH(r0,r0,r7,r9);
1261 FMA_VEC_SH(r1,r1,r8,rA);
1262 FMA_VEC_SH(r2,r2,r9,rB);
1263 FMA_VEC_SH(r3,r3,rA,rC);
1264 FMA_VEC_SH(r4,r4,rB,rD);
1265 FMA_VEC_SH(r5,r5,rC,rE);
1267 FMA_VEC_SH(r0,r0,rD,rF);
1268 FMA_VEC_SH(r1,r1,rC,rE);
1269 FMA_VEC_SH(r2,r2,rB,rD);
1270 FMA_VEC_SH(r3,r3,rA,rC);
1271 FMA_VEC_SH(r4,r4,r9,rB);
1272 FMA_VEC_SH(r5,r5,r8,rA);
1274 FMA_VEC_SH(r0,r0,r7,r9);
1275 FMA_VEC_SH(r1,r1,r8,rA);
1276 FMA_VEC_SH(r2,r2,r9,rB);
1277 FMA_VEC_SH(r3,r3,rA,rC);
1278 FMA_VEC_SH(r4,r4,rB,rD);
1279 FMA_VEC_SH(r5,r5,rC,rE);
1281 FMA_VEC_SH(r0,r0,rD,rF);
1282 FMA_VEC_SH(r1,r1,rC,rE);
1283 FMA_VEC_SH(r2,r2,rB,rD);
1284 FMA_VEC_SH(r3,r3,rA,rC);
1285 FMA_VEC_SH(r4,r4,r9,rB);
1286 FMA_VEC_SH(r5,r5,r8,rA);
1288 FMA_VEC_SH(r0,r0,r7,r9);
1289 FMA_VEC_SH(r1,r1,r8,rA);
1290 FMA_VEC_SH(r2,r2,r9,rB);
1291 FMA_VEC_SH(r3,r3,rA,rC);
1292 FMA_VEC_SH(r4,r4,rB,rD);
1293 FMA_VEC_SH(r5,r5,rC,rE);
1295 FMA_VEC_SH(r0,r0,rD,rF);
1296 FMA_VEC_SH(r1,r1,rC,rE);
1297 FMA_VEC_SH(r2,r2,rB,rD);
1298 FMA_VEC_SH(r3,r3,rA,rC);
1299 FMA_VEC_SH(r4,r4,r9,rB);
1300 FMA_VEC_SH(r5,r5,r8,rA);
1302 FMA_VEC_SH(r0,r0,r7,r9);
1303 FMA_VEC_SH(r1,r1,r8,rA);
1304 FMA_VEC_SH(r2,r2,r9,rB);
1305 FMA_VEC_SH(r3,r3,rA,rC);
1306 FMA_VEC_SH(r4,r4,rB,rD);
1307 FMA_VEC_SH(r5,r5,rC,rE);
1309 FMA_VEC_SH(r0,r0,rD,rF);
1310 FMA_VEC_SH(r1,r1,rC,rE);
1311 FMA_VEC_SH(r2,r2,rB,rD);
1312 FMA_VEC_SH(r3,r3,rA,rC);
1313 FMA_VEC_SH(r4,r4,r9,rB);
1314 FMA_VEC_SH(r5,r5,r8,rA);
1322 r0 = ADD_VEC_SH(r0,r1);
1323 r2 = ADD_VEC_SH(r2,r3);
1324 r4 = ADD_VEC_SH(r4,r5);
1326 r0 = ADD_VEC_SH(r0,r6);
1327 r2 = ADD_VEC_SH(r2,r4);
1329 r0 = ADD_VEC_SH(r0,r2);
1333 out = ADD_VEC_SH(out,temp);
1362 register SP_SCALAR_TYPE r0,r1,r2,r3,r4,r5,r6,r7,r8,r9,rA,rB,rC,rD,rE,rF;
1365 r0 = SET_VEC_SS(0.01);
1366 r1 = SET_VEC_SS(0.02);
1367 r2 = SET_VEC_SS(0.03);
1368 r3 = SET_VEC_SS(0.04);
1369 r4 = SET_VEC_SS(0.05);
1370 r5 = SET_VEC_SS(0.06);
1371 r6 = SET_VEC_SS(0.07);
1372 r7 = SET_VEC_SS(0.08);
1373 r8 = SET_VEC_SS(0.09);
1374 r9 = SET_VEC_SS(0.10);
1375 rA = SET_VEC_SS(0.11);
1376 rB = SET_VEC_SS(0.12);
1377 rC = SET_VEC_SS(0.13);
1378 rD = SET_VEC_SS(0.14);
1379 rE = SET_VEC_SS(0.15);
1380 rF = SET_VEC_SS(0.16);
1383 while (
c < iterations){
1388 FMA_VEC_SS(r0,r0,r7,r9);
1389 FMA_VEC_SS(r1,r1,r8,rA);
1390 FMA_VEC_SS(r2,r2,r9,rB);
1391 FMA_VEC_SS(r3,r3,rA,rC);
1392 FMA_VEC_SS(r4,r4,rB,rD);
1393 FMA_VEC_SS(r5,r5,rC,rE);
1395 FMA_VEC_SS(r0,r0,rD,rF);
1396 FMA_VEC_SS(r1,r1,rC,rE);
1397 FMA_VEC_SS(r2,r2,rB,rD);
1398 FMA_VEC_SS(r3,r3,rA,rC);
1399 FMA_VEC_SS(r4,r4,r9,rB);
1400 FMA_VEC_SS(r5,r5,r8,rA);
1408 r0 = ADD_VEC_SS(r0,r1);
1409 r2 = ADD_VEC_SS(r2,r3);
1410 r4 = ADD_VEC_SS(r4,r5);
1412 r0 = ADD_VEC_SS(r0,r6);
1413 r2 = ADD_VEC_SS(r2,r4);
1415 r0 = ADD_VEC_SS(r0,r2);
1418 SP_SCALAR_TYPE temp = r0;
1419 out += ((
float*)&temp)[0];
1428 register SP_SCALAR_TYPE r0,r1,r2,r3,r4,r5,r6,r7,r8,r9,rA,rB,rC,rD,rE,rF;
1431 r0 = SET_VEC_SS(0.01);
1432 r1 = SET_VEC_SS(0.02);
1433 r2 = SET_VEC_SS(0.03);
1434 r3 = SET_VEC_SS(0.04);
1435 r4 = SET_VEC_SS(0.05);
1436 r5 = SET_VEC_SS(0.06);
1437 r6 = SET_VEC_SS(0.07);
1438 r7 = SET_VEC_SS(0.08);
1439 r8 = SET_VEC_SS(0.09);
1440 r9 = SET_VEC_SS(0.10);
1441 rA = SET_VEC_SS(0.11);
1442 rB = SET_VEC_SS(0.12);
1443 rC = SET_VEC_SS(0.13);
1444 rD = SET_VEC_SS(0.14);
1445 rE = SET_VEC_SS(0.15);
1446 rF = SET_VEC_SS(0.16);
1449 while (
c < iterations){
1454 FMA_VEC_SS(r0,r0,r7,r9);
1455 FMA_VEC_SS(r1,r1,r8,rA);
1456 FMA_VEC_SS(r2,r2,r9,rB);
1457 FMA_VEC_SS(r3,r3,rA,rC);
1458 FMA_VEC_SS(r4,r4,rB,rD);
1459 FMA_VEC_SS(r5,r5,rC,rE);
1461 FMA_VEC_SS(r0,r0,rD,rF);
1462 FMA_VEC_SS(r1,r1,rC,rE);
1463 FMA_VEC_SS(r2,r2,rB,rD);
1464 FMA_VEC_SS(r3,r3,rA,rC);
1465 FMA_VEC_SS(r4,r4,r9,rB);
1466 FMA_VEC_SS(r5,r5,r8,rA);
1468 FMA_VEC_SS(r0,r0,r7,r9);
1469 FMA_VEC_SS(r1,r1,r8,rA);
1470 FMA_VEC_SS(r2,r2,r9,rB);
1471 FMA_VEC_SS(r3,r3,rA,rC);
1472 FMA_VEC_SS(r4,r4,rB,rD);
1473 FMA_VEC_SS(r5,r5,rC,rE);
1475 FMA_VEC_SS(r0,r0,rD,rF);
1476 FMA_VEC_SS(r1,r1,rC,rE);
1477 FMA_VEC_SS(r2,r2,rB,rD);
1478 FMA_VEC_SS(r3,r3,rA,rC);
1479 FMA_VEC_SS(r4,r4,r9,rB);
1480 FMA_VEC_SS(r5,r5,r8,rA);
1488 r0 = ADD_VEC_SS(r0,r1);
1489 r2 = ADD_VEC_SS(r2,r3);
1490 r4 = ADD_VEC_SS(r4,r5);
1492 r0 = ADD_VEC_SS(r0,r6);
1493 r2 = ADD_VEC_SS(r2,r4);
1495 r0 = ADD_VEC_SS(r0,r2);
1498 SP_SCALAR_TYPE temp = r0;
1499 out += ((
float*)&temp)[0];
1508 register SP_SCALAR_TYPE r0,r1,r2,r3,r4,r5,r6,r7,r8,r9,rA,rB,rC,rD,rE,rF;
1511 r0 = SET_VEC_SS(0.01);
1512 r1 = SET_VEC_SS(0.02);
1513 r2 = SET_VEC_SS(0.03);
1514 r3 = SET_VEC_SS(0.04);
1515 r4 = SET_VEC_SS(0.05);
1516 r5 = SET_VEC_SS(0.06);
1517 r6 = SET_VEC_SS(0.07);
1518 r7 = SET_VEC_SS(0.08);
1519 r8 = SET_VEC_SS(0.09);
1520 r9 = SET_VEC_SS(0.10);
1521 rA = SET_VEC_SS(0.11);
1522 rB = SET_VEC_SS(0.12);
1523 rC = SET_VEC_SS(0.13);
1524 rD = SET_VEC_SS(0.14);
1525 rE = SET_VEC_SS(0.15);
1526 rF = SET_VEC_SS(0.16);
1529 while (
c < iterations){
1534 FMA_VEC_SS(r0,r0,r7,r9);
1535 FMA_VEC_SS(r1,r1,r8,rA);
1536 FMA_VEC_SS(r2,r2,r9,rB);
1537 FMA_VEC_SS(r3,r3,rA,rC);
1538 FMA_VEC_SS(r4,r4,rB,rD);
1539 FMA_VEC_SS(r5,r5,rC,rE);
1541 FMA_VEC_SS(r0,r0,rD,rF);
1542 FMA_VEC_SS(r1,r1,rC,rE);
1543 FMA_VEC_SS(r2,r2,rB,rD);
1544 FMA_VEC_SS(r3,r3,rA,rC);
1545 FMA_VEC_SS(r4,r4,r9,rB);
1546 FMA_VEC_SS(r5,r5,r8,rA);
1548 FMA_VEC_SS(r0,r0,r7,r9);
1549 FMA_VEC_SS(r1,r1,r8,rA);
1550 FMA_VEC_SS(r2,r2,r9,rB);
1551 FMA_VEC_SS(r3,r3,rA,rC);
1552 FMA_VEC_SS(r4,r4,rB,rD);
1553 FMA_VEC_SS(r5,r5,rC,rE);
1555 FMA_VEC_SS(r0,r0,rD,rF);
1556 FMA_VEC_SS(r1,r1,rC,rE);
1557 FMA_VEC_SS(r2,r2,rB,rD);
1558 FMA_VEC_SS(r3,r3,rA,rC);
1559 FMA_VEC_SS(r4,r4,r9,rB);
1560 FMA_VEC_SS(r5,r5,r8,rA);
1562 FMA_VEC_SS(r0,r0,r7,r9);
1563 FMA_VEC_SS(r1,r1,r8,rA);
1564 FMA_VEC_SS(r2,r2,r9,rB);
1565 FMA_VEC_SS(r3,r3,rA,rC);
1566 FMA_VEC_SS(r4,r4,rB,rD);
1567 FMA_VEC_SS(r5,r5,rC,rE);
1569 FMA_VEC_SS(r0,r0,rD,rF);
1570 FMA_VEC_SS(r1,r1,rC,rE);
1571 FMA_VEC_SS(r2,r2,rB,rD);
1572 FMA_VEC_SS(r3,r3,rA,rC);
1573 FMA_VEC_SS(r4,r4,r9,rB);
1574 FMA_VEC_SS(r5,r5,r8,rA);
1576 FMA_VEC_SS(r0,r0,r7,r9);
1577 FMA_VEC_SS(r1,r1,r8,rA);
1578 FMA_VEC_SS(r2,r2,r9,rB);
1579 FMA_VEC_SS(r3,r3,rA,rC);
1580 FMA_VEC_SS(r4,r4,rB,rD);
1581 FMA_VEC_SS(r5,r5,rC,rE);
1583 FMA_VEC_SS(r0,r0,rD,rF);
1584 FMA_VEC_SS(r1,r1,rC,rE);
1585 FMA_VEC_SS(r2,r2,rB,rD);
1586 FMA_VEC_SS(r3,r3,rA,rC);
1587 FMA_VEC_SS(r4,r4,r9,rB);
1588 FMA_VEC_SS(r5,r5,r8,rA);
1596 r0 = ADD_VEC_SS(r0,r1);
1597 r2 = ADD_VEC_SS(r2,r3);
1598 r4 = ADD_VEC_SS(r4,r5);
1600 r0 = ADD_VEC_SS(r0,r6);
1601 r2 = ADD_VEC_SS(r2,r4);
1603 r0 = ADD_VEC_SS(r0,r2);
1606 SP_SCALAR_TYPE temp = r0;
1607 out += ((
float*)&temp)[0];
1616 register DP_SCALAR_TYPE r0,r1,r2,r3,r4,r5,r6,r7,r8,r9,rA,rB,rC,rD,rE,rF;
1619 r0 = SET_VEC_SD(0.01);
1620 r1 = SET_VEC_SD(0.02);
1621 r2 = SET_VEC_SD(0.03);
1622 r3 = SET_VEC_SD(0.04);
1623 r4 = SET_VEC_SD(0.05);
1624 r5 = SET_VEC_SD(0.06);
1625 r6 = SET_VEC_SD(0.07);
1626 r7 = SET_VEC_SD(0.08);
1627 r8 = SET_VEC_SD(0.09);
1628 r9 = SET_VEC_SD(0.10);
1629 rA = SET_VEC_SD(0.11);
1630 rB = SET_VEC_SD(0.12);
1631 rC = SET_VEC_SD(0.13);
1632 rD = SET_VEC_SD(0.14);
1633 rE = SET_VEC_SD(0.15);
1634 rF = SET_VEC_SD(0.16);
1637 while (
c < iterations){
1642 FMA_VEC_SD(r0,r0,r7,r9);
1643 FMA_VEC_SD(r1,r1,r8,rA);
1644 FMA_VEC_SD(r2,r2,r9,rB);
1645 FMA_VEC_SD(r3,r3,rA,rC);
1646 FMA_VEC_SD(r4,r4,rB,rD);
1647 FMA_VEC_SD(r5,r5,rC,rE);
1649 FMA_VEC_SD(r0,r0,rD,rF);
1650 FMA_VEC_SD(r1,r1,rC,rE);
1651 FMA_VEC_SD(r2,r2,rB,rD);
1652 FMA_VEC_SD(r3,r3,rA,rC);
1653 FMA_VEC_SD(r4,r4,r9,rB);
1654 FMA_VEC_SD(r5,r5,r8,rA);
1662 r0 = ADD_VEC_SD(r0,r1);
1663 r2 = ADD_VEC_SD(r2,r3);
1664 r4 = ADD_VEC_SD(r4,r5);
1666 r0 = ADD_VEC_SD(r0,r6);
1667 r2 = ADD_VEC_SD(r2,r4);
1669 r0 = ADD_VEC_SD(r0,r2);
1672 DP_SCALAR_TYPE temp = r0;
1673 out += ((
double*)&temp)[0];
1682 register DP_SCALAR_TYPE r0,r1,r2,r3,r4,r5,r6,r7,r8,r9,rA,rB,rC,rD,rE,rF;
1685 r0 = SET_VEC_SD(0.01);
1686 r1 = SET_VEC_SD(0.02);
1687 r2 = SET_VEC_SD(0.03);
1688 r3 = SET_VEC_SD(0.04);
1689 r4 = SET_VEC_SD(0.05);
1690 r5 = SET_VEC_SD(0.06);
1691 r6 = SET_VEC_SD(0.07);
1692 r7 = SET_VEC_SD(0.08);
1693 r8 = SET_VEC_SD(0.09);
1694 r9 = SET_VEC_SD(0.10);
1695 rA = SET_VEC_SD(0.11);
1696 rB = SET_VEC_SD(0.12);
1697 rC = SET_VEC_SD(0.13);
1698 rD = SET_VEC_SD(0.14);
1699 rE = SET_VEC_SD(0.15);
1700 rF = SET_VEC_SD(0.16);
1703 while (
c < iterations){
1708 FMA_VEC_SD(r0,r0,r7,r9);
1709 FMA_VEC_SD(r1,r1,r8,rA);
1710 FMA_VEC_SD(r2,r2,r9,rB);
1711 FMA_VEC_SD(r3,r3,rA,rC);
1712 FMA_VEC_SD(r4,r4,rB,rD);
1713 FMA_VEC_SD(r5,r5,rC,rE);
1715 FMA_VEC_SD(r0,r0,rD,rF);
1716 FMA_VEC_SD(r1,r1,rC,rE);
1717 FMA_VEC_SD(r2,r2,rB,rD);
1718 FMA_VEC_SD(r3,r3,rA,rC);
1719 FMA_VEC_SD(r4,r4,r9,rB);
1720 FMA_VEC_SD(r5,r5,r8,rA);
1722 FMA_VEC_SD(r0,r0,r7,r9);
1723 FMA_VEC_SD(r1,r1,r8,rA);
1724 FMA_VEC_SD(r2,r2,r9,rB);
1725 FMA_VEC_SD(r3,r3,rA,rC);
1726 FMA_VEC_SD(r4,r4,rB,rD);
1727 FMA_VEC_SD(r5,r5,rC,rE);
1729 FMA_VEC_SD(r0,r0,rD,rF);
1730 FMA_VEC_SD(r1,r1,rC,rE);
1731 FMA_VEC_SD(r2,r2,rB,rD);
1732 FMA_VEC_SD(r3,r3,rA,rC);
1733 FMA_VEC_SD(r4,r4,r9,rB);
1734 FMA_VEC_SD(r5,r5,r8,rA);
1742 r0 = ADD_VEC_SD(r0,r1);
1743 r2 = ADD_VEC_SD(r2,r3);
1744 r4 = ADD_VEC_SD(r4,r5);
1746 r0 = ADD_VEC_SD(r0,r6);
1747 r2 = ADD_VEC_SD(r2,r4);
1749 r0 = ADD_VEC_SD(r0,r2);
1752 DP_SCALAR_TYPE temp = r0;
1753 out += ((
double*)&temp)[0];
1762 register DP_SCALAR_TYPE r0,r1,r2,r3,r4,r5,r6,r7,r8,r9,rA,rB,rC,rD,rE,rF;
1765 r0 = SET_VEC_SD(0.01);
1766 r1 = SET_VEC_SD(0.02);
1767 r2 = SET_VEC_SD(0.03);
1768 r3 = SET_VEC_SD(0.04);
1769 r4 = SET_VEC_SD(0.05);
1770 r5 = SET_VEC_SD(0.06);
1771 r6 = SET_VEC_SD(0.07);
1772 r7 = SET_VEC_SD(0.08);
1773 r8 = SET_VEC_SD(0.09);
1774 r9 = SET_VEC_SD(0.10);
1775 rA = SET_VEC_SD(0.11);
1776 rB = SET_VEC_SD(0.12);
1777 rC = SET_VEC_SD(0.13);
1778 rD = SET_VEC_SD(0.14);
1779 rE = SET_VEC_SD(0.15);
1780 rF = SET_VEC_SD(0.16);
1783 while (
c < iterations){
1788 FMA_VEC_SD(r0,r0,r7,r9);
1789 FMA_VEC_SD(r1,r1,r8,rA);
1790 FMA_VEC_SD(r2,r2,r9,rB);
1791 FMA_VEC_SD(r3,r3,rA,rC);
1792 FMA_VEC_SD(r4,r4,rB,rD);
1793 FMA_VEC_SD(r5,r5,rC,rE);
1795 FMA_VEC_SD(r0,r0,rD,rF);
1796 FMA_VEC_SD(r1,r1,rC,rE);
1797 FMA_VEC_SD(r2,r2,rB,rD);
1798 FMA_VEC_SD(r3,r3,rA,rC);
1799 FMA_VEC_SD(r4,r4,r9,rB);
1800 FMA_VEC_SD(r5,r5,r8,rA);
1802 FMA_VEC_SD(r0,r0,r7,r9);
1803 FMA_VEC_SD(r1,r1,r8,rA);
1804 FMA_VEC_SD(r2,r2,r9,rB);
1805 FMA_VEC_SD(r3,r3,rA,rC);
1806 FMA_VEC_SD(r4,r4,rB,rD);
1807 FMA_VEC_SD(r5,r5,rC,rE);
1809 FMA_VEC_SD(r0,r0,rD,rF);
1810 FMA_VEC_SD(r1,r1,rC,rE);
1811 FMA_VEC_SD(r2,r2,rB,rD);
1812 FMA_VEC_SD(r3,r3,rA,rC);
1813 FMA_VEC_SD(r4,r4,r9,rB);
1814 FMA_VEC_SD(r5,r5,r8,rA);
1816 FMA_VEC_SD(r0,r0,r7,r9);
1817 FMA_VEC_SD(r1,r1,r8,rA);
1818 FMA_VEC_SD(r2,r2,r9,rB);
1819 FMA_VEC_SD(r3,r3,rA,rC);
1820 FMA_VEC_SD(r4,r4,rB,rD);
1821 FMA_VEC_SD(r5,r5,rC,rE);
1823 FMA_VEC_SD(r0,r0,rD,rF);
1824 FMA_VEC_SD(r1,r1,rC,rE);
1825 FMA_VEC_SD(r2,r2,rB,rD);
1826 FMA_VEC_SD(r3,r3,rA,rC);
1827 FMA_VEC_SD(r4,r4,r9,rB);
1828 FMA_VEC_SD(r5,r5,r8,rA);
1830 FMA_VEC_SD(r0,r0,r7,r9);
1831 FMA_VEC_SD(r1,r1,r8,rA);
1832 FMA_VEC_SD(r2,r2,r9,rB);
1833 FMA_VEC_SD(r3,r3,rA,rC);
1834 FMA_VEC_SD(r4,r4,rB,rD);
1835 FMA_VEC_SD(r5,r5,rC,rE);
1837 FMA_VEC_SD(r0,r0,rD,rF);
1838 FMA_VEC_SD(r1,r1,rC,rE);
1839 FMA_VEC_SD(r2,r2,rB,rD);
1840 FMA_VEC_SD(r3,r3,rA,rC);
1841 FMA_VEC_SD(r4,r4,r9,rB);
1842 FMA_VEC_SD(r5,r5,r8,rA);
1850 r0 = ADD_VEC_SD(r0,r1);
1851 r2 = ADD_VEC_SD(r2,r3);
1852 r4 = ADD_VEC_SD(r4,r5);
1854 r0 = ADD_VEC_SD(r0,r6);
1855 r2 = ADD_VEC_SD(r2,r4);
1857 r0 = ADD_VEC_SD(r0,r2);
1860 DP_SCALAR_TYPE temp = r0;
1861 out += ((
double*)&temp)[0];
unsigned long long uint64
Stop counting hardware events in an event set.
static double c[MATRIX_SIZE][MATRIX_SIZE]
float test_hp_scalar_VEC_FMA_48(uint64 iterations)
float test_hp_scalar_VEC_24(uint64 iterations)
double test_dp_scalar_VEC_FMA_24(uint64 iterations)
double test_dp_scalar_VEC_FMA_12(uint64 iterations)
float test_sp_scalar_VEC_96(uint64 iterations)
float test_hp_scalar_VEC_FMA_24(uint64 iterations)
double test_dp_scalar_VEC_FMA_48(uint64 iterations)
void papi_stop_and_print_placeholder(long long theory, FILE *fp)
float test_sp_scalar_VEC_24(uint64 iterations)
double test_dp_scalar_VEC_48(uint64 iterations)
float test_hp_scalar_VEC_FMA_12(uint64 iterations)
float test_sp_scalar_VEC_FMA_12(uint64 iterations)
void papi_stop_and_print(long long theory, int EventSet, FILE *fp)
float test_sp_scalar_VEC_FMA_48(uint64 iterations)
double test_dp_scalar_VEC_96(uint64 iterations)
float test_hp_scalar_VEC_48(uint64 iterations)
double test_dp_scalar_VEC_24(uint64 iterations)
float test_sp_scalar_VEC_48(uint64 iterations)
float test_hp_scalar_VEC_96(uint64 iterations)
float test_sp_scalar_VEC_FMA_24(uint64 iterations)