PAPI 7.1.0.0
Loading...
Searching...
No Matches
vector_testcode.c File Reference
Include dependency graph for vector_testcode.c:

Go to the source code of this file.

Macros

#define NUMBER   100
 

Functions

void inline_packed_sse_add (float *aa, float *bb, float *cc)
 
void inline_packed_sse_mul (float *aa, float *bb, float *cc)
 
void inline_packed_sse2_add (double *aa, double *bb, double *cc)
 
void inline_packed_sse2_mul (double *aa, double *bb, double *cc)
 
void inline_unpacked_sse_add (float *aa, float *bb, float *cc)
 
void inline_unpacked_sse_mul (float *aa, float *bb, float *cc)
 
void inline_unpacked_sse2_add (double *aa, double *bb, double *cc)
 
void inline_unpacked_sse2_mul (double *aa, double *bb, double *cc)
 
int main (int argc, char **argv)
 

Macro Definition Documentation

◆ NUMBER

#define NUMBER   100

Definition at line 4 of file vector_testcode.c.

Function Documentation

◆ inline_packed_sse2_add()

void inline_packed_sse2_add ( double *  aa,
double *  bb,
double *  cc 
)
inline

Definition at line 27 of file vector_testcode.c.

28{
29 __asm__ __volatile__( "movapd (%0), %%xmm0;"
30 "movapd (%1), %%xmm1;"
31 "addpd %%xmm0, %%xmm1;"
32 "movapd %%xmm1, (%2);"::"r"( aa ),
33 "r"( bb ), "r"( cc )
34 :"%xmm0", "%xmm1" );
35}
double bb[N]
Definition: byte_profile.c:35
double aa[N]
Definition: byte_profile.c:35
Here is the caller graph for this function:

◆ inline_packed_sse2_mul()

void inline_packed_sse2_mul ( double *  aa,
double *  bb,
double *  cc 
)
inline

Definition at line 37 of file vector_testcode.c.

38{
39 __asm__ __volatile__( "movapd (%0), %%xmm0;"
40 "movapd (%1), %%xmm1;"
41 "mulpd %%xmm0, %%xmm1;"
42 "movapd %%xmm1, (%2);"::"r"( aa ),
43 "r"( bb ), "r"( cc )
44 :"%xmm0", "%xmm1" );
45}
Here is the caller graph for this function:

◆ inline_packed_sse_add()

void inline_packed_sse_add ( float *  aa,
float *  bb,
float *  cc 
)
inline

Definition at line 7 of file vector_testcode.c.

8{
9 __asm__ __volatile__( "movaps (%0), %%xmm0;"
10 "movaps (%1), %%xmm1;"
11 "addps %%xmm0, %%xmm1;"
12 "movaps %%xmm1, (%2);"::"r"( aa ),
13 "r"( bb ), "r"( cc )
14 :"%xmm0", "%xmm1" );
15}
Here is the caller graph for this function:

◆ inline_packed_sse_mul()

void inline_packed_sse_mul ( float *  aa,
float *  bb,
float *  cc 
)
inline

Definition at line 17 of file vector_testcode.c.

18{
19 __asm__ __volatile__( "movaps (%0), %%xmm0;"
20 "movaps (%1), %%xmm1;"
21 "mulps %%xmm0, %%xmm1;"
22 "movaps %%xmm1, (%2);"::"r"( aa ),
23 "r"( bb ), "r"( cc )
24 :"%xmm0", "%xmm1" );
25}
Here is the caller graph for this function:

◆ inline_unpacked_sse2_add()

void inline_unpacked_sse2_add ( double *  aa,
double *  bb,
double *  cc 
)
inline

Definition at line 65 of file vector_testcode.c.

66{
67 __asm__ __volatile__( "movsd (%0), %%xmm0;"
68 "movsd (%1), %%xmm1;"
69 "addsd %%xmm0, %%xmm1;"
70 "movsd %%xmm1, (%2);"::"r"( aa ), "r"( bb ), "r"( cc )
71 :"%xmm0", "%xmm1" );
72}
Here is the caller graph for this function:

◆ inline_unpacked_sse2_mul()

void inline_unpacked_sse2_mul ( double *  aa,
double *  bb,
double *  cc 
)
inline

Definition at line 74 of file vector_testcode.c.

75{
76 __asm__ __volatile__( "movsd (%0), %%xmm0;"
77 "movsd (%1), %%xmm1;"
78 "mulsd %%xmm0, %%xmm1;"
79 "movsd %%xmm1, (%2);"::"r"( aa ), "r"( bb ), "r"( cc )
80 :"%xmm0", "%xmm1" );
81}
Here is the caller graph for this function:

◆ inline_unpacked_sse_add()

void inline_unpacked_sse_add ( float *  aa,
float *  bb,
float *  cc 
)
inline

Definition at line 47 of file vector_testcode.c.

48{
49 __asm__ __volatile__( "movss (%0), %%xmm0;"
50 "movss (%1), %%xmm1;"
51 "addss %%xmm0, %%xmm1;"
52 "movss %%xmm1, (%2);"::"r"( aa ), "r"( bb ), "r"( cc )
53 :"%xmm0", "%xmm1" );
54}
Here is the caller graph for this function:

◆ inline_unpacked_sse_mul()

void inline_unpacked_sse_mul ( float *  aa,
float *  bb,
float *  cc 
)
inline

Definition at line 56 of file vector_testcode.c.

57{
58 __asm__ __volatile__( "movss (%0), %%xmm0;"
59 "movss (%1), %%xmm1;"
60 "mulss %%xmm0, %%xmm1;"
61 "movss %%xmm1, (%2);"::"r"( aa ), "r"( bb ), "r"( cc )
62 :"%xmm0", "%xmm1" );
63}
Here is the caller graph for this function:

◆ main()

int main ( int  argc,
char **  argv 
)

Definition at line 84 of file vector_testcode.c.

85{
86 int i, packed = 0, sse = 0;
87 float a[4] = { 1.0, 2.0, 3.0, 4.0 };
88 float b[4] = { 2.0, 3.0, 4.0, 5.0 };
89 float c[4] = { 0.0, 0.0, 0.0, 0.0 };
90 double d[4] = { 1.0, 2.0, 3.0, 4.0 };
91 double e[4] = { 2.0, 3.0, 4.0, 5.0 };
92 double f[4] = { 0.0, 0.0, 0.0, 0.0 };
93
94 if ( argc != 3 ) {
95 bail:
96 printf( "Usage %s: <packed|unpacked> <sse|sse2>\n", argv[0] );
97 exit( 1 );
98 }
99 if ( strcasecmp( argv[1], "packed" ) == 0 )
100 packed = 1;
101 else if ( strcasecmp( argv[1], "unpacked" ) == 0 )
102 packed = 0;
103 else
104 goto bail;
105 if ( strcasecmp( argv[2], "sse" ) == 0 )
106 sse = 1;
107 else if ( strcasecmp( argv[2], "sse2" ) == 0 )
108 sse = 0;
109 else
110 goto bail;
111
112#if 0
113 if ( ( sse ) &&
114 ( system( "cat /proc/cpuinfo | grep sse > /dev/null" ) != 0 ) ) {
115 printf( "This processor does not have SSE.\n" );
116 exit( 1 );
117 }
118 if ( ( sse == 0 ) &&
119 ( system( "cat /proc/cpuinfo | grep sse2 > /dev/null" ) != 0 ) ) {
120 printf( "This processor does not have SSE2.\n" );
121 exit( 1 );
122 }
123#endif
124
125 printf( "Vector 1: %f %f %f %f\n", a[0], a[1], a[2], a[3] );
126 printf( "Vector 2: %f %f %f %f\n\n", b[0], b[1], b[2], b[3] );
127
128 if ( ( packed == 0 ) && ( sse == 1 ) ) {
129 for ( i = 0; i < NUMBER; i++ ) {
130 inline_unpacked_sse_add( &a[0], &b[0], &c[0] );
131 }
132 printf( "%d SSE Unpacked Adds: Result %f\n", NUMBER, c[0] );
133
134 for ( i = 0; i < NUMBER; i++ ) {
135 inline_unpacked_sse_mul( &a[0], &b[0], &c[0] );
136 }
137 printf( "%d SSE Unpacked Muls: Result %f\n", NUMBER, c[0] );
138 }
139 if ( ( packed == 1 ) && ( sse == 1 ) ) {
140 for ( i = 0; i < NUMBER; i++ ) {
142 }
143 printf( "%d SSE Packed Adds: Result %f %f %f %f\n", NUMBER, c[0], c[1],
144 c[2], c[3] );
145 for ( i = 0; i < NUMBER; i++ ) {
147 }
148 printf( "%d SSE Packed Muls: Result %f %f %f %f\n", NUMBER, c[0], c[1],
149 c[2], c[3] );
150 }
151
152 if ( ( packed == 0 ) && ( sse == 0 ) ) {
153 for ( i = 0; i < NUMBER; i++ ) {
154 inline_unpacked_sse2_add( &d[0], &e[0], &f[0] );
155 }
156 printf( "%d SSE2 Unpacked Adds: Result %f\n", NUMBER, c[0] );
157
158 for ( i = 0; i < NUMBER; i++ ) {
159 inline_unpacked_sse2_mul( &d[0], &e[0], &f[0] );
160 }
161 printf( "%d SSE2 Unpacked Muls: Result %f\n", NUMBER, c[0] );
162 }
163 if ( ( packed == 1 ) && ( sse == 0 ) ) {
164 for ( i = 0; i < NUMBER; i++ ) {
165 inline_packed_sse2_add( &d[0], &e[0], &f[0] );
166 }
167 printf( "%d SSE2 Packed Adds: Result %f\n", NUMBER, c[0] );
168
169 for ( i = 0; i < NUMBER; i++ ) {
170 inline_packed_sse2_mul( &d[0], &e[0], &f[0] );
171 }
172 printf( "%d SSE2 Packed Muls: Result %f\n", NUMBER, c[0] );
173 }
174
175
176 exit( 0 );
177}
int i
double f(double a)
Definition: cpi.c:23
static double a[MATRIX_SIZE][MATRIX_SIZE]
Definition: libmsr_basic.c:38
static double b[MATRIX_SIZE][MATRIX_SIZE]
Definition: libmsr_basic.c:39
static double c[MATRIX_SIZE][MATRIX_SIZE]
Definition: libmsr_basic.c:40
void inline_packed_sse_add(float *aa, float *bb, float *cc)
void inline_unpacked_sse2_mul(double *aa, double *bb, double *cc)
void inline_packed_sse2_add(double *aa, double *bb, double *cc)
void inline_unpacked_sse2_add(double *aa, double *bb, double *cc)
void inline_unpacked_sse_mul(float *aa, float *bb, float *cc)
#define NUMBER
void inline_packed_sse_mul(float *aa, float *bb, float *cc)
void inline_packed_sse2_mul(double *aa, double *bb, double *cc)
void inline_unpacked_sse_add(float *aa, float *bb, float *cc)
Here is the call graph for this function: