PLASMA  2.4.5
PLASMA - Parallel Linear Algebra for Scalable Multi-core Architectures
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups
core_splgsy.c
Go to the documentation of this file.
1 
17 #include "common.h"
18 
19 #define REAL
20 #undef COMPLEX
21 
22 /*
23  Rnd64seed is a global variable but it doesn't spoil thread safety. All matrix
24  generating threads only read Rnd64seed. It is safe to set Rnd64seed before
25  and after any calls to create_tile(). The only problem can be caused if
26  Rnd64seed is changed during the matrix generation time.
27  */
28 
29 //static unsigned long long int Rnd64seed = 100;
30 #define Rnd64_A 6364136223846793005ULL
31 #define Rnd64_C 1ULL
32 #define RndF_Mul 5.4210108624275222e-20f
33 #define RndD_Mul 5.4210108624275222e-20
34 
35 #ifdef COMPLEX
36 #define NBELEM 2
37 #else
38 #define NBELEM 1
39 #endif
40 
41 static unsigned long long int
42 Rnd64_jump(unsigned long long int n, unsigned long long int seed ) {
43  unsigned long long int a_k, c_k, ran;
44  int i;
45 
46  a_k = Rnd64_A;
47  c_k = Rnd64_C;
48 
49  ran = seed;
50  for (i = 0; n; n >>= 1, i++) {
51  if (n & 1)
52  ran = a_k * ran + c_k;
53  c_k *= (a_k + 1);
54  a_k *= a_k;
55  }
56 
57  return ran;
58 }
59 
60 #if defined(PLASMA_HAVE_WEAK)
61 #pragma weak CORE_splgsy = PCORE_splgsy
62 #define CORE_splgsy PCORE_splgsy
63 #endif
64 void CORE_splgsy( float bump, int m, int n, float *A, int lda,
65  int bigM, int m0, int n0, unsigned long long int seed )
66 {
67  float *tmp = A;
68  int64_t i, j;
69  unsigned long long int ran, jump;
70 
71  jump = m0 + n0 * bigM;
72 
73  /*
74  * Tile diagonal
75  */
76  if ( m0 == n0 ) {
77  for (j = 0; j < n; j++) {
78  ran = Rnd64_jump( NBELEM * jump, seed );
79 
80  for (i = j; i < m; i++) {
81  *tmp = 0.5f - ran * RndF_Mul;
82  ran = Rnd64_A * ran + Rnd64_C;
83 #ifdef COMPLEX
84  *tmp += I*(0.5f - ran * RndF_Mul);
85  ran = Rnd64_A * ran + Rnd64_C;
86 #endif
87  tmp++;
88  }
89  tmp += (lda - i + j + 1);
90  jump += bigM + j;
91  }
92 
93  for (j = 0; j < n; j++) {
94  A[j+j*lda] += bump;
95 
96  for (i=0; i<j; i++) {
97  A[lda*j+i] = A[lda*i+j];
98  }
99  }
100  }
101  /*
102  * Lower part
103  */
104  else if ( m0 > n0 ) {
105  for (j = 0; j < n; j++) {
106  ran = Rnd64_jump( NBELEM * jump, seed );
107 
108  for (i = 0; i < m; i++) {
109  *tmp = 0.5f - ran * RndF_Mul;
110  ran = Rnd64_A * ran + Rnd64_C;
111 #ifdef COMPLEX
112  *tmp += I*(0.5f - ran * RndF_Mul);
113  ran = Rnd64_A * ran + Rnd64_C;
114 #endif
115  tmp++;
116  }
117  tmp += (lda - i);
118  jump += bigM;
119  }
120  }
121  /*
122  * Upper part
123  */
124  else if ( m0 < n0 ) {
125  /* Overwrite jump */
126  jump = n0 + m0 * bigM;
127 
128  for (i = 0; i < m; i++) {
129  ran = Rnd64_jump( NBELEM * jump, seed );
130 
131  for (j = 0; j < n; j++) {
132  A[j*lda+i] = 0.5f - ran * RndF_Mul;
133  ran = Rnd64_A * ran + Rnd64_C;
134 #ifdef COMPLEX
135  A[j*lda+i] += I*(0.5f - ran * RndF_Mul);
136  ran = Rnd64_A * ran + Rnd64_C;
137 #endif
138  }
139  jump += bigM;
140  }
141  }
142 }
143 
144 /***************************************************************************/
147 void QUARK_CORE_splgsy( Quark *quark, Quark_Task_Flags *task_flags,
148  float bump, int m, int n, float *A, int lda,
149  int bigM, int m0, int n0, unsigned long long int seed )
150 {
152  QUARK_Insert_Task(quark, CORE_splgsy_quark, task_flags,
153  sizeof(float), &bump, VALUE,
154  sizeof(int), &m, VALUE,
155  sizeof(int), &n, VALUE,
156  sizeof(float)*lda*n, A, OUTPUT,
157  sizeof(int), &lda, VALUE,
158  sizeof(int), &bigM, VALUE,
159  sizeof(int), &m0, VALUE,
160  sizeof(int), &n0, VALUE,
161  sizeof(unsigned long long int), &seed, VALUE,
162  0);
163 }
164 
165 /***************************************************************************/
168 #if defined(PLASMA_HAVE_WEAK)
169 #pragma weak CORE_splgsy_quark = PCORE_splgsy_quark
170 #define CORE_splgsy_quark PCORE_splgsy_quark
171 #endif
173 {
174  float bump;
175  int m;
176  int n;
177  float *A;
178  int lda;
179  int bigM;
180  int m0;
181  int n0;
182  unsigned long long int seed;
183 
184  quark_unpack_args_9( quark, bump, m, n, A, lda, bigM, m0, n0, seed );
185  CORE_splgsy( bump, m, n, A, lda, bigM, m0, n0, seed );
186 }
187