PLASMA  2.4.5
PLASMA - Parallel Linear Algebra for Scalable Multi-core Architectures
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups
core_ztrdalg_v2.c
Go to the documentation of this file.
1 
15 #include <lapacke.h>
16 #include "common.h"
17 
18 /***************************************************************************/
78 #if defined(PLASMA_HAVE_WEAK)
79 #pragma weak CORE_ztrdalg_v2 = PCORE_ztrdalg_v2
80 #define CORE_ztrdalg_v2 PCORE_ztrdalg_v2
81 #endif
83  PLASMA_desc *pA,
86  int grsiz, int lcsweep, int id, int blksweep)
87 {
88  PLASMA_desc A = *pA;
89  size_t eltsize = plasma_element_size(A.dtyp);
90  int N, NB;
91  int i, blkid, st, ed, KDM1;
92  int NT=pA->nt;
93 
94 
95  N = A.m;
96  NB = A.mb;
97  KDM1 = NB-1;
98 
99 
100  /* code for all tiles */
101  for (i = 0; i < grsiz ; i++) {
102  blkid = id+i;
103  st = min(blkid*NB+lcsweep+1, N-1);
104  ed = min(st+KDM1, N-1);
105  /*printf(" COUCOU voici N %5d NB %5d st %5d ed %5d lcsweep %5d id %5d blkid %5d\n",N, NB, st, ed, lcsweep, id, blkid);*/
106 
107  if(st==ed) /* quick return in case of last tile */
108  return;
109 
110  st =st +1; /* because kernel are still in fortran way */
111  ed =ed +1;
112  if(blkid==blksweep){
113  CORE_zhbelr(uplo, N, &A, V, TAU, st, ed, eltsize);
114  if(id!=(NT-1))CORE_zhbrce(uplo, N, &A, V, TAU, st, ed, eltsize);
115  }else{
116  CORE_zhblrx(uplo, N, &A, V, TAU, st, ed, eltsize);
117  if(id!=(NT-1))CORE_zhbrce(uplo, N, &A, V, TAU, st, ed, eltsize);
118  }
119  }
120 
121 }
122 
123 /***************************************************************************/
126 #define A(m,n) BLKADDR(A, PLASMA_Complex64_t, m, n)
128  int uplo,
129  PLASMA_desc *pA,
132  int grsiz, int lcsweep, int id, int blksweep)
133 {
134  Quark_Task *MYTASK;
135  PLASMA_desc A=*pA;
136  int ii, cur_id, NT=pA->nt;
137 
138  //printf("coucou from quark function id %d lcsweep %d blksweep %d grsiz %d NT %d\n", id, lcsweep, blksweep, grsiz, NT);
139  MYTASK = QUARK_Task_Init( quark, CORE_ztrdalg_v2_quark, task_flags);
140  QUARK_Task_Pack_Arg(quark, MYTASK, sizeof(int), &uplo, VALUE );
141  QUARK_Task_Pack_Arg(quark, MYTASK, sizeof(PLASMA_desc), pA, NODEP );
142  QUARK_Task_Pack_Arg(quark, MYTASK, sizeof(PLASMA_Complex64_t), V, NODEP );
143  QUARK_Task_Pack_Arg(quark, MYTASK, sizeof(PLASMA_Complex64_t), TAU, NODEP );
144  QUARK_Task_Pack_Arg(quark, MYTASK, sizeof(int), &grsiz, VALUE );
145  QUARK_Task_Pack_Arg(quark, MYTASK, sizeof(int), &lcsweep, VALUE );
146  QUARK_Task_Pack_Arg(quark, MYTASK, sizeof(int), &id, VALUE );
147  QUARK_Task_Pack_Arg(quark, MYTASK, sizeof(int), &blksweep, VALUE );
148 
149  QUARK_Task_Pack_Arg(quark, MYTASK, sizeof(PLASMA_Complex64_t), A(id, id ), INOUT );
150  QUARK_Task_Pack_Arg(quark, MYTASK, sizeof(PLASMA_Complex64_t), A(id+1, id ), INOUT );
151  if( id<(NT-1) )
152  QUARK_Task_Pack_Arg(quark, MYTASK, sizeof(PLASMA_Complex64_t), A(id+1, id+1), INOUT );
153  if( id<(NT-2) )
154  QUARK_Task_Pack_Arg(quark, MYTASK, sizeof(PLASMA_Complex64_t), A(id+2, id+1), INOUT );
155 
156  cur_id = id;
157  for (ii = 1; ii < grsiz ; ii++) {
158  cur_id = cur_id+1;
159  if( id<(NT-1) )
160  QUARK_Task_Pack_Arg(quark, MYTASK, sizeof(PLASMA_Complex64_t), A(cur_id+1, cur_id+1), INOUT );
161  if( id<(NT-2) )
162  QUARK_Task_Pack_Arg(quark, MYTASK, sizeof(PLASMA_Complex64_t), A(cur_id+2, cur_id+1), INOUT );
163  }
164 
165  QUARK_Insert_Task_Packed(quark, MYTASK);
166 }
167 #undef A
168 /***************************************************************************/
171 #if defined(PLASMA_HAVE_WEAK)
172 #pragma weak CORE_ztrdalg_v2_quark = PCORE_ztrdalg_v2_quark
173 #define CORE_ztrdalg_v2_quark PCORE_ztrdalg_v2_quark
174 #endif
176 {
177  PLASMA_desc *pA;
180  int uplo;
181  int grsiz, lcsweep, id, blksweep;
182 
183  quark_unpack_args_8(quark, uplo, pA, V, TAU, grsiz, lcsweep, id, blksweep);
184  CORE_ztrdalg_v2(uplo, pA, V, TAU, grsiz, lcsweep, id, blksweep);
185 }