22 #include <sys/types.h>
61 int nprob,
int me,
int ne,
int L,
65 int ngrp, thrdbypb, thrdtot, nleaders;
70 ngrp = thrdtot/thrdbypb;
73 if( (nprob * me * ne * L) != (m * n) ) {
74 plasma_error(__func__,
"problem size does not match matrix size");
78 if( thrdbypb > thrdtot ) {
79 plasma_error(__func__,
"number of thread per problem must be less or equal to total number of threads");
82 if( (thrdtot % thrdbypb) != 0 ) {
83 plasma_error(__func__,
"number of thread per problem must divide the total number of thread");
88 if( (me < 2) || (ne < 2) || (nprob < 1) ) {
101 for (i=0; i<thrdtot; i++)
108 ipb = (nprob / ngrp)*ngrp;
112 for (i=0; i<nleaders; i+=3) {
114 owner =
minloc(thrdbypb, Tp);
117 Tp[owner] = Tp[owner] + leaders[i+1] *
L;
118 leaders[i+2] = owner;
124 for (i=0; i<nleaders; i+=3) {
125 Tp[0] = Tp[0] + leaders[i+1] *
L;
131 for (i=0; i< (nprob/ngrp); i++) {
147 for (i=0; i<thrdtot; i++)
152 for (i=0; i<nleaders; i+=3) {
154 owner =
minloc(thrdtot, Tp);
157 Tp[owner] = Tp[owner] + leaders[i+1] *
L;
158 leaders[i+2] = owner;
163 for (i=0; i<nleaders; i+=3) {
164 Tp[0] = Tp[0] + leaders[i+1] *
L;
170 for (i=ipb; i<nprob; i++) {
236 int i, x, snix, cl, iprob;
237 int n, m,
L, nleaders, thrdbypb;
246 locrnk = myrank % thrdbypb;
247 iprob = myrank / thrdbypb;
250 Al = &(A[iprob*m*n*
L]);
256 for(i=0; i<nleaders; i+=3) {
257 if( leaders[i+2] == locrnk ) {
262 else if( leaders[i+2] == -2 ) {
264 x = leaders[i+1] / thrdbypb;
267 cl = leaders[i+1] - x * (thrdbypb - 1);
270 snix = (s *
modpow(n, locrnk*x, m * n - 1)) % q;
290 int *leaders,
int nleaders,
int nprob,
305 for(iprob=0; iprob<nprob; iprob++) {
306 Al = &(A[iprob*size]);
317 for(i=0; i<nleaders; i+=3) {
320 leaders[i], m, n, L, Al);