001: /* ///////////////////////////// P /// L /// A /// S /// M /// A /////////////////////////////// */
002: /* ///                    PLASMA computational routine (version 2.0.0)                       ///
003:  * ///                    Release Date: July, 4th 2009                                       ///
004:  * ///                    PLASMA is a software package provided by Univ. of Tennessee,       ///
005:  * ///                    Univ. of California Berkeley and Univ. of Colorado Denver          /// */
006: 
007: /* /////////////////////////// P /// U /// R /// P /// O /// S /// E /////////////////////////// */
008: // PLASMA_zgels_Tile - solves overdetermined or underdetermined linear systems involving an M-by-N
009: // matrix A using the QR or the LQ factorization of A.  It is assumed that A has full rank.
010: // The following options are provided:
011: //
012: // 1. trans = PlasmaNoTrans and M >= N: find the least squares solution of an overdetermined
013: //    system, i.e., solve the least squares problem: minimize || B - A*X ||.
014: //
015: // 2. trans = PlasmaNoTrans and M < N:  find the minimum norm solution of an underdetermined
016: //    system A * X = B.
017: //
018: // Several right hand side vectors B and solution vectors X can be handled in a single call;
019: // they are stored as the columns of the M-by-NRHS right hand side matrix B and the N-by-NRHS
020: // solution
021: // matrix X.
022: // All matrices are passed through descriptors. All dimensions are taken from the descriptors.
023: 
024: /* ///////////////////// A /// R /// G /// U /// M /// E /// N /// T /// S ///////////////////// */
025: // trans    PLASMA_enum (IN)
026: //          Intended usage:
027: //          = PlasmaNoTrans:   the linear system involves A;
028: //          = PlasmaConjTrans: the linear system involves A**H.
029: //          Currently only PlasmaNoTrans is supported.
030: //
031: // A        PLASMA_Complex64_t* (INOUT)
032: //          On entry, the M-by-N matrix A.
033: //          On exit,
034: //          if M >= N, A is overwritten by details of its QR factorization as returned by
035: //                     PLASMA_zgeqrf;
036: //          if M < N, A is overwritten by details of its LQ factorization as returned by
037: //                      PLASMA_zgelqf.
038: //
039: // T        PLASMA_Complex64_t* (OUT)
040: //          On exit, auxiliary factorization data.
041: //
042: // B        PLASMA_Complex64_t* (INOUT)
043: //          On entry, the M-by-NRHS matrix B of right hand side vectors, stored columnwise;
044: //          On exit, if return value = 0, B is overwritten by the solution vectors, stored
045: //          columnwise:
046: //          if M >= N, rows 1 to N of B contain the least squares solution vectors; the residual
047: //          sum of squares for the solution in each column is given by the sum of squares of the
048: //          modulus of elements N+1 to M in that column;
049: //          if M < N, rows 1 to N of B contain the minimum norm solution vectors;
050: 
051: /* ///////////// R /// E /// T /// U /// R /// N /////// V /// A /// L /// U /// E ///////////// */
052: //          = 0: successful exit
053: 
054: /* //////////////////////////////////// C /// O /// D /// E //////////////////////////////////// */
055: #include "common.h"
056: 
057: int PLASMA_zgels_Tile(PLASMA_enum trans, PLASMA_desc *A, PLASMA_desc *T, PLASMA_desc *B)
058: {
059:     PLASMA_desc descA = *A;
060:     PLASMA_desc descT = *T;
061:     PLASMA_desc descB = *B;
062:     plasma_context_t *plasma;
063: 
064:     plasma = plasma_context_self();
065:     if (plasma == NULL) {
066:         plasma_fatal_error("PLASMA_zgels_Tile", "PLASMA not initialized");
067:         return PLASMA_ERR_NOT_INITIALIZED;
068:     }
069:     /* Check descriptors for correctness */
070:     if (plasma_desc_check(&descA) != PLASMA_SUCCESS) {
071:         plasma_error("PLASMA_zgels_Tile", "invalid first descriptor");
072:         return PLASMA_ERR_ILLEGAL_VALUE;
073:     }
074:     if (plasma_desc_check(&descT) != PLASMA_SUCCESS) {
075:         plasma_error("PLASMA_zgels_Tile", "invalid second descriptor");
076:         return PLASMA_ERR_ILLEGAL_VALUE;
077:     }
078:     if (plasma_desc_check(&descB) != PLASMA_SUCCESS) {
079:         plasma_error("PLASMA_zgels_Tile", "invalid third descriptor");
080:         return PLASMA_ERR_ILLEGAL_VALUE;
081:     }
082:     /* Check input arguments */
083:     if (trans != PlasmaNoTrans) {
084:         plasma_error("PLASMA_zgels_Tile", "only PlasmaNoTrans supported");
085:         return PLASMA_ERR_NOT_SUPPORTED;
086:     }
087:     /* Quick return */
088: /*
089:     if (min(M, min(N, NRHS)) == 0) {
090:         for (i = 0; i < max(M, N); i++)
091:             for (j = 0; j < NRHS; j++)
092:                 B[j*LDB+i] = 0.0;
093:         return PLASMA_SUCCESS;
094:     }
095: */
096:     if (descA.m >= descA.n)
097:     {
098:         plasma_parallel_call_2(plasma_pzgeqrf,
099:             PLASMA_desc, descA,
100:             PLASMA_desc, descT);
101: 
102:         plasma_parallel_call_3(plasma_pzunmqr,
103:             PLASMA_desc, descA,
104:             PLASMA_desc, descB,
105:             PLASMA_desc, descT);
106: 
107:         plasma_parallel_call_7(plasma_pztrsm,
108:             PLASMA_enum, PlasmaLeft,
109:             PLASMA_enum, PlasmaUpper,
110:             PLASMA_enum, PlasmaNoTrans,
111:             PLASMA_enum, PlasmaNonUnit,
112:             PLASMA_Complex64_t, 1.0,
113:             PLASMA_desc, plasma_desc_submatrix(descA, 0, 0, descA.n, descA.n),
114:             PLASMA_desc, plasma_desc_submatrix(descB, 0, 0, descA.n, descB.n));
115:     }
116:     else
117:     {
118: /*
119:         for (i = M; i < N; i++)
120:             for (j = 0; j < NRHS; j++)
121:                 B[j*LDB+i] = 0.0;
122: */
123:         plasma_parallel_call_1(plasma_tile_zero,
124:             PLASMA_desc, plasma_desc_submatrix(descB, descA.lm, 0, descA.ln-descA.lm, descB.ln));
125: 
126:         plasma_parallel_call_2(plasma_pzgelqf,
127:             PLASMA_desc, descA,
128:             PLASMA_desc, descT);
129: 
130:         plasma_parallel_call_7(plasma_pztrsm,
131:             PLASMA_enum, PlasmaLeft,
132:             PLASMA_enum, PlasmaLower,
133:             PLASMA_enum, PlasmaNoTrans,
134:             PLASMA_enum, PlasmaNonUnit,
135:             PLASMA_Complex64_t, 1.0,
136:             PLASMA_desc, plasma_desc_submatrix(descA, 0, 0, descA.m, descA.m),
137:             PLASMA_desc, plasma_desc_submatrix(descB, 0, 0, descA.m, descB.n));
138: 
139:         plasma_parallel_call_3(plasma_pzunmlq,
140:             PLASMA_desc, descA,
141:             PLASMA_desc, descB,
142:             PLASMA_desc, descT);
143:     }
144:     return PLASMA_SUCCESS;
145: }