I have compiled ATLAS using the nof77 option to create cblas. I also compiled CLAPACK. I enclose the following files: test1.c, svd.c, svd.h and a Makefile. If it matters, this is all being done on Ubuntu 10.04 x86-64, 8GB RAM.
Here is test1.c
- Code: Select all
/* test1.c, a simple shell program to call svd.c */
#include <stdio.h>
#include <sys/time.h>
#include <time.h>
#include <ctype.h>
#include <stdarg.h>
#include <string.h>
#include <stdint.h>
#include <inttypes.h>
#include <math.h>
/* CBLAS was created by ATLAS using nof77 */
#include "/usr/local/atlas/include/cblas.h"
#include "/usr/local/atlas/include/clapack.h"
#include "svd.h"
//#define littletry
/* ifdef littletry, use a 2 x 2 Complex Matrix */
void MAIN_(){}
void MAIN__(){}
void _MAIN_(){}
int main(void)
{
long m,n,ii,jj;
double diff, NN;
double realsig, imagsig;
#ifdef littletry
m = 2; n = 2;
#else
m = 9; n = 9;
#endif
int ldA, ldB, ldC;
ldA = m; ldB = m; ldC = m;
doublecomplex A[m][m];
doublecomplex C[m][m];
doublecomplex I[m][m];
/* make identity matrix */
for ( ii=0; ii<m; ii++){
for (jj=0; jj<m; jj++){
if (ii == jj){
I[ii][jj].r = 1.0; I[ii][jj].i = 0.0;}
else{
I[ii][jj].r = 0.0; I[ii][jj].i = 0.0;}
}
}
/* A is a matrix which is the result of a bunch of CBLAS routines. To
simplify, let us start with a matrix initialize to the following
*/
/* below is Fortran Order. Previous routines used C order, so we will need
to convert for use in svd
A[0].r = 0.69096145; A[0].i = 0.54390208; // A[0][0]
A[1].r = 0.83805603; A[1].i = 0.27186403; // A[1][0]
A[2].r = 0.35559057; A[2].i = 0.12026953; // A[0][1]
A[3].r = 0.89833702; A[3].i = 0.06791132; // A[1][1]
*/
/* Assume routine starts with CBLASRowMajor */
/* original data, which works */
#ifdef littletry
/* a 2 x 2 */
A[0][0].r = 0.69096145; A[0][0].i = 0.54390208; // A[0][0] A[0]
A[0][1].r = 0.35559057; A[0][1].i = 0.12026953; // A[0][1] A[1]
A[1][0].r = 0.83805603; A[1][0].i = 0.27186403; // A[1][0] A[2]
A[1][1].r = 0.89833702; A[1][1].i = 0.06791132; // A[1][1] A[3]
#else
/* a 9 x 9 */
A[0][0].r = 0.1056245; A[0][0].i = 3.008661e-18;
A[0][1].r = 0.12341259; A[0][1].i = -0.048803565;
A[0][2].r = 0.12811331; A[0][2].i = -0.10658785;
A[0][3].r = 0.10972881; A[0][3].i = -0.16601482;
A[0][4].r = 0.066265673; A[0][4].i = -0.21196376;
A[0][5].r = 0.0050456614; A[0][5].i = -0.23662813;
A[0][6].r = -0.060823528; A[0][6].i = -0.23785819;
A[0][7].r = -0.12032375; A[0][7].i = -0.22090523;
A[0][8].r = -0.17269674; A[0][8].i = -0.18375118;
A[1][0].r = 0.12341259; A[1][0].i = 0.048803565;
A[1][1].r = 0.23163068; A[1][1].i = 1.6263033e-17;
A[1][2].r = 0.33062898; A[1][2].i = -0.091557204;
A[1][3].r = 0.39583681; A[1][3].i = -0.22168363;
A[1][4].r = 0.40637821; A[1][4].i = -0.36824733;
A[1][5].r = 0.36179959; A[1][5].i = -0.51181571;
A[1][6].r = 0.26657764; A[1][6].i = -0.63062745;
A[1][7].r = 0.13972331; A[1][7].i = -0.71466428;
A[1][8].r = -0.011894371; A[1][8].i = -0.75281686;
A[2][0].r = 0.12811331; A[2][0].i = 0.10658785;
A[2][1].r = 0.33062898; A[2][1].i = 0.091557204;
A[2][2].r = 0.54440918; A[2][2].i = 6.6542908e-18;
A[2][3].r = 0.7242013; A[2][3].i = -0.17340506;
A[2][4].r = 0.8296598; A[2][4].i = -0.40593058;
A[2][5].r = 0.84813909; A[2][5].i = -0.66878153;
A[2][6].r = 0.76642337; A[2][6].i = -0.92313234;
A[2][7].r = 0.60716441; A[2][7].i = -1.1398751;
A[2][8].r = 0.37907731; A[2][8].i = -1.2956598;
A[3][0].r = 0.10972881; A[3][0].i = 0.16601482;
A[3][1].r = 0.39583681; A[3][1].i = 0.22168363;
A[3][2].r = 0.7242013; A[3][2].i = 0.17340506;
A[3][3].r = 1.0389688; A[3][3].i = 1.8458542e-17;
A[3][4].r = 1.2733233; A[3][4].i = -0.28371476;
A[3][5].r = 1.4011977; A[3][5].i = -0.64373949;
A[3][6].r = 1.3852911; A[3][6].i = -1.0307846;
A[3][7].r = 1.243208; A[3][7].i = -1.3947563;
A[3][8].r = 0.9826123; A[3][8].i = -1.6993985;
A[4][0].r = 0.066265673; A[4][0].i = 0.21196376;
A[4][1].r = 0.40637821; A[4][1].i = 0.36824733;
A[4][2].r = 0.8296598; A[4][2].i = 0.40593058;
A[4][3].r = 1.2733233; A[4][3].i = 0.28371476;
A[4][4].r = 1.6548532; A[4][4].i = 1.9515639e-18;
A[4][5].r = 1.925963; A[4][5].i = -0.41275362;
A[4][6].r = 2.0247087; A[4][6].i = -0.90354436;
A[4][7].r = 1.9556722; A[4][7].i = -1.4043293;
A[4][8].r = 1.7198514; A[4][8].i = -1.8659826;
A[5][0].r = 0.0050456614; A[5][0].i = 0.23662813;
A[5][1].r = 0.36179959; A[5][1].i = 0.51181571;
A[5][2].r = 0.84813909; A[5][2].i = 0.66878153;
A[5][3].r = 1.4011977; A[5][3].i = 0.64373949;
A[5][4].r = 1.925963; A[5][4].i = 0.41275362;
A[5][5].r = 2.3602752; A[5][5].i = 1.5612511e-17;
A[5][6].r = 2.6099806; A[5][6].i = -0.55240669;
A[5][7].r = 2.6629588; A[5][7].i = -1.1625421;
A[5][8].r = 2.5083167; A[5][8].i = -1.7713606;
A[6][0].r = -0.060823528; A[6][0].i = 0.23785819;
A[6][1].r = 0.26657764; A[6][1].i = 0.63062745;
A[6][2].r = 0.76642337; A[6][2].i = 0.92313234;
A[6][3].r = 1.3852911; A[6][3].i = 1.0307846;
A[6][4].r = 2.0247087; A[6][4].i = 0.90354436;
A[6][5].r = 2.6099806; A[6][5].i = 0.55240669;
A[6][6].r = 3.0257882; A[6][6].i = -1.9125326e-16;
A[6][7].r = 3.2336681; A[6][7].i = -0.66610971;
A[6][8].r = 3.2113302; A[6][8].i = -1.3815207;
A[7][0].r = -0.12032375; A[7][0].i = 0.22090523;
A[7][1].r = 0.13972331; A[7][1].i = 0.71466428;
A[7][2].r = 0.60716441; A[7][2].i = 1.1398751;
A[7][3].r = 1.243208; A[7][3].i = 1.3947563;
A[7][4].r = 1.9556722; A[7][4].i = 1.4043293;
A[7][5].r = 2.6629588; A[7][5].i = 1.1625421;
A[7][6].r = 3.2336681; A[7][6].i = 0.66610971;
A[7][7].r = 3.6083259; A[7][7].i = 1.8648277e-16;
A[7][8].r = 3.7462007; A[7][8].i = -0.77126832;
A[8][0].r = -0.17269674; A[8][0].i = 0.18375118;
A[8][1].r = -0.011894371; A[8][1].i = 0.75281686;
A[8][2].r = 0.37907731; A[8][2].i = 1.2956598;
A[8][3].r = 0.9826123; A[8][3].i = 1.6993985;
A[8][4].r = 1.7198514; A[8][4].i = 1.8659826;
A[8][5].r = 2.5083167; A[8][5].i = 1.7713606;
A[8][6].r = 3.2113302; A[8][6].i = 1.3815207;
A[8][7].r = 3.7462007; A[8][7].i = 0.77126832;
A[8][8].r = 4.0597545; A[8][8].i = -1.1709383e-17;
#endif
printf("Original matrix in CblasRowMajor form\n\n");
for (ii=0; ii<m; ii++){
for (jj=0; jj<n; jj++){
printf("A[%li][%li]= %12.8lg, %12.8lgj\n", ii, jj,
A[ii][jj].r, A[ii][jj].i);
}
}
/* Now form the transpose using zgemm */
doublecomplex alpha; alpha.r = 1.000000; alpha.i = 0.000000;
doublecomplex beta; beta.r = 0.000000; beta.i = 0.000000;
cblas_zgemm( CblasRowMajor, CblasTrans, CblasNoTrans, m, m, m,
&alpha, A, ldA, I, ldB, &beta, C, ldC);
/* If all goes well, C will now contain A transpose */
/* print it out to check... */
printf("Now matrix is transposed\n\n");
for (ii=0; ii<m; ii++){
for (jj=0; jj<n; jj++){
printf("C[%li][%li]= %12.8lg, %12.8lgj\n", ii, jj,
C[ii][jj].r, C[ii][jj].i);
}
}
/* define variables for mysvd */
char JOBU, JOBVT;
JOBU = 'A'; JOBVT = 'A';
int lda = m; int ldu = m;
int ldvt = n;
double S[m];
doublecomplex UU[m][m];
doublecomplex VV[m][m];
int mn = min(m,n);
int MN = max(m,n);
int lwork = 2*max(3*mn+MN,5*mn);
doublecomplex wk[lwork];
if ( mysvd( JOBU, JOBVT, m, n, &C[0][0], lda, &S[0], &UU[0][0], ldu, &VV[0][0], ldvt, &wk[0], lwork) == 0)
{
printf("Hey that seemed to work! Let's continue development...\n");
printf("\nThe matrix U is\n");
for (jj=0;jj<n; jj++){
for (ii=0; ii<m; ii++){
printf("U[%li][%li]= %12.8lg, %12.8lgj\n", ii, jj,
UU[ii][jj].r, UU[ii][jj].i);
}
}
printf("\nThe matrix V is\n");
for (jj=0;jj<n; jj++){
for (ii=0; ii<m; ii++){
printf("V[%li][%li]= %12.8lg, %12.8lgj\n", ii, jj,
VV[ii][jj].r, VV[ii][jj].i);
}
}
printf("\nThe array S is\n");
for (ii=0; ii<m; ii++){
printf("S[%li]= %12.8lg\n", ii, S[ii]);
}
}
else{
printf("bummer, this attempt did not work\n");
}
return 0;
}
Here is svd.c
- Code: Select all
/* svd.c */
#include <stdlib.h>
#include <math.h>
#include <stdio.h>
#include "/home/bruce/Apps/CLAPACK-3.2.1/INCLUDE/f2c.h"
#include "/usr/local/atlas/include/cblas.h"
#include "/usr/local/atlas/include/clapack.h"
#include "/home/bruce/Apps/CLAPACK-3.2.1/INCLUDE/clapack.h"
/*
zgesvd_( &JOBU, &JOBVT, &M, &N, A, &LDA, s, uu, &LDU,
vt, &LDVT, wk, &LWORK, &RWORK, &INFO);
*/
int mysvd( char jobu, char jobv, int m, int n, doublecomplex *A, int lda, double *S,
doublecomplex *U, int ldu, doublecomplex *VT, int ldvt,
doublecomplex *wk, int lwork)
{
doublereal RWORK;
integer M = (integer)m;
integer N = (integer)n;
integer INFO;
integer LDA = (integer)lda;
integer LDU = (integer)ldu;
integer LDVT = (integer)ldvt;
integer LWORK = (integer)lwork;
printf("Got inside svd.c, just before zgesvd_ call.\n");
printf("jobu = %c, jobv = %c, m = %li, n = %li, LDA = %li, LDU = %li, LDVT = %li, LWORK = %li\n", jobu, jobv, (long)m, (long)n, (long)LDA, (long)LDU, (long)LDVT, (long)LWORK);
/*
zgesvd_( &jobu, &jobv, &M, &N, A, &LDA, S, U, &LDU, VT, &LDVT, wk,
&LWORK, &RWORK, &INFO);
*/
zgesvd_( &jobu, &jobv, &M, &N, A, &LDA, S, U, &LDU, VT, &LDVT, wk,
&LWORK, &RWORK, &INFO);
if ((long) INFO == 0 )
return 0;
else
return -1;
}
Here is the header file svd.h
- Code: Select all
/* Function Prototype for mysvd, svd.h */
//#ifdef __cplusplus
//extern "C"{
//#endif
#include "/home/bruce/Apps/CLAPACK-3.2.1/INCLUDE/f2c.h"
//#ifndef doublecomplex
//typedef struct{ double r; double i;} doublecomplex;
//#endif
int mysvd( char jobu, char jobv, int m, int n, doublecomplex *A, int lda, double *S,
doublecomplex *U, int ldu, doublecomplex *VT, int ldvt,
doublecomplex *wk, int lwork);
//#ifdef __cplusplus
//}
//#endif
Finally the make file
- Code: Select all
# Makefile for test
CC = gcc
CFLAGS = -O3 -m64 -Wall -Wcast-qual
ROOTPATH = /home/bruce/Apps/CLAPACK-3.2.1
BLASPATH = /usr/local/atlas
INCDIRS = -I$(BLASPATH)/include/ -I$(ROOTPATH)/INCLUDE/ -I$(ROOTPATH)
INCSVDDIRS = $(INCDIRS)
F2CDIR = $(ROOTPATH)/F2CLIBS
LDLIBS = $(ROOTPATH)/lapack_LINUX.a $(ROOTPATH)/libcblaswr.a $(BLASPATH)/lib/libcblas.a $(BLASPATH)/lib/libatlas.a $(F2CDIR)/libf2c.a -lm
test1: test1.o svd.o
$(CC) test1.o svd.o $(LDLIBS) -o test1
svd.o: svd.c
$(CC) svd.c $(INCSVDDIRS) -c
test1.o: test1.c
$(CC) test1.c $(INCDIRS) -c
clean:
rm -f test1 test1.o svd.o
Here is a copy of running test1 when littletry is defined:
$ ./test1
Original matrix in CblasRowMajor form
A[0][0]= 0.69096145, 0.54390208j
A[0][1]= 0.35559057, 0.12026953j
A[1][0]= 0.83805603, 0.27186403j
A[1][1]= 0.89833702, 0.06791132j
Now matrix is transposed
C[0][0]= 0.69096145, 0.54390208j
C[0][1]= 0.83805603, 0.27186403j
C[1][0]= 0.35559057, 0.12026953j
C[1][1]= 0.89833702, 0.06791132j
Got inside svd.c, just before zgesvd_ call.
jobu = A, jobv = A, m = 2, n = 2, LDA = 2, LDU = 2, LDVT = 2, LWORK = 20
Hey that seemed to work! Let's continue development...
The matrix U is
U[0][0]= -0.47469266, -0.36010181j
U[1][0]= 0.58398959, 0.55131635j
U[0][1]= -0.76057606, -0.25791011j
U[1][1]= -0.54044185, -0.25085661j
The matrix V is
V[0][0]= -0.79276742, 0j
V[1][0]= -0.58771384, 0.16159287j
V[0][1]= 0.60952425, 0j
V[1][1]= -0.76440008, 0.21017304j
The array S is
S[0]= 1.553263
S[1]= 0.29901511
However, when littletry is undefined, one uses the larger matrix 9 x 9 matrix. In the interest of saving space on this already voluminous post, the matrix A is transposed correctly. After the matrix C is printed out (all 81 elements) one gets:
Got inside svd.c, just before zgesvd_ call.
jobu = A, jobv = A, m = 9, n = 9, LDA = 9, LDU = 9, LDVT = 9, LWORK = 90
Segmentation fault
Any tips, clues, suggestions or otherwise would be greatly appreciated. If I can get this to work, then I can get on with the rest of the algorithm... I have tried increasing the size of LWORK by a factor of 10 and 100, but it did not help.

