I am testing zgemm_() subroutine and write the following code to compute C=A*B, of which all are 2*2 matrices.
--------------------------------------------------------------------------
#include<stdio.h>
#include<f2c.h>
#include<clapack.h>
#include<blaswrap.h>
int main(){
char transa = 'N';
char transb = 'N';
integer m = 2;
integer n = 2;
integer k = 2;
doublecomplex alpha;
doublecomplex a[2][2];
integer lda = 2;
doublecomplex b[2][2];
integer ldb = 2;
doublecomplex beta;
doublecomplex c[2][2];
integer ldc = 2;
int i, j;
a[0][0].r = 1;
a[0][1].r = 1;
a[1][0].r = 1;
a[1][1].r = 1;
b[0][0].r = 1;
b[0][1].r = 0;
b[1][0].r = 1;
b[1][1].r = 2;
alpha.r = 1; alpha.i = 0;
beta.r = 0; beta.i = 0;
// Print A, B, C before multiplication
for(i=0;i<2;i++){
for(j=0;j<2;j++) printf("%+f i %+f ", a[i][j].r, a[i][j].i);
putchar('\n');
}
for(i=0;i<2;i++){
for(j=0;j<2;j++) printf("%+f i %+f ", b[i][j].r, b[i][j].i);
putchar('\n');
}
for(i=0;i<2;i++){
for(j=0;j<2;j++) printf("%+f i %+f ", c[i][j].r, c[i][j].i);
putchar('\n');
}
zgemm_(&transa, &transb, &m, &n, &k, &alpha, a, &lda, b, &ldb, &beta, c, &ldc);
// Print A, B, C after multiplication
for(i=0;i<2;i++){
for(j=0;j<2;j++) printf("%+f i %+f ", a[i][j].r, a[i][j].i);
putchar('\n');
}
for(i=0;i<2;i++){
for(j=0;j<2;j++) printf("%+f i %+f ", b[i][j].r, b[i][j].i);
putchar('\n');
}
for(i=0;i<2;i++){
for(j=0;j<2;j++) printf("%+f i %+f ", c[i][j].r, c[i][j].i);
putchar('\n');
}
return 0;
}
--------------------------------------------------------------------------
The screen output is:
--------------------------------------------------------------------------
+1.000000 i +0.000000 +1.000000 i +0.000000 // A before
+1.000000 i +0.000000 +1.000000 i +0.000000
+1.000000 i +0.000000 +0.000000 i +0.000000 // B before
+1.000000 i +0.000000 +2.000000 i +0.000000
+0.000000 i +0.000000 +0.000000 i +0.000000 // C before
+0.000000 i +0.000000 +0.000000 i +0.000000
+1.000000 i +0.000000 +1.000000 i +0.000000 // A after
+1.000000 i +0.000000 +1.000000 i +0.000000
+1.000000 i +0.000000 +0.000000 i +0.000000 // B after
+1.000000 i +0.000000 +2.000000 i +0.000000
+1.000000 i +0.000000 +1.000000 i +0.000000 // C after
+3.000000 i +0.000000 +3.000000 i +0.000000
--------------------------------------------------------------------------
As can be seen, the returned C is clearly wrong. Which point am I missing?

