I am using Valgrind's memcheck to supervise the memory manipulation of my code. Yet, it is reporting a very strange invalid access from the LAPACK.
The following code depicts a minimally working example, which can be compiled as follows:
- Code: Select all
g++ -std=c++11 -c -g -Wall mwe.cc
gfortran -o mwe mwe.o $(HOME)/Libraries/lapack-3.4.1/liblapack.a
$(HOME)/Libraries/BLAS/libblas.a -lstdc++
The code creates a generator vector, its related Vandermonde matrix, and then performs a QR factorization of it. Once the factorization completes, we extract and print the Q matrix.
- Code: Select all
#include <iostream>
#include <iomanip>
#include <cmath>
#include <cstring>
#define VERBOSE 1 // Should I verbose the execution?
using namespace std;
extern "C" {
// External routines from the LAPACK:
// LAPACK can be installed from either:
// 1. http://www.netlib.org/lapack/
// 2. https://software.intel.com/en-us/non-commercial-software-development
// Double precision General QR Factorization to compute the null-space:
// Theory: http://www.netlib.org/lapack/lug/node69.html
// http://www.netlib.no/netlib/lapack/double/dgeqrf.f
void dgeqrf_(int *m,
int *n,
double *a,
int *lda,
double *tau,
double *work,
int *lwork,
int *info);
// dormqr_ overwrites the general real M-by-N matrix C with
//
// SIDE = 'L' SIDE = 'R'
// TRANS = 'N': Q * C C * Q
// TRANS = 'T': Q**T * C C * Q**T
//
// where Q is a real orthogonal matrix defined as the product of k
// elementary reflectors
//
// Q = H(1) H(2) . . . H(k)
//
// as returned by DGEQRF. Q is of order M if SIDE = 'L' and of order N
// if SIDE = 'R'.
// http://www.netlib.no/netlib/lapack/double/dormqr.f
void dormqr_(char *side,
char *trans,
int *m,
int *n,
int *k,
double *a,
int *lda,
double *tau,
double *c,
int *ldc,
double *work,
int *lwork,
int *info);
}
// Used to compute a 1D index (idx) from a 2D set of indexes.
inline int idx(const int ii, const int offset, const int jj) {
return ii*offset + jj;
}
// Generates an identity matrix:
double* Identity(int num_rows, int num_cols, bool transpose) {
double* II {}; // Output identity matrix.
if (num_rows < 1 || num_cols < 1) {
return nullptr;
}
try {
II = new double[num_rows*num_cols];
memset (II, 0, sizeof(II[0])*num_rows*num_cols);
} catch (bad_alloc &memory_allocation_exception) {
cerr << "Memory allocation exception on line " << __LINE__ - 2 << endl;
cerr << memory_allocation_exception.what() << endl;
}
if (transpose) {
for (auto ii = 0; ii < num_rows; ii++) {
for (auto jj = 0; jj < num_cols; jj++) {
II[idx(jj,num_rows,ii)] = (ii == jj)? 1.0: 0.0;
}
}
} else {
for (auto ii = 0; ii < num_rows; ii++) {
for (auto jj = 0; jj < num_cols; jj++) {
II[idx(ii,num_cols,jj)] = (ii == jj)? 1.0: 0.0;
}
}
}
return II;
}
// Transposes a matrix:
double* Transpose(double *AA, int rr, int cc) {
double* AT {}; // Output transpose matrix.
if (rr <= 0 || cc <= 0 || AA == nullptr) {
return nullptr;
}
try {
AT = new double[rr*cc];
memset(AT, 0, sizeof(AT[0])*(rr*cc));
} catch (bad_alloc &memory_allocation_exception) {
cerr << "Memory allocation exception on line " << __LINE__ - 2 << endl;
cerr << memory_allocation_exception.what() << endl;
}
for (auto ii = 0; ii < rr; ii++) {
for (auto jj = 0; jj < cc; jj++) {
AT[idx(jj,rr,ii)] = AA[idx(ii,cc,jj)];
}
}
return AT;
}
// Generates a Vandermonde matrix with a given generator vector gen:
double* VandermondeOrDie(const double *gen, // Given generator vector.
const int length_gen, // How long generator vector?
const int num_rows, // How many rows?
const int num_cols, // How many columns?
const bool transpose) { // Should create transposed?
double* TT {}; // Output Vandermonde matrix.
if (length_gen < 1 || num_rows < 1 || num_cols < 1 || length_gen > num_cols) {
return nullptr;
}
try {
TT = new double[num_rows*num_cols];
memset (TT, 0, sizeof(TT[0])*num_rows*num_cols);
} catch (bad_alloc &memory_allocation_exception) {
cerr << "Memory allocation exception on line " << __LINE__ - 2 << endl;
cerr << memory_allocation_exception.what() << endl;
}
if (transpose) {
for (auto ii = 0; ii < num_rows; ii++) {
for (auto jj = 0; jj < num_cols; jj++) {
TT[idx(jj,num_rows,ii)] = pow(gen[jj], (double) ii);
}
}
} else {
for (auto ii = 0; ii < num_rows; ii++) {
for (auto jj = 0; jj < num_cols; jj++) {
TT[idx(ii,num_cols,jj)] = pow(gen[jj], (double) ii);
}
}
}
return TT;
}
int main () {
// Input:
int kk {8}; // Order of numerical accuracy of the operator.
// Variables:
char side {'L'}; // Side for overwriting Q.
char trans {'N'}; // Should it be transposed?
double* gg {}; // Spatial coordinates for the boundary points.
double* AA {}; // Boundary Vandermonde to get the null-space.
double* tau {}; // The scalar factors of elementary reflectors for QR.
double* work {}; // Work array for dgels_ solver and dgeqrf_ QR factor.
double* QQ {}; // Q matrix from qr(AA) containing AA's null-space.
int info {0}; // Information regarding solution of the system.
int num_bndy_approxs; // Required boundary points for uniform order accuracy.
int AA_num_rows; // Number rows for the boundary Vandermonde matrices.
int AA_num_cols; // Number cols for the boundary Vandermonde matrices.
int AA_ld; // Leading dimension of the boundary Vandermonde matrix.
int lwork {-1}; // Length of work array for the dgels_ solver.
int QQ_ld; // Leading dimension of the Q = qr(AA) matrix.
// Compute amount of boundary points required to achieve uniform accuracy:
num_bndy_approxs = (int) (3.0*((double) kk)/2.0);
// Generator vector for the next approximation:
try {
gg = new double[num_bndy_approxs];
memset(gg, 0, sizeof(gg[0])*num_bndy_approxs);
} catch (bad_alloc &memory_allocation_exception) {
cerr << "Memory allocation exception on line " << __LINE__ - 2 << endl;
cerr << memory_allocation_exception.what() << endl;
}
gg[0] = -1.0/2.0;
for (auto ii = 1; ii < num_bndy_approxs; ii++) {
gg[ii] = gg[ii - 1] + 1.0;
}
// Before solving this and any other systems for the boundary matrices, we
// must obtain the null-space from the first matrix. For this we will follow
// recommendations given in:
//
// http://icl.cs.utk.edu/lapack-forum/viewtopic.php?f=5&t=4506
//
// We will compute the QR Factorization of the transpose, as in the
// following (MATLAB) pseudo-code:
//
// [Q,R] = qr(A'); % Full QR as defined in
// % http://www.stanford.edu/class/ee263/notes/qr_matlab.pdf
// null-space = Q(:, last (kk/2 - 1) columns of Q);
//
// However, given the nature of the Vandermonde matrices we will compute--
// we will compute dim_null of them--they will all posses the same
// null-space. Therefore, we impose the convention of computing the
// null-space of the first Vandermonde matrix only (for the west boundary).
// Vandermonde matrix (using boundary coordinates as the generator):
// We compute it NOT transposed, which is the ACTUAL transpose for
// LAPACK's QR factor.
AA_num_rows = kk + 1;
AA_num_cols = num_bndy_approxs;
AA = VandermondeOrDie(gg, num_bndy_approxs,
AA_num_rows, AA_num_cols, false);
if (AA == nullptr) {
cerr << "ERROR constructing matrix at line " << __LINE__ + 2 << endl;
cerr << "Exiting..." << endl;
return EXIT_FAILURE;
}
// Prepare to factorize: allocate and inquire for the value of lwork:
try {
tau = new double[1];
memset(tau, 0, sizeof(tau[0])*1);
} catch (bad_alloc &memory_allocation_exception) {
cerr << "Memory allocation exception on line " << __LINE__ - 2 << endl;
cerr << memory_allocation_exception.what() << endl;
}
try {
work = new double[1];
memset(work, 0, sizeof(work[0])*1);
} catch (bad_alloc &memory_allocation_exception) {
cerr << "Memory allocation exception on line " << __LINE__ - 2 << endl;
cerr << memory_allocation_exception.what() << endl;
}
AA_ld = AA_num_cols;
info = 0;
dgeqrf_(&AA_num_cols, &AA_num_rows, AA, &AA_ld, tau, work, &lwork, &info);
lwork = (int) work[0];
// Once we know lwork, we can actually invoke the factorization:
if (tau != nullptr) {
delete[] tau;
tau = nullptr;
}
try {
tau = new double[AA_num_cols];
memset(tau, 0, sizeof(tau[0])*AA_num_cols);
} catch (bad_alloc &memory_allocation_exception) {
cerr << "Memory allocation exception on line " << __LINE__ - 2 << endl;
cerr << memory_allocation_exception.what() << endl;
}
if (work != nullptr) {
delete[] work;
work = nullptr;
}
try {
work = new double[lwork];
memset(work, 0, sizeof(work[0])*(lwork));
} catch (bad_alloc &memory_allocation_exception) {
cerr << "Memory allocation exception on line " << __LINE__ - 2 << endl;
cerr << memory_allocation_exception.what() << endl;
}
dgeqrf_(&AA_num_cols, &AA_num_rows, AA, &AA_ld, tau, work, &lwork, &info);
if (!info) {
if (VERBOSE) {
cout << "QR factorization successfully completed!" << endl << endl;
}
} else {
cerr << "Something went wrong factoring! info = " << info << endl;
}
// Generate the real matrix Q with orthonormal columns. This has to be done
// separately since the actual output of dgeqrf_(AA) represents the
// orthogonal matrix Q as a product of AA_num_cols elementary Householder
// reflectors:
QQ_ld = AA_num_cols;
try {
QQ = new double[QQ_ld*QQ_ld];
memset(QQ, 0, sizeof(QQ[0])*(QQ_ld*QQ_ld));
} catch (bad_alloc &memory_allocation_exception) {
cerr << "Memory allocation exception on line " << __LINE__ - 2 << endl;
cerr << memory_allocation_exception.what() << endl;
}
// QQ must be initialized to the identity matrix:
for (auto ii = 0; ii < QQ_ld; ii++) {
for (auto jj = 0; jj < QQ_ld; jj++) {
if (ii == jj) {
QQ[idx(ii,QQ_ld,jj)] = 1.0;
}
}
}
// To complete the recovery of the real QQ, we must re-inquire the new value
// for lwork that is going to be used by the dormqr_ routine we will use.
// Notice we leave the tau array alone... i.e. we only re-allocate the work
// array.
if (work != nullptr) {
delete[] work;
work = nullptr;
}
try {
work = new double[1];
memset(work, 0, sizeof(work[0])*1);
} catch (bad_alloc &memory_allocation_exception) {
cerr << "Memory allocation exception on line " << __LINE__ - 2 << endl;
cerr << memory_allocation_exception.what() << endl;
}
lwork = -1;
dormqr_(&side, &trans, &AA_num_cols, &AA_num_cols, &AA_num_cols, AA, &AA_ld,
tau, QQ, &QQ_ld, work, &lwork, &info);
lwork = (int) work[0];
if (work != nullptr) {
delete[] work;
work = nullptr;
}
try {
work = new double[lwork];
memset(work, 0, sizeof(work[0])*(lwork));
} catch (bad_alloc &memory_allocation_exception) {
cerr << "Memory allocation exception on line " << __LINE__ - 2 << endl;
cerr << memory_allocation_exception.what() << endl;
}
info = 0;
// WARNING: We get the following warning from Valgrind's memcheck, when
// invoking the following routine:
// ==14545== Invalid read of size 8
// ==14545== at 0x40BA4B: dorm2r_ (dorm2r.f:272)
// ==14545== by 0x4080F1: dormqr_ (dormqr.f:289)
// ==14545== by 0x402450: main (divrcrs.cc:480)
// ==14545== Address 0x5f79c78 is not stack'd, malloc'd or (recently) free'd
// ==14545==
// ==14545== Invalid write of size 8
// ==14545== at 0x40BA7A: dorm2r_ (dorm2r.f:273)
// ==14545== by 0x4080F1: dormqr_ (dormqr.f:289)
// ==14545== by 0x402450: main (divrcrs.cc:480)
// ==14545== Address 0x5f79c78 is not stack'd, malloc'd or (recently) free'd
// ==14545==
// ==14545== Invalid write of size 8
// ==14545== at 0x40BB4F: dorm2r_ (dorm2r.f:276)
// ==14545== by 0x4080F1: dormqr_ (dormqr.f:289)
// ==14545== by 0x402450: main (divrcrs.cc:480)
// ==14545== Address 0x5f79c78 is not stack'd, malloc'd or (recently) free'd
// ==14545==
// TODO: We should make a more profound debug, unless we can no longer
// depend on the matrix QQ (within the code flow).
dormqr_(&side, &trans, &AA_num_cols, &AA_num_cols, &AA_num_cols, AA, &AA_ld,
tau, QQ, &QQ_ld, work, &lwork, &info);
if (!info) {
if (VERBOSE) {
cout << "Q matrix successfully attained!" << endl << endl;
cout << "Q = " << endl;
for (auto ii = 0; ii < QQ_ld; ii++) {
for (auto jj = 0; jj < QQ_ld; jj++) {
cout << setw(12) << QQ[idx(ii,QQ_ld,jj)];
}
cout << endl;
}
cout << endl;
}
} else {
cerr << "Something went wrong building QQ! info = " << info << endl;
}
// Garbage collection :)
if (gg != nullptr) {
delete[] gg;
gg = nullptr;
}
if (AA != nullptr) {
delete[] AA;
AA = nullptr;
}
if (tau != nullptr) {
delete[] tau;
tau = nullptr;
}
if (work != nullptr) {
delete[] work;
work = nullptr;
}
if (QQ != nullptr) {
delete[] QQ;
QQ = nullptr;
}
}
Output:
- Code: Select all
ejspeiro@Eduardo-Alienware-14:~/Dropbox/mimops$ make run
./mwe
QR factorization successfully completed!
Q matrix successfully attained!
Q =
-0.288675 -0.288675 -0.288675 -0.288675 -0.288675 -0.288675 -0.288675 -0.288675 -0.288675 -0.288675 -0.288675 -0.288675
-0.459933 -0.376309 -0.292685 -0.209061 -0.125436 -0.0418121 0.0418121 0.125436 0.209061 0.292685 0.376309 0.459933
0.501828 0.228104 0.00912415 -0.155111 -0.2646 -0.319345 -0.319345 -0.2646 -0.155111 0.00912415 0.228104 0.501828
0.459933 -0.0418121 -0.292685 -0.348434 -0.26481 -0.0975616 0.0975616 0.26481 0.348434 0.292685 0.0418121 -0.459933
-0.368767 0.301718 0.368767 0.145272 -0.134097 -0.312893 -0.312893 -0.134097 0.145272 0.368767 0.301718 -0.368767
-0.261608 0.451869 0.166478 -0.229898 -0.348811 -0.15855 0.15855 0.348811 0.229898 -0.166478 -0.451869 0.261608
0.164197 -0.462738 0.164197 0.373176 0.0597081 -0.298541 -0.298541 0.0597081 0.373176 0.164197 -0.462738 0.164197
-0.0904791 0.370142 -0.412914 -0.136541 0.335595 0.230311 -0.230311 -0.335595 0.136541 0.412914 -0.370142 0.0904791
0.0430767 -0.23888 0.466011 -0.254544 -0.289789 0.274124 0.274124 -0.289789 -0.254544 0.466011 -0.23888 0.0430767
-0.00874036 0.0583 -0.146954 0.141249 0.0486822 -0.156543 -0.130333 0.566054 -0.653529 0.386018 -0.119774 0.0155708
0.0117399 -0.0941426 0.322481 -0.603006 0.637649 -0.321721 -0.0245471 0.104354 -0.0153304 -0.0339244 0.0199423 -0.00349546
0.0106414 -0.0748491 0.203312 -0.217426 -0.109592 0.578839 -0.66744 0.29945 0.0592101 -0.129478 0.0558632 -0.00853182
It seems to be running fine! Except that when I go to Valgrind's memcheck, I get this output:
- Code: Select all
==21810== HEAP SUMMARY:
==21810== in use at exit: 0 bytes in 0 blocks
==21810== total heap usage: 30 allocs, 30 frees, 35,757 bytes allocated
==21810==
==21810== All heap blocks were freed -- no leaks are possible
==21810==
==21810== ERROR SUMMARY: 6 errors from 3 contexts (suppressed: 0 from 0)
==21810==
==21810== 2 errors in context 1 of 3:
==21810== Invalid write of size 8
==21810== at 0x40A0A1: dorm2r_ (dorm2r.f:276)
==21810== by 0x406B30: dormqr_ (dormqr.f:289)
==21810== by 0x4020C6: main (mwe.cc:399)
==21810== Address 0x5f79870 is not stack'd, malloc'd or (recently) free'd
==21810==
==21810==
==21810== 2 errors in context 2 of 3:
==21810== Invalid write of size 8
==21810== at 0x409FCC: dorm2r_ (dorm2r.f:273)
==21810== by 0x406B30: dormqr_ (dormqr.f:289)
==21810== by 0x4020C6: main (mwe.cc:399)
==21810== Address 0x5f79870 is not stack'd, malloc'd or (recently) free'd
==21810==
==21810==
==21810== 2 errors in context 3 of 3:
==21810== Invalid read of size 8
==21810== at 0x409F9D: dorm2r_ (dorm2r.f:272)
==21810== by 0x406B30: dormqr_ (dormqr.f:289)
==21810== by 0x4020C6: main (mwe.cc:399)
==21810== Address 0x5f79870 is not stack'd, malloc'd or (recently) free'd
==21810==
==21810== ERROR SUMMARY: 6 errors from 3 contexts (suppressed: 0 from 0)
Any hint? o.O Through DDD, I tracked the bug to the suggested line...
It seems that, the routine indexes the matrix at the very last value of it... namely, A(N,N). But what to do from C++11?
Feel free to suggest about my coding!
Thanks in advanced!


