Overview: CUDA8.0, Pascal cards,Cray wrappers (CC and ftn). gcc/5.3.0 (also tried 6.1.0)
Code: Select all
dom101/apps/daint/UES/6.0.UP04/sandboxes/wsawyer/magma-2.3.0> srun nvidia-smi
Thu Jan 25 15:00:18 2018
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 384.59 Driver Version: 384.59 |
|-------------------------------+----------------------+----------------------+
| GPU Name Persistence-M| Bus-Id Disp.A | Volatile Uncorr. ECC |
| Fan Temp Perf Pwr:Usage/Cap| Memory-Usage | GPU-Util Compute M. |
|===============================+======================+======================|
| 0 Tesla P100-PCIE... On | 00000000:02:00.0 Off | 0 |
| N/A 28C P0 25W / 250W | 0MiB / 16276MiB | 0% E. Process |
+-------------------------------+----------------------+----------------------+
+-----------------------------------------------------------------------------+
| Processes: GPU Memory |
| GPU PID Type Process name Usage |
|=============================================================================|
| No running processes found |
+-----------------------------------------------------------------------------+
Code: Select all
#//////////////////////////////////////////////////////////////////////////////
# -- MAGMA (version 2.3.0) --
# Univ. of Tennessee, Knoxville
# Univ. of California, Berkeley
# Univ. of Colorado, Denver
# @date November 2017
#//////////////////////////////////////////////////////////////////////////////
# GPU_TARGET contains one or more of Fermi, Kepler, Maxwell, Pascal, Volta
# to specify for which GPUs you want to compile MAGMA:
# Fermi - NVIDIA compute capability 2.x cards
# Kepler - NVIDIA compute capability 3.x cards
# Maxwell - NVIDIA compute capability 5.x cards
# Pascal - NVIDIA compute capability 6.x cards
# Volta - NVIDIA compute capability 7.x cards
# The default is "Kepler Maxwell Pascal".
# Note that NVIDIA no longer supports 1.x cards, as of CUDA 6.5.
# See http://developer.nvidia.com/cuda-gpus
#
#GPU_TARGET ?= Kepler Maxwell Pascal
# --------------------
# programs
CC = cc
CXX = CC
NVCC = nvcc
FORT = ftn
ARCH = ar
ARCHFLAGS = cr
RANLIB = ranlib
# --------------------
# flags
# Use -fPIC to make shared (.so) and static (.a) library;
# can be commented out if making only static library.
#FPIC = -fPIC
CFLAGS += -g -DADD_ -DMAGMA_SETAFFINITY
CXXFLAGS += -g -DADD_ -DMAGMA_SETAFFINITY
FFLAGS += -g -DADD_ -Wall -Wno-unused-dummy-argument
F90FLAGS += -g -DADD_ -Wall -Wno-unused-dummy-argument -x f95-cpp-input
NVCCFLAGS = -g -DADD_ -Xcompiler "-fno-strict-aliasing $(FPIC)"
# C++11 (gcc >= 4.7) is not required, but has benefits like atomic operations
CXXFLAGS := $(CFLAGS) -std=c++11
CFLAGS += -std=c99
# --------------------
# libraries
# gcc with OpenBLAS (includes LAPACK)
LIB =
LIB += -lcublas -lcusparse
# --------------------
# directories
# define library directories preferably in your environment, or here.
#OPENBLASDIR ?= /usr/local/openblas
#CUDADIR ?= /usr/local/cuda
#-include make.check-openblas
#-include make.check-cuda
LIBDIR = -L$(CUDADIR)/lib64
INC = -I$(CUDADIR)/include
Code: Select all
dom101/apps/daint/UES/6.0.UP04/sandboxes/wsawyer/magma-2.3.0> module list
Currently Loaded Modulefiles:
1) modules/3.2.10.6 14) gni-headers/5.0.11-6.0.4.0_7.2__g7136988.ari
2) gcc/5.3.0 15) xpmem/2.2.2-6.0.4.0_3.1__g43b0535.ari
3) craype-haswell 16) job/2.2.2-6.0.4.0_8.2__g3c644b5.ari
4) craype-network-aries 17) dvs/2.7_2.2.32-6.0.4.1_7.1__ged1923a
5) craype/2.5.12 18) alps/6.4.1-6.0.4.0_7.2__g86d0f3d.ari
6) cray-mpich/7.6.0 19) rca/2.2.11-6.0.4.0_13.2__g84de67a.ari
7) slurm/17.02.9-1 20) atp/2.1.1
8) xalt/daint-2016.11 21) perftools-base/6.5.1
9) cray-libsci/17.06.1 22) PrgEnv-gnu/6.0.4
10) udreg/2.3.2-6.0.4.0_12.2__g2f9c3ee.ari 23) cray-libsci_acc/17.03.1
11) ugni/6.0.14-6.0.4.0_14.1__ge7db4a2.ari 24) cudatoolkit/8.0.61_2.4.3-6.0.4.0_3.1__gb475d12
12) pmi/5.0.12 25) craype-accel-nvidia60
13) dmapp/7.1.1-6.0.4.0_46.2__gb8abda2.ari
Code: Select all
dom101/apps/daint/UES/6.0.UP04/sandboxes/wsawyer/magma-2.3.0> nvcc --version
nvcc: NVIDIA (R) Cuda compiler driver
Copyright (c) 2005-2016 NVIDIA Corporation
Built on Tue_Jan_10_13:22:03_CST_2017
Cuda compilation tools, release 8.0, V8.0.61
Code: Select all
dom101/apps/daint/UES/6.0.UP04/sandboxes/wsawyer/magma-2.3.0/testing> ./run_tests.py -s testing_dgemm
****************************************************************************************************
srun -n 1 ./testing_dgemm -l -NN -c -n 1:20:1 -n 30 -n 31 -n 32 -n 33 -n 34 -n 62 -n 63 -n 64 -n 65 -n 66 -n 94 -n 95 -n 96 -n 97 -n 98 -n 126 -n 127 -n 128 -n 129 -n 130 -n 254 -n 255 -n 256 -n 257 -n 258 -n 2,1 -n 3,1 -n 4,2 -n 20,19 -n 20,10 -n 20,2 -n 20,1 -n 200,199 -n 200,100 -n 200,20 -n 200,10 -n 200,1 -n 1,2 -n 1,3 -n 2,4 -n 19,20 -n 10,20 -n 2,20 -n 1,20 -n 199,200 -n 100,200 -n 20,200 -n 10,200 -n 1,200 -n 1,2,3 -n 2,1,3 -n 1,3,2 -n 2,3,1 -n 3,1,2 -n 3,2,1 -n 10,20,30 -n 20,10,30 -n 10,30,20 -n 20,30,10 -n 30,10,20 -n 30,20,10 -n 100,200,300 -n 200,100,300 -n 100,300,200 -n 200,300,100 -n 300,100,200 -n 300,200,100
****************************************************************************************************
srun: error: nid00038: task 0: Segmentation fault
srun: Terminating job step 658781.31
^CTraceback (most recent call last):
File "./run_tests.py", line 1711, in <module>
(okay, fail, error, status) = run( cmd_args )
File "./run_tests.py", line 1601, in run
line = p.stdout.readline()
KeyboardInterrupt
Code: Select all
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <math.h>
// includes, project
#include "flops.h"
#include "magma_v2.h"
#include "magma_lapack.h"
#include "magma_operators.h"
#include "testings.h"
int main(int argc, char **argv)
{
#ifdef HAVE_clBLAS
#define dA(i_, j_) dA, ((i_) + (j_)*ldda)
#define dX(i_) dX, ((i_))
#define dY(i_) dY, ((i_))
#else
#define dA(i_, j_) (dA + (i_) + (j_)*ldda)
#define dX(i_) (dX + (i_))
#define dY(i_) (dY + (i_))
#endif
TESTING_CHECK( magma_init() );
magma_print_environment();
int status = 0;
#if _PROVOKE_BUG
magma_opts opts;
#endif
// opts.parse_opts( argc, argv );
return status;
}
Code: Select all
dom101/apps/daint/UES/6.0.UP04/sandboxes/wsawyer/magma-2.3.0> CC -g -DADD_ -std=c++11 -I/include -I./include -I./testing -c -o testing/testing_junk.o testing/testing_junk.cpp
dom101/apps/daint/UES/6.0.UP04/sandboxes/wsawyer/magma-2.3.0> CC -O3 -Wl,-rpath,../lib -o testing/testing_junk testing/testing_junk.o -L./testing -ltest -L./lib -lmagma -L/lib64 -lcublas -lcusparse
dom101/apps/daint/UES/6.0.UP04/sandboxes/wsawyer/magma-2.3.0> srun -n 1 ./testing/testing_junk
% MAGMA 2.3.0 compiled for CUDA capability >= 6.0, 32-bit magma_int_t, 64-bit pointer.
% CUDA runtime 8000, driver 9000. MAGMA not compiled with OpenMP.
% device 0: Tesla P100-PCIE-16GB, 1328.5 MHz clock, 16276.2 MiB memory, capability 6.0
% Thu Jan 25 14:45:31 2018
dom101/apps/daint/UES/6.0.UP04/sandboxes/wsawyer/magma-2.3.0> CC -g -DADD_ -D_PROVOKE_BUG -std=c++11 -I/include -I./include -I./testing -c -o testing/testing_junk.o testing/testing_junk.cpp
dom101/apps/daint/UES/6.0.UP04/sandboxes/wsawyer/magma-2.3.0> CC -O3 -Wl,-rpath,../lib -o testing/testing_junk testing/testing_junk.o -L./testing -ltest -L./lib -lmagma -L/lib64 -lcublas -lcusparse
dom101/apps/daint/UES/6.0.UP04/sandboxes/wsawyer/magma-2.3.0> srun -n 1 ./testing/testing_junk
srun: error: nid00038: task 0: Segmentation fault
srun: Terminating job step 658781.28
Addendum: My colleague has successfully run with MKL+Intel; I can confirm that this is running and passing all tests, so far. However, we are still interested in Craylibacc + GNU, and would like to learn what the problem is.
Many thanks for your time, --Will