@inproceedings {icl:238, title = {Self Adapting Linear Algebra Algorithms and Software}, journal = {IEEE Proceedings (to appear)}, year = {2004}, month = {2004-00}, keywords = {salsa, sans}, author = {James Demmel and Jack Dongarra and Victor Eijkhout and Erika Fuentes and Antoine Petitet and Rich Vuduc and Clint Whaley and Katherine Yelick} } @article {icl:125, title = {An Updated Set of Basic Linear Algebra Subprograms (BLAS)}, journal = {ACM Transactions on Mathematical Software}, volume = {28}, number = {2}, year = {2002}, month = {2002-12}, pages = {135-151}, doi = {10.1145/567806.567807}, author = {Susan Blackford and James Demmel and Jack Dongarra and Iain Duff and Sven Hammarling and Greg Henry and Michael Heroux and Linda Kaufman and Andrew Lumsdaine and Antoine Petitet and Roldan Pozo and Karin Remington and Clint Whaley} } @article {icl:85, title = {Automated Empirical Optimization of Software and the ATLAS Project}, journal = {Parallel Computing}, volume = {27}, number = {1-2}, year = {2001}, month = {2001-01}, pages = {3-25}, keywords = {atlas}, author = {Clint Whaley and Antoine Petitet and Jack Dongarra} } @article {icl:6, title = {Basic Linear Algebra Subprograms (BLAS)}, journal = {(an update), submitted to ACM TOMS}, year = {2001}, month = {2001-02}, author = {Susan Blackford and James Demmel and Jack Dongarra and Iain Duff and Sven Hammarling and Greg Henry and Michael Heroux and Linda Kaufman and Andrew Lumsdaine and Antoine Petitet and Roldan Pozo and Karin Remington and Clint Whaley} } @techreport {icl:49, title = {Automated Empirical Optimizations of Software and the ATLAS Project (LAPACK Working Note 147)}, journal = {University of Tennessee Computer Science Department Technical Report,}, number = {UT-CS-00-448}, year = {2000}, month = {2000-09}, keywords = {atlas}, author = {Clint Whaley and Antoine Petitet and Jack Dongarra} } @article {icl:229, title = {A Numerical Linear Algebra Problem Solving Environment Designer{\textquoteright}s Perspective (LAPACK Working Note 139)}, journal = {SIAM Annual Meeting}, year = {1999}, month = {1999-05}, address = {Atlanta, GA}, author = {Antoine Petitet and Henri Casanova and Clint Whaley and Jack Dongarra and Yves Robert} } @article {icl:75, title = {Parallel and Distributed Scientific Computing: A Numerical Linear Algebra Problem Solving Environment Designer{\textquoteright}s Perspective}, journal = {Handbook on Parallel and Distributed Processing}, year = {1999}, month = {1999-01}, author = {Antoine Petitet and Henri Casanova and Jack Dongarra and Yves Robert and Clint Whaley} } @conference {967, title = {Automatically Tuned Linear Algebra Software}, booktitle = {1998 ACM/IEEE conference on Supercomputing (SC {\textquoteright}98)}, year = {1998}, month = {1998-11}, publisher = {IEEE Computer Society}, organization = {IEEE Computer Society}, address = {Orlando, FL}, abstract = {This paper describes an approach for the automatic generation and optimization of numerical software for processors with deep memory hierarchies and pipelined functional units. The production of such software for machines ranging from desktop workstations to embedded processors can be a tedious and time consuming process. The work described here can help in automating much of this process. We will concentrate our efforts on the widely used linear algebra kernels called the Basic Linear Algebra Subroutines (BLAS). In particular, the work presented here is for general matrix multiply, DGEMM. However much of the technology and approach developed here can be applied to the other Level 3 BLAS and the general strategy can have an impact on basic linear algebra operations in general and may be extended to other important kernel operations.}, keywords = {BLAS, code generation, high performance, linear algebra, optimization, Tuning}, isbn = {0-89791-984-X}, author = {Clint Whaley and Jack Dongarra} } @article {1467, title = {ScaLAPACK: A Portable Linear Algebra Library for Distributed Memory Computers - Design Issues and Performance}, journal = {Computer Physics Communications}, volume = {97}, year = {1996}, month = {1996-08}, pages = {1-15}, abstract = {This paper outlines the content and performance of ScaLAPACK, a collection of mathematical software for linear algebra computations on distributed memory computers. The importance of developing standards for computational and message passing interfaces is discussed. We present the different components and building blocks of ScaLAPACK. This paper outlines the difficulties inherent in producing correct codes for networks of heterogeneous processors. We define a theoretical model of parallel computers dedicated to linear algebra applications: the Distributed Linear Algebra Machine (DLAM). This model provides a convenient framework for developing parallel algorithms and investigating their scalability, performance and programmability. Extensive performance results on various platforms are presented and analyzed with the help of the DLAM. Finally, this paper briefly describes future directions for the ScaLAPACK library and concludes by suggesting alternative approaches to mathematical libraries, explaining how ScaLAPACK could be integrated into efficient and user-friendly distributed systems.}, doi = {https://doi.org/10.1016/0010-4655(96)00017-3}, author = {Jaeyoung Choi and Jim Demmel and Inderjit Dhillon and Jack Dongarra and Susan Ostrouchov and Antoine Petitet and Kendall Stanley and David Walker and Clint Whaley} }