@inbook {, title = {Batch QR Factorization on GPUs: Design, Optimization, and Tuning}, booktitle = { Lecture Notes in Computer Science}, volume = {13350}, year = {2022}, month = {2022-06}, publisher = {Springer International Publishing}, organization = {Springer International Publishing}, address = {Cham}, abstract = {QR factorization of dense matrices is a ubiquitous tool in high performance computing (HPC). From solving linear systems and least squares problems to eigenvalue problems, and singular value decompositions, the impact of a high performance QR factorization is fundamental to computer simulations and many applications. More importantly, the QR factorization on a batch of relatively small matrices has acquired a lot of attention in sparse direct solvers and low-rank approximations for Hierarchical matrices. To address this interest and demand, we developed and present a high performance batch QR factorization for Graphics Processing Units (GPUs). We present a multi-level blocking strategy that adjusts various algorithmic designs to the size of the input matrices. We also show that following the LAPACK QR design convention, while still useful, is significantly outperformed by unconventional code structures that increase data reuse. The performance results show multi-fold speedups against the state of the art libraries on the latest GPU architectures from both NVIDIA and AMD.}, keywords = {Batch linear algebra, GPU computing, QR factorization}, isbn = {978-3-031-08750-9}, doi = {10.1007/978-3-031-08751-6_5}, url = {https://link.springer.com/chapter/10.1007/978-3-031-08751-6_5}, author = {Abdelfattah, Ahmad and Stanimire Tomov and Dongarra, Jack}, editor = {Groen, Derek and de Mulatier, C{\'e}lia and Paszy{\'n}ski, Maciej and Krzhizhanovskaya, Valeria V. and Dongarra, Jack J. and Sloot, Peter M. A.} } @article {, title = {Computational science for a better future}, journal = {Journal of Computational Science}, volume = {62}, year = {2022}, month = {2022-07}, pages = {101745}, issn = {18777503}, doi = {10.1016/j.jocs.2022.101745}, url = {https://www.sciencedirect.com/science/article/pii/S1877750322001351}, author = {Kovalchuk, Sergey V. and Krzhizhanovskaya, Valeria V. and Paszy{\'n}ski, Maciej and Kranzlm{\"u}ller, Dieter and Dongarra, Jack and Sloot, Peter M.A.} }