@article {, title = {Accelerating Restarted GMRES with Mixed Precision Arithmetic}, journal = {IEEE Transactions on Parallel and Distributed Systems}, year = {2021}, month = {2021-06}, abstract = {The generalized minimum residual method (GMRES) is a commonly used iterative Krylov solver for sparse, non-symmetric systems of linear equations. Like other iterative solvers, data movement dominates its run time. To improve this performance, we propose running GMRES in reduced precision with key operations remaining in full precision. Additionally, we provide theoretical results linking the convergence of finite precision GMRES with classical Gram-Schmidt with reorthogonalization (CGSR) and its infinite precision counterpart which helps justify the convergence of this method to double-precision accuracy. We tested the mixed-precision approach with a variety of matrices and preconditioners on a GPU-accelerated node. Excluding the incomplete LU factorization without fill in (ILU(0)) preconditioner, we achieved average speedups ranging from 8 to 61 percent relative to comparable double-precision implementations, with the simpler preconditioners achieving the higher speedups. }, keywords = {Convergence, Error correction, iterative methods, Kernel, linear systems, Stability analysis}, doi = {10.1109/TPDS.2021.3090757}, author = {Neil Lindquist and Piotr Luszczek and Jack Dongarra} } @conference {, title = {Replacing Pivoting in Distributed Gaussian Elimination with Randomized Techniques}, booktitle = {2020 IEEE/ACM 11th Workshop on Latest Advances in Scalable Algorithms for Large-Scale Systems (ScalA)}, year = {2020}, month = {2020-11}, publisher = {IEEE}, organization = {IEEE}, address = {Atlanta, GA}, abstract = {Gaussian elimination is a key technique for solving dense, non-symmetric systems of linear equations. Pivoting is used to ensure numerical stability but can introduce significant overheads. We propose replacing pivoting with recursive butterfly transforms (RBTs) and iterative refinement. RBTs use an FFT-like structure and randomized elements to provide an efficient, two-sided preconditioner for factoring. This approach was implemented and tested using Software for Linear Algebra Targeting Exascale (SLATE). In numerical experiments, our implementation was more robust than Gaussian elimination with no pivoting (GENP) but failed to solve all the problems solvable with Gaussian elimination with partial pivoting (GEPP). Furthermore, the proposed solver was able to outperform GEPP when distributed on GPU-accelerated nodes.}, keywords = {linear systems, Randomized algorithms}, author = {Neil Lindquist and Piotr Luszczek and Jack Dongarra} }