@techreport {, title = {Randomized Numerical Linear Algebra: A Perspective on the Field with an Eye to Software}, journal = {University of California, Berkeley EECS Technical Report}, number = {UCB/EECS-2022-258}, year = {2022}, month = {2022-11}, publisher = {University of California, Berkeley}, abstract = {Randomized numerical linear algebra {\textendash} RandNLA, for short {\textendash} concerns the use of randomization as a resource to develop improved algorithms for large-scale linear algebra computations. The origins of contemporary RandNLA lay in theoretical computer science, where it blossomed from a simple idea: randomization provides an avenue for computing approximate solutions to linear algebra problems more efficiently than deterministic algorithms. This idea proved fruitful in and was largely driven by the development of scalable algorithms for machine learning and statistical data analysis applications. However, the true potential of RandNLA only came into focus once it began to integrate with the fields of numerical analysis and {\textquotedblleft}classical{\textquotedblright} numerical linear algebra. Through the efforts of many individuals, randomized algorithms have been developed that provide full control over the accuracy of their solutions and that are every bit as reliable as algorithms that might be found in libraries such as LAPACK. The spectrum of possibilities offered by RandNLA has created a virtuous cycle of contributions by numerical analysts, statisticians, theoretical computer scientists, and the machine learning community. Recent years have even seen the incorporation of certain RandNLA methods into MATLAB, the NAG Library, and NVIDIA{\textquoteright}s cuSOLVER. In view of these developments, we believe the time is ripe to accelerate the adoption of RandNLA in the scientific community. In particular, we believe the community stands to benefit significantly from a suitably defined {\textquotedblleft}RandBLAS{\textquotedblright} and {\textquotedblleft}RandLAPACK,{\textquotedblright} to serve as standard libraries for RandNLA, in much the same way that BLAS and LAPACK serve as standards for deterministic linear algebra. This monograph surveys the field of RandNLA as a step toward building mean- ingful RandBLAS and RandLAPACK libraries. Section 1 begins by setting scope and design principles for RandLAPACK and summarizing subsequent sections of the monograph. Section 2 focuses on RandBLAS, which is to be responsible for sketching. Details of functionality suitable for RandLAPACK are covered in the five sections that follow. Specifically, Sections 3 to 5 cover least squares and optimization, low- rank approximation, and other select problems that are well-understood in how they benefit from randomized algorithms. The remaining sections {\textendash} on statistical leverage scores (Section 6) and tensor computations (Section 7) {\textendash} read more like traditional surveys. The different flavor of these latter sections reflects how, in our assessment, the literature on these topics is still maturing. We provide a substantial amount of pseudo-code and supplementary material over the course of five appendices. Much of the pseudo-code has been tested via publicly available Matlab and Python implementations.}, keywords = {Randomized algorithms}, doi = {10.48550/arXiv.2302.1147}, url = {https://www2.eecs.berkeley.edu/Pubs/TechRpts/2022/EECS-2022-258.html}, author = {Riley Murray and James Demmel and Michael W. Mahoney and N. Benjamin Erichson and Maksim Melnichenko and Osman Asif Malik and Laura Grigori and Piotr Luszczek and Micha{\l} Derezi{\'n}ski and Miles E. Lopes and Tianyu Liang and Hengrui Luo and Jack Dongarra} } @conference {, title = {Replacing Pivoting in Distributed Gaussian Elimination with Randomized Techniques}, booktitle = {2020 IEEE/ACM 11th Workshop on Latest Advances in Scalable Algorithms for Large-Scale Systems (ScalA)}, year = {2020}, month = {2020-11}, publisher = {IEEE}, organization = {IEEE}, address = {Atlanta, GA}, abstract = {Gaussian elimination is a key technique for solving dense, non-symmetric systems of linear equations. Pivoting is used to ensure numerical stability but can introduce significant overheads. We propose replacing pivoting with recursive butterfly transforms (RBTs) and iterative refinement. RBTs use an FFT-like structure and randomized elements to provide an efficient, two-sided preconditioner for factoring. This approach was implemented and tested using Software for Linear Algebra Targeting Exascale (SLATE). In numerical experiments, our implementation was more robust than Gaussian elimination with no pivoting (GENP) but failed to solve all the problems solvable with Gaussian elimination with partial pivoting (GEPP). Furthermore, the proposed solver was able to outperform GEPP when distributed on GPU-accelerated nodes.}, keywords = {linear systems, Randomized algorithms}, author = {Neil Lindquist and Piotr Luszczek and Jack Dongarra} } @article {821, title = {An Efficient Distributed Randomized Algorithm for Solving Large Dense Symmetric Indefinite Linear Systems}, journal = {Parallel Computing}, volume = {40}, year = {2014}, month = {2014-07}, pages = {213-223}, abstract = {Randomized algorithms are gaining ground in high-performance computing applications as they have the potential to outperform deterministic methods, while still providing accurate results. We propose a randomized solver for distributed multicore architectures to efficiently solve large dense symmetric indefinite linear systems that are encountered, for instance, in parameter estimation problems or electromagnetism simulations. The contribution of this paper is to propose efficient kernels for applying random butterfly transformations and a new distributed implementation combined with a runtime (PaRSEC) that automatically adjusts data structures, data mappings, and the scheduling as systems scale up. Both the parallel distributed solver and the supporting runtime environment are innovative. To our knowledge, the randomization approach associated with this solver has never been used in public domain software for symmetric indefinite systems. The underlying runtime framework allows seamless data mapping and task scheduling, mapping its capabilities to the underlying hardware features of heterogeneous distributed architectures. The performance of our software is similar to that obtained for symmetric positive definite systems, but requires only half the execution time and half the amount of data storage of a general dense solver.}, keywords = {Distributed linear algebra solvers, LDLT factorization, PaRSEC runtime, plasma, Randomized algorithms, Symmetric indefinite systems}, doi = {10.1016/j.parco.2013.12.003}, author = {Marc Baboulin and Du Becker and George Bosilca and Anthony Danalis and Jack Dongarra} }