@inproceedings{2e252ebff05145ac94d0d6ce6beb070b,
title = "Cholesky Factorization of Tile Low Rank Matrices on GPUs",
abstract = "Tile low rank (TLR) representations of dense matrices partition them into blocks of roughly uniform size, where off-diagonal tiles are compressed and stored in low rank factorizations. They offer an attractive representation for many data-sparse dense operators that appear in practical applications, since substantial compression and a much smaller memory footprint can be achieved. Despite their utility, however, there are currently only a few high performance algorithms that can generate their Cholesky factorizations and operate on them efficiently, especially on GPUs. The difficulties in achieving high performance when factoring TLR matrices come from the expensive compression operations that must be performed during the factorization process and the irregular rank distribution of the tiles that requires an adaptive work pattern for the processing cores. In this work, we describe an algorithm that overcomes these limitations. Our algorithm has several new features. It always works in the compressed representation of the tiles. It compresses every tile in the output once only. It uses GEMM-rich adaptive randomized approximation for the compression. It also uses dynamic batched operations on the GPU to manage the irregular workload due to differing ranks among the output tiles. The resulting algorithm achieves substantial performance, as we demonstrate on sample matrices.",
author = "Wajih Boukaram and Stefano Zampini and George Turkiyyah and David Keyes",
note = "Publisher Copyright: Copyright {\textcopyright} 2024 by SIAM Unauthorized reproduction of this article is prohibited.; 22nd SIAM Conference on Parallel Processing for Scientific Computing, PP 2024 ; Conference date: 05-03-2024 Through 08-03-2024",
year = "2024",
language = "English (US)",
series = "2024 SIAM Conference on Parallel Processing for Scientific Computing, PP 2024",
publisher = "Society for Industrial and Applied Mathematics Publications",
pages = "65--77",
editor = "Michael Bader and Anshu Dubey and Bethany Lusch",
booktitle = "2024 SIAM Conference on Parallel Processing for Scientific Computing, PP 2024",
address = "United States",
}