@inproceedings{e110fa8880d5427c84e0c577c1ea3b93,
title = "DONNA: Distributed Optimized Neural Network Allocation on CIM-Based Heterogeneous Accelerators",
abstract = "The continued development of neural network architectures continues to drive demand for computing power. While data center scaling continues, inference away from the cloud will increasingly rely on distributed inference on multiple devices. Most prior efforts have focused on optimizing singledevice inference or partitioning models to enhance inference throughput. Meanwhile, energy consumption continues to grow in importance as a factor of consideration. This work proposes a framework that searches for optimal model splits and distributes the partitions across the combination of devices taking into account throughput and energy. Participating devices are strategically grouped into homogeneous and heterogeneous clusters consisting of general-purpose CPU and GPU architectures, as well as emerging Compute-In-Memory (CIM) accelerators. The framework simultaneously optimizes inference throughput and energy consumption. It is able to demonstrate up to 4× speedup with approximately 4× per-device energy reduction in a heterogeneous setup compared to single GPU inference. The algorithm also finds a smooth Pareto-like curve in the energy-throughput space for CIM devices.",
keywords = "CIM, Compute-in-memory, CPU, Distributed Inference, GPU, Heterogeneous Devices, Heterogeneous Hardware, Model splitting, ReRAM",
author = "Alshams, {Mojtaba F.} and Smagulova, {Kamilya S.} and Fahmy, {Suhaib A.} and Fouda, {Mohammed E.} and Eltawil, {Ahmed M.}",
note = "Publisher Copyright: {\textcopyright} 2024 IEEE.; 8th IEEE International Conference on Edge Computing and Communications, EDGE 2024 ; Conference date: 07-07-2024 Through 13-07-2024",
year = "2024",
doi = "10.1109/EDGE62653.2024.00027",
language = "English (US)",
series = "Proceedings - IEEE International Conference on Edge Computing",
publisher = "Institute of Electrical and Electronics Engineers Inc.",
pages = "149--156",
editor = "Chang, {Rong N.} and Chang, {Carl K.} and Jingwei Yang and Zhi Jin and Michael Sheng and Jing Fan and Fletcher, {Kenneth K.} and Qiang He and Nimanthi Atukorala and Hongyue Wu and Shiqiang Wang and Shuiguang Deng and Nirmit Desai and Gopal Pingali and Javid Taheri and Subramaniam, {K. V.} and Feras Awaysheh and {El Maghaouri}, Kaouta and Yingjie Wang",
booktitle = "Proceedings - 2024 IEEE International Conference on Edge Computing and Communications, EDGE 2024",
address = "United States",
}