@article {1237,
title = {Solving Linear Diophantine Systems on Parallel Architectures},
journal = {IEEE Transactions on Parallel and Distributed Systems},
volume = {30},
year = {2019},
month = {2019-05},
pages = {1158-1169},
abstract = {Solving linear Diophantine systems of equations is applied in discrete-event systems, model checking, formal languages and automata, logic programming, cryptography, networking, signal processing, and chemistry. For modeling discrete systems with Petri nets, a solution in non-negative integer numbers is required, which represents an intractable problem. For this reason, solving such kinds of tasks with significant speedup is highly appreciated. In this paper we design a new solver of linear Diophantine systems based on the parallel-sequential composition of the system clans. The solver is studied and implemented to run on parallel architectures using a two-level parallelization concept based on MPI and OpenMP. A decomposable system is usually represented by a sparse matrix; a minimal clan size of the decomposition restricts the granulation of the technique. MPI is applied for solving systems for clans using a parallel-sequential composition on distributed-memory computing nodes, while OpenMP is applied in solving a single indecomposable system on a single node using multiple cores. A dynamic task-dispatching subsystem is developed for distributing systems on nodes in the process of compositional solution. Computational speedups are obtained on a series of test examples, e.g., illustrating that the best value constitutes up to 45 times speedup obtained on 5 nodes with 20 cores each.},
keywords = {Mathematical model, Matrix decomposition, Parallel architectures, Petri nets, Software algorithms, Sparse matrices, Task analysis},
doi = {http://dx.doi.org/10.1109/TPDS.2018.2873354},
url = {https://ieeexplore.ieee.org/document/8482295},
author = {Dmitry Zaitsev and Stanimire Tomov and Jack Dongarra}
}
@conference {1094,
title = {Co-Scheduling Algorithms for Cache-Partitioned Systems},
booktitle = {19th Workshop on Advances in Parallel and Distributed Computational Models},
year = {2017},
month = {2017-05},
publisher = {IEEE Computer Society Press},
organization = {IEEE Computer Society Press},
address = {Orlando, FL},
abstract = {Cache-partitioned architectures allow subsections of the shared last-level cache (LLC) to be exclusively reserved for some applications. This technique dramatically limits interactions between applications that are concurrently executing on a multicore machine. Consider n applications that execute concurrently, with the objective to minimize the makespan, defined as the maximum completion time of the n applications. Key scheduling questions are: (i) which proportion of cache and (ii) how many processors should be given to each application? Here, we assign rational numbers of processors to each application, since they can be shared across applications through multi-threading. In this paper, we provide answers to (i) and (ii) for perfectly parallel applications. Even though the problem is shown to be NP-complete, we give key elements to determine the subset of applications that should share the LLC (while remaining ones only use their smaller private cache). Building upon these results, we design efficient heuristics for general applications. Extensive simulations demonstrate the usefulness of co-scheduling when our efficient cache partitioning strategies are deployed.},
keywords = {Computational modeling, Degradation, Interference, Mathematical model, Program processors, Supercomputers, Throughput},
doi = {10.1109/IPDPSW.2017.60},
author = {Guillaume Aupy and Anne Benoit and Lo{\"\i}c Pottier and Padma Raghavan and Yves Robert and Manu Shantharam}
}