@article {icl:538, title = {Enabling Workflows in GridSolve: Request Sequencing and Service Trading}, journal = {Journal of Supercomputing}, volume = {64}, year = {2013}, month = {2013-06}, pages = {1133-1152}, chapter = {1133}, abstract = {GridSolve employs a RPC-based client-agent-server model for solving computational problems. There are two deficiencies associated with GridSolve when a computational problem essentially forms a workflow consisting of a sequence of tasks with data dependencies between them. First, intermediate results are always passed through the client, resulting in unnecessary data transport. Second, since the execution of each individual task is a separate RPC session, it is difficult to enable any potential parallelism among tasks. This paper presents a request sequencing technique that addresses these deficiencies and enables workflow executions. Building on the request sequencing work, one way to generate workflows is by taking higher level service requests and decomposing them into a sequence of simpler service requests using a technique called service trading. A service trading component is added to GridSolve to take advantage of the new dynamic request sequencing. The features described here include automatic DAG construction and data dependency analysis, direct interserver data transfer, parallel task execution capabilities, and a service trading component.}, keywords = {grid computing, gridpac, netsolve, service trading, workflow applications}, issn = {1573-0484}, doi = {10.1007/s11227-010-0549-1}, author = {Yinan Li and Asim YarKhan and Jack Dongarra and Keith Seymour and Aurlie Hurault} } @article {icl:566, title = {SmartGridRPC: The new RPC model for high performance Grid Computing and Its Implementation in SmartGridSolve}, journal = {Concurrency and Computation: Practice and Experience (to appear)}, year = {2010}, month = {2010-01}, keywords = {netsolve}, author = {Thomas Brady and Alexey Lastovetsky and Keith Seymour and Michele Guidolin and Jack Dongarra} } @article {icl:500, title = {Paravirtualization Effect on Single- and Multi-threaded Memory-Intensive Linear Algebra Software}, journal = {Cluster Computing Journal: Special Issue on High Performance Distributed Computing}, volume = {12}, number = {2}, year = {2009}, month = {2009-00}, pages = {101-122}, publisher = {Springer Netherlands}, author = {Lamia Youseff and Keith Seymour and Haihang You and Dmitrii Zagorodnov and Jack Dongarra and Rich Wolski} } @article {icl:515, title = {Transparent Cross-Platform Access to Software Services using GridSolve and GridRPC}, journal = {in Cloud Computing and Software Services: Theory and Techniques (to appear)}, year = {2009}, month = {2009-00}, publisher = {CRC Press}, keywords = {netsolve}, author = {Keith Seymour and Asim YarKhan and Jack Dongarra}, editor = {Syed Ahson and Mohammad Ilyas} } @inproceedings {icl:418, title = {A Comparison of Search Heuristics for Empirical Code Optimization}, journal = {The 3rd international Workshop on Automatic Performance Tuning}, year = {2008}, month = {2008-10}, address = {Tsukuba, Japan}, keywords = {gco}, author = {Keith Seymour and Haihang You and Jack Dongarra} } @article {icl:409, title = {High Performance GridRPC Middleware}, journal = {Recent developments in Grid Technology and Applications}, year = {2008}, month = {2008-00}, publisher = {Nova Science Publishers}, keywords = {netsolve}, author = {Yves Caniou and Eddy Caron and Frederic Desprez and Hidemoto Nakada and Yoshio Tanaka and Keith Seymour}, editor = {George A. Gravvanis and John P. Morrison and Hamid R. Arabnia and D. A. Power} } @inproceedings {icl:410, title = {The Impact of Paravirtualized Memory Hierarchy on Linear Algebra Computational Kernels and Software}, journal = {ACM/IEEE International Symposium on High Performance Distributed Computing}, year = {2008}, month = {2008-06}, address = {Boston, MA.}, keywords = {gco, netsolve}, author = {Lamia Youseff and Keith Seymour and Haihang You and Jack Dongarra and Rich Wolski} } @article {icl:408, title = {Interactive Grid-Access Using Gridsolve and Giggle}, journal = {Computing and Informatics}, volume = {27}, number = {2}, year = {2008}, month = {2008-00}, pages = {233-248,ISSN1335-9150}, keywords = {netsolve}, author = {Marcus Hardt and Keith Seymour and Jack Dongarra and Michael Zapf and Nicole Ruiter} } @article {icl:462, title = {PERI Auto-tuning}, journal = {Proc. SciDAC 2008}, volume = {125}, year = {2008}, month = {2008-01}, publisher = {Journal of Physics}, address = {Seatlle, Washington}, keywords = {gco}, author = {David Bailey and Jacqueline Chame and Chun Chen and Jack Dongarra and Mary Hall and Jeffrey K. Hollingsworth and Paul D. Hovland and Shirley Moore and Keith Seymour and Jaewook Shin and Ananta Tiwari and Sam Williams and Haihang You} } @inproceedings {icl:419, title = {Request Sequencing: Enabling Workflow for Efficient Problem Solving in GridSolve}, journal = {International Conference on Grid and Cooperative Computing (GCC 2008) (submitted)}, year = {2008}, month = {2008-10}, address = {Shenzhen, China}, author = {Yinan Li and Jack Dongarra and Keith Seymour and Asim YarKhan} } @techreport {icl:336, title = {Automated Empirical Tuning of a Multiresolution Analysis Kernel}, journal = {ICL Technical Report}, number = {ICL-UT-07-01}, year = {2007}, month = {2007-01}, pages = {10}, keywords = {gco}, author = {Haihang You and Keith Seymour and Jack Dongarra and Shirley Moore} } @techreport {icl:338, title = {Empirical Tuning of a Multiresolution Analysis Kernel using a Specialized Code Generator}, journal = {ICL Technical Report}, number = {ICL-UT-07-02}, year = {2007}, month = {2007-01}, keywords = {gco}, author = {Haihang You and Keith Seymour and Jack Dongarra and Shirley Moore} } @inproceedings {icl:339, title = {GridSolve: The Evolution of Network Enabled Solver}, journal = {Grid-Based Problem Solving Environments: IFIP TC2/WG 2.5 Working Conference on Grid-Based Problem Solving Environments (Prescott, AZ, July 2006)}, year = {2007}, month = {2007-00}, pages = {215-226}, publisher = {Springer}, keywords = {netsolve}, author = {Asim YarKhan and Jack Dongarra and Keith Seymour}, editor = {Patrick Gaffney} } @article {icl:403, title = {Improved Runtime and Transfer Time Prediction Mechanisms in a Network Enabled Servers Middleware}, journal = {Parallel Processing Letters}, volume = {17}, number = {1}, year = {2007}, month = {2007-03}, pages = {47-59}, author = {Emmanuel Jeannot and Keith Seymour and Asim YarKhan and Jack Dongarra} } @techreport {icl:313, title = {ATLAS on the BlueGene/L {\textendash} Preliminary Results}, journal = {ICL Technical Report}, number = {ICL-UT-06-10}, year = {2006}, month = {2006-01}, keywords = {gco}, author = {Keith Seymour and Haihang You and Jack Dongarra} } @article {icl:340, title = {Improved Runtime and Transfer Time Prediction Mechanisms in a Network Enabled Server}, journal = {Parallel Processing Letters}, volume = {17}, number = {1}, year = {2006}, month = {2006-03}, pages = {47-59}, keywords = {netsolve}, author = {Emmanuel Jeannot and Keith Seymour and Asim YarKhan and Jack Dongarra} } @article {icl:294, title = {Recent Developments in GridSolve}, journal = {International Journal of High Performance Computing Applications (Special Issue: Scheduling for Large-Scale Heterogeneous Platforms)}, volume = {20}, number = {1}, year = {2006}, month = {2006-00}, publisher = {Sage Science Press}, keywords = {netsolve}, author = {Asim YarKhan and Keith Seymour and Kiran Sagi and Zhiao Shi and Jack Dongarra}, editor = {Yves Robert} } @article {icl:332, title = {Self Adapting Numerical Software SANS Effort}, journal = {IBM Journal of Research and Development}, volume = {50}, number = {2/3}, year = {2006}, month = {2006-01}, pages = {223-238}, keywords = {gco}, author = {George Bosilca and Zizhong Chen and Jack Dongarra and Victor Eijkhout and Graham Fagg and Erika Fuentes and Julien Langou and Piotr Luszczek and Jelena Pjesivac{\textendash}Grbovic and Keith Seymour and Haihang You and Sathish Vadhiyar} } @techreport {icl:255, title = {An Effective Empirical Search Method for Automatic Software Tuning}, journal = {ICL Technical Report}, number = {ICL-UT-05-02}, year = {2005}, month = {2005-01}, keywords = {gco}, author = {Haihang You and Keith Seymour and Jack Dongarra} } @article {icl:276, title = {NetSolve: Grid Enabling Scientific Computing Environments}, journal = {Grid Computing and New Frontiers of High Performance Processing}, number = {14}, year = {2005}, month = {2005-00}, publisher = {Elsevier}, keywords = {netsolve}, author = {Keith Seymour and Asim YarKhan and Sudesh Agrawal and Jack Dongarra}, editor = {Lucio Grandinetti} } @conference {icl:197, title = {Accurate Cache and TLB Characterization Using Hardware Counters}, booktitle = {International Conference on Computational Science (ICCS 2004)}, year = {2004}, month = {2004-06}, publisher = {Springer}, organization = {Springer}, address = {Krakow, Poland}, abstract = {We have developed a set of microbenchmarks for accurately determining the structural characteristics of data cache memories and TLBs. These characteristics include cache size, cache line size, cache associativity, memory page size, number of data TLB entries, and data TLB associativity. Unlike previous microbenchmarks that used time-based measurements, our microbenchmarks use hardware event counts to more accurately and quickly determine these characteristics while requiring fewer limiting assumptions.}, keywords = {gco, lacsi, papi}, doi = {https://doi.org/10.1007/978-3-540-24688-6_57}, author = {Jack Dongarra and Shirley Moore and Phil Mucci and Keith Seymour and Haihang You} } @conference {icl:235, title = {Automatic Blocking of QR and LU Factorizations for Locality}, booktitle = {2nd ACM SIGPLAN Workshop on Memory System Performance (MSP 2004)}, year = {2004}, month = {2004-06}, publisher = {ACM}, organization = {ACM}, address = {Washington, DC}, abstract = {QR and LU factorizations for dense matrices are important linear algebra computations that are widely used in scientific applications. To efficiently perform these computations on modern computers, the factorization algorithms need to be blocked when operating on large matrices to effectively exploit the deep cache hierarchy prevalent in today{\textquoteright}s computer memory systems. Because both QR (based on Householder transformations) and LU factorization algorithms contain complex loop structures, few compilers can fully automate the blocking of these algorithms. Though linear algebra libraries such as LAPACK provides manually blocked implementations of these algorithms, by automatically generating blocked versions of the computations, more benefit can be gained such as automatic adaptation of different blocking strategies. This paper demonstrates how to apply an aggressive loop transformation technique, dependence hoisting, to produce efficient blockings for both QR and LU with partial pivoting. We present different blocking strategies that can be generated by our optimizer and compare the performance of auto-blocked versions with manually tuned versions in LAPACK, both using reference BLAS, ATLAS BLAS and native BLAS specially tuned for the underlying machine architectures.}, keywords = {gco, papi, sans}, doi = {10.1145/1065895.1065898}, author = {Qing Yi and Ken Kennedy and Haihang You and Keith Seymour and Jack Dongarra} } @article {icl:156, title = {Automatic Translation of Fortran to JVM Bytecode}, journal = {Concurrency and Computation: Practice and Experience}, volume = {15}, number = {3-5}, year = {2003}, month = {2003-00}, pages = {202-207}, keywords = {f2j}, author = {Keith Seymour and Jack Dongarra} } @article {icl:143, title = {NetSolve: Past, Present, and Future - A Look at a Grid Enabled Server}, journal = {Making the Global Infrastructure a Reality}, year = {2003}, month = {2003-00}, publisher = {Wiley Publishing}, keywords = {netsolve}, author = {Sudesh Agrawal and Jack Dongarra and Keith Seymour and Sathish Vadhiyar}, editor = {Francine Berman and Geoffrey Fox and Anthony Hey} } @techreport {icl:97, title = {GridRPC: A Remote Procedure Call API for Grid Computing}, journal = {ICL Technical Report}, number = {ICL-UT-02-06}, year = {2002}, month = {2002-11}, author = {Keith Seymour and Hidemoto Nakada and Satoshi Matsuoka and Jack Dongarra and Craig Lee and Henri Casanova} } @article {icl:116, title = {JLAPACK - Compiling LAPACK Fortran to Java}, journal = {Scientific Programming}, volume = {7}, number = {2}, year = {2002}, month = {2002-10}, pages = {111-138}, keywords = {f2j}, author = {David Doolin and Jack Dongarra and Keith Seymour} } @inproceedings {icl:187, title = {Overview of GridRPC: A Remote Procedure Call API for Grid Computing}, journal = {Proceedings of the Third International Workshop on Grid Computing}, year = {2002}, month = {2002-01}, pages = {274-278}, author = {Keith Seymour and Hidemoto Nakada and Satoshi Matsuoka and Jack Dongarra and Craig Lee and Henri Casanova}, editor = {Manish Parashar} } @techreport {icl:96, title = {Users{\textquoteright} Guide to NetSolve v1.4.1}, journal = {ICL Technical Report}, number = {ICL-UT-02-05}, year = {2002}, month = {2002-06}, keywords = {netsolve}, author = {Sudesh Agrawal and Dorian Arnold and Susan Blackford and Jack Dongarra and Michelle Miller and Kiran Sagi and Zhiao Shi and Keith Seymour and Sathish Vadhiyar} } @inproceedings {icl:22, title = {Automatic Translation of Fortran to JVM Bytecode}, journal = {Joint ACM Java Grande - ISCOPE 2001 Conference (submitted)}, year = {2001}, month = {2001-06}, address = {Stanford University, California}, keywords = {f2j}, author = {Keith Seymour and Jack Dongarra} } @conference {icl:15, title = {End-user Tools for Application Performance Analysis, Using Hardware Counters}, booktitle = {International Conference on Parallel and Distributed Computing Systems}, year = {2001}, month = {2001-08}, address = {Dallas, TX}, abstract = {One purpose of the end-user tools described in this paper is to give users a graphical representation of performance information that has been gathered by instrumenting an application with the PAPI library. PAPI is a project that specifies a standard API for accessing hardware performance counters available on most modern microprocessors. These counters exist as a small set of registers that count \"events\", which are occurrences of specific signals and states related to a processor{\textquoteright}s function. Monitoring these events facilitates correlation between the structure of source/object code and the efficiency of the mapping of that code to the underlying architecture. The perfometer tool developed by the PAPI project provides a graphical view of this information, allowing users to quickly see where performance bottlenecks are in their application. Only one function call has to be added by the user to their program to take advantage of perfometer. This makes it quick and simple to add and remove instrumentation from a program. Also, perfometer allows users to change the \"event\" they are monitoring. Add the ability to monitor parallel applications, set alarms and a Java front-end that can run anywhere, and this gives the user a powerful tool for quickly discovering where and why a bottleneck exists. A number of third-party tools for analyzing performance of message-passing and/or threaded programs have also incorporated support for PAPI so as to be able to display and analyze hardware counter data from their interfaces.}, keywords = {papi}, author = {Kevin London and Jack Dongarra and Shirley Moore and Phil Mucci and Keith Seymour and T. Spencer} } @conference {icl:16, title = {The PAPI Cross-Platform Interface to Hardware Performance Counters}, booktitle = {Department of Defense Users{\textquoteright} Group Conference Proceedings}, year = {2001}, month = {2001-06}, address = {Biloxi, Mississippi}, abstract = {The purpose of the PAPI project is to specify a standard API for accessing hardware performance counters available on most modern microprocessors. These counters exist as a small set of registers that count \"events,\" which are occurrences of specific signals and states related to the processor{\textquoteright}s function. Monitoring these events facilitates correlation between the structure of source/object code and the efficiency of the mapping of that code to the underlying architecture. This correlation has a variety of uses in performance analysis and tuning. The PAPI project has developed a standard set of hardware events and a standard cross-platform library interface to the underlying counter hardware. The PAPI library has been implemented for a number of Shared Resource Center platforms. The PAPI project is developing end-user tools for dynamically selecting and displaying hardware counter performance data. PAPI support is also being incorporated into a number of third-party tools.}, keywords = {papi}, author = {Kevin London and Shirley Moore and Phil Mucci and Keith Seymour and Richard Luczak} }