#include <stdio.h>#include <time.h>#include <string.h>#include <sys/types.h>#include <sys/stat.h>#include <sys/wait.h>#include <fcntl.h>#include <errno.h>#include <unistd.h>#include <sys/param.h>#include "utility.h"#include "problem.h"#include "comm_protocol.h"#include "comm_basics.h"#include "comm_data.h"#include "comm_encode.h"#include "gs_pm_model.h"
Go to the source code of this file.
Classes | |
| struct | gs_service_info_t |
Defines | |
| #define | REQUEST_ID_LEN 64 |
| #define | REQUEST_ID_TEMPLATE "gsrequest_%s_%d_XXXXXXXXXXXX" |
Functions | |
| int | gs_read_server_from_file (char *, gs_server_t *) |
| int | gs_service_read_coeff (gs_service_info_t *, gs_server_t *) |
| int | gs_problem_service (gs_problem_t *) |
| int | gs_service_blocking_request (gs_service_info_t *) |
| int | gs_service_nonblocking_request (gs_service_info_t *) |
| int | gs_service_batch_request (gs_service_info_t *) |
| void | gs_dummy_signal_handler (int) |
| void | gs_service_sigterm_handler (int) |
| double | gs_read_service_et (char *) |
| double | gs_pm_problem_service (gs_service_info_t *) |
| double | gs_agent_get_server_score (gs_problem_t *, gs_server_t *) |
| int | service_template (int argc, char *argv[]) |
| void | gs_batch_service_sigterm_handler (int sig) |
| int | gs_exec_batch_service (gs_service_info_t *s) |
| int | gs_wait_for_batch_job_completion (gs_service_info_t *s) |
| int | gs_get_category_names (gs_pm_model_t *model, gs_problem_t *prob, char ***arr) |
| int | gs_get_param_exprs (gs_pm_model_t *model, char *comp_model, char ***arr) |
| int | gs_gen_expr (int i, int numrows, char **cat_names, char **param_expr, double **cat_mat, double **coef_mat, gs_pm_model_t *model, FILE *cf) |
| int | gs_generate_pm_expr (gs_pm_model_t *model, char *comp_model, gs_problem_t *prob, FILE *cf) |
| int | gs_update_perf_model (gs_service_info_t *s, char *model_fname, char *coef_fname, double elapsed_time) |
Variables | |
| pid_t | gs_service_pid = 0 |
This file contains a generic service template for the end service.
Definition in file service_template.c.
| #define REQUEST_ID_LEN 64 |
Definition at line 35 of file service_template.c.
| #define REQUEST_ID_TEMPLATE "gsrequest_%s_%d_XXXXXXXXXXXX" |
Definition at line 36 of file service_template.c.
| double gs_agent_get_server_score | ( | gs_problem_t * | problem, | |
| gs_server_t * | server | |||
| ) |
Gets the server computation time estimate for the given problem.
Definition at line 178 of file agent_scheduler_eval.c.
{
double score;
score = gs_agent_perf_model_score(problem, server);
if(score < 0)
score = gs_agent_complexity_score(problem, server);
return score;
}


| void gs_batch_service_sigterm_handler | ( | int | sig | ) |
Signal handler for the SIGTERM delivered when the batch service is cancelled. Here we want to kill the real service process, which will depend on what kind of batch system we're using on the back-end.
| sig | -- the signal caught |
Definition at line 417 of file service_template.c.
{
if(gs_service_pid > 0) {
if(kill(gs_service_pid, sig) < 0)
ERRPRINTF("Failed to kill batch service process [pid = %d]\n", gs_service_pid);
}
return;
}

| void gs_dummy_signal_handler | ( | int | sig | ) |
Signal handler for the SIGCHLD delivered when the service terminates. Originally I tried just ignoring the signal, but then on certain systems the subsequent call to waitpid() failed.
| sig | -- the signal caught |
Definition at line 383 of file service_template.c.
{
return;
}

| int gs_exec_batch_service | ( | gs_service_info_t * | s | ) |
Exec a batch request.
Definition at line 808 of file service_template.c.
{
int status;
char *cmd, *orig_exe, *new_exe;
unlink("gs_batch_id");
#ifdef __CYGWIN__
cmd = dstring_sprintf("%s/service/%s/gs_submit %s/service/%s/%s_batch_service > gs_batch_id",
s->gridsolve_root, s->problem_name, s->gridsolve_root, s->problem_name, s->problem_name);
#else
new_exe = dstring_sprintf("%s/%s/%s_batch_service", s->cwd,
s->request_id, s->problem_name);
orig_exe = dstring_sprintf("%s/service/%s/%s_batch_service",
s->gridsolve_root, s->problem_name, s->problem_name);
if(symlink(orig_exe, new_exe) < 0) {
ERRPRINTF("failed to create symlink (%s -> %s)\n",
new_exe, orig_exe);
return -1;
}
cmd = dstring_sprintf("%s/service/%s/gs_submit %s > gs_batch_id",
s->gridsolve_root, s->problem_name, new_exe);
#endif
if(!cmd) {
ERRPRINTF("failed to create command string\n");
return -1;
}
DBGPRINTF("cmd: %s\n", cmd);
status = system(cmd);
if((status < 0) || (WEXITSTATUS(status) != 0)) {
ERRPRINTF("command failed: '%s'\n", cmd);
return -1;
}
return 0;
}


| int gs_gen_expr | ( | int | i, | |
| int | numrows, | |||
| char ** | cat_names, | |||
| char ** | param_expr, | |||
| double ** | cat_mat, | |||
| double ** | coef_mat, | |||
| gs_pm_model_t * | model, | |||
| FILE * | cf | |||
| ) |
Definition at line 1122 of file service_template.c.
{
int j;
if(i == numrows) {
fprintf(cf, "-1");
return 0;
}
fprintf(cf, "(");
for(j=0;j<model->nb_categories;j++) {
fprintf(cf, "(%s == %g)", cat_names[j], cat_mat[i][j]);
if(j<model->nb_categories-1)
fprintf(cf, " && ");
}
fprintf(cf, ")");
fprintf(cf, "?");
fprintf(cf, "(");
for(j=0;j<model->nb_params;j++) {
fprintf(cf, " (%g * (%s)) ", coef_mat[i][j], param_expr[j]);
if(j<model->nb_params-1)
fprintf(cf, " + ");
}
fprintf(cf, ")");
fprintf(cf, ":");
fprintf(cf, "(");
gs_gen_expr(i+1, numrows, cat_names, param_expr, cat_mat, coef_mat, model, cf);
fprintf(cf, ")");
return 0;
}


| int gs_generate_pm_expr | ( | gs_pm_model_t * | model, | |
| char * | comp_model, | |||
| gs_problem_t * | prob, | |||
| FILE * | cf | |||
| ) |
Definition at line 1160 of file service_template.c.
{
char **cat_names, **param_expr;
double **cat_mat, **coef_mat;
int numrows;
numrows = gs_pm_all_models(model, &cat_mat, &coef_mat);
if(gs_get_category_names(model, prob, &cat_names) < 0) {
ERRPRINTF("Error getting category names\n");
return -1;
}
if(gs_get_param_exprs(model, comp_model, ¶m_expr) < 0) {
ERRPRINTF("Error getting category names\n");
if(cat_names)
free(cat_names);
return -1;
}
if(numrows > 0) {
gs_gen_expr(0, numrows, cat_names, param_expr, cat_mat, coef_mat, model, cf);
fprintf(cf, "\n");
}
free(cat_names);
free(param_expr);
return 0;
}


| int gs_get_category_names | ( | gs_pm_model_t * | model, | |
| gs_problem_t * | prob, | |||
| char *** | arr | |||
| ) |
Definition at line 1063 of file service_template.c.
{
gs_argument_t *argptr;
char **cat_names;
int i;
cat_names = (char **)malloc(model->nb_categories * sizeof(char *));
if(!cat_names)
return -1;
i = 0;
for(argptr=prob->arglist; argptr != NULL; argptr=argptr->next) {
if(argptr->arg_enum) {
cat_names[i] = argptr->name;
i++;
}
}
*arr = cat_names;
return 0;
}

| int gs_get_param_exprs | ( | gs_pm_model_t * | model, | |
| char * | comp_model, | |||
| char *** | arr | |||
| ) |
Definition at line 1088 of file service_template.c.
{
char *cm_copy, *cp, **pexp;
int i;
cm_copy = strdup(comp_model);
pexp = (char **)malloc(model->nb_params * sizeof(char *));
if(!cm_copy || !pexp) {
if(cm_copy) free(cm_copy);
if(pexp) free(pexp);
return -1;
}
cp = cm_copy;
i = 0;
while(cp) {
pexp[i] = cp;
i++;
cp = strchr(cp, ';');
if(cp) {
*cp = 0;
cp++;
}
}
*arr = pexp;
return 0;
}

| double gs_pm_problem_service | ( | gs_service_info_t * | s | ) |
Runs the service in timed mode and updates the performance model if it exists.
| s | - pointer to service info struct |
Definition at line 1350 of file service_template.c.
{
double start_time, elapsed_time;
start_time = usertime();
gs_problem_service(s->problem);
elapsed_time = usertime() - start_time;
#ifdef GS_PM_DISABLE
if(strcmp(s->infodir, "-") != 0) {
/* don't bother adding entries where the elapsed time is zero */
if(elapsed_time > 0.0) {
char *model_fname, *coef_fname;
model_fname = dstring_sprintf("%s/%s.mdl", s->infodir, s->problem->name);
if(!model_fname)
return -1.0;
coef_fname = dstring_sprintf("%s/%s.coe", s->infodir, s->problem->name);
if(!coef_fname) {
free(model_fname);
return -1.0;
}
gs_update_perf_model(s, model_fname, coef_fname, elapsed_time);
free(model_fname);
free(coef_fname);
}
}
#endif
return elapsed_time;
}


| int gs_problem_service | ( | gs_problem_t * | ) |

| int gs_read_server_from_file | ( | char * | , | |
| gs_server_t * | ||||
| ) |
| double gs_read_service_et | ( | char * | file | ) |
Reads the elapsed time written on the second line of the specified file. Normally this will be the "done" timestamp file.
| file | -- the filename |
Definition at line 782 of file service_template.c.
{
double service_et;
FILE *dfile;
service_et = 0.0;
dfile = fopen(file, "r");
if(dfile) {
char buf[128];
fgets(buf, 128, dfile); /* skip timestamp */
if(fgets(buf, 128, dfile))
service_et = atof(buf);
fclose(dfile);
}
return service_et;
}


| int gs_service_batch_request | ( | gs_service_info_t * | s | ) |
Services batch requests (PBS, LSF, etc.).
| s | - pointer to service info struct |
Definition at line 915 of file service_template.c.
{
char *problemstr = NULL;
double service_et;
FILE *xmlfile;
pid_t pid;
gs_service_pid = 0;
if((gs_signal(SIGCHLD, gs_dummy_signal_handler) == SIG_ERR) ||
(gs_signal(SIGTERM, gs_batch_service_sigterm_handler) == SIG_ERR)) {
ERRPRINTF("Error: could not ignore SIGCHLD\n");
s->err = GS_SVC_ERR_SIGNALS;
return -1;
}
/* first save the problem struct to a file */
xmlfile = fopen(GS_BATCH_XML, "w");
if(!xmlfile) {
ERRPRINTF("Could not create xml file.\n");
s->err = GS_SVC_ERR_CREATE_XML;
return -1;
}
if(gs_encode_problem(&problemstr, s->problem) < 0) {
ERRPRINTF("Could not encode problem.\n");
s->err = GS_SVC_ERR_PROBLEM_ENC;
return -1;
}
fprintf(xmlfile, "%s\n", problemstr);
fclose(xmlfile);
/* then save the args */
if(gs_save_input_args_to_file("input", s->problem, s->my_dsig, GS_CALL_FROM_C,
s->problem->major) < 0) {
ERRPRINTF("Error saving input args.\n");
s->err = GS_SVC_ERR_CREAT_DATA_FILE;
return -1;
}
/* fork a child process to execute the batch service */
pid = fork();
if(pid == -1) {
ERRPRINTF("Failed to fork\n");
s->err = GS_SVC_ERR_FORK;
return -1;
}
if(pid == 0) {
if(gs_exec_batch_service(s) < 0)
_exit(s->err);
_exit(0);
}
else {
int cstat_loc, status;
pid_t child;
gs_service_pid = pid;
/* this is the parent. */
child = waitpid(pid, &cstat_loc, 0);
if(child < 0) {
ERRPRINTF("Error waiting for batch service process %d.\n", (int)pid);
s->err = GS_SVC_ERR_WAITPID;
return -1;
}
if(WIFEXITED(cstat_loc) == 0) {
ERRPRINTF("batch service process %d did not terminate.\n", (int)pid);
s->err = GS_SVC_ERR_ABNORMAL_EXIT;
return -1;
}
status = WEXITSTATUS(cstat_loc);
if(status != 0) {
ERRPRINTF("batch service process %d terminated abnormally (status %d).\n",
(int)pid, (char)status);
s->err = (char)status > 0 ? (char)status : GS_SVC_ERR_UNSPECIFIED;
return -1;
}
if(gs_wait_for_batch_job_completion(s) < 0) {
ERRPRINTF("Failed to wait for job completion.\n");
s->err = GS_SVC_ERR_WAITPID;
return -1;
}
if(s->blocking) {
char filename[5];
int fd;
sprintf(filename, "data");
if((fd = open(filename, O_RDONLY)) == -1) {
ERRPRINTF("failed to open output data\n");
s->err = GS_SVC_ERR_OPEN_DATA_FILE;
return -1;
}
if(gs_restore_output_args_from_file(fd, s->problem, s->my_dsig) < 0) {
ERRPRINTF("failed to restore output data from disk\n");
close(fd);
s->err = GS_SVC_ERR_RESTORE_ARGS;
return -1;
}
close(fd);
if(gs_send_tag(s->sock, GS_PROT_OK) < 0) {
ERRPRINTF("Error sending tag.\n");
s->err = GS_SVC_ERR_IO;
return -1;
}
if(gs_send_output_args(s->sock, s->problem, s->my_dsig) < 0) {
ERRPRINTF("Error sending output args.\n");
s->err = GS_SVC_ERR_IO;
return -1;
}
if(gs_create_timestamp_file(".", "retrieved", 0.0))
ERRPRINTF("Warning: failed to create 'retrieved' file.\n");
}
service_et = gs_read_service_et("done");
if(gs_decrement_job_count(s->srv_job_count) < 0)
ERRPRINTF("Warning: failed to decrement job count.\n");
if(gs_notify_agent_problem_complete(s->agent, s->agentport, s->problem_name,
s->srv_cid, s->cli_username, s->cli_hostname, s->cli_cid, s->request_id,
s->agent_taskid, service_et) < 0)
ERRPRINTF("Warning: failed sending problem solve notification.\n");
}
return 0;
}


| int gs_service_blocking_request | ( | gs_service_info_t * | s | ) |
services blocking requests.
| s | - pointer to service info struct |
Definition at line 516 of file service_template.c.
{
double service_et;
service_et = gs_pm_problem_service(s);
if(gs_decrement_job_count(s->srv_job_count) < 0)
ERRPRINTF("Warning: failed to decrement job count.\n");
/* since the service might have changed the working directory
* try to chdir back to the request subdirectory
*/
if((chdir(s->cwd) < 0) || (chdir(s->request_id) < 0)) {
char *origcwd, *newcwd;
ERRPRINTF("Could not cd back to request directory '%s/%s'.\n",
s->cwd, s->request_id);
gs_send_tag(s->sock, GS_SVC_ERR_CHDIR);
/* check whether working directory has changed. if not,
* goto service_abnormal_exit so that we can attempt to
* write the cancelled file. otherwise, just exit without
* writing since we'd be writing it in the wrong location.
*/
origcwd = dstring_sprintf("%s/%s", s->cwd, s->request_id);
newcwd = getcwd(NULL, MAXPATHLEN);
if(!strcmp(newcwd, origcwd))
return -1;
exit(-1);
}
if(gs_send_tag(s->sock, GS_PROT_OK) < 0) {
ERRPRINTF("Error sending tag.\n");
return -1;
}
#ifdef GS_SMART_GRIDSOLVE
int pid;
if(s->problem->has_smart_arg_comm==1){
if(gs_smart_send_output_args_to_client(s->sock ,s->problem, s->my_dsig)<0){
ERRPRINTF("SMART : Error sending smart sending arguments\n");
return -1;
}
pid=fork();
if(pid==-1){
ERRPRINTF("SMART: Out of memory could not fork\n");
return -1;
}
if(pid==0){
gs_server_t * src_server = (gs_server_t *)calloc(1,sizeof(gs_server_t));
if(gs_service_read_coeff(s, src_server) < 0) {
free(src_server);
src_server = NULL;
}
if(gs_smart_send_output_args_remotely(s->sock ,src_server, s->problem, s->my_dsig)<0){
ERRPRINTF("SMART : Error sending smart sending arguments\n");
return -1;
}
_exit(0);
}
}
else{
if(gs_send_output_args(s->sock, s->problem, s->my_dsig) < 0) {
ERRPRINTF("Error sending output args.\n");
return -1;
}
}
#else
if(gs_send_output_args(s->sock, s->problem, s->my_dsig) < 0) {
ERRPRINTF("Error sending output args.\n");
return -1;
}
#endif
if(gs_notify_agent_problem_complete(s->agent, s->agentport, s->problem_name,
s->srv_cid, s->cli_username, s->cli_hostname, s->cli_cid, s->request_id,
s->agent_taskid, service_et) < 0)
ERRPRINTF("Warning: failed sending problem solve notification.\n");
if(gs_create_timestamp_file(".", "retrieved", 0.0))
ERRPRINTF("Warning: failed to create 'retrieved' file.\n");
return 0;
}


| int gs_service_nonblocking_request | ( | gs_service_info_t * | s | ) |
services non-blocking requests.
| s | - pointer to service info struct |
Definition at line 618 of file service_template.c.
{
char *problemstr = NULL;
FILE *xmlfile;
double service_et;
pid_t pid;
int fd;
gs_service_pid = 0;
/* make sure SIGCHLD is caught so that it is not delivered to
* the mfork library. I tried ignoring it (SIG_IGN) but then
* waitpid() failed on some systems.
*/
if((gs_signal(SIGCHLD, gs_dummy_signal_handler) == SIG_ERR) ||
(gs_signal(SIGTERM, gs_service_sigterm_handler) == SIG_ERR)) {
ERRPRINTF("Error: could not set signal handlers\n");
s->err = GS_SVC_ERR_SIGNALS;
return -1;
}
/* fork a child process to execute the service */
pid = fork();
if(pid == -1) {
ERRPRINTF("Failed to fork\n");
s->err = GS_SVC_ERR_FORK;
return -1;
}
if(pid == 0) {
/* this is the child. execute the service and save the results. */
setbuf(stdout, NULL);
setbuf(stderr, NULL);
service_et = gs_pm_problem_service(s);
if((chdir(s->cwd) < 0) || (chdir(s->request_id) < 0)) {
ERRPRINTF("Could not cd back to request directory '%s/%s'.\n",
s->cwd, s->request_id);
_exit(GS_SVC_ERR_CHDIR);
}
xmlfile = fopen("problem.xml", "w");
if(!xmlfile) {
ERRPRINTF("Could not create xml file.\n");
_exit(GS_SVC_ERR_CREATE_XML);
}
if(gs_encode_problem(&problemstr, s->problem) < 0) {
ERRPRINTF("Could not encode problem.\n");
_exit(GS_SVC_ERR_PROBLEM_ENC);
}
fprintf(xmlfile, "%s\n", problemstr);
fclose(xmlfile);
fd = open("data", O_WRONLY | O_CREAT, 0600);
if(fd < 0) {
ERRPRINTF("Could not create data file.\n");
_exit(GS_SVC_ERR_CREAT_DATA_FILE);
}
#ifdef GS_SMART_GRIDSOLVE
if(s->problem->has_smart_arg_comm==1){
gs_server_t * src_server = (gs_server_t *)calloc(1,sizeof(gs_server_t));
if(gs_service_read_coeff(s, src_server) < 0) {
free(src_server);
src_server = NULL;
}
if(gs_smart_save_output_args_to_file(s->sock, src_server, fd, s->problem, s->my_dsig) < 0) {
ERRPRINTF("Error sending output args.\n");
_exit(GS_SVC_ERR_IO);
}
}
else{
if(gs_save_output_args_to_file(fd, s->problem, s->my_dsig) < 0) {
ERRPRINTF("Error sending output args.\n");
_exit(GS_SVC_ERR_IO);
}
}
#else
if(gs_save_output_args_to_file(fd, s->problem, s->my_dsig) < 0) {
ERRPRINTF("Error sending output args.\n");
_exit(GS_SVC_ERR_IO);
}
#endif
close(fd);
if(gs_create_timestamp_file(".", "done", service_et) < 0) {
ERRPRINTF("Could not create completion file.\n");
_exit(GS_SVC_ERR_COMPLETION_FILE);
}
_exit(0);
}
else {
pid_t child;
int cstat_loc, status;
gs_service_pid = pid;
/* this is the parent. wait for the child (service) to complete
* and check its status to determine if it was successful or not.
*/
child = waitpid(pid, &cstat_loc, 0);
if(child < 0) {
ERRPRINTF("Error waiting for service process %d.\n", (int)pid);
s->err = GS_SVC_ERR_WAITPID;
return -1;
}
if(WIFEXITED(cstat_loc) == 0) {
ERRPRINTF("service process %d did not terminate.\n", (int)pid);
s->err = GS_SVC_ERR_ABNORMAL_EXIT;
return -1;
}
status = WEXITSTATUS(cstat_loc);
if(status != 0) {
ERRPRINTF("service process %d terminated abnormally (status %d).\n",
(int)pid, (char)status);
s->err = (char)status > 0 ? (char)status : GS_SVC_ERR_UNSPECIFIED;
return -1;
}
service_et = gs_read_service_et("done");
if(gs_decrement_job_count(s->srv_job_count) < 0)
ERRPRINTF("Warning: failed to decrement job count.\n");
if(gs_notify_agent_problem_complete(s->agent, s->agentport, s->problem_name,
s->srv_cid, s->cli_username, s->cli_hostname, s->cli_cid, s->request_id,
s->agent_taskid, service_et) < 0)
ERRPRINTF("Warning: failed sending problem solve notification.\n");
}
return 0;
}


| int gs_service_read_coeff | ( | gs_service_info_t * | s, | |
| gs_server_t * | server | |||
| ) |
Initializes server struct and reads coefficient string (if present) for the service being invoked.
| s | -- service info struct | |
| server | -- server struct (filled out upon return) |
Definition at line 438 of file service_template.c.
{
char *server_xml, *service_coeff, *service_model;
int mfd;
server_xml = dstring_sprintf(GS_SERVER_XML_TEMPLATE, s->infodir);
if(!server_xml) {
s->err = GS_SVC_ERR_MALLOC;
return -1;
}
if(gs_read_server_from_file(server_xml, server) < 0) {
s->err = GS_SVC_ERR_MISSING_SV_XML;
return -1;
}
server->workload = gs_get_workload();
service_model = dstring_sprintf("%s/%s.mdl", s->infodir, s->problem_name);
if(!service_model) {
s->err = GS_SVC_ERR_MALLOC;
return -1;
}
service_coeff = dstring_sprintf("%s/%s.coe", s->infodir, s->problem_name);
if(!service_coeff) {
s->err = GS_SVC_ERR_MALLOC;
return -1;
}
mfd = open(service_model, O_RDONLY, 0600);
if(mfd < 0) {
server->perf_expr = strdup(GS_NO_MODEL_UPDATE);
return 0;
}
/* note we're obtaining a lock on the model file, not the
* coefficient file. the model file is the one that will
* be locked by the service processes when both files are
* updated.
*/
if(gs_lock_fd(mfd, F_RDLCK) < 0) {
close(mfd);
server->perf_expr = strdup(GS_NO_MODEL_UPDATE);
return 0;
}
if(gs_get_contents_of_file(service_coeff, &(server->perf_expr)) < 0) {
ERRPRINTF("Warning: failed to read coefficient file '%s'\n", service_coeff);
gs_unlock_fd(mfd);
close(mfd);
server->perf_expr = strdup(GS_NO_MODEL_UPDATE);
return 0;
}
if(server->perf_expr[strlen(server->perf_expr)-1] == '\n')
server->perf_expr[strlen(server->perf_expr)-1] = 0;
gs_unlock_fd(mfd);
close(mfd);
return 0;
}


| void gs_service_sigterm_handler | ( | int | sig | ) |
Signal handler for the SIGTERM delivered when the service is cancelled. Here we want to kill the real service process.
| sig | -- the signal caught |
Definition at line 397 of file service_template.c.
{
if(gs_service_pid > 0) {
if(kill(gs_service_pid, sig) < 0)
ERRPRINTF("Failed to kill service process [pid = %d]\n", gs_service_pid);
}
return;
}

| int gs_update_perf_model | ( | gs_service_info_t * | s, | |
| char * | model_fname, | |||
| char * | coef_fname, | |||
| double | elapsed_time | |||
| ) |
Updates the performance model with the execution time from the completed run.
| s | - pointer to service info struct | |
| model_fname | - filename of the performance model for this service | |
| elapsed_time | - the elapsed time for the just completed run |
Definition at line 1202 of file service_template.c.
{
int i, new_model, num_expr, fd;
char *comp_model, *cm_copy, *tok;
gs_arg_enum_t *arg_enum = NULL;
gs_argument_t *argptr;
gs_pm_model_t *model;
struct stat stbuf;
icl_hash_t *symtab;
FILE *coef_file;
double j;
model = NULL;
comp_model = gs_problem_getinfo(s->problem, "COMPLEXITY_MODEL", NULL);
if(!comp_model)
return 0;
new_model = stat(model_fname, &stbuf) < 0;
if((fd = gs_open_locked_file(model_fname, F_WRLCK, O_RDWR | O_CREAT)) < 0) {
ERRPRINTF("Warning: failed to open perf model file '%s'.\n", model_fname);
return -1;
}
if(new_model) {
int num_categories = 0;
/* model does not exist yet, so create one now */
num_expr = 1;
for(i=0;i<strlen(comp_model);i++)
if(comp_model[i] == ';')
num_expr++;
for(argptr=s->problem->arglist; argptr != NULL; argptr=argptr->next)
if(argptr->arg_enum)
num_categories++;
model = gs_pm_init_model(num_categories, num_expr, GS_PM_MAX_RUNS);
}
else {
/* model already exists, so load from disk */
model = gs_pm_load(fd);
}
if(!model) {
ERRPRINTF("Failed to intialize model\n");
gs_unlock_fd(fd);
close(fd);
return -1;
}
if(gs_construct_scalar_hashtable(&symtab, s->problem, GS_IN) < 0) {
ERRPRINTF("Failed to construct hash table for scalars\n");
gs_unlock_fd(fd);
close(fd);
return -1;
}
/* dup since strtok will clobber original */
cm_copy = strdup(comp_model);
if(!cm_copy) {
ERRPRINTF("strdup failed\n");
icl_hash_destroy(symtab, NULL, NULL);
gs_unlock_fd(fd);
close(fd);
return -1;
}
for(i=0, tok=NULL; (tok = strtok(tok ? NULL : cm_copy, ";")); i++) {
if(gs_expr_d(tok, &(model->params[i]), symtab) < 0)
ERRPRINTF("Warning: failed to evaluate model expression '%s'\n", tok);
}
i = 0;
for(argptr=s->problem->arglist; argptr != NULL; argptr=argptr->next) {
j = 0.0;
if(argptr->arg_enum) {
int found_enum_match = 0;
for(arg_enum=argptr->arg_enum; arg_enum != NULL; arg_enum=arg_enum->next) {
if((strcmp(arg_enum->val, "other") == 0) ||
((argptr->datatype == GS_CHAR) && !strncmp(argptr->data, arg_enum->val, 1)) ||
((argptr->datatype != GS_CHAR) && (argptr->expr_val == atof(arg_enum->val))))
{
found_enum_match = 1;
model->categories[i] = j;
break;
}
j += 1.0;
}
if(!found_enum_match) {
ERRPRINTF("No match in model for arg %s\n", argptr->name);
icl_hash_destroy(symtab, NULL, NULL);
gs_unlock_fd(fd);
close(fd);
return -1;
}
i++;
}
}
gs_pm_store_timing(elapsed_time, model);
lseek(fd, 0, SEEK_SET);
/* write model to disk.. */
if(gs_pm_save(model, fd) < 0) {
ERRPRINTF("Failed to save model to disk\n");
icl_hash_destroy(symtab, NULL, NULL);
gs_unlock_fd(fd);
close(fd);
return -1;
}
coef_file = fopen(coef_fname, "w");
if(coef_file) {
gs_generate_pm_expr(model, comp_model, s->problem, coef_file);
fclose(coef_file);
}
gs_pm_free_model(model);
icl_hash_destroy(symtab, NULL, NULL);
gs_unlock_fd(fd);
close(fd);
return 0;
}


| int gs_wait_for_batch_job_completion | ( | gs_service_info_t * | s | ) |
Waits for a previous batch job to complete.
Definition at line 854 of file service_template.c.
{
char buf[256], *cmd;
int status;
FILE *f;
/* just in case the submit script changed the current directory,
* change it back to the request dir.
*/
if((chdir(s->cwd) < 0) || (chdir(s->request_id) < 0)) {
ERRPRINTF("can't cd back to %s/%s\n", s->cwd, s->request_id);
return -1;
}
if((f = fopen("gs_batch_id", "r")) == NULL) {
ERRPRINTF("failed to open file gs_batch_id\n");
return -1;
}
if(!fgets(buf, 256, f)) {
ERRPRINTF("failed to read ID from file gs_batch_id\n");
return -1;
}
fclose(f);
buf[strlen(buf)-1] = '\0';
cmd = dstring_sprintf("%s/service/%s/gs_probe %s", s->gridsolve_root, s->problem_name, buf);
if(!cmd) {
ERRPRINTF("malloc");
return -1;
}
for(;;) {
status = system(cmd);
if(status < 0) {
ERRPRINTF("command failed: '%s'\n", cmd);
return -1;
}
if(WEXITSTATUS(status) != 0)
break;
sleep(5);
}
return 0;
}


| int service_template | ( | int | argc, | |
| char * | argv[] | |||
| ) |
This is a generic "service template" which forms the basis for the executable service that is forked/execed by the server.
When processing/compiling an IDL file, the compiler will link to this service_template. A main routine will then call this service_template, passing all of its arguments.
For a specific problem P, the server process will call the appropriate service program P_service with the appropriate command line arguements (as described below). The P_service program will call service_template passing all its arguments. This service_template routine will talk to the client to get the input arguments, call the actual service wrapper, and send the output arguments back to the client.
This routine expects the following arguments to be sent from the server.
argv[0] -- name of service executable. argv[1] -- name of the problem to be solved. argv[2] -- tag: either GS_PROT_PROBLEM_SOLVE_ASSIGNED or GS_PROT_PROBLEM_SOLVE, depending on whether this is an assigned server request or not. this is currently just the same tag as the client sends to the server. argv[3] -- client data signature (as sent by client to server). argv[4] -- sock: the socket descriptor already opened between the client and server. argv[5] -- gridsolve_root: path to root of GridSolve argv[6] -- gridsolve_arch: architecture string argv[7] -- blocking: either "0" (non-blocking) or "1" (blocking). argv[8] -- hostname of agent argv[9] -- agent port argv[10] -- server component ID (in printable string form) argv[11] -- client user name argv[12] -- client host name argv[13] -- client component ID (in printable string form) argv[14] -- server info dir (where timing info will be stored) argv[15] -- task id assigned by the agent (-1 for assigned server requests) argv[16] -- agent's estimated execution time for this job
Definition at line 137 of file service_template.c.
{
gs_service_info_t sinfo;
gs_server_t *server;
char *service_xml;
double est_time;
char *cwd;
sinfo.err = GS_SVC_ERR_UNSPECIFIED;
if(argc != 17) {
fprintf(stderr, "Bad usage. Anyway, don't use this\n");
fprintf(stderr, "from the command line.\n");
exit(-1);
}
sinfo.problem_name = strdup(argv[1]);
sinfo.tag = atoi(argv[2]);
sinfo.client_dsig = atoi(argv[3]);
sinfo.sock = atoi(argv[4]);
sinfo.gridsolve_root = argv[5];
sinfo.gridsolve_arch = argv[6];
sinfo.blocking = atoi(argv[7]);
sinfo.agent = strdup(argv[8]);
sinfo.agentport = atoi(argv[9]);
sinfo.srv_cid = strdup(argv[10]);
sinfo.cli_username = strdup(argv[11]);
sinfo.cli_hostname = strdup(argv[12]);
sinfo.cli_cid = strdup(argv[13]);
sinfo.infodir = strdup(argv[14]);
sinfo.agent_taskid = atoi(argv[15]);
sinfo.agent_est_time = atof(argv[16]);
server = (gs_server_t *) malloc(sizeof(gs_server_t));
if(!server) {
gs_send_tag(sinfo.sock, GS_SVC_ERR_MALLOC);
exit(-1);
}
if(gs_service_read_coeff(&sinfo, server) < 0) {
free(server);
server = NULL;
}
service_xml = dstring_sprintf("%s/service/%s/%s.xml", sinfo.gridsolve_root,
sinfo.problem_name, sinfo.problem_name);
if(!service_xml) {
gs_send_tag(sinfo.sock, GS_SVC_ERR_MALLOC);
exit(-1);
}
sinfo.problem = (gs_problem_t *) malloc(sizeof(gs_problem_t));
if(!sinfo.problem) {
gs_send_tag(sinfo.sock, GS_SVC_ERR_MALLOC);
exit(-1);
}
snprintf(sinfo.srv_job_count, FN_LEN, "%s/%s.%d", sinfo.infodir,
GS_SERVER_JOB_COUNT_FILE_PREFIX, getppid());
/* Look for the service corresponding to the requested problem */
if(gs_read_problem_from_file(service_xml, sinfo.problem) < 0) {
ERRPRINTF("Error loading service: '%s'.\n", service_xml);
gs_send_tag(sinfo.sock, GS_SVC_ERR_MISSING_XML);
exit(-1);
}
else {
sinfo.my_dsig = pvmgetdsig();
cwd = CALLOC(MAXPATHLEN, sizeof(char));
if (cwd == NULL) exit(-1);
if (getcwd(cwd, MAXPATHLEN) == NULL) exit(-1);
sinfo.cwd = strdup(cwd);
FREE(cwd);
if (!sinfo.cwd) {
ERRPRINTF("Can't get current working directory.\n");
gs_send_tag(sinfo.sock, GS_SVC_ERR_GETCWD);
exit(-1);
}
sprintf(sinfo.request_id, REQUEST_ID_TEMPLATE, sinfo.srv_cid,
(int) getpid());
if(gs_create_request_id(sinfo.request_id) < 0) {
ERRPRINTF("Error creating request id.\n");
gs_send_tag(sinfo.sock, GS_SVC_ERR_REQID);
exit(-1);
}
if(mkdir(sinfo.request_id, 0700) < 0) {
ERRPRINTF("Could not create directory '%s' ", sinfo.request_id);
ERRPRINTF("to store output (cwd = '%s')\n", sinfo.cwd);
gs_send_tag(sinfo.sock, GS_SVC_ERR_MKDIR);
exit(-1);
}
if(chdir(sinfo.request_id) < 0) {
ERRPRINTF("Could not cd to request directory '%s'.\n", sinfo.request_id);
gs_send_tag(sinfo.sock, GS_SVC_ERR_CHDIR);
exit(-1);
}
if(gs_increment_job_count(sinfo.srv_job_count) < 0)
ERRPRINTF("Warning: failed to increment job count.\n");
if(gs_send_tag(sinfo.sock, GS_PROT_OK) < 0) {
ERRPRINTF("Error sending GS_PROT_OK.\n");
goto service_abnormal_exit;
}
if(gs_send_string(sinfo.sock, sinfo.request_id) < 0) {
ERRPRINTF("Error sending request id.\n");
goto service_abnormal_exit;
}
/* now, if this is an assigned server request, send the problem
description back to the client. */
if(sinfo.tag == GS_PROT_PROBLEM_SOLVE_ASSIGNED) {
char *problemstring = NULL;
char dsig_string[256];
sprintf(dsig_string, "%d", sinfo.my_dsig);
if(gs_send_string(sinfo.sock, dsig_string) < 0) {
ERRPRINTF("Error sending server data signature.\n");
goto service_abnormal_exit;
}
if(gs_encode_problem(&problemstring, sinfo.problem) < 0) {
ERRPRINTF("Error encoding problem description.\n");
goto service_abnormal_exit;
}
if(gs_send_string(sinfo.sock, problemstring) < 0) {
ERRPRINTF("Error sending problem description.\n");
goto service_abnormal_exit;
}
}
#ifdef GS_SMART_GRIDSOLVE
if(gs_recv_int(sinfo.sock, &sinfo.problem->has_smart_arg_comm) < 0) {
ERRPRINTF("Error sending problem description.\n");
goto service_abnormal_exit;
}
if(sinfo.problem->has_smart_arg_comm==1){
if(gs_smart_recv_map_info(sinfo.sock, sinfo.problem)<0){
ERRPRINTF("Error receiving remote comm info.\n");
goto service_abnormal_exit;
}
}
if(sinfo.problem->has_smart_arg_comm==1){
if(gs_smart_recv_input_args(sinfo.sock, server, sinfo.problem, sinfo.client_dsig, sinfo.my_dsig)<0){
ERRPRINTF("SMART: Error receiving smart input args.\n");
goto service_abnormal_exit;
}
}
else{
if(gs_recv_input_args(sinfo.sock, sinfo.problem, sinfo.client_dsig, sinfo.my_dsig) < 0) {
ERRPRINTF("Error receiving input args.\n");
goto service_abnormal_exit;
}
}
#else
if(gs_recv_input_args(sinfo.sock, sinfo.problem, sinfo.client_dsig, sinfo.my_dsig) < 0) {
ERRPRINTF("Error receiving input args.\n");
goto service_abnormal_exit;
}
#endif
if(server)
est_time = gs_agent_get_server_score(sinfo.problem, server);
else
est_time = 2000.0;
if(gs_notify_agent_problem_solve(sinfo.agent, sinfo.agentport,
sinfo.problem_name, est_time, sinfo.srv_cid, sinfo.cli_username,
sinfo.cli_hostname, sinfo.cli_cid, sinfo.request_id,
sinfo.agent_taskid, sinfo.agent_est_time) < 0)
ERRPRINTF("Warning: failed sending problem solve notification.\n");
sinfo.bmode = gs_problem_getinfo(sinfo.problem, "BATCH_SUBMIT", NULL);
if(sinfo.bmode) {
if(gs_service_batch_request(&sinfo) < 0) {
gs_send_tag(sinfo.sock, sinfo.err);
goto service_abnormal_exit;
}
}
else if(sinfo.blocking) {
if(gs_service_blocking_request(&sinfo) < 0)
goto service_abnormal_exit;
}
else {
if(gs_service_nonblocking_request(&sinfo) < 0)
goto service_abnormal_exit;
}
}
gs_close_socket(sinfo.sock);
exit(0);
service_abnormal_exit:
/* for non-blocking requests, create a file whose name contains
* the error code so we'll remember why the service failed when
* the client connects back to wait for it to complete.
*/
if(!sinfo.blocking || sinfo.bmode) {
if(gs_decrement_job_count(sinfo.srv_job_count) < 0)
ERRPRINTF("Warning: failed to decrement job count.\n");
if(gs_create_error_file(".", sinfo.err) < 0)
ERRPRINTF("Could not create 'error' file.\n");
}
/* if something goes wrong, write a "cancelled" file to the
* request subdirectory so that it'll get cleaned up.
*/
if(gs_create_timestamp_file(".", "cancelled", 0.0) < 0)
ERRPRINTF("Could not create 'cancelled' file.\n");
ERRPRINTF("Service terminating abnormally\n");
exit(-1);
}

| pid_t gs_service_pid = 0 |
Definition at line 90 of file service_template.c.
1.6.3-20100507