diff --git a/src/srun/debugger.c b/src/srun/debugger.c index 5fd351bb9b7ceb4400130077cfc8a8b175ac1fc0..173334496919de46a2e459ced8ad6ba5a56113af 100644 --- a/src/srun/debugger.c +++ b/src/srun/debugger.c @@ -45,6 +45,7 @@ #include "src/common/log.h" #include "src/srun/debugger.h" +#include "src/srun/srun_job.h" /* * Instantiate extern variables from debugger.h @@ -52,20 +53,20 @@ MPIR_PROCDESC *MPIR_proctable; int MPIR_proctable_size; VOLATILE int MPIR_debug_state; -VOLATILE int MPIR_debug_gate; int MPIR_being_debugged; int MPIR_i_am_starter; int MPIR_acquired_pre_main; int MPIR_partial_attach_ok; char *totalview_jobid; -void MPIR_Breakpoint(void) +void MPIR_Breakpoint(srun_job_t *job) { /* * This just notifies parallel debugger that some event of * interest occurred. */ debug("In MPIR_Breakpoint"); + slurm_step_launch_fwd_signal(job->step_ctx, SIG_DEBUG_WAKE); } diff --git a/src/srun/debugger.h b/src/srun/debugger.h index 24942dec4871975e1eb2af5a6cd29cf902ecb830..e2c6f7ae985b138a5f6b9ccdf10f2474f94c3f8d 100644 --- a/src/srun/debugger.h +++ b/src/srun/debugger.h @@ -73,7 +73,6 @@ extern int MPIR_proctable_size; * now free to run. */ extern VOLATILE int MPIR_debug_state; -extern VOLATILE int MPIR_debug_gate; extern int MPIR_partial_attach_ok; extern int MPIR_being_debugged; /* Cause extra info on internal state * to be maintained @@ -89,7 +88,7 @@ extern int MPIR_being_debugged; /* Cause extra info on internal state extern int MPIR_i_am_starter; extern int MPIR_acquired_pre_main; -extern void MPIR_Breakpoint(void); +extern void MPIR_Breakpoint(srun_job_t *job); /* Value for totalview %J expansion in bulk launch string */ diff --git a/src/srun/srun.c b/src/srun/srun.c index 2b5aff6bc933e8ffd448d9ac5f54603e36570424..732972304a90dd1d289936877bb2fd09fa219271 100644 --- a/src/srun/srun.c +++ b/src/srun/srun.c @@ -146,7 +146,6 @@ static int _run_srun_script (srun_job_t *job, char *script); static void _set_cpu_env_var(resource_allocation_response_msg_t *resp); static void _set_exit_code(void); static int _setup_signals(void); -static void _spawn_mpir_thread(void); static void _step_opt_exclusive(void); static void _set_stdio_fds(srun_job_t *job, slurm_step_io_fds_t *cio_fds); static void _set_submit_dir_env(void); @@ -157,7 +156,6 @@ static int _slurm_debug_env_val (void); static void _task_start(launch_tasks_response_msg_t *msg); static void _task_finish(task_exit_msg_t *msg); static char *_uint16_array_to_str(int count, const uint16_t *array); -static void *_wait_mpir_gate(void *arg); /* * from libvirt-0.6.2 GPL2 @@ -475,9 +473,8 @@ int srun(int ac, char **av) launch_params.argv[0]); else mpir_set_executable_names(launch_params.argv[0]); - _spawn_mpir_thread(); MPIR_debug_state = MPIR_DEBUG_SPAWNED; - MPIR_Breakpoint(); + MPIR_Breakpoint(job); if (opt.debugger_test) mpir_dump_proctable(); } else { @@ -523,45 +520,6 @@ cleanup: return (int)global_rc; } -static void _spawn_mpir_thread(void) -{ - int i; - pthread_t thread_id; - pthread_attr_t attr; - bool launched = false; - - if (!MPIR_being_debugged) - return; - - slurm_attr_init(&attr); - pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED); - for (i=0; i<3; i++) { - if (i) - usleep(100000); - if (pthread_create(&thread_id, &attr, &_wait_mpir_gate, - NULL) == 0) { - launched = true; - break; - } - } - slurm_attr_destroy(&attr); - if (!launched) - fatal("pthread_create: %m"); -} - -static void *_wait_mpir_gate(void *arg) -{ - while (job->state == SRUN_JOB_RUNNING) { - if (MPIR_debug_gate) { - slurm_step_launch_fwd_signal(job->step_ctx, - SIG_DEBUG_WAKE); - break; - } - usleep(100000); - } - pthread_exit(NULL); -} - static slurm_step_layout_t * _get_slurm_step_layout(srun_job_t *job) {