Skip to content
Snippets Groups Projects
Commit 43d1fa2b authored by Moe Jette's avatar Moe Jette
Browse files

remove MPIR_debug_gate and release tasks upon exit of MPIR_Breakpoint

parent 87d581f4
No related branches found
No related tags found
No related merge requests found
...@@ -45,6 +45,7 @@ ...@@ -45,6 +45,7 @@
#include "src/common/log.h" #include "src/common/log.h"
#include "src/srun/debugger.h" #include "src/srun/debugger.h"
#include "src/srun/srun_job.h"
/* /*
* Instantiate extern variables from debugger.h * Instantiate extern variables from debugger.h
...@@ -52,20 +53,20 @@ ...@@ -52,20 +53,20 @@
MPIR_PROCDESC *MPIR_proctable; MPIR_PROCDESC *MPIR_proctable;
int MPIR_proctable_size; int MPIR_proctable_size;
VOLATILE int MPIR_debug_state; VOLATILE int MPIR_debug_state;
VOLATILE int MPIR_debug_gate;
int MPIR_being_debugged; int MPIR_being_debugged;
int MPIR_i_am_starter; int MPIR_i_am_starter;
int MPIR_acquired_pre_main; int MPIR_acquired_pre_main;
int MPIR_partial_attach_ok; int MPIR_partial_attach_ok;
char *totalview_jobid; char *totalview_jobid;
void MPIR_Breakpoint(void) void MPIR_Breakpoint(srun_job_t *job)
{ {
/* /*
* This just notifies parallel debugger that some event of * This just notifies parallel debugger that some event of
* interest occurred. * interest occurred.
*/ */
debug("In MPIR_Breakpoint"); debug("In MPIR_Breakpoint");
slurm_step_launch_fwd_signal(job->step_ctx, SIG_DEBUG_WAKE);
} }
...@@ -73,7 +73,6 @@ extern int MPIR_proctable_size; ...@@ -73,7 +73,6 @@ extern int MPIR_proctable_size;
* now free to run. * now free to run.
*/ */
extern VOLATILE int MPIR_debug_state; extern VOLATILE int MPIR_debug_state;
extern VOLATILE int MPIR_debug_gate;
extern int MPIR_partial_attach_ok; extern int MPIR_partial_attach_ok;
extern int MPIR_being_debugged; /* Cause extra info on internal state extern int MPIR_being_debugged; /* Cause extra info on internal state
* to be maintained * to be maintained
...@@ -89,7 +88,7 @@ extern int MPIR_being_debugged; /* Cause extra info on internal state ...@@ -89,7 +88,7 @@ extern int MPIR_being_debugged; /* Cause extra info on internal state
extern int MPIR_i_am_starter; extern int MPIR_i_am_starter;
extern int MPIR_acquired_pre_main; extern int MPIR_acquired_pre_main;
extern void MPIR_Breakpoint(void); extern void MPIR_Breakpoint(srun_job_t *job);
/* Value for totalview %J expansion in bulk launch string /* Value for totalview %J expansion in bulk launch string
*/ */
......
...@@ -146,7 +146,6 @@ static int _run_srun_script (srun_job_t *job, char *script); ...@@ -146,7 +146,6 @@ static int _run_srun_script (srun_job_t *job, char *script);
static void _set_cpu_env_var(resource_allocation_response_msg_t *resp); static void _set_cpu_env_var(resource_allocation_response_msg_t *resp);
static void _set_exit_code(void); static void _set_exit_code(void);
static int _setup_signals(void); static int _setup_signals(void);
static void _spawn_mpir_thread(void);
static void _step_opt_exclusive(void); static void _step_opt_exclusive(void);
static void _set_stdio_fds(srun_job_t *job, slurm_step_io_fds_t *cio_fds); static void _set_stdio_fds(srun_job_t *job, slurm_step_io_fds_t *cio_fds);
static void _set_submit_dir_env(void); static void _set_submit_dir_env(void);
...@@ -157,7 +156,6 @@ static int _slurm_debug_env_val (void); ...@@ -157,7 +156,6 @@ static int _slurm_debug_env_val (void);
static void _task_start(launch_tasks_response_msg_t *msg); static void _task_start(launch_tasks_response_msg_t *msg);
static void _task_finish(task_exit_msg_t *msg); static void _task_finish(task_exit_msg_t *msg);
static char *_uint16_array_to_str(int count, const uint16_t *array); static char *_uint16_array_to_str(int count, const uint16_t *array);
static void *_wait_mpir_gate(void *arg);
/* /*
* from libvirt-0.6.2 GPL2 * from libvirt-0.6.2 GPL2
...@@ -475,9 +473,8 @@ int srun(int ac, char **av) ...@@ -475,9 +473,8 @@ int srun(int ac, char **av)
launch_params.argv[0]); launch_params.argv[0]);
else else
mpir_set_executable_names(launch_params.argv[0]); mpir_set_executable_names(launch_params.argv[0]);
_spawn_mpir_thread();
MPIR_debug_state = MPIR_DEBUG_SPAWNED; MPIR_debug_state = MPIR_DEBUG_SPAWNED;
MPIR_Breakpoint(); MPIR_Breakpoint(job);
if (opt.debugger_test) if (opt.debugger_test)
mpir_dump_proctable(); mpir_dump_proctable();
} else { } else {
...@@ -523,45 +520,6 @@ cleanup: ...@@ -523,45 +520,6 @@ cleanup:
return (int)global_rc; return (int)global_rc;
} }
static void _spawn_mpir_thread(void)
{
int i;
pthread_t thread_id;
pthread_attr_t attr;
bool launched = false;
if (!MPIR_being_debugged)
return;
slurm_attr_init(&attr);
pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED);
for (i=0; i<3; i++) {
if (i)
usleep(100000);
if (pthread_create(&thread_id, &attr, &_wait_mpir_gate,
NULL) == 0) {
launched = true;
break;
}
}
slurm_attr_destroy(&attr);
if (!launched)
fatal("pthread_create: %m");
}
static void *_wait_mpir_gate(void *arg)
{
while (job->state == SRUN_JOB_RUNNING) {
if (MPIR_debug_gate) {
slurm_step_launch_fwd_signal(job->step_ctx,
SIG_DEBUG_WAKE);
break;
}
usleep(100000);
}
pthread_exit(NULL);
}
static slurm_step_layout_t * static slurm_step_layout_t *
_get_slurm_step_layout(srun_job_t *job) _get_slurm_step_layout(srun_job_t *job)
{ {
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment