#!/bin/bash -l # # Jupyter notebook server launcher for ODU Wahab cluster # # This is the LOCAL script to be run on user's machine. # It will set the necessary port forwarding after the Jupyter # process is successfully launched. # This script depends on the REMOTE counterpart script # (DS_jupyter). # # Requirements: # * bash (to run this script) # * basic file utils (mkdir, cp, mv, rm...) # * ssh # * mktemp # * awk # * tee # * grep # # TODO # - cleanup of all server processes (live or dead) # - cleanup of old Jupyter log files (~/.jupyter/jupyter_batch.jNNNN on cluster home dir) # - reconnect to most recent server process # - automatically determine free local TCP port to use # - check validity of LOCAL_PORTS, LOAD_MODULES. # Avoid any possible exploits. OPT_HEADLESS= OPT_ML_PACKAGES= OPT_DL_PACKAGES= OPT_DEVEL_LAUNCHER= # FIXME: For the time being the launch mode is hardwired OPT_LAUNCH_MODE=batch HAS_TIME= MYSELF=$0 REMOTE_HOST=wahab.hpc.odu.edu USER_ID= LOCAL_PORT=8888 LOAD_MODULES= process_args() { local arg args i args=( "$@" ) for ((i = 0; i < ${#args[@]}; ++i)); do arg=${args[i]} if [ -n "$DEBUG" ]; then echo "Processing: $arg" fi case "${arg}" in (-t|--time|--time=*) HAS_TIME=yes ;; (-h|--headless) OPT_HEADLESS=yes continue ;; (--ml) OPT_ML_PACKAGES=yes continue ;; (--dl) OPT_ML_PACKAGES=yes OPT_DL_PACKAGES=yes #SLURM_ARGS=( "${SLURM_ARGS[@]}" -C AVX2 ) continue ;; (-u|--user) # Sets up the user ID for Wahab USER_ID=${args[i+1]} (( ++i )) continue ;; (--local-port) # Sets up the local TCP port to pass thru to # Jupyter process on a Turing compute node LOCAL_PORT=${args[i+1]} (( ++i )) continue ;; (--modules) # Loads extra modules just before Jupyter is launched LOAD_MODULES=${args[i+1]} (( ++i )) continue ;; # UNDOCUMENTED OPTIONS (--devel--) # Use self-provided "devel" version of DS_jupyter script OPT_DEVEL_LAUNCHER=yes continue ;; # For later: different launch mode (e.g. reconnect, kill, ...) # (--batch) # OPT_LAUNCH_MODE=batch # continue # ;; esac SLURM_ARGS=( "${SLURM_ARGS[@]}" "$arg" ) done # FIXME: Can do more thorough arg checking later. } _get_jupyter_token() { # Usage: # _get_jupyter_token JUPYTER_OUTPUT_FILE # Output vars: # * jupyter_suffix # * jupyter_token # * jupyter_port # * jupyter_host jupyter_host=$(awk '/^\* Jupyter-host: / { print $3; exit }' "$1") jupyter_port=$(awk '/^\* Jupyter-port: / { print $3; exit }' "$1") jupyter_token=$(awk '/^\* Jupyter-token: / { print $3; exit }' "$1") jupyter_suffix="${jupyter_port}/?token=${jupyter_token}" if [ -z "$jupyter_token" ]; then echo "Failed to retrieve Jupyter token?" >&2 exit 2 fi } clean_up() { #job_id=$(awk '/srun: job/ {print $3;exit}' $jupyter_output) #scancel $job_id if [ -n "$DEBUG" ]; then rm -i -v $jupyter_output else rm $jupyter_output fi #echo "Terminated SLURM job $job_id" } launch_remote_jupyter() { set -e set -m jupyter_output=$(mktemp ~/.jupyter_remote.XXXXXX) trap clean_up EXIT if [ -z "$USER_ID" ]; then echo "Error: USER_ID is not set. Use the '--user YOUR_MIDAS_ID' option." >&2 exit 1 fi _submit_remote_batch_jupyter _establish_pipeline } _submit_remote_batch_jupyter() { # Connects to Wahab to submit the Jupyter server through sbatch. # Wait till the server is ready, then extract the key information # to use for setting up the pipeline. # # Usage: # _submit_remote_batch_jupyter # Pass on options to DS_jupyter local script_flags= slurm_flags= flag ssh_host ssh_cmd local DS_jupyter=DS_jupyter_wahab for flag in "${SLURM_ARGS[@]}"; do slurm_flags="$slurm_flags \"$flag\"" done script_flags="--batch --ml --local-port \"$LOCAL_PORT\"" if [ "x$OPT_DL_PACKAGES" = xyes ]; then script_flags="$script_flags --dl" fi if [ -n "$LOAD_MODULES" ]; then script_flags="$script_flags --modules \"$LOAD_MODULES\"" fi # Undocumented features if [ x"$OPT_DEVEL_LAUNCHER" = xyes ]; then DS_jupyter=DeapSECURE/devel/deapsecure-mod02-bd-devel/bin/DS_jupyter_wahab fi # End undocumented features ssh_host=${USER_ID}@${REMOTE_HOST} ssh_cmd="enable_lmod; module load DeapSECURE; $DS_jupyter $script_flags $slurm_flags" if [ -n "$DEBUG" ]; then echo "SSH host: $ssh_host" echo "SSH command: $ssh_cmd" fi ssh "$ssh_host" "$ssh_cmd" \ | tee -a "$jupyter_output" if ! grep -q "OK: Jupyter server launched successfully" "$jupyter_output"; then echo "Error: Failed to submit Jupyter batch job on remote host" >&2 exit 2 fi } _establish_pipeline() { # Establishes the pipeline (port forwarding) to talk to the real Jupyter server. _get_jupyter_token "$jupyter_output" echo "Setting up pipeline to connect to the Jupyter server..." ssh -f -N -t -L "${LOCAL_PORT}:${jupyter_host}:${jupyter_port}" \ "${USER_ID}@${REMOTE_HOST}" echo "SSH tunnel established in the background." #echo "Now please launch your browser to connect to Jupyter:" #echo #echo " http://localhost:${LOCAL_PORT}/?token=${jupyter_token}" #echo echo "Do not kill this SSH process as it will kill the pipeline." echo echo "In case the pipeline dies, you can reconnect by issuing on this terminal:" echo echo " ssh -L ${LOCAL_PORT}:${jupyter_host}:${jupyter_port} ${USER_ID}@${REMOTE_HOST}" echo } # FIXME:: SLURM_ARGS=( ) process_args "$@" #echo "Requesting ML packages: ${OPT_ML_PACKAGES:-no}" # ----------- Begin main actions if [ "$OPT_LAUNCH_MODE" = batch ]; then launch_remote_jupyter else echo "Bug: Unrecognized OPT_LAUNCH_MODE = $OPT_LAUNCH_MODE" >&2 exit 2 fi