Nohup Sequential Commands

Created: =dateformat(this.file.ctime,"dd MMM yyyy, hh:mm a") | Modified: =dateformat(this.file.mtime,"dd MMM yyyy, hh:mm a") Tags: knowledge

#!/bin/bash
 
# Call this shell script using the following:
#  `nohup ./nohup_sequential.sh > nohup_sequential.out 2>&1 &`
 
# Define the commands
cmds=(
    "CUDA_VISIBLE_DEVICES=0 nohup python test_script.py --arg1 value1 > test_script_1.out 2>&1"
    "CUDA_VISIBLE_DEVICES=0 nohup python test_script.py --arg2 value2 > test_script_2.out 2>&1"
    "CUDA_VISIBLE_DEVICES=0 nohup python test_script.py --arg3 value3 > test_script_3.out 2>&1"
)
 
# Loop through the commands
echo "=============================================================================="
for cmd in "${cmds[@]}"; do
    # Capture the start time
    start_time=$(date "+%Y-%m-%d %H:%M:%S")
    echo -e "======================================="
    echo -e "Starting process at $start_time\n$cmd"
    echo -e "======================================="
 
    # Execute the command and get the PID
    eval $cmd &
    pid=$!
    echo "Started process with PID: $pid"
    
    # Wait for the process to complete and capture the exit status
    wait $pid
    status=$?
    
    # Capture the end time
    end_time=$(date "+%Y-%m-%d %H:%M:%S")
    echo -e "Process with PID $pid ended at $end_time with status $status.\n"
    
    # Check if the process failed
    if [ $status -ne 0 ]; then
        echo "Process with PID $pid failed with status $status. Exiting."
        exit $status
        # Or can just use `continue` instead of `exit $status` to continue the runs
    fi
done
 
echo "All processes completed."
echo "=============================================================================="

#!/bin/bash
 
# Call this shell script using the following:
#  `nohup ./nohup_check_existing_then_sequential.sh > nohup_check_existing_then_sequential.out 2>&1 &`
 
CHECK_PID=282144 #TODO: give the PID here
 
# Function to check if a process is running
is_process_running() {
    if ps -p $1 > /dev/null; then
        return 0
    else
        return 1
    fi
}
 
# Wait for the specific PID to complete if it is still running
echo "Checking if process with PID $CHECK_PID is still running..."
while is_process_running $CHECK_PID; do
    echo "Process with PID $CHECK_PID is still running. Waiting..."
    sleep 1h
done
echo "Process with PID $CHECK_PID has completed. Starting the commands."
 
# Define the commands
cmds=(
    "CUDA_VISIBLE_DEVICES=0 nohup python test_script.py --arg1 value1 > test_script_1.out 2>&1"
    "CUDA_VISIBLE_DEVICES=0 nohup python test_script.py --arg2 value2 > test_script_2.out 2>&1"
    "CUDA_VISIBLE_DEVICES=0 nohup python test_script.py --arg3 value3 > test_script_3.out 2>&1"
)
 
# Loop through the commands
echo "=============================================================================="
for cmd in "${cmds[@]}"; do
    # Capture the start time
    start_time=$(date "+%Y-%m-%d %H:%M:%S")
    echo -e "======================================="
    echo -e "Starting process at $start_time\n$cmd"
    echo -e "======================================="
 
    # Execute the command and get the PID
    eval $cmd &
    pid=$!
    echo "Started process with PID: $pid"
    
    # Wait for the process to complete and capture the exit status
    wait $pid
    status=$?
    
    # Capture the end time
    end_time=$(date "+%Y-%m-%d %H:%M:%S")
    echo -e "Process with PID $pid ended at $end_time with status $status.\n"
    
    # Check if the process failed
    if [ $status -ne 0 ]; then
        echo "Process with PID $pid failed with status $status. Exiting."
        exit $status
    fi
done
 
echo "All processes completed."
echo "=============================================================================="

Example test script

import time
import sys
import torch
 
# Get the arguments passed to the script
args = sys.argv[1:]
 
# Print the arguments
print(f"Arguments received: {args}")
 
# Check for the fail argument
if "--fail" in args:
    raise ValueError("Simulated failure.")
 
# Check for GPU availability
if torch.cuda.is_available():
    device = torch.device("cuda")
    print("Using GPU:", torch.cuda.get_device_name(0))
else:
    device = torch.device("cpu")
    print("GPU not available, using CPU")
 
# Simulate some computation on the GPU
print("Performing computation...")
x = torch.rand(10000, 10000, device=device)
y = torch.mm(x, x)
# Free up memory
del x, y  # Explicitly delete tensors to free up GPU memory
torch.cuda.empty_cache()  # Clear the GPU cache (optional, but useful for ensuring memory is actually freed)
 
# Sleep for a few seconds to simulate a long-running process
print("Sleeping for 5 seconds...")
time.sleep(5)
 
# Print a completion message
print("Script completed.")

Darius Knowledge Hub

Explorer

Nohup Sequential Commands

Nohup Sequential Commands

Example test script

Graph View

Table of Contents