Nohup Sequential Commands
Created: =dateformat(this.file.ctime,"dd MMM yyyy, hh:mm a") | Modified: =dateformat(this.file.mtime,"dd MMM yyyy, hh:mm a")
Tags: knowledge
#!/bin/bash
# Call this shell script using the following:
# `nohup ./nohup_sequential.sh > nohup_sequential.out 2>&1 &`
# Define the commands
cmds=(
"CUDA_VISIBLE_DEVICES=0 nohup python test_script.py --arg1 value1 > test_script_1.out 2>&1"
"CUDA_VISIBLE_DEVICES=0 nohup python test_script.py --arg2 value2 > test_script_2.out 2>&1"
"CUDA_VISIBLE_DEVICES=0 nohup python test_script.py --arg3 value3 > test_script_3.out 2>&1"
)
# Loop through the commands
echo "=============================================================================="
for cmd in "${cmds[@]}"; do
# Capture the start time
start_time=$(date "+%Y-%m-%d %H:%M:%S")
echo -e "======================================="
echo -e "Starting process at $start_time\n$cmd"
echo -e "======================================="
# Execute the command and get the PID
eval $cmd &
pid=$!
echo "Started process with PID: $pid"
# Wait for the process to complete and capture the exit status
wait $pid
status=$?
# Capture the end time
end_time=$(date "+%Y-%m-%d %H:%M:%S")
echo -e "Process with PID $pid ended at $end_time with status $status.\n"
# Check if the process failed
if [ $status -ne 0 ]; then
echo "Process with PID $pid failed with status $status. Exiting."
exit $status
# Or can just use `continue` instead of `exit $status` to continue the runs
fi
done
echo "All processes completed."
echo "=============================================================================="#!/bin/bash
# Call this shell script using the following:
# `nohup ./nohup_check_existing_then_sequential.sh > nohup_check_existing_then_sequential.out 2>&1 &`
CHECK_PID=282144 #TODO: give the PID here
# Function to check if a process is running
is_process_running() {
if ps -p $1 > /dev/null; then
return 0
else
return 1
fi
}
# Wait for the specific PID to complete if it is still running
echo "Checking if process with PID $CHECK_PID is still running..."
while is_process_running $CHECK_PID; do
echo "Process with PID $CHECK_PID is still running. Waiting..."
sleep 1h
done
echo "Process with PID $CHECK_PID has completed. Starting the commands."
# Define the commands
cmds=(
"CUDA_VISIBLE_DEVICES=0 nohup python test_script.py --arg1 value1 > test_script_1.out 2>&1"
"CUDA_VISIBLE_DEVICES=0 nohup python test_script.py --arg2 value2 > test_script_2.out 2>&1"
"CUDA_VISIBLE_DEVICES=0 nohup python test_script.py --arg3 value3 > test_script_3.out 2>&1"
)
# Loop through the commands
echo "=============================================================================="
for cmd in "${cmds[@]}"; do
# Capture the start time
start_time=$(date "+%Y-%m-%d %H:%M:%S")
echo -e "======================================="
echo -e "Starting process at $start_time\n$cmd"
echo -e "======================================="
# Execute the command and get the PID
eval $cmd &
pid=$!
echo "Started process with PID: $pid"
# Wait for the process to complete and capture the exit status
wait $pid
status=$?
# Capture the end time
end_time=$(date "+%Y-%m-%d %H:%M:%S")
echo -e "Process with PID $pid ended at $end_time with status $status.\n"
# Check if the process failed
if [ $status -ne 0 ]; then
echo "Process with PID $pid failed with status $status. Exiting."
exit $status
fi
done
echo "All processes completed."
echo "=============================================================================="Example test script
import time
import sys
import torch
# Get the arguments passed to the script
args = sys.argv[1:]
# Print the arguments
print(f"Arguments received: {args}")
# Check for the fail argument
if "--fail" in args:
raise ValueError("Simulated failure.")
# Check for GPU availability
if torch.cuda.is_available():
device = torch.device("cuda")
print("Using GPU:", torch.cuda.get_device_name(0))
else:
device = torch.device("cpu")
print("GPU not available, using CPU")
# Simulate some computation on the GPU
print("Performing computation...")
x = torch.rand(10000, 10000, device=device)
y = torch.mm(x, x)
# Free up memory
del x, y # Explicitly delete tensors to free up GPU memory
torch.cuda.empty_cache() # Clear the GPU cache (optional, but useful for ensuring memory is actually freed)
# Sleep for a few seconds to simulate a long-running process
print("Sleeping for 5 seconds...")
time.sleep(5)
# Print a completion message
print("Script completed.")