From 85308196d3d85439af008f093eba1875eee4155e Mon Sep 17 00:00:00 2001 From: AntreasAntoniou Date: Fri, 9 Feb 2018 22:01:22 +0000 Subject: [PATCH] Updated tqdm counting issue, added script for setting up environment on head-node --- cifar100_network_trainer.py | 2 +- cifar10_network_trainer.py | 2 +- emnist_network_trainer.py | 2 +- gpu_cluster_environment_script.sh | 20 ++++++++++++++++++++ msd10_network_trainer.py | 2 +- msd25_network_trainer.py | 2 +- 6 files changed, 25 insertions(+), 5 deletions(-) create mode 100644 gpu_cluster_environment_script.sh diff --git a/cifar100_network_trainer.py b/cifar100_network_trainer.py index e5cc4de..54de53c 100644 --- a/cifar100_network_trainer.py +++ b/cifar100_network_trainer.py @@ -83,7 +83,7 @@ with tf.Session() as sess: continue_from_epoch)) # restore previous graph to continue operations best_val_accuracy = 0. - with tqdm.tqdm(total=epochs) as epoch_pbar: + with tqdm.tqdm(total=epochs - start_epoch) as epoch_pbar: for e in range(start_epoch, epochs): total_c_loss = 0. total_accuracy = 0. diff --git a/cifar10_network_trainer.py b/cifar10_network_trainer.py index d456c9c..6cf61f3 100644 --- a/cifar10_network_trainer.py +++ b/cifar10_network_trainer.py @@ -83,7 +83,7 @@ with tf.Session() as sess: continue_from_epoch)) # restore previous graph to continue operations best_val_accuracy = 0. - with tqdm.tqdm(total=epochs) as epoch_pbar: + with tqdm.tqdm(total=epochs-start_epoch) as epoch_pbar: for e in range(start_epoch, epochs): total_c_loss = 0. total_accuracy = 0. diff --git a/emnist_network_trainer.py b/emnist_network_trainer.py index f33a436..1af1956 100644 --- a/emnist_network_trainer.py +++ b/emnist_network_trainer.py @@ -83,7 +83,7 @@ with tf.Session() as sess: continue_from_epoch)) # restore previous graph to continue operations best_val_accuracy = 0. - with tqdm.tqdm(total=epochs) as epoch_pbar: + with tqdm.tqdm(total=epochs - start_epoch) as epoch_pbar: for e in range(start_epoch, epochs): total_c_loss = 0. total_accuracy = 0. diff --git a/gpu_cluster_environment_script.sh b/gpu_cluster_environment_script.sh new file mode 100644 index 0000000..5d9cd91 --- /dev/null +++ b/gpu_cluster_environment_script.sh @@ -0,0 +1,20 @@ +#!/bin/sh +export CUDA_HOME=/opt/cuda-8.0.44 + +export CUDNN_HOME=/opt/cuDNN-6.0_8.0 + +export STUDENT_ID=sxxxxxx + +export LD_LIBRARY_PATH=${CUDNN_HOME}/lib64:${CUDA_HOME}/lib64:$LD_LIBRARY_PATH + +export LIBRARY_PATH=${CUDNN_HOME}/lib64:$LIBRARY_PATH + +export CPATH=${CUDNN_HOME}/include:$CPATH + +export PATH=${CUDA_HOME}/bin:${PATH} + +export PYTHON_PATH=$PATH + +# Activate the relevant virtual environment: + +source /home/${STUDENT_ID}/miniconda3/bin/activate mlp diff --git a/msd10_network_trainer.py b/msd10_network_trainer.py index 584347b..e379b4c 100644 --- a/msd10_network_trainer.py +++ b/msd10_network_trainer.py @@ -84,7 +84,7 @@ with tf.Session() as sess: continue_from_epoch)) # restore previous graph to continue operations best_val_accuracy = 0. - with tqdm.tqdm(total=epochs) as epoch_pbar: + with tqdm.tqdm(total=epochs - start_epoch) as epoch_pbar: for e in range(start_epoch, epochs): total_c_loss = 0. total_accuracy = 0. diff --git a/msd25_network_trainer.py b/msd25_network_trainer.py index bc94997..ea86b8c 100644 --- a/msd25_network_trainer.py +++ b/msd25_network_trainer.py @@ -83,7 +83,7 @@ with tf.Session() as sess: continue_from_epoch)) # restore previous graph to continue operations best_val_accuracy = 0. - with tqdm.tqdm(total=epochs) as epoch_pbar: + with tqdm.tqdm(total=epochs - start_epoch) as epoch_pbar: for e in range(start_epoch, epochs): total_c_loss = 0. total_accuracy = 0.