#include "dnn.hpp" #include void run_omp(MNIST& D, unsigned num_threads) { // Create a task flow graph const auto iter_num = D.images.rows()/D.batch_size; const auto num_storage = num_threads; // number of concurrent shuffle tasks const auto num_par_shf = std::min(num_storage, D.epoch); std::vector mats(num_par_shf, D.images); std::vector vecs(num_par_shf, D.labels); const int num_layers = D.acts.size(); // Propagation per epoch const auto prop_per_e = num_layers*iter_num; auto dep_s = new int [D.epoch]; auto dep_f = new int [D.epoch * iter_num]; auto dep_b = new int [D.epoch * prop_per_e]; auto dep_u = new int [D.epoch * prop_per_e]; omp_set_num_threads(num_threads); #pragma omp parallel { #pragma omp single { for(size_t e=0; e=0; j--) { if(j == num_layers-1) { #pragma omp task depend (in: dep_f[e*iter_num + i]) depend (out: dep_b[e*prop_per_e + i*num_layers + j]) firstprivate(j, e, num_par_shf) shared(D, mats) { backward_task(D, j, e%num_par_shf, mats); } } else { #pragma omp task depend (in: dep_b[e*prop_per_e + i*num_layers + j + 1]) depend (out: dep_b[e*prop_per_e + i*num_layers + j]) firstprivate(j, e, num_par_shf) shared(D, mats) { backward_task(D, j, e%num_par_shf, mats); } } } // Update tasks for(int j=num_layers-1; j>=0; j--) { #pragma omp task depend (in: dep_b[e*prop_per_e + i*num_layers + j]) depend (out: dep_u[e*prop_per_e + i*num_layers + j]) firstprivate(j) shared(D) { D.update(j); } } } // End of one iteration } // End of one epoch #pragma omp taskwait } // End of omp single } // End of omp parallel delete [] dep_s; delete [] dep_f; delete [] dep_b; delete [] dep_u; }