#include #include int main(int argc, char* argv[]) { if(argc != 2) { std::cerr << "usage: ./cuda_merge N\n"; std::exit(EXIT_FAILURE); } unsigned N = std::atoi(argv[1]); // gpu data auto da = tf::cuda_malloc_shared(N); auto db = tf::cuda_malloc_shared(N); auto dc = tf::cuda_malloc_shared(N + N); // host data std::vector ha(N), hb(N), hc(N + N); for(unsigned i=0; i{}, buf ); stream.synchronize(); auto end = std::chrono::steady_clock::now(); std::cout << "GPU merge: " << std::chrono::duration_cast(end-beg).count() << " us\n"; // -------------------------------------------------------------------------- // CPU merge // -------------------------------------------------------------------------- beg = std::chrono::steady_clock::now(); std::merge(ha.begin(), ha.end(), hb.begin(), hb.end(), hc.begin()); end = std::chrono::steady_clock::now(); std::cout << "CPU merge: " << std::chrono::duration_cast(end-beg).count() << " us\n"; // -------------------------------------------------------------------------- // verify the result // -------------------------------------------------------------------------- for(size_t i=0; i