multi.cpp 2.2 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495
  1. /* This sample demonstrates the way you can perform independent tasks
  2. on the different GPUs */
  3. // Disable some warnings which are caused with CUDA headers
  4. #if defined(_MSC_VER)
  5. #pragma warning(disable: 4201 4408 4100)
  6. #endif
  7. #include <iostream>
  8. #include "opencv2/core.hpp"
  9. #include "opencv2/cudaarithm.hpp"
  10. #if !defined(HAVE_CUDA)
  11. int main()
  12. {
  13. std::cout << "CUDA support is required (OpenCV CMake parameter 'WITH_CUDA' must be true)." << std::endl;
  14. return 0;
  15. }
  16. #else
  17. using namespace std;
  18. using namespace cv;
  19. using namespace cv::cuda;
  20. struct Worker : public cv::ParallelLoopBody
  21. {
  22. void operator()(const Range& r) const CV_OVERRIDE
  23. {
  24. for (int i = r.start; i < r.end; ++i) { this->operator()(i); }
  25. }
  26. void operator()(int device_id) const;
  27. };
  28. int main()
  29. {
  30. int num_devices = getCudaEnabledDeviceCount();
  31. if (num_devices < 2)
  32. {
  33. std::cout << "Two or more GPUs are required\n";
  34. return -1;
  35. }
  36. for (int i = 0; i < num_devices; ++i)
  37. {
  38. cv::cuda::printShortCudaDeviceInfo(i);
  39. DeviceInfo dev_info(i);
  40. if (!dev_info.isCompatible())
  41. {
  42. std::cout << "CUDA module isn't built for GPU #" << i << " ("
  43. << dev_info.name() << ", CC " << dev_info.majorVersion()
  44. << dev_info.minorVersion() << "\n";
  45. return -1;
  46. }
  47. }
  48. // Execute calculation in two threads using two GPUs
  49. cv::Range devices(0, 2);
  50. cv::parallel_for_(devices, Worker(), devices.size());
  51. return 0;
  52. }
  53. void Worker::operator()(int device_id) const
  54. {
  55. setDevice(device_id);
  56. Mat src(1000, 1000, CV_32F);
  57. Mat dst;
  58. RNG rng(0);
  59. rng.fill(src, RNG::UNIFORM, 0, 1);
  60. // CPU works
  61. cv::transpose(src, dst);
  62. // GPU works
  63. GpuMat d_src(src);
  64. GpuMat d_dst;
  65. cuda::transpose(d_src, d_dst);
  66. // Check results
  67. bool passed = cv::norm(dst - Mat(d_dst), NORM_INF) < 1e-3;
  68. std::cout << "GPU #" << device_id << " (" << DeviceInfo().name() << "): "
  69. << (passed ? "passed" : "FAILED") << endl;
  70. // Deallocate data here, otherwise deallocation will be performed
  71. // after context is extracted from the stack
  72. d_src.release();
  73. d_dst.release();
  74. }
  75. #endif