sphereview_data.cpp 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331
  1. /*
  2. * Software License Agreement (BSD License)
  3. *
  4. * Copyright (c) 2009, Willow Garage, Inc.
  5. * All rights reserved.
  6. *
  7. * Redistribution and use in source and binary forms, with or without
  8. * modification, are permitted provided that the following conditions
  9. * are met:
  10. *
  11. * * Redistributions of source code must retain the above copyright
  12. * notice, this list of conditions and the following disclaimer.
  13. * * Redistributions in binary form must reproduce the above
  14. * copyright notice, this list of conditions and the following
  15. * disclaimer in the documentation and/or other materials provided
  16. * with the distribution.
  17. * * Neither the name of Willow Garage, Inc. nor the names of its
  18. * contributors may be used to endorse or promote products derived
  19. * from this software without specific prior written permission.
  20. *
  21. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  22. * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  23. * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
  24. * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
  25. * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
  26. * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
  27. * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  28. * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
  29. * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  30. * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
  31. * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  32. * POSSIBILITY OF SUCH DAMAGE.
  33. *
  34. */
  35. /**
  36. * @file demo_sphereview_data.cpp
  37. * @brief Generating training data for CNN with triplet loss.
  38. * @author Yida Wang
  39. */
  40. #include <opencv2/cnn_3dobj.hpp>
  41. #include <opencv2/viz/vizcore.hpp>
  42. #include <iostream>
  43. #include <stdlib.h>
  44. #include <time.h>
  45. using namespace cv;
  46. using namespace std;
  47. using namespace cv::cnn_3dobj;
  48. /**
  49. * @function listDir
  50. * @brief Making all files names under a directory into a list
  51. */
  52. static void listDir(const char *path, std::vector<String>& files, bool r)
  53. {
  54. DIR *pDir;
  55. struct dirent *ent;
  56. char childpath[512];
  57. pDir = opendir(path);
  58. memset(childpath, 0, sizeof(childpath));
  59. while ((ent = readdir(pDir)) != NULL)
  60. {
  61. if (ent->d_type & DT_DIR)
  62. {
  63. if (strcmp(ent->d_name, ".") == 0 || strcmp(ent->d_name, "..") == 0 || strcmp(ent->d_name, ".DS_Store") == 0)
  64. {
  65. continue;
  66. }
  67. if (r)
  68. {
  69. sprintf(childpath, "%s/%s", path, ent->d_name);
  70. listDir(childpath,files,false);
  71. }
  72. }
  73. else
  74. {
  75. if (strcmp(ent->d_name, ".DS_Store") != 0)
  76. files.push_back(ent->d_name);
  77. }
  78. }
  79. sort(files.begin(),files.end());
  80. };
  81. int main(int argc, char *argv[])
  82. {
  83. const String keys = "{help | | demo :$ ./sphereview_test -ite_depth=2 -plymodel=../data/3Dmodel/ape.ply -imagedir=../data/images_all/ -labeldir=../data/label_all.txt -num_class=6 -label_class=0, then press 'q' to run the demo for images generation when you see the gray background and a coordinate.}"
  84. "{ite_depth | 3 | Iteration of sphere generation.}"
  85. "{plymodel | ../data/3Dmodel/ape.ply | Path of the '.ply' file for image rendering. }"
  86. "{imagedir | ../data/images_all/ | Path of the generated images for one particular .ply model. }"
  87. "{labeldir | ../data/label_all.txt | Path of the generated images for one particular .ply model. }"
  88. "{bakgrdir | | Path of the backgroud images sets. }"
  89. "{cam_head_x | 0 | Head of the camera. }"
  90. "{cam_head_y | 0 | Head of the camera. }"
  91. "{cam_head_z | -1 | Head of the camera. }"
  92. "{semisphere | 1 | Camera only has positions on half of the whole sphere. }"
  93. "{z_range | 0.6 | Maximum camera position on z axis. }"
  94. "{center_gen | 0 | Find center from all points. }"
  95. "{image_size | 128 | Size of captured images. }"
  96. "{label_class | | Class label of current .ply model. }"
  97. "{label_item | | Item label of current .ply model. }"
  98. "{rgb_use | 0 | Use RGB image or grayscale. }"
  99. "{num_class | 6 | Total number of classes of models. }"
  100. "{binary_out | 0 | Produce binaryfiles for images and label. }"
  101. "{view_region | 0 | Take a special view of front or back angle}";
  102. /* Get parameters from comand line. */
  103. cv::CommandLineParser parser(argc, argv, keys);
  104. parser.about("Generating training data for CNN with triplet loss");
  105. if (parser.has("help"))
  106. {
  107. parser.printMessage();
  108. return 0;
  109. }
  110. int ite_depth = parser.get<int>("ite_depth");
  111. String plymodel = parser.get<String>("plymodel");
  112. String imagedir = parser.get<String>("imagedir");
  113. String labeldir = parser.get<String>("labeldir");
  114. String bakgrdir = parser.get<String>("bakgrdir");
  115. int label_class = parser.get<int>("label_class");
  116. int label_item = parser.get<int>("label_item");
  117. float cam_head_x = parser.get<float>("cam_head_x");
  118. float cam_head_y = parser.get<float>("cam_head_y");
  119. float cam_head_z = parser.get<float>("cam_head_z");
  120. int semisphere = parser.get<int>("semisphere");
  121. float z_range = parser.get<float>("z_range");
  122. int center_gen = parser.get<int>("center_gen");
  123. int image_size = parser.get<int>("image_size");
  124. int rgb_use = parser.get<int>("rgb_use");
  125. int num_class = parser.get<int>("num_class");
  126. int binary_out = parser.get<int>("binary_out");
  127. int view_region = parser.get<int>("view_region");
  128. double obj_dist, bg_dist, y_range;
  129. if (view_region == 1 || view_region == 2)
  130. {
  131. /* Set for TV */
  132. if (label_class == 12)
  133. obj_dist = 340;
  134. else
  135. obj_dist = 250;
  136. ite_depth = ite_depth + 1;
  137. bg_dist = 700;
  138. y_range = 0.85;
  139. }
  140. else if (view_region == 0)
  141. {
  142. obj_dist = 370;
  143. bg_dist = 400;
  144. }
  145. if (label_class == 5 || label_class == 10 || label_class == 11 || label_class == 12)
  146. ite_depth = ite_depth + 1;
  147. cv::cnn_3dobj::icoSphere ViewSphere(10,ite_depth);
  148. std::vector<cv::Point3d> campos;
  149. std::vector<cv::Point3d> campos_temp = ViewSphere.CameraPos;
  150. /* Regular objects on the ground using a semisphere view system */
  151. if (semisphere == 1)
  152. {
  153. if (view_region == 1)
  154. {
  155. for (int pose = 0; pose < static_cast<int>(campos_temp.size()); pose++)
  156. {
  157. if (campos_temp.at(pose).z >= 0 && campos_temp.at(pose).z < z_range && campos_temp.at(pose).y < -y_range)
  158. campos.push_back(campos_temp.at(pose));
  159. }
  160. }
  161. else if (view_region == 2)
  162. {
  163. for (int pose = 0; pose < static_cast<int>(campos_temp.size()); pose++)
  164. {
  165. if (campos_temp.at(pose).z >= 0 && campos_temp.at(pose).z < z_range && campos_temp.at(pose).y > y_range)
  166. campos.push_back(campos_temp.at(pose));
  167. }
  168. }
  169. else
  170. {
  171. /* Set for sofa */
  172. if (label_class == 10)
  173. {
  174. for (int pose = 0; pose < static_cast<int>(campos_temp.size()); pose++)
  175. {
  176. if (campos_temp.at(pose).z >= 0 && campos_temp.at(pose).z < z_range && campos_temp.at(pose).y < -0.4)
  177. campos.push_back(campos_temp.at(pose));
  178. }
  179. }
  180. else
  181. {
  182. for (int pose = 0; pose < static_cast<int>(campos_temp.size()); pose++)
  183. {
  184. if (campos_temp.at(pose).z >= 0 && campos_temp.at(pose).z < z_range)
  185. campos.push_back(campos_temp.at(pose));
  186. }
  187. }
  188. }
  189. }
  190. /* Special object such as plane using a full space of view sphere */
  191. else
  192. {
  193. if (view_region == 1)
  194. {
  195. for (int pose = 0; pose < static_cast<int>(campos_temp.size()); pose++)
  196. {
  197. if (campos_temp.at(pose).z < 0.2 && campos_temp.at(pose).z > -0.2 && campos_temp.at(pose).y < -y_range)
  198. campos.push_back(campos_temp.at(pose));
  199. }
  200. }
  201. else if (view_region == 2)
  202. {
  203. for (int pose = 0; pose < static_cast<int>(campos_temp.size()); pose++)
  204. {
  205. if (campos_temp.at(pose).z < 0.2 && campos_temp.at(pose).z > -0.2 && campos_temp.at(pose).y > y_range)
  206. campos.push_back(campos_temp.at(pose));
  207. }
  208. }
  209. else
  210. {
  211. for (int pose = 0; pose < static_cast<int>(campos_temp.size()); pose++)
  212. {
  213. if (campos_temp.at(pose).z < 0.2 && campos_temp.at(pose).z > -0.6)
  214. campos.push_back(campos_temp.at(pose));
  215. }
  216. }
  217. }
  218. std::fstream imglabel;
  219. imglabel.open(labeldir.c_str(), fstream::app|fstream::out);
  220. bool camera_pov = true;
  221. /* Create a window using viz. */
  222. viz::Viz3d myWindow("Coordinate Frame");
  223. /* Set window size. */
  224. myWindow.setWindowSize(Size(image_size,image_size));
  225. /* Set background color. */
  226. myWindow.setBackgroundColor(viz::Color::gray());
  227. myWindow.spinOnce();
  228. /* Create a Mesh widget, loading .ply models. */
  229. viz::Mesh objmesh = viz::Mesh::load(plymodel);
  230. /* Get the center of the generated mesh widget, cause some .ply files, this could be ignored if you are using PASCAL database*/
  231. Point3d cam_focal_point;
  232. if (center_gen)
  233. cam_focal_point = ViewSphere.getCenter(objmesh.cloud);
  234. else
  235. cam_focal_point = Point3d(0,0,0);
  236. const char* headerPath = "../data/header_for_";
  237. const char* binaryPath = "../data/binary_";
  238. if (binary_out)
  239. {
  240. ViewSphere.createHeader(static_cast<int>(campos.size()), image_size, image_size, headerPath);
  241. }
  242. float radius = ViewSphere.getRadius(objmesh.cloud, cam_focal_point);
  243. objmesh.cloud = objmesh.cloud/radius*100;
  244. cam_focal_point = cam_focal_point/radius*100;
  245. Point3d cam_y_dir;
  246. cam_y_dir.x = cam_head_x;
  247. cam_y_dir.y = cam_head_y;
  248. cam_y_dir.z = cam_head_z;
  249. char temp[1024];
  250. std::vector<String> name_bkg;
  251. if (bakgrdir.size() != 0)
  252. {
  253. /* List the file names under a given path */
  254. listDir(bakgrdir.c_str(), name_bkg, false);
  255. for (unsigned int i = 0; i < name_bkg.size(); i++)
  256. {
  257. name_bkg.at(i) = bakgrdir + name_bkg.at(i);
  258. }
  259. }
  260. /* Images will be saved as .png files. */
  261. size_t cnt_img;
  262. srand((int)time(0));
  263. do
  264. {
  265. cnt_img = 0;
  266. for(int pose = 0; pose < static_cast<int>(campos.size()); pose++){
  267. /* Add light. */
  268. // double alpha1 = rand()%(314/2)/100;
  269. // double alpha2 = rand()%(314*2)/100;
  270. // printf("%f %f %f/n", ceil(10000*sqrt(1 - sin(alpha1)*sin(alpha1))*sin(alpha2)), 10000*sqrt(1 - sin(alpha1)*sin(alpha1))*cos(alpha2), sin(alpha1)*10000);
  271. // myWindow.addLight(Vec3d(10000*sqrt(1 - sin(alpha1)*sin(alpha1))*sin(alpha2),10000*sqrt(1 - sin(alpha1)*sin(alpha1))*cos(alpha2),sin(alpha1)*10000), Vec3d(0,0,0), viz::Color::white(), viz::Color::white(), viz::Color::black(), viz::Color::white());
  272. int label_x, label_y, label_z;
  273. label_x = static_cast<int>(campos.at(pose).x*100);
  274. label_y = static_cast<int>(campos.at(pose).y*100);
  275. label_z = static_cast<int>(campos.at(pose).z*100);
  276. sprintf (temp,"%02i_%02i_%04i_%04i_%04i_%02i", label_class, label_item, label_x, label_y, label_z, static_cast<int>(obj_dist/100));
  277. String filename = temp;
  278. filename += ".png";
  279. imglabel << filename << ' ' << label_class << endl;
  280. filename = imagedir + filename;
  281. /* Get the pose of the camera using makeCameraPoses. */
  282. if (view_region != 0)
  283. {
  284. cam_focal_point.x = cam_focal_point.y - label_x/5;
  285. }
  286. Affine3f cam_pose = viz::makeCameraPose(campos.at(pose)*obj_dist+cam_focal_point, cam_focal_point, cam_y_dir*obj_dist+cam_focal_point);
  287. /* Get the transformation matrix from camera coordinate system to global. */
  288. Affine3f transform = viz::makeTransformToGlobal(Vec3f(1.0f,0.0f,0.0f), Vec3f(0.0f,1.0f,0.0f), Vec3f(0.0f,0.0f,1.0f), campos.at(pose));
  289. viz::WMesh mesh_widget(objmesh);
  290. /* Pose of the widget in camera frame. */
  291. Affine3f cloud_pose = Affine3f().translate(Vec3f(1.0f,1.0f,1.0f));
  292. /* Pose of the widget in global frame. */
  293. Affine3f cloud_pose_global = transform * cloud_pose;
  294. /* Visualize camera frame. */
  295. if (!camera_pov)
  296. {
  297. viz::WCameraPosition cpw(1); // Coordinate axes
  298. viz::WCameraPosition cpw_frustum(Vec2f(0.5, 0.5)); // Camera frustum
  299. myWindow.showWidget("CPW", cpw, cam_pose);
  300. myWindow.showWidget("CPW_FRUSTUM", cpw_frustum, cam_pose);
  301. }
  302. /* Visualize widget. */
  303. if (bakgrdir.size() != 0)
  304. {
  305. cv::Mat img_bg = cv::imread(name_bkg.at(rand()%name_bkg.size()));
  306. /* Back ground images has a distance of 2 times of radius of camera view distance */
  307. cv::viz::WImage3D background_widget(img_bg, Size2d(image_size*4.2, image_size*4.2), Vec3d(-campos.at(pose)*bg_dist+cam_focal_point), Vec3d(campos.at(pose)*bg_dist-cam_focal_point), Vec3d(0,0,-1)*bg_dist+Vec3d(0,2*cam_focal_point.y,0));
  308. myWindow.showWidget("bgwidget", background_widget, cloud_pose_global);
  309. }
  310. // mesh_widget.setRenderingProperty(viz::LINE_WIDTH, 4.0);
  311. myWindow.showWidget("targetwidget", mesh_widget, cloud_pose_global);
  312. /* Set the viewer pose to that of camera. */
  313. if (camera_pov)
  314. myWindow.setViewerPose(cam_pose);
  315. /* Save screen shot as images. */
  316. myWindow.saveScreenshot(filename);
  317. if (binary_out)
  318. {
  319. /* Write images into binary files for further using in CNN training. */
  320. ViewSphere.writeBinaryfile(filename, binaryPath, headerPath,static_cast<int>(campos.size())*num_class, label_class, static_cast<int>(campos.at(pose).x*100), static_cast<int>(campos.at(pose).y*100), static_cast<int>(campos.at(pose).z*100), rgb_use);
  321. }
  322. cnt_img++;
  323. }
  324. } while (cnt_img != campos.size());
  325. imglabel.close();
  326. return 1;
  327. };