human_parsing.py 7.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190
  1. #!/usr/bin/env python
  2. '''
  3. You can download the converted pb model from https://www.dropbox.com/s/qag9vzambhhkvxr/lip_jppnet_384.pb?dl=0
  4. or convert the model yourself.
  5. Follow these steps if you want to convert the original model yourself:
  6. To get original .meta pre-trained model download https://drive.google.com/file/d/1BFVXgeln-bek8TCbRjN6utPAgRE0LJZg/view
  7. For correct convert .meta to .pb model download original repository https://github.com/Engineering-Course/LIP_JPPNet
  8. Change script evaluate_parsing_JPPNet-s2.py for human parsing
  9. 1. Remove preprocessing to create image_batch_origin:
  10. with tf.name_scope("create_inputs"):
  11. ...
  12. Add
  13. image_batch_origin = tf.placeholder(tf.float32, shape=(2, None, None, 3), name='input')
  14. 2. Create input
  15. image = cv2.imread(path/to/image)
  16. image_rev = np.flip(image, axis=1)
  17. input = np.stack([image, image_rev], axis=0)
  18. 3. Hardcode image_h and image_w shapes to determine output shapes.
  19. We use default INPUT_SIZE = (384, 384) from evaluate_parsing_JPPNet-s2.py.
  20. parsing_out1 = tf.reduce_mean(tf.stack([tf.image.resize_images(parsing_out1_100, INPUT_SIZE),
  21. tf.image.resize_images(parsing_out1_075, INPUT_SIZE),
  22. tf.image.resize_images(parsing_out1_125, INPUT_SIZE)]), axis=0)
  23. Do similarly with parsing_out2, parsing_out3
  24. 4. Remove postprocessing. Last net operation:
  25. raw_output = tf.reduce_mean(tf.stack([parsing_out1, parsing_out2, parsing_out3]), axis=0)
  26. Change:
  27. parsing_ = sess.run(raw_output, feed_dict={'input:0': input})
  28. 5. To save model after sess.run(...) add:
  29. input_graph_def = tf.get_default_graph().as_graph_def()
  30. output_node = "Mean_3"
  31. output_graph_def = tf.graph_util.convert_variables_to_constants(sess, input_graph_def, output_node)
  32. output_graph = "LIP_JPPNet.pb"
  33. with tf.gfile.GFile(output_graph, "wb") as f:
  34. f.write(output_graph_def.SerializeToString())'
  35. '''
  36. import argparse
  37. import os.path
  38. import numpy as np
  39. import cv2 as cv
  40. backends = (cv.dnn.DNN_BACKEND_DEFAULT, cv.dnn.DNN_BACKEND_INFERENCE_ENGINE, cv.dnn.DNN_BACKEND_OPENCV,
  41. cv.dnn.DNN_BACKEND_VKCOM, cv.dnn.DNN_BACKEND_CUDA)
  42. targets = (cv.dnn.DNN_TARGET_CPU, cv.dnn.DNN_TARGET_OPENCL, cv.dnn.DNN_TARGET_OPENCL_FP16, cv.dnn.DNN_TARGET_MYRIAD,
  43. cv.dnn.DNN_TARGET_HDDL, cv.dnn.DNN_TARGET_VULKAN, cv.dnn.DNN_TARGET_CUDA, cv.dnn.DNN_TARGET_CUDA_FP16)
  44. def preprocess(image):
  45. """
  46. Create 4-dimensional blob from image and flip image
  47. :param image: input image
  48. """
  49. image_rev = np.flip(image, axis=1)
  50. input = cv.dnn.blobFromImages([image, image_rev], mean=(104.00698793, 116.66876762, 122.67891434))
  51. return input
  52. def run_net(input, model_path, backend, target):
  53. """
  54. Read network and infer model
  55. :param model_path: path to JPPNet model
  56. :param backend: computation backend
  57. :param target: computation device
  58. """
  59. net = cv.dnn.readNet(model_path)
  60. net.setPreferableBackend(backend)
  61. net.setPreferableTarget(target)
  62. net.setInput(input)
  63. out = net.forward()
  64. return out
  65. def postprocess(out, input_shape):
  66. """
  67. Create a grayscale human segmentation
  68. :param out: network output
  69. :param input_shape: input image width and height
  70. """
  71. # LIP classes
  72. # 0 Background
  73. # 1 Hat
  74. # 2 Hair
  75. # 3 Glove
  76. # 4 Sunglasses
  77. # 5 UpperClothes
  78. # 6 Dress
  79. # 7 Coat
  80. # 8 Socks
  81. # 9 Pants
  82. # 10 Jumpsuits
  83. # 11 Scarf
  84. # 12 Skirt
  85. # 13 Face
  86. # 14 LeftArm
  87. # 15 RightArm
  88. # 16 LeftLeg
  89. # 17 RightLeg
  90. # 18 LeftShoe
  91. # 19 RightShoe
  92. head_output, tail_output = np.split(out, indices_or_sections=[1], axis=0)
  93. head_output = head_output.squeeze(0)
  94. tail_output = tail_output.squeeze(0)
  95. head_output = np.stack([cv.resize(img, dsize=input_shape) for img in head_output[:, ...]])
  96. tail_output = np.stack([cv.resize(img, dsize=input_shape) for img in tail_output[:, ...]])
  97. tail_list = np.split(tail_output, indices_or_sections=list(range(1, 20)), axis=0)
  98. tail_list = [arr.squeeze(0) for arr in tail_list]
  99. tail_list_rev = [tail_list[i] for i in range(14)]
  100. tail_list_rev.extend([tail_list[15], tail_list[14], tail_list[17], tail_list[16], tail_list[19], tail_list[18]])
  101. tail_output_rev = np.stack(tail_list_rev, axis=0)
  102. tail_output_rev = np.flip(tail_output_rev, axis=2)
  103. raw_output_all = np.mean(np.stack([head_output, tail_output_rev], axis=0), axis=0, keepdims=True)
  104. raw_output_all = np.argmax(raw_output_all, axis=1)
  105. raw_output_all = raw_output_all.transpose(1, 2, 0)
  106. return raw_output_all
  107. def decode_labels(gray_image):
  108. """
  109. Colorize image according to labels
  110. :param gray_image: grayscale human segmentation result
  111. """
  112. height, width, _ = gray_image.shape
  113. colors = [(0, 0, 0), (128, 0, 0), (255, 0, 0), (0, 85, 0), (170, 0, 51), (255, 85, 0),
  114. (0, 0, 85), (0, 119, 221), (85, 85, 0), (0, 85, 85), (85, 51, 0), (52, 86, 128),
  115. (0, 128, 0), (0, 0, 255), (51, 170, 221), (0, 255, 255),(85, 255, 170),
  116. (170, 255, 85), (255, 255, 0), (255, 170, 0)]
  117. segm = np.stack([colors[idx] for idx in gray_image.flatten()])
  118. segm = segm.reshape(height, width, 3).astype(np.uint8)
  119. segm = cv.cvtColor(segm, cv.COLOR_BGR2RGB)
  120. return segm
  121. def parse_human(image, model_path, backend=cv.dnn.DNN_BACKEND_OPENCV, target=cv.dnn.DNN_TARGET_CPU):
  122. """
  123. Prepare input for execution, run net and postprocess output to parse human.
  124. :param image: input image
  125. :param model_path: path to JPPNet model
  126. :param backend: name of computation backend
  127. :param target: name of computation target
  128. """
  129. input = preprocess(image)
  130. input_h, input_w = input.shape[2:]
  131. output = run_net(input, model_path, backend, target)
  132. grayscale_out = postprocess(output, (input_w, input_h))
  133. segmentation = decode_labels(grayscale_out)
  134. return segmentation
  135. if __name__ == '__main__':
  136. parser = argparse.ArgumentParser(description='Use this script to run human parsing using JPPNet',
  137. formatter_class=argparse.ArgumentDefaultsHelpFormatter)
  138. parser.add_argument('--input', '-i', required=True, help='Path to input image.')
  139. parser.add_argument('--model', '-m', default='lip_jppnet_384.pb', help='Path to pb model.')
  140. parser.add_argument('--backend', choices=backends, default=cv.dnn.DNN_BACKEND_DEFAULT, type=int,
  141. help="Choose one of computation backends: "
  142. "%d: automatically (by default), "
  143. "%d: Intel's Deep Learning Inference Engine (https://software.intel.com/openvino-toolkit), "
  144. "%d: OpenCV implementation, "
  145. "%d: VKCOM, "
  146. "%d: CUDA"% backends)
  147. parser.add_argument('--target', choices=targets, default=cv.dnn.DNN_TARGET_CPU, type=int,
  148. help='Choose one of target computation devices: '
  149. '%d: CPU target (by default), '
  150. '%d: OpenCL, '
  151. '%d: OpenCL fp16 (half-float precision), '
  152. '%d: NCS2 VPU, '
  153. '%d: HDDL VPU, '
  154. '%d: Vulkan, '
  155. '%d: CUDA, '
  156. '%d: CUDA fp16 (half-float preprocess)' % targets)
  157. args, _ = parser.parse_known_args()
  158. if not os.path.isfile(args.model):
  159. raise OSError("Model not exist")
  160. image = cv.imread(args.input)
  161. output = parse_human(image, args.model, args.backend, args.target)
  162. winName = 'Deep learning human parsing in OpenCV'
  163. cv.namedWindow(winName, cv.WINDOW_AUTOSIZE)
  164. cv.imshow(winName, output)
  165. cv.waitKey()