7.8 KB

  1. #!/usr/bin/env python
  2. '''
  3. You can download the converted pb model from
  4. or convert the model yourself.
  5. Follow these steps if you want to convert the original model yourself:
  6. To get original .meta pre-trained model download
  7. For correct convert .meta to .pb model download original repository
  8. Change script for human parsing
  9. 1. Remove preprocessing to create image_batch_origin:
  10. with tf.name_scope("create_inputs"):
  11. ...
  12. Add
  13. image_batch_origin = tf.placeholder(tf.float32, shape=(2, None, None, 3), name='input')
  14. 2. Create input
  15. image = cv2.imread(path/to/image)
  16. image_rev = np.flip(image, axis=1)
  17. input = np.stack([image, image_rev], axis=0)
  18. 3. Hardcode image_h and image_w shapes to determine output shapes.
  19. We use default INPUT_SIZE = (384, 384) from
  20. parsing_out1 = tf.reduce_mean(tf.stack([tf.image.resize_images(parsing_out1_100, INPUT_SIZE),
  21. tf.image.resize_images(parsing_out1_075, INPUT_SIZE),
  22. tf.image.resize_images(parsing_out1_125, INPUT_SIZE)]), axis=0)
  23. Do similarly with parsing_out2, parsing_out3
  24. 4. Remove postprocessing. Last net operation:
  25. raw_output = tf.reduce_mean(tf.stack([parsing_out1, parsing_out2, parsing_out3]), axis=0)
  26. Change:
  27. parsing_ =, feed_dict={'input:0': input})
  28. 5. To save model after add:
  29. input_graph_def = tf.get_default_graph().as_graph_def()
  30. output_node = "Mean_3"
  31. output_graph_def = tf.graph_util.convert_variables_to_constants(sess, input_graph_def, output_node)
  32. output_graph = "LIP_JPPNet.pb"
  33. with tf.gfile.GFile(output_graph, "wb") as f:
  34. f.write(output_graph_def.SerializeToString())'
  35. '''
  36. import argparse
  37. import os.path
  38. import numpy as np
  39. import cv2 as cv
  42. targets = (cv.dnn.DNN_TARGET_CPU, cv.dnn.DNN_TARGET_OPENCL, cv.dnn.DNN_TARGET_OPENCL_FP16, cv.dnn.DNN_TARGET_MYRIAD,
  44. def preprocess(image):
  45. """
  46. Create 4-dimensional blob from image and flip image
  47. :param image: input image
  48. """
  49. image_rev = np.flip(image, axis=1)
  50. input = cv.dnn.blobFromImages([image, image_rev], mean=(104.00698793, 116.66876762, 122.67891434))
  51. return input
  52. def run_net(input, model_path, backend, target):
  53. """
  54. Read network and infer model
  55. :param model_path: path to JPPNet model
  56. :param backend: computation backend
  57. :param target: computation device
  58. """
  59. net = cv.dnn.readNet(model_path)
  60. net.setPreferableBackend(backend)
  61. net.setPreferableTarget(target)
  62. net.setInput(input)
  63. out = net.forward()
  64. return out
  65. def postprocess(out, input_shape):
  66. """
  67. Create a grayscale human segmentation
  68. :param out: network output
  69. :param input_shape: input image width and height
  70. """
  71. # LIP classes
  72. # 0 Background
  73. # 1 Hat
  74. # 2 Hair
  75. # 3 Glove
  76. # 4 Sunglasses
  77. # 5 UpperClothes
  78. # 6 Dress
  79. # 7 Coat
  80. # 8 Socks
  81. # 9 Pants
  82. # 10 Jumpsuits
  83. # 11 Scarf
  84. # 12 Skirt
  85. # 13 Face
  86. # 14 LeftArm
  87. # 15 RightArm
  88. # 16 LeftLeg
  89. # 17 RightLeg
  90. # 18 LeftShoe
  91. # 19 RightShoe
  92. head_output, tail_output = np.split(out, indices_or_sections=[1], axis=0)
  93. head_output = head_output.squeeze(0)
  94. tail_output = tail_output.squeeze(0)
  95. head_output = np.stack([cv.resize(img, dsize=input_shape) for img in head_output[:, ...]])
  96. tail_output = np.stack([cv.resize(img, dsize=input_shape) for img in tail_output[:, ...]])
  97. tail_list = np.split(tail_output, indices_or_sections=list(range(1, 20)), axis=0)
  98. tail_list = [arr.squeeze(0) for arr in tail_list]
  99. tail_list_rev = [tail_list[i] for i in range(14)]
  100. tail_list_rev.extend([tail_list[15], tail_list[14], tail_list[17], tail_list[16], tail_list[19], tail_list[18]])
  101. tail_output_rev = np.stack(tail_list_rev, axis=0)
  102. tail_output_rev = np.flip(tail_output_rev, axis=2)
  103. raw_output_all = np.mean(np.stack([head_output, tail_output_rev], axis=0), axis=0, keepdims=True)
  104. raw_output_all = np.argmax(raw_output_all, axis=1)
  105. raw_output_all = raw_output_all.transpose(1, 2, 0)
  106. return raw_output_all
  107. def decode_labels(gray_image):
  108. """
  109. Colorize image according to labels
  110. :param gray_image: grayscale human segmentation result
  111. """
  112. height, width, _ = gray_image.shape
  113. colors = [(0, 0, 0), (128, 0, 0), (255, 0, 0), (0, 85, 0), (170, 0, 51), (255, 85, 0),
  114. (0, 0, 85), (0, 119, 221), (85, 85, 0), (0, 85, 85), (85, 51, 0), (52, 86, 128),
  115. (0, 128, 0), (0, 0, 255), (51, 170, 221), (0, 255, 255),(85, 255, 170),
  116. (170, 255, 85), (255, 255, 0), (255, 170, 0)]
  117. segm = np.stack([colors[idx] for idx in gray_image.flatten()])
  118. segm = segm.reshape(height, width, 3).astype(np.uint8)
  119. segm = cv.cvtColor(segm, cv.COLOR_BGR2RGB)
  120. return segm
  121. def parse_human(image, model_path, backend=cv.dnn.DNN_BACKEND_OPENCV, target=cv.dnn.DNN_TARGET_CPU):
  122. """
  123. Prepare input for execution, run net and postprocess output to parse human.
  124. :param image: input image
  125. :param model_path: path to JPPNet model
  126. :param backend: name of computation backend
  127. :param target: name of computation target
  128. """
  129. input = preprocess(image)
  130. input_h, input_w = input.shape[2:]
  131. output = run_net(input, model_path, backend, target)
  132. grayscale_out = postprocess(output, (input_w, input_h))
  133. segmentation = decode_labels(grayscale_out)
  134. return segmentation
  135. if __name__ == '__main__':
  136. parser = argparse.ArgumentParser(description='Use this script to run human parsing using JPPNet',
  137. formatter_class=argparse.ArgumentDefaultsHelpFormatter)
  138. parser.add_argument('--input', '-i', required=True, help='Path to input image.')
  139. parser.add_argument('--model', '-m', default='lip_jppnet_384.pb', help='Path to pb model.')
  140. parser.add_argument('--backend', choices=backends, default=cv.dnn.DNN_BACKEND_DEFAULT, type=int,
  141. help="Choose one of computation backends: "
  142. "%d: automatically (by default), "
  143. "%d: Intel's Deep Learning Inference Engine (, "
  144. "%d: OpenCV implementation, "
  145. "%d: VKCOM, "
  146. "%d: CUDA"% backends)
  147. parser.add_argument('--target', choices=targets, default=cv.dnn.DNN_TARGET_CPU, type=int,
  148. help='Choose one of target computation devices: '
  149. '%d: CPU target (by default), '
  150. '%d: OpenCL, '
  151. '%d: OpenCL fp16 (half-float precision), '
  152. '%d: NCS2 VPU, '
  153. '%d: HDDL VPU, '
  154. '%d: Vulkan, '
  155. '%d: CUDA, '
  156. '%d: CUDA fp16 (half-float preprocess)' % targets)
  157. args, _ = parser.parse_known_args()
  158. if not os.path.isfile(args.model):
  159. raise OSError("Model not exist")
  160. image = cv.imread(args.input)
  161. output = parse_human(image, args.model, args.backend,
  162. winName = 'Deep learning human parsing in OpenCV'
  163. cv.namedWindow(winName, cv.WINDOW_AUTOSIZE)
  164. cv.imshow(winName, output)
  165. cv.waitKey()