mediapipe: Building the Python Package for CUDA: Unable to find "segmentation_mask"

System information

  • OS Platform: Linux - Kernel: 5.13.19.2 - Manjaro KDE
  • Programming Language: Python 3.9.7
  • Compiler version: 10.3 and 11.1
  • Installed using virtualenv
  • Upstream MediaPipe
  • Bazel version: 3.7.2
  • OpenCV version: 4.5.4

I’m trying to build a Python package based on the upstream version of MediaPipe. I’ve installed all the dependencies, and made changes based on other issues, and built a wheel file. However, when I try to run the holistic example code, I get the following error:

Traceback (most recent call last):
  File ".../main.py", line 11, in <module>
    with mp_holistic.Holistic(
  File ".../venv/lib/python3.9/site-packages/mediapipe/python/solutions/holistic.py", line 109, in __init__
    super().__init__(
  File ".../venv/lib/python3.9/site-packages/mediapipe/python/solution_base.py", line 242, in __init__
    canonical_graph_config_proto = self._initialize_graph_interface(
  File ".../venv/lib/python3.9/site-packages/mediapipe/python/solution_base.py", line 395, in _initialize_graph_interface
    self._output_stream_type_info = {
  File ".../venv/lib/python3.9/site-packages/mediapipe/python/solution_base.py", line 396, in <dictcomp>
    get_name(tag_index_name): get_stream_packet_type(tag_index_name)
  File ".../venv/lib/python3.9/site-packages/mediapipe/python/solution_base.py", line 383, in get_stream_packet_type
    validated_graph.registered_stream_type_name(
RuntimeError: ; Unable to find the type for stream "segmentation_mask".  It may be set to AnyType or something else that isn't determinable, or the type may be defined but not registered.

Process finished with exit code 1

I have made the following changes in the upstream repo:

diff --git a/.bazelrc b/.bazelrc
index 37a0bc1..5af865c 100644
--- a/.bazelrc
+++ b/.bazelrc
@@ -87,6 +87,11 @@ build:darwin_x86_64 --apple_platform_type=macos
 build:darwin_x86_64 --macos_minimum_os=10.12
 build:darwin_x86_64 --cpu=darwin_x86_64
 
+# This config refers to building CUDA op kernels with nvcc.
+build:cuda --repo_env TF_NEED_CUDA=1
+build:cuda --crosstool_top=@local_config_cuda//crosstool:toolchain
+build:cuda --@local_config_cuda//:enable_cuda
+
 # This bazelrc file is meant to be written by a setup script.
 try-import %workspace%/.configure.bazelrc
 
diff --git a/.gitignore b/.gitignore
index b3a8817..4d68342 100644
--- a/.gitignore
+++ b/.gitignore
@@ -4,3 +4,4 @@ mediapipe/MediaPipe.tulsiproj/*.tulsiconf-user
 mediapipe/provisioning_profile.mobileprovision
 .configure.bazelrc
 .user.bazelrc
+.idea
diff --git a/mediapipe/modules/holistic_landmark/holistic_landmark_gpu.pbtxt b/mediapipe/modules/holistic_landmark/holistic_landmark_gpu.pbtxt
index dc2a7b9..09a10f6 100644
--- a/mediapipe/modules/holistic_landmark/holistic_landmark_gpu.pbtxt
+++ b/mediapipe/modules/holistic_landmark/holistic_landmark_gpu.pbtxt
@@ -50,7 +50,7 @@
 
 type: "HolisticLandmarkGpu"
 
-# GPU image. (GpuBuffer)
+# CPU image.
 input_stream: "IMAGE:image"
 
 # Complexity of the pose landmark model: 0, 1 or 2. Landmark accuracy as well as
@@ -93,10 +93,22 @@ output_stream: "SEGMENTATION_MASK:segmentation_mask"
 output_stream: "POSE_ROI:pose_landmarks_roi"
 output_stream: "POSE_DETECTION:pose_detection"
 
+node: {
+  calculator: "ColorConvertCalculator"
+  input_stream: "RGB_IN:image"
+  output_stream: "RGBA_OUT:image_rgba"
+}
+
+node: {
+  calculator: "ImageFrameToGpuBufferCalculator"
+  input_stream: "image_rgba"
+  output_stream: "image_gpu"
+}
+
 # Predicts pose landmarks.
 node {
   calculator: "PoseLandmarkGpu"
-  input_stream: "IMAGE:image"
+  input_stream: "IMAGE:image_gpu"
   input_side_packet: "MODEL_COMPLEXITY:model_complexity"
   input_side_packet: "SMOOTH_LANDMARKS:smooth_landmarks"
   input_side_packet: "ENABLE_SEGMENTATION:enable_segmentation"
@@ -112,7 +124,7 @@ node {
 # Predicts left and right hand landmarks based on the initial pose landmarks.
 node {
   calculator: "HandLandmarksLeftAndRightGpu"
-  input_stream: "IMAGE:image"
+  input_stream: "IMAGE:image_gpu"
   input_stream: "POSE_LANDMARKS:pose_landmarks"
   output_stream: "LEFT_HAND_LANDMARKS:left_hand_landmarks"
   output_stream: "RIGHT_HAND_LANDMARKS:right_hand_landmarks"
@@ -133,7 +145,7 @@ node {
 # Predicts face landmarks based on the initial pose landmarks.
 node {
   calculator: "FaceLandmarksFromPoseGpu"
-  input_stream: "IMAGE:image"
+  input_stream: "IMAGE:image_gpu"
   input_stream: "FACE_LANDMARKS_FROM_POSE:face_landmarks_from_pose"
   output_stream: "FACE_LANDMARKS:face_landmarks"
 }
diff --git a/mediapipe/python/BUILD b/mediapipe/python/BUILD
index 42ce07f..50ca538 100644
--- a/mediapipe/python/BUILD
+++ b/mediapipe/python/BUILD
@@ -63,16 +63,18 @@ cc_library(
         "//mediapipe/calculators/core:string_to_int_calculator",
         "//mediapipe/calculators/image:image_transformation_calculator",
         "//mediapipe/calculators/util:detection_unique_id_calculator",
-        "//mediapipe/modules/face_detection:face_detection_full_range_cpu",
-        "//mediapipe/modules/face_detection:face_detection_short_range_cpu",
-        "//mediapipe/modules/face_landmark:face_landmark_front_cpu",
-        "//mediapipe/modules/hand_landmark:hand_landmark_tracking_cpu",
-        "//mediapipe/modules/holistic_landmark:holistic_landmark_cpu",
-        "//mediapipe/modules/objectron:objectron_cpu",
-        "//mediapipe/modules/palm_detection:palm_detection_cpu",
-        "//mediapipe/modules/pose_detection:pose_detection_cpu",
-        "//mediapipe/modules/pose_landmark:pose_landmark_by_roi_cpu",
-        "//mediapipe/modules/pose_landmark:pose_landmark_cpu",
-        "//mediapipe/modules/selfie_segmentation:selfie_segmentation_cpu",
+        "//mediapipe/modules/face_detection:face_detection_full_range_gpu",
+        "//mediapipe/modules/face_detection:face_detection_short_range_gpu",
+        "//mediapipe/modules/face_landmark:face_landmark_front_gpu",
+        "//mediapipe/modules/hand_landmark:hand_landmark_tracking_gpu",
+        "//mediapipe/modules/holistic_landmark:holistic_landmark_gpu",
+        "//mediapipe/modules/objectron:objectron_gpu",
+        "//mediapipe/modules/palm_detection:palm_detection_gpu",
+        "//mediapipe/modules/pose_detection:pose_detection_gpu",
+        "//mediapipe/modules/pose_landmark:pose_landmark_by_roi_gpu",
+        "//mediapipe/modules/pose_landmark:pose_landmark_gpu",
+        "//mediapipe/modules/selfie_segmentation:selfie_segmentation_gpu",
+        "//mediapipe/calculators/image:color_convert_calculator",
+        "//mediapipe/gpu:image_frame_to_gpu_buffer_calculator",
     ],
 )
diff --git a/mediapipe/python/solutions/face_detection.py b/mediapipe/python/solutions/face_detection.py
index 7d4da8f..4d9b3ed 100644
--- a/mediapipe/python/solutions/face_detection.py
+++ b/mediapipe/python/solutions/face_detection.py
@@ -28,8 +28,8 @@ from mediapipe.calculators.util import non_max_suppression_calculator_pb2
 # pylint: enable=unused-import
 from mediapipe.python.solution_base import SolutionBase
 
-_SHORT_RANGE_GRAPH_FILE_PATH = 'mediapipe/modules/face_detection/face_detection_short_range_cpu.binarypb'
-_FULL_RANGE_GRAPH_FILE_PATH = 'mediapipe/modules/face_detection/face_detection_full_range_cpu.binarypb'
+_SHORT_RANGE_GRAPH_FILE_PATH = 'mediapipe/modules/face_detection/face_detection_short_range_gpu.binarypb'
+_FULL_RANGE_GRAPH_FILE_PATH = 'mediapipe/modules/face_detection/face_detection_full_range_gpu.binarypb'
 
 
 def get_key_point(
diff --git a/mediapipe/python/solutions/face_mesh.py b/mediapipe/python/solutions/face_mesh.py
index 1fe9d91..88cfb2a 100644
--- a/mediapipe/python/solutions/face_mesh.py
+++ b/mediapipe/python/solutions/face_mesh.py
@@ -53,7 +53,7 @@ from mediapipe.python.solutions.face_mesh_connections import FACEMESH_TESSELATIO
 
 FACEMESH_NUM_LANDMARKS = 468
 FACEMESH_NUM_LANDMARKS_WITH_IRISES = 478
-_BINARYPB_FILE_PATH = 'mediapipe/modules/face_landmark/face_landmark_front_cpu.binarypb'
+_BINARYPB_FILE_PATH = 'mediapipe/modules/face_landmark/face_landmark_front_gpu.binarypb'
 
 
 class FaceMesh(SolutionBase):
@@ -99,9 +99,9 @@ class FaceMesh(SolutionBase):
             'use_prev_landmarks': not static_image_mode,
         },
         calculator_params={
-            'facedetectionshortrangecpu__facedetectionshortrangecommon__TensorsToDetectionsCalculator.min_score_thresh':
+            'facedetectionshortrangegpu__facedetectionshortrangecommon__TensorsToDetectionsCalculator.min_score_thresh':
                 min_detection_confidence,
-            'facelandmarkcpu__ThresholdingCalculator.threshold':
+            'facelandmarkgpu__ThresholdingCalculator.threshold':
                 min_tracking_confidence,
         },
         outputs=['multi_face_landmarks'])
diff --git a/mediapipe/python/solutions/hands.py b/mediapipe/python/solutions/hands.py
index 08f2d73..4696d09 100644
--- a/mediapipe/python/solutions/hands.py
+++ b/mediapipe/python/solutions/hands.py
@@ -67,7 +67,7 @@ class HandLandmark(enum.IntEnum):
   PINKY_TIP = 20
 
 
-_BINARYPB_FILE_PATH = 'mediapipe/modules/hand_landmark/hand_landmark_tracking_cpu.binarypb'
+_BINARYPB_FILE_PATH = 'mediapipe/modules/hand_landmark/hand_landmark_tracking_gpu.binarypb'
 
 
 class Hands(SolutionBase):
@@ -113,9 +113,9 @@ class Hands(SolutionBase):
             'use_prev_landmarks': not static_image_mode,
         },
         calculator_params={
-            'palmdetectioncpu__TensorsToDetectionsCalculator.min_score_thresh':
+            'palmdetectiongpu__TensorsToDetectionsCalculator.min_score_thresh':
                 min_detection_confidence,
-            'handlandmarkcpu__ThresholdingCalculator.threshold':
+            'handlandmarkgpu__ThresholdingCalculator.threshold':
                 min_tracking_confidence,
         },
         outputs=['multi_hand_landmarks', 'multi_handedness'])
diff --git a/mediapipe/python/solutions/holistic.py b/mediapipe/python/solutions/holistic.py
index 70ce491..495944c 100644
--- a/mediapipe/python/solutions/holistic.py
+++ b/mediapipe/python/solutions/holistic.py
@@ -49,7 +49,7 @@ from mediapipe.python.solutions.pose import PoseLandmark
 from mediapipe.python.solutions.pose_connections import POSE_CONNECTIONS
 # pylint: enable=unused-import
 
-_BINARYPB_FILE_PATH = 'mediapipe/modules/holistic_landmark/holistic_landmark_cpu.binarypb'
+_BINARYPB_FILE_PATH = 'mediapipe/modules/holistic_landmark/holistic_landmark_gpu.binarypb'
 
 
 def _download_oss_pose_landmark_model(model_complexity):
@@ -117,9 +117,9 @@ class Holistic(SolutionBase):
             'use_prev_landmarks': not static_image_mode,
         },
         calculator_params={
-            'poselandmarkcpu__posedetectioncpu__TensorsToDetectionsCalculator.min_score_thresh':
+            'poselandmarkgpu__posedetectiongpu__TensorsToDetectionsCalculator.min_score_thresh':
                 min_detection_confidence,
-            'poselandmarkcpu__poselandmarkbyroicpu__tensorstoposelandmarksandsegmentation__ThresholdingCalculator.threshold':
+            'poselandmarkgpu__poselandmarkbyroigpu__tensorstoposelandmarksandsegmentation__ThresholdingCalculator.threshold':
                 min_tracking_confidence,
         },
         outputs=[
diff --git a/mediapipe/python/solutions/objectron.py b/mediapipe/python/solutions/objectron.py
index 28cc026..00373a7 100644
--- a/mediapipe/python/solutions/objectron.py
+++ b/mediapipe/python/solutions/objectron.py
@@ -75,7 +75,7 @@ class BoxLandmark(enum.IntEnum):
   BACK_TOP_RIGHT = 7
   FRONT_TOP_RIGHT = 8
 
-_BINARYPB_FILE_PATH = 'mediapipe/modules/objectron/objectron_cpu.binarypb'
+_BINARYPB_FILE_PATH = 'mediapipe/modules/objectron/objectron_gpu.binarypb'
 BOX_CONNECTIONS = frozenset([
     (BoxLandmark.BACK_BOTTOM_LEFT, BoxLandmark.FRONT_BOTTOM_LEFT),
     (BoxLandmark.BACK_BOTTOM_LEFT, BoxLandmark.BACK_TOP_LEFT),
diff --git a/mediapipe/python/solutions/pose.py b/mediapipe/python/solutions/pose.py
index d4b499f..1942da0 100644
--- a/mediapipe/python/solutions/pose.py
+++ b/mediapipe/python/solutions/pose.py
@@ -87,7 +87,7 @@ class PoseLandmark(enum.IntEnum):
   RIGHT_FOOT_INDEX = 32
 
 
-_BINARYPB_FILE_PATH = 'mediapipe/modules/pose_landmark/pose_landmark_cpu.binarypb'
+_BINARYPB_FILE_PATH = 'mediapipe/modules/pose_landmark/pose_landmark_gpu.binarypb'
 
 
 def _download_oss_pose_landmark_model(model_complexity):
@@ -154,9 +154,9 @@ class Pose(SolutionBase):
             'use_prev_landmarks': not static_image_mode,
         },
         calculator_params={
-            'posedetectioncpu__TensorsToDetectionsCalculator.min_score_thresh':
+            'posedetectiongpu__TensorsToDetectionsCalculator.min_score_thresh':
                 min_detection_confidence,
-            'poselandmarkbyroicpu__tensorstoposelandmarksandsegmentation__ThresholdingCalculator.threshold':
+            'poselandmarkbyroigpu__tensorstoposelandmarksandsegmentation__ThresholdingCalculator.threshold':
                 min_tracking_confidence,
         },
         outputs=['pose_landmarks', 'pose_world_landmarks', 'segmentation_mask'])
diff --git a/mediapipe/python/solutions/selfie_segmentation.py b/mediapipe/python/solutions/selfie_segmentation.py
index 1334e9f..b795b67 100644
--- a/mediapipe/python/solutions/selfie_segmentation.py
+++ b/mediapipe/python/solutions/selfie_segmentation.py
@@ -29,7 +29,7 @@ from mediapipe.framework.tool import switch_container_pb2
 
 from mediapipe.python.solution_base import SolutionBase
 
-_BINARYPB_FILE_PATH = 'mediapipe/modules/selfie_segmentation/selfie_segmentation_cpu.binarypb'
+_BINARYPB_FILE_PATH = 'mediapipe/modules/selfie_segmentation/selfie_segmentation_gpu.binarypb'
 
 
 class SelfieSegmentation(SolutionBase):
diff --git a/mediapipe/util/__init__.py b/mediapipe/util/__init__.py
deleted file mode 100644
index 6db73bc..0000000
--- a/mediapipe/util/__init__.py
+++ /dev/null
@@ -1,14 +0,0 @@
-"""Copyright 2019 The MediaPipe Authors.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-     http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
diff --git a/setup.py b/setup.py
index 24fb2e3..0cef902 100644
--- a/setup.py
+++ b/setup.py
@@ -222,13 +222,13 @@ class BuildBinaryGraphs(build.build):
   def run(self):
     _check_bazel()
     binary_graphs = [
-        'face_detection/face_detection_short_range_cpu',
-        'face_detection/face_detection_full_range_cpu',
-        'face_landmark/face_landmark_front_cpu',
-        'hand_landmark/hand_landmark_tracking_cpu',
-        'holistic_landmark/holistic_landmark_cpu', 'objectron/objectron_cpu',
-        'pose_landmark/pose_landmark_cpu',
-        'selfie_segmentation/selfie_segmentation_cpu'
+        'face_detection/face_detection_short_range_gpu',
+        'face_detection/face_detection_full_range_gpu',
+        'face_landmark/face_landmark_front_gpu',
+        'hand_landmark/hand_landmark_tracking_gpu',
+        'holistic_landmark/holistic_landmark_gpu', 'objectron/objectron_gpu',
+        'pose_landmark/pose_landmark_gpu',
+        'selfie_segmentation/selfie_segmentation_gpu'
     ]
     for binary_graph in binary_graphs:
       sys.stderr.write('generating binarypb: %s\n' %
@@ -243,7 +243,8 @@ class BuildBinaryGraphs(build.build):
         'build',
         '--compilation_mode=opt',
         '--copt=-DNDEBUG',
-        '--define=MEDIAPIPE_DISABLE_GPU=1',
+        '--copt=-DMESA_EGL_NO_X11_HEADERS',
+        '--copt=-DEGL_NO_X11',
         '--action_env=PYTHON_BIN_PATH=' + _normalize_path(sys.executable),
         os.path.join('mediapipe/modules/', graph_path),
     ]
@@ -300,7 +301,8 @@ class BuildBazelExtension(build_ext.build_ext):
         'build',
         '--compilation_mode=opt',
         '--copt=-DNDEBUG',
-        '--define=MEDIAPIPE_DISABLE_GPU=1',
+        '--copt=-DMESA_EGL_NO_X11_HEADERS',
+        '--copt=-DEGL_NO_X11',
         '--action_env=PYTHON_BIN_PATH=' + _normalize_path(sys.executable),
         str(ext.bazel_target + '.so'),
     ]
diff --git a/third_party/BUILD b/third_party/BUILD
index e2044cf..35e99fd 100644
--- a/third_party/BUILD
+++ b/third_party/BUILD
@@ -90,7 +90,7 @@ OPENCV_MODULES = [
 # still only builds the shared libraries, so we have to choose one or the
 # other. We build shared libraries by default, but this variable can be used
 # to switch to static libraries.
-OPENCV_SHARED_LIBS = True
+OPENCV_SHARED_LIBS = False
 
 OPENCV_SO_VERSION = "3.4"

About this issue

  • Original URL
  • State: closed
  • Created 3 years ago
  • Comments: 17

Most upvoted comments

MediaPipe Python package only contains CPU graphs. If you need to run any GPU graphs, see #1651 (comment).