coremltools: Error when converting tensorflow model to CoreML

Issue

I have model that is trained in tensorflow 2.x. The model works perfectly with tensorflow, openvino and onnxruntime format but doesn;t get converted in coreml. The model inference is perfect in tensorflow but when I try to convert it into coreml format I get the following error.

---------------------------------------------------------------------------
InvalidArgumentError                      Traceback (most recent call last)
File ~/SageMaker/envs/coreml_env/lib64/python3.8/site-packages/tensorflow/python/framework/importer.py:496, in _import_graph_def_internal(graph_def, input_map, return_elements, validate_colocation_constraints, name, producer_op_list)
    495 try:
--> 496   results = c_api.TF_GraphImportGraphDefWithResults(
    497       graph._c_graph, serialized, options)  # pylint: disable=protected-access
    498   results = c_api_util.ScopedTFImportGraphDefResults(results)

InvalidArgumentError: Input 0 of node Model1/FPN/FPN1/bn/AssignNewValue was passed float from Model1/FPN/FPN1/bn/FusedBatchNormV3/ReadVariableOp/resource:0 incompatible with expected resource.

During handling of the above exception, another exception occurred:

ValueError                                Traceback (most recent call last)
Cell In[15], line 13
      6 width = 256
      8 input_shape = ct.Shape(shape=(ct.RangeDim(lower_bound=1, upper_bound=-1),
      9                     ct.RangeDim(lower_bound=height, upper_bound=1024),
     10                     ct.RangeDim(lower_bound=width, upper_bound=1024),
     11                     3))
---> 13 c_model = ct.convert(model, inputs=[ct.TensorType(shape=input_shape, name=input_name)], source='tensorflow')

Source Code

Here is the source code for loading and converting the model in coreml format

Import coremltools as ct

model_pth = "./temp_with_weights_model.h5"
model = tf.keras.models.load_model(model_pth)


print(ct.__version__)

input_name = model.inputs[0].name

height = 256
width = 256

input_shape = ct.Shape(shape=(ct.RangeDim(lower_bound=1, upper_bound=-1),
                    ct.RangeDim(lower_bound=height, upper_bound=1024),
                    ct.RangeDim(lower_bound=width, upper_bound=1024),
                    3))

c_model = ct.convert(model, inputs=[ct.TensorType(shape=input_shape, name=input_name)], source='tensorflow')

About this issue

  • Original URL
  • State: closed
  • Created a year ago
  • Comments: 17

Most upvoted comments

@YifanShenSZ I replaced my custom batchnorm with official Tensorflow batchnorm layer but now model is not getting converted for dynamic shapes. But if i make it static shape, the model converts to CoreML and works properly. I can share the error as well as the layer that is causing the issue for dynamic shape.

Code

import tensorflow as tf
from tensorflow.keras import Model
from tensorflow.keras.applications import MobileNetV2, ResNet50
from tensorflow.keras.layers import Input, Conv2D, ReLU, LeakyReLU
import coremltools as ct



def _regularizer(weights_decay):
    """l2 regularizer"""
    return tf.keras.regularizers.l2(weights_decay)


def _kernel_init(scale=1.0, seed=None):
    """He normal initializer"""
    return tf.keras.initializers.he_normal()


out_ch = 64
wd = 0.0005


class ConvUnit(tf.keras.layers.Layer):
    """Conv + BN + Act"""
    def __init__(self, f=None, k=None, s=None, wd=None, act=None, name='ConvBN', **kwargs):
        super(ConvUnit, self).__init__(name=name, **kwargs)
        self.f = f
        self.k = k
        self.s = s
        self.wd = wd
        self.act = act
#         self.name = name
        self.conv = Conv2D(filters=f, kernel_size=k, strides=s, padding='same',
                           kernel_initializer=_kernel_init(),
                           kernel_regularizer=_regularizer(wd),
                           use_bias=False, name='conv')

        # self.bn = BatchNormalization(name='bn')
        self.bn = tf.keras.layers.BatchNormalization(name='bn')

        if act is None:
            self.act_fn = tf.identity
        elif act == 'relu':
            self.act_fn = ReLU()
        elif act == 'lrelu':
            self.act_fn = LeakyReLU(0.1)
        else:
            raise NotImplementedError(
                'Activation function type {} is not recognized.'.format(act))


    def call(self, x):
        return self.act_fn(self.bn(self.conv(x)))
        # return self.bn(self.conv(x))
        # return self.act_fn(self.conv(x))

    def get_config(self):
        config = super().get_config()
        temp = {
            'f': self.f,
            'k': self.k,
            's': self.s,
            'wd': self.wd,
            'act': self.act,
#             'convunit_name': self.name,
#             'conv': self.conv,
#             'bn': self.bn,
#             'act_fn': self.act_fn,
        }
        config.update(temp)
        return config


class FPN(tf.keras.layers.Layer):
    """Feature Pyramid Network"""
    def __init__(self, out_ch, wd, name='FPN', **kwargs):
        super(FPN, self).__init__(name=name, **kwargs)
        self.act = 'relu'
        self.out_ch = out_ch
#         self.name = name
        self.wd = wd
        if (out_ch <= 64):
            self.act = 'lrelu'

        self.output1 = ConvUnit(f=out_ch, k=1, s=1, wd=wd, act=self.act, name=name+str(1))
        self.output2 = ConvUnit(f=out_ch, k=1, s=1, wd=wd, act=self.act, name=name+str(2))
        self.output3 = ConvUnit(f=out_ch, k=1, s=1, wd=wd, act=self.act, name=name+str(3))
        self.merge1 = ConvUnit(f=out_ch, k=3, s=1, wd=wd, act=self.act, name=name+str(4))
        self.merge2 = ConvUnit(f=out_ch, k=3, s=1, wd=wd, act=self.act, name=name+str(5))

    def call(self, x):
        print(f"x[0] :- {x[0].shape}")
        print(f"x[1] :- {x[1].shape}")
        print(f"x[2] :- {x[2].shape}")
        
        print(f"Running output1 conv layer")
        output1 = self.output1(x[0])  # [80, 80, out_ch]
        print(f"Output of output1 conv :- {output1.shape} \n")
        
        print(f"Running output2 conv layer")
        output2 = self.output2(x[1])  # [40, 40, out_ch]
        print(f"Output of output2 conv :- {output2.shape} \n")
        
        print(f"Running output3 conv layer")
        output3 = self.output3(x[2])  # [20, 20, out_ch]
        print(f"Output of output3 conv :- {output3.shape} \n")

        print(f"Running resize of output3 \n")
        up_h, up_w = tf.shape(output2)[1], tf.shape(output2)[2]
        
        # up_h, up_w = output2.shape[1], output2.shape[2]
        print(output2.shape[1], output2.shape[2])
        
        up3 = tf.image.resize(output3, [up_h, up_w], method='nearest')
        print(f"Resized output3 to :- {tf.shape(up3)} {up3.shape}\n")
        
        print(f"Adding output2 to resized output3 \n")
        # print(f"Shape of tf.concat output2 ,up3 :- {tf.add(output2, up3).shape}")
        
        # output2 = output2 + up3
        output2 = tf.add(output2, up3)
        print(f"Output2 shape :- {tf.shape(output2)} {output2.shape}\n")
        
        print(f"passing output2 to merge2 conv \n")
        output2 = self.merge2(output2)
        print(f"Output of output2 :- {output2.shape} \n")
        
        print(f"Running resize of output2 w.r.t. output1 \n")
        up_h, up_w = tf.shape(output1)[1], tf.shape(output1)[2]
        up2 = tf.image.resize(output2, [up_h, up_w], method='nearest')
        print(f"Resized output2 to :- {tf.shape(up2)} {up2.shape}\n")
        
        print(f"Adding output1 to resized output2 \n")
        
        # output1 = output1 + up2
        output1 = tf.add(output1, up2)
        print(f"Output1 shape :- {tf.shape(output1)} {output1.shape}\n")
        
        print(f"Running merge1 conv on output1 \n")
        output1 = self.merge1(output1)
        print(f"Shape of output1 :- {output1.shape} \n")

        return output1, output2, output3

    def get_config(self):
        config = super().get_config()
        temp = {
#             'fpn_act': self.act,
            'out_ch': self.out_ch,
#             'fpn_name': self.name,
            'wd': self.wd,
#             'fpn_conv1': self.output1,
#             'fpn_conv2': self.output2,
#             'fpn_conv3': self.output3,
#             'fpn_merge1': self.merge1,
#             'fpn_merge2': self.merge2,
        }
        config.update(temp)
        return config



x = (Input(shape= (None, None, 192)), Input(shape= (None, None, 576)), Input(shape= (None, None, 960)))

# x = (Input(shape= (100, 100, 192)), Input(shape= (50, 50, 576)), Input(shape= (25, 25, 960)))


# name = "CU"
# output1 = ConvUnit(f=out_ch, k=1, s=1, wd=wd, act="relu", name=name+str(1))
# output1(x[0])

fpn = FPN(out_ch=out_ch, wd=wd)
# fpn(x)

fpn_mdl = Model(inputs=x, outputs=fpn(x))
fpn_mdl.summary()




input_name = fpn_mdl.inputs[0].name
input_name1 = fpn_mdl.inputs[1].name
input_name2 = fpn_mdl.inputs[2].name

height = 200
width = 200


input_shape1 = ct.Shape(shape=(ct.RangeDim(lower_bound=1, upper_bound=-1),
                    ct.RangeDim(lower_bound=height, upper_bound=-1),
                    ct.RangeDim(lower_bound=width, upper_bound=-1),
                    192))

input_shape2 = ct.Shape(shape=(ct.RangeDim(lower_bound=1, upper_bound=-1),
                    ct.RangeDim(lower_bound=height, upper_bound=-1),
                    ct.RangeDim(lower_bound=width, upper_bound=-1),
                    576))

input_shape3 = ct.Shape(shape=(ct.RangeDim(lower_bound=1, upper_bound=-1),
                    ct.RangeDim(lower_bound=height, upper_bound=-1),
                    ct.RangeDim(lower_bound=width, upper_bound=-1),
                    960))



fpn_model = ct.convert(fpn_mdl, inputs=[ct.TensorType(shape=input_shape1, name=input_name), ct.TensorType(shape=input_shape2, name=input_name1), ct.TensorType(shape=input_shape3, name=input_name2)], source='tensorflow')

Issue

2023-06-07 07:12:53.914456: I tensorflow/core/grappler/devices.cc:69] Number of eligible GPUs (core count >= 8, compute capability >= 0.0): 0
2023-06-07 07:12:53.914593: I tensorflow/core/grappler/clusters/single_machine.cc:357] Starting new session
2023-06-07 07:12:53.917406: I tensorflow/core/grappler/optimizers/meta_optimizer.cc:1144] Optimization results for grappler item: graph_to_optimize
  function_optimizer: function_optimizer did nothing. time = 0.007ms.
  function_optimizer: function_optimizer did nothing. time = 0.001ms.

2023-06-07 07:12:53.994451: I tensorflow/core/grappler/devices.cc:69] Number of eligible GPUs (core count >= 8, compute capability >= 0.0): 0
2023-06-07 07:12:53.994538: I tensorflow/core/grappler/clusters/single_machine.cc:357] Starting new session
2023-06-07 07:12:54.005123: I tensorflow/core/grappler/optimizers/meta_optimizer.cc:1144] Optimization results for grappler item: graph_to_optimize
  constant_folding: Graph size after: 72 nodes (-25), 150 edges (-25), time = 3.705ms.
  dependency_optimizer: Graph size after: 72 nodes (0), 75 edges (-75), time = 0.72ms.
  debug_stripper: debug_stripper did nothing. time = 0.07ms.
  constant_folding: Graph size after: 72 nodes (0), 75 edges (0), time = 1.29ms.
  dependency_optimizer: Graph size after: 72 nodes (0), 75 edges (0), time = 0.576ms.
  debug_stripper: debug_stripper did nothing. time = 0.058ms.

Running TensorFlow Graph Passes: 100%|██████████| 6/6 [00:00<00:00, 38.40 passes/s]
Converting TF Frontend ==> MIL Ops:  83%|████████▎ | 60/72 [00:00<00:00, 2437.44 ops/s]
---------------------------------------------------------------------------
AttributeError                            Traceback (most recent call last)
Cell In[35], line 36
     19 input_shape3 = ct.Shape(shape=(ct.RangeDim(lower_bound=1, upper_bound=-1),
     20                     ct.RangeDim(lower_bound=height, upper_bound=-1),
     21                     ct.RangeDim(lower_bound=width, upper_bound=-1),
     22                     960))
     27 # c_model = ct.convert(model, inputs=[ct.TensorType(shape=input_shape, name=input_name)], pass_pipeline=pipeline, source='tensorflow')
     28 
     29 
   (...)
     34 
     35 # c_model = ct.convert(model, inputs=[ct.TensorType(shape=input_shape, name=input_name)], pass_pipeline=pipeline, source='tensorflow')
---> 36 fpn_model = ct.convert(fpn_mdl, inputs=[ct.TensorType(shape=input_shape1, name=input_name), ct.TensorType(shape=input_shape2, name=input_name1), ct.TensorType(shape=input_shape3, name=input_name2)], source='tensorflow')
     39 # # out = fpn
     40 
     41 # features = [SSH(out_ch=out_ch, wd=wd, name=f'SSH_{i}')(f)
   (...)
     70 
     71 # model_after_extractor = Model(fpn, out)

File ~/SageMaker/envs/coreml_env/lib64/python3.8/site-packages/coremltools/converters/_converters_entry.py:492, in convert(model, source, inputs, outputs, classifier_config, minimum_deployment_target, convert_to, compute_precision, skip_model_load, compute_units, package_dir, debug, pass_pipeline)
    489 if specification_version is None:
    490     specification_version = _set_default_specification_version(exact_target)
--> 492 mlmodel = mil_convert(
    493     model,
    494     convert_from=exact_source,
    495     convert_to=exact_target,
    496     inputs=inputs,
    497     outputs=outputs_as_tensor_or_image_types,  # None or list[ct.ImageType/ct.TensorType]
    498     classifier_config=classifier_config,
    499     skip_model_load=skip_model_load,
    500     compute_units=compute_units,
    501     package_dir=package_dir,
    502     debug=debug,
    503     specification_version=specification_version,
    504     main_pipeline=pass_pipeline,
    505 )
    507 if exact_target == 'milinternal':
    508     return mlmodel  # Returns the MIL program

File ~/SageMaker/envs/coreml_env/lib64/python3.8/site-packages/coremltools/converters/mil/converter.py:188, in mil_convert(model, convert_from, convert_to, compute_units, **kwargs)
    149 @_profile
    150 def mil_convert(
    151     model,
   (...)
    155     **kwargs
    156 ):
    157     """
    158     Convert model from a specified frontend `convert_from` to a specified
    159     converter backend `convert_to`.
   (...)
    186         See `coremltools.converters.convert`
    187     """
--> 188     return _mil_convert(model, convert_from, convert_to, ConverterRegistry, MLModel, compute_units, **kwargs)

File ~/SageMaker/envs/coreml_env/lib64/python3.8/site-packages/coremltools/converters/mil/converter.py:212, in _mil_convert(model, convert_from, convert_to, registry, modelClass, compute_units, **kwargs)
    209     weights_dir = _tempfile.TemporaryDirectory()
    210     kwargs["weights_dir"] = weights_dir.name
--> 212 proto, mil_program = mil_convert_to_proto(
    213                         model,
    214                         convert_from,
    215                         convert_to,
    216                         registry,
    217                         **kwargs
    218                      )
    220 _reset_conversion_state()
    222 if convert_to == 'milinternal':

File ~/SageMaker/envs/coreml_env/lib64/python3.8/site-packages/coremltools/converters/mil/converter.py:285, in mil_convert_to_proto(model, convert_from, convert_to, converter_registry, main_pipeline, **kwargs)
    280 frontend_pipeline, backend_pipeline = _construct_other_pipelines(
    281     main_pipeline, convert_from, convert_to
    282 )
    284 frontend_converter = frontend_converter_type()
--> 285 prog = frontend_converter(model, **kwargs)
    286 PipelineManager.apply_pipeline(prog, frontend_pipeline)
    288 PipelineManager.apply_pipeline(prog, main_pipeline)

File ~/SageMaker/envs/coreml_env/lib64/python3.8/site-packages/coremltools/converters/mil/converter.py:98, in TensorFlow2Frontend.__call__(self, *args, **kwargs)
     95 from .frontend.tensorflow2.load import TF2Loader
     97 tf2_loader = TF2Loader(*args, **kwargs)
---> 98 return tf2_loader.load()

File ~/SageMaker/envs/coreml_env/lib64/python3.8/site-packages/coremltools/converters/mil/frontend/tensorflow/load.py:82, in TFLoader.load(self)
     75     dot_string = self._tf_ssa.get_dot_string(
     76         annotation=True, name_and_op_style=True, highlight_debug_nodes=[]
     77     )
     78     graphviz.Source(dot_string).view(
     79         filename="/tmp/ssa_before_tf_passes", cleanup=True
     80     )
---> 82 program = self._program_from_tf_ssa()
     83 logger.debug("program:\n{}".format(program))
     84 return program

File ~/SageMaker/envs/coreml_env/lib64/python3.8/site-packages/coremltools/converters/mil/frontend/tensorflow2/load.py:210, in TF2Loader._program_from_tf_ssa(self)
    203 self._run_tf_ssa_passes()
    204 converter = TF2Converter(
    205     tfssa=self._tf_ssa,
    206     inputs=self.kwargs["inputs"],
    207     outputs=self.kwargs["outputs"],
    208     opset_version=self.kwargs["specification_version"],
    209 )
--> 210 return converter.convert()

File ~/SageMaker/envs/coreml_env/lib64/python3.8/site-packages/coremltools/converters/mil/frontend/tensorflow/converter.py:465, in TFConverter.convert(self)
    463 for g_name in self.graph_stack[1:]:
    464     self.context.add_graph(g_name, self.tfssa.functions[g_name].graph)
--> 465 self.convert_main_graph(prog, graph)
    466 return prog

File ~/SageMaker/envs/coreml_env/lib64/python3.8/site-packages/coremltools/converters/mil/frontend/tensorflow/converter.py:389, in TFConverter.convert_main_graph(self, prog, graph)
    387         input_var = mb.cast(x=input_var, dtype="fp32", name=name)
    388     self.context.add(name, input_var)
--> 389 outputs = convert_graph(self.context, graph, self.output_names)
    390 ssa_func.set_outputs(outputs)
    391 prog.add_function("main", ssa_func)

File ~/SageMaker/envs/coreml_env/lib64/python3.8/site-packages/coremltools/converters/mil/frontend/tensorflow/convert_utils.py:191, in convert_graph(context, graph, outputs)
    187     msg = "Conversion for TF op '{0}' not implemented.\n \n{1}".format(
    188         node.op, node.original_node
    189     )
    190     raise NotImplementedError(msg)
--> 191 add_op(context, node)
    193 if len(node.outputs) > 0:
    194     # set_global / get_global / NoOp has no direct consumer / outputs
    195     x = context[node.name]

File ~/SageMaker/envs/coreml_env/lib64/python3.8/site-packages/coremltools/converters/mil/frontend/tensorflow/ops.py:2584, in ResizeNearestNeighbor(context, node)
   2580 Hout, Wout = None, None
   2581 if context[node.inputs[1]].val is None:
   2582     # for the dynamic input shape case,
   2583     # context[node.inputs[1]] is a mul(x=input_shape, y=scaling_factor) op.
-> 2584     scaling_factor_h = context[node.inputs[1]].op.y.val[0]
   2585     scaling_factor_w = context[node.inputs[1]].op.y.val[1]
   2586 else:

AttributeError: 'concat' object has no attribute 'y'

I tried bisecting the model into parts. The BatchNorm layer after Conv layer in ConvUnits custom layer is causing the issue. But if I remove the bathcnorm layer, model is getting convertd into coreml. The converted model is not that accurate as the original model though