tensorflow: tensorflow 1.4 is 8 times slower than tensorflow 1.3 when read data

System information

  • Have I written custom code (as opposed to using a stock example script provided in TensorFlow):
  • OS Platform and Distribution (e.g., Linux Ubuntu 16.04):
  • TensorFlow installed from (source or binary): python wheel
  • TensorFlow version (use command below): 1.4 and 1.3
  • Python version: 3.6.1
  • Bazel version (if compiling from source):
  • GCC/Compiler version (if compiling from source):
  • CUDA/cuDNN version: None
  • GPU model and memory: None
  • Exact command to reproduce:

when I run tensorflow1.4 script using estimator, the script is 8 times slower than tensorflow 1.3

Source code / logs

main script

#!/usr/bin/env python
__author__ = 'zj'

import argparse
import os
import sys
import numpy as np
import time
try:
    import better_exceptions
except ImportError:
    pass
import tensorflow as tf
from src.model_ori import crnn_fn
from src.data_handler import data_loader
from src.config import Params, Alphabet
from src.input_utils import input_fn


def main(unused_argv):
    models_path = FLAGS.input_model_dir
    if not os.path.exists(models_path):
        assert FileNotFoundError

    models_list = [os.path.join(models_path, x[:-5]) for x in os.listdir(models_path) if x.endswith('.meta')]

    if not os.path.exists(FLAGS.output_model_dir):
        os.makedirs(FLAGS.output_model_dir)

    parameters = Params(eval_batch_size=128,
                        input_shape=(32, 304),
                        digits_only=False,
                        alphabet=Alphabet.CHINESECHAR_LETTERS_DIGITS_EXTENDED,
                        alphabet_decoding='same',
                        image_channels=1,
                        csv_delimiter=' ',
                        csv_files_eval=FLAGS.csv_files_eval,
                        output_model_dir=FLAGS.output_model_dir,
                        gpu=FLAGS.gpu
                        )

    model_params = {
        'Params': parameters,
    }

    os.environ['CUDA_VISIBLE_DEVICES'] = parameters.gpu
    config_sess = tf.ConfigProto()
    config_sess.gpu_options.per_process_gpu_memory_fraction = 0.6

    # Config estimator
    est_config = tf.estimator.RunConfig()
    est_config = est_config.replace(session_config=config_sess,
                                    save_summary_steps=100,
                                    model_dir=parameters.output_model_dir)

    estimator = tf.estimator.Estimator(model_fn=crnn_fn,
                                       params=model_params,
                                       config=est_config,
                                       model_dir=parameters.output_model_dir,
                                       )
    try:
        with open(FLAGS.output_file, encoding='utf-8', mode='w') as save_file:
            for model in models_list:
                start = time.time()
                
                eval_results = estimator.evaluate(input_fn=data_loader(csv_filename=parameters.csv_files_eval,
                                                                       params=parameters,
                                                                       batch_size=parameters.eval_batch_size,
                                                                       num_epochs=1),
                                                  steps=3,
                                                  checkpoint_path=model)
                print('time:',time.time() - start)
                print('model: %s Evaluation results: %s' % (model, str(eval_results)))
                save_file.write(model + ' ' + str(eval_results) + '\n')

    except KeyboardInterrupt:
        print('Interrupted')


if __name__ == '__main__':
    parser = argparse.ArgumentParser()
    parser.add_argument('-fe', '--csv_files_eval', required=False, type=str, help='CSV filename for evaluation',
                        nargs='*', default=['E:/val1.csv'])
    parser.add_argument('-o', '--output_model_dir', required=False, type=str,
                        help='Directory for output', default='models_vgg_100K_no_eval')
    parser.add_argument('-m', '--input_model_dir', required=False, type=str,
                        help='Directory for output', default='model_test')
    parser.add_argument('-g', '--gpu', type=str, help="GPU 0,1 or '' ", default='0')
    parser.add_argument('-of', '--output_file', required=False, type=str, default='123.txt', help="the log output file")

    tf.logging.set_verbosity(tf.logging.DEBUG)
    FLAGS, unparsed = parser.parse_known_args()
    tf.app.run(main=main, argv=[sys.argv[0]] + unparsed)

data_loader script

#!/usr/bin/env python
import tensorflow as tf
import numpy as np
from .config import Params, CONST
from typing import Tuple


def data_loader(csv_filename: str, params: Params, batch_size: int = 128, data_augmentation: bool = False,
                num_epochs: int = None, image_summaries: bool = False):
    def input_fn():
        # Choose case one csv file or list of csv files
        if not isinstance(csv_filename, list):
            filename_queue = tf.train.string_input_producer([csv_filename], num_epochs=num_epochs,
                                                            name='filename_queue')
        elif isinstance(csv_filename, list):
            filename_queue = tf.train.string_input_producer(csv_filename, num_epochs=num_epochs, name='filename_queue')

        # Skip lines that have already been processed
        reader = tf.TextLineReader(name='CSV_Reader', skip_header_lines=0)
        key, value = reader.read(filename_queue, name='file_reading_op')

        default_line = [['None'], ['None']]
        path, label = tf.decode_csv(value, record_defaults=default_line, field_delim=params.csv_delimiter,
                                    name='csv_reading_op')

        image, img_width = image_reading(path, resized_size=params.input_shape, params=params,
                                         data_augmentation=data_augmentation, padding=True)

        to_batch = {'images': image, 'images_widths': img_width, 'filenames': path, 'labels': label}
        prepared_batch = tf.train.shuffle_batch(to_batch,
                                                batch_size=batch_size,
                                                min_after_dequeue=500,
                                                num_threads=15, capacity=4000,
                                                allow_smaller_final_batch=False,
                                                name='prepared_batch_queue')

        if image_summaries:
            tf.summary.image('input/image', prepared_batch.get('images'), max_outputs=1)
        tf.summary.text('input/labels', prepared_batch.get('labels')[:10])
        tf.summary.text('input/widths', tf.as_string(prepared_batch.get('images_widths')))

        return prepared_batch, prepared_batch.get('labels')

    return input_fn


def image_reading(path: str, params: Params, resized_size: Tuple[int, int] = None, data_augmentation: bool = False,
                  padding: bool = False) -> Tuple[tf.Tensor, tf.Tensor]:
    # Read image
    image_content = tf.read_file(path, name='image_reader')
    image = tf.cond(tf.equal(tf.string_split([path], '.').values[1], tf.constant('jpg', dtype=tf.string)),
                    true_fn=lambda: tf.image.decode_jpeg(image_content, channels=params.image_channels,
                                                         try_recover_truncated=True),  # TODO channels = 3 ?
                    false_fn=lambda: tf.image.decode_png(image_content, channels=params.image_channels),
                    name='image_decoding')

    # Data augmentation
    if data_augmentation:
        image = augment_data(image)

    # Padding
    if padding:
        with tf.name_scope('padding'):
            image, img_width = padding_inputs_width(image, resized_size, increment=CONST.DIMENSION_REDUCTION_W_POOLING)
    # Resize
    else:
        image = tf.image.resize_images(image, size=resized_size)
        img_width = tf.shape(image)[1]

    with tf.control_dependencies([tf.assert_equal(image.shape[:2], resized_size)]):
        return image, img_width


def random_rotation(img: tf.Tensor, max_rotation: float = 0.1, crop: bool = True) -> tf.Tensor:  # from SeguinBe
    with tf.name_scope('RandomRotation'):
        rotation = tf.random_uniform([], -max_rotation, max_rotation)
        rotated_image = tf.contrib.image.rotate(img, rotation, interpolation='BILINEAR')
        if crop:
            rotation = tf.abs(rotation)
            original_shape = tf.shape(rotated_image)[:2]
            h, w = original_shape[0], original_shape[1]
            # see https://stackoverflow.com/questions/16702966/rotate-image-and-crop-out-black-borders for formulae
            old_l, old_s = tf.cond(h > w, lambda: [h, w], lambda: [w, h])
            old_l, old_s = tf.cast(old_l, tf.float32), tf.cast(old_s, tf.float32)
            new_l = (old_l * tf.cos(rotation) - old_s * tf.sin(rotation)) / tf.cos(2 * rotation)
            new_s = (old_s - tf.sin(rotation) * new_l) / tf.cos(rotation)
            new_h, new_w = tf.cond(h > w, lambda: [new_l, new_s], lambda: [new_s, new_l])
            new_h, new_w = tf.cast(new_h, tf.int32), tf.cast(new_w, tf.int32)
            bb_begin = tf.cast(tf.ceil((h - new_h) / 2), tf.int32), tf.cast(tf.ceil((w - new_w) / 2), tf.int32)
            rotated_image_crop = rotated_image[bb_begin[0]:h - bb_begin[0], bb_begin[1]:w - bb_begin[1], :]

            # If crop removes the entire image, keep the original image
            rotated_image = tf.cond(tf.equal(tf.size(rotated_image_crop), 0),
                                    true_fn=lambda: img,
                                    false_fn=lambda: rotated_image_crop)

        return rotated_image


def random_padding(image: tf.Tensor, max_pad_w: int = 5, max_pad_h: int = 10) -> tf.Tensor:
    w_pad = list(np.random.randint(0, max_pad_w, size=[2]))
    h_pad = list(np.random.randint(0, max_pad_h, size=[2]))
    paddings = [h_pad, w_pad, [0, 0]]

    return tf.pad(image, paddings, mode='REFLECT', name='random_padding')


def augment_data(image: tf.Tensor) -> tf.Tensor:
    with tf.name_scope('DataAugmentation'):
        # Random padding
        image = random_padding(image)

        image = tf.image.random_brightness(image, max_delta=0.1)
        image = tf.image.random_contrast(image, 0.5, 1.5)
        image = random_rotation(image, 0.05, crop=True)

        if image.shape[-1] >= 3:
            image = tf.image.random_hue(image, 0.2)
            image = tf.image.random_saturation(image, 0.5, 1.5)

        return image


def padding_inputs_width(image: tf.Tensor, target_shape: Tuple[int, int], increment: int) -> Tuple[
    tf.Tensor, tf.Tensor]:
    target_ratio = target_shape[1] / target_shape[0]
    # Compute ratio to keep the same ratio in new image and get the size of padding
    # necessary to have the final desired shape
    shape = tf.shape(image)
    # 计算宽高比
    ratio = tf.divide(shape[1], shape[0], name='ratio')

    new_h = target_shape[0]
    new_w = tf.cast(tf.round((ratio * new_h) / increment) * increment, tf.int32)
    f1 = lambda: (new_w, ratio)
    f2 = lambda: (new_h, tf.constant(1.0, dtype=tf.float64))
    new_w, ratio = tf.case({tf.greater(new_w, 0): f1,
                            tf.less_equal(new_w, 0): f2},
                           default=f1, exclusive=True)
    target_w = target_shape[1]

    # Definitions for cases
    def pad_fn():
        with tf.name_scope('mirror_padding'):
            pad = tf.subtract(target_w, new_w)

            img_resized = tf.image.resize_images(image, [new_h, new_w])

            # Padding to have the desired width
            paddings = [[0, 0], [0, pad], [0, 0]]
            pad_image = tf.pad(img_resized, paddings, mode='SYMMETRIC', name=None)

            # Set manually the shape
            pad_image.set_shape([target_shape[0], target_shape[1], img_resized.get_shape()[2]])

            return pad_image, (new_h, new_w)

    def replicate_fn():
        with tf.name_scope('replication_padding'):
            img_resized = tf.image.resize_images(image, [new_h, new_w])

            # If one symmetry is not enough to have a full width
            # Count number of replications needed
            n_replication = tf.cast(tf.ceil(target_shape[1] / new_w), tf.int32)
            img_replicated = tf.tile(img_resized, tf.stack([1, n_replication, 1]))
            pad_image = tf.image.crop_to_bounding_box(image=img_replicated, offset_height=0, offset_width=0,
                                                      target_height=target_shape[0], target_width=target_shape[1])

            # Set manually the shape
            pad_image.set_shape([target_shape[0], target_shape[1], img_resized.get_shape()[2]])

            return pad_image, (new_h, new_w)

    def simple_resize():
        with tf.name_scope('simple_resize'):
            img_resized = tf.image.resize_images(image, target_shape)

            img_resized.set_shape([target_shape[0], target_shape[1], img_resized.get_shape()[2]])

            return img_resized, target_shape

    # 3 cases
    pad_image, (new_h, new_w) = tf.case(
        {  # case 1 : new_w >= target_w
            tf.logical_and(tf.greater_equal(ratio, target_ratio),
                           tf.greater_equal(new_w, target_w)): simple_resize,
            # case 2 : new_w >= target_w/2 & new_w < target_w & ratio < target_ratio
            tf.logical_and(tf.less(ratio, target_ratio),
                           tf.logical_and(tf.greater_equal(new_w, tf.cast(tf.divide(target_w, 2), tf.int32)),
                                          tf.less(new_w, target_w))): pad_fn,
            # case 3 : new_w < target_w/2 & new_w < target_w & ratio < target_ratio
            tf.logical_and(tf.less(ratio, target_ratio),
                           tf.logical_and(tf.less(new_w, target_w),
                                          tf.less(new_w, tf.cast(tf.divide(target_w, 2), tf.int32)))): replicate_fn
        },
        default=simple_resize, exclusive=True)

    return pad_image, new_w  # new_w = image width used for computing sequence lengths


def preprocess_image_for_prediction(fixed_height: int = 32, min_width: int = 8):
    """
    Input function to use when exporting the model for making predictions (see estimator.export_savedmodel)
    :param fixed_height: height of the input image after resizing
    :param min_width: minimum width of image after resizing
    :return:
    """

    def serving_input_fn():
        # define placeholder for input image
        image = tf.placeholder(dtype=tf.float32, shape=[None, None, 1])

        shape = tf.shape(image)
        # Assert shape is h x w x c with c = 1

        ratio = tf.divide(shape[1], shape[0])
        increment = CONST.DIMENSION_REDUCTION_W_POOLING
        new_width = tf.cast(tf.round((ratio * fixed_height) / increment) * increment, tf.int32)

        resized_image = tf.cond(new_width < tf.constant(min_width, dtype=tf.int32),
                                true_fn=lambda: tf.image.resize_images(image, size=(fixed_height, min_width)),
                                false_fn=lambda: tf.image.resize_images(image, size=(fixed_height, new_width))
                                )

        # Features to serve
        features = {'images': resized_image[None],  # cast to 1 x h x w x c
                    'images_widths': new_width[None]  # cast to tensor
                    }

        # Inputs received
        receiver_inputs = {'images': image}

        return tf.estimator.export.ServingInputReceiver(features, receiver_inputs)

    return serving_input_fn

log tensorflow1.4

INFO:tensorflow:Using config: {'_model_dir': 'models_vgg_100K_no_eval', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': gpu_options {
  per_process_gpu_memory_fraction: 0.6
}
, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_service': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x000002BAAA7A6780>, '_task_type': 'worker', '_task_id': 0, '_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}
INFO:tensorflow:Starting evaluation at 2017-11-28-12:21:42
INFO:tensorflow:Restoring parameters from model_test\model.ckpt-54692
2017-11-28 20:22:04.720980: I C:\tf_jenkins\home\workspace\rel-win\M\windows\PY\36\tensorflow\core\kernels\logging_ops.cc:79] * Loss : [0.236689657]
INFO:tensorflow:Evaluation [1/3]
2017-11-28 20:28:32.360331: I C:\tf_jenkins\home\workspace\rel-win\M\windows\PY\36\tensorflow\core\kernels\logging_ops.cc:79] * Loss : [0.238805175]
INFO:tensorflow:Evaluation [2/3]
2017-11-28 20:35:41.020994: I C:\tf_jenkins\home\workspace\rel-win\M\windows\PY\36\tensorflow\core\kernels\logging_ops.cc:79] * Loss : [0.237995088]
INFO:tensorflow:Evaluation [3/3]
INFO:tensorflow:Finished evaluation at 2017-11-28-12:43:21
INFO:tensorflow:Saving dict for global step 54692: eval/CER = 0.0108218, eval/accuracy = 0.929688, global_step = 54692, loss = 0.23783
time:1306.1133954524994
model: model_test\model.ckpt-54692 Evaluation results: {'eval/CER': 0.01082176, 'eval/accuracy': 0.9296875, 'loss': 0.23782997, 'global_step': 54692}

tensorflow 1.3

INFO:tensorflow:Using config: {'_model_dir': 'models_vgg_100K_no_eval', '_tf_random_seed': 1, '_save_summary_steps': 100, '_save_checkpoints_secs': 600, '_save_checkpoints_steps': None, '_session_config': gpu_options {
  per_process_gpu_memory_fraction: 0.6
}
, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100}
INFO:tensorflow:Starting evaluation at 2017-11-28-12:49:50
INFO:tensorflow:Restoring parameters from model_test\model.ckpt-54692
2017-11-28 20:50:12.841210: I C:\tf_jenkins\home\workspace\rel-win\M\windows\PY\36\tensorflow\core\kernels\logging_ops.cc:79] * Loss : [0.17519826]
INFO:tensorflow:Evaluation [1/3]
2017-11-28 20:51:03.366275: I C:\tf_jenkins\home\workspace\rel-win\M\windows\PY\36\tensorflow\core\kernels\logging_ops.cc:79] * Loss : [0.2987892]
INFO:tensorflow:Evaluation [2/3]
2017-11-28 20:51:49.843030: I C:\tf_jenkins\home\workspace\rel-win\M\windows\PY\36\tensorflow\core\kernels\logging_ops.cc:79] * Loss : [0.20660429]
INFO:tensorflow:Evaluation [3/3]
INFO:tensorflow:Finished evaluation at 2017-11-28-12:52:19
INFO:tensorflow:Saving dict for global step 54692: eval/CER = 0.01188, eval/accuracy = 0.924479, global_step = 54692, loss = 0.226864
time:157.26274514198303
model: model_test\model.ckpt-54692 Evaluation results: {'eval/CER': 0.011879961, 'eval/accuracy': 0.92447919, 'loss': 0.22686392, 'global_step': 54692}

About this issue

  • Original URL
  • State: closed
  • Created 7 years ago
  • Comments: 36 (18 by maintainers)

Commits related to this issue

Most upvoted comments

I tried to reproduce your issue with the code you provided but I cannot see any slowdown switching from 1.3 to 1.4. The scripts you provided are very complex, many different issues inside and outside TensorFlow could contribute to a slowdown. Can you provide a small repro script that isolates the issue and which we can use for debugging? I will close this issue as not reproducible, but please reopen this with some more specific information.