tensorflow: tensorflow 1.4 is 8 times slower than tensorflow 1.3 when read data
System information
- Have I written custom code (as opposed to using a stock example script provided in TensorFlow):
- OS Platform and Distribution (e.g., Linux Ubuntu 16.04):
- TensorFlow installed from (source or binary): python wheel
- TensorFlow version (use command below): 1.4 and 1.3
- Python version: 3.6.1
- Bazel version (if compiling from source):
- GCC/Compiler version (if compiling from source):
- CUDA/cuDNN version: None
- GPU model and memory: None
- Exact command to reproduce:
when I run tensorflow1.4 script using estimator, the script is 8 times slower than tensorflow 1.3
Source code / logs
main script
#!/usr/bin/env python
__author__ = 'zj'
import argparse
import os
import sys
import numpy as np
import time
try:
import better_exceptions
except ImportError:
pass
import tensorflow as tf
from src.model_ori import crnn_fn
from src.data_handler import data_loader
from src.config import Params, Alphabet
from src.input_utils import input_fn
def main(unused_argv):
models_path = FLAGS.input_model_dir
if not os.path.exists(models_path):
assert FileNotFoundError
models_list = [os.path.join(models_path, x[:-5]) for x in os.listdir(models_path) if x.endswith('.meta')]
if not os.path.exists(FLAGS.output_model_dir):
os.makedirs(FLAGS.output_model_dir)
parameters = Params(eval_batch_size=128,
input_shape=(32, 304),
digits_only=False,
alphabet=Alphabet.CHINESECHAR_LETTERS_DIGITS_EXTENDED,
alphabet_decoding='same',
image_channels=1,
csv_delimiter=' ',
csv_files_eval=FLAGS.csv_files_eval,
output_model_dir=FLAGS.output_model_dir,
gpu=FLAGS.gpu
)
model_params = {
'Params': parameters,
}
os.environ['CUDA_VISIBLE_DEVICES'] = parameters.gpu
config_sess = tf.ConfigProto()
config_sess.gpu_options.per_process_gpu_memory_fraction = 0.6
# Config estimator
est_config = tf.estimator.RunConfig()
est_config = est_config.replace(session_config=config_sess,
save_summary_steps=100,
model_dir=parameters.output_model_dir)
estimator = tf.estimator.Estimator(model_fn=crnn_fn,
params=model_params,
config=est_config,
model_dir=parameters.output_model_dir,
)
try:
with open(FLAGS.output_file, encoding='utf-8', mode='w') as save_file:
for model in models_list:
start = time.time()
eval_results = estimator.evaluate(input_fn=data_loader(csv_filename=parameters.csv_files_eval,
params=parameters,
batch_size=parameters.eval_batch_size,
num_epochs=1),
steps=3,
checkpoint_path=model)
print('time:',time.time() - start)
print('model: %s Evaluation results: %s' % (model, str(eval_results)))
save_file.write(model + ' ' + str(eval_results) + '\n')
except KeyboardInterrupt:
print('Interrupted')
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('-fe', '--csv_files_eval', required=False, type=str, help='CSV filename for evaluation',
nargs='*', default=['E:/val1.csv'])
parser.add_argument('-o', '--output_model_dir', required=False, type=str,
help='Directory for output', default='models_vgg_100K_no_eval')
parser.add_argument('-m', '--input_model_dir', required=False, type=str,
help='Directory for output', default='model_test')
parser.add_argument('-g', '--gpu', type=str, help="GPU 0,1 or '' ", default='0')
parser.add_argument('-of', '--output_file', required=False, type=str, default='123.txt', help="the log output file")
tf.logging.set_verbosity(tf.logging.DEBUG)
FLAGS, unparsed = parser.parse_known_args()
tf.app.run(main=main, argv=[sys.argv[0]] + unparsed)
data_loader script
#!/usr/bin/env python
import tensorflow as tf
import numpy as np
from .config import Params, CONST
from typing import Tuple
def data_loader(csv_filename: str, params: Params, batch_size: int = 128, data_augmentation: bool = False,
num_epochs: int = None, image_summaries: bool = False):
def input_fn():
# Choose case one csv file or list of csv files
if not isinstance(csv_filename, list):
filename_queue = tf.train.string_input_producer([csv_filename], num_epochs=num_epochs,
name='filename_queue')
elif isinstance(csv_filename, list):
filename_queue = tf.train.string_input_producer(csv_filename, num_epochs=num_epochs, name='filename_queue')
# Skip lines that have already been processed
reader = tf.TextLineReader(name='CSV_Reader', skip_header_lines=0)
key, value = reader.read(filename_queue, name='file_reading_op')
default_line = [['None'], ['None']]
path, label = tf.decode_csv(value, record_defaults=default_line, field_delim=params.csv_delimiter,
name='csv_reading_op')
image, img_width = image_reading(path, resized_size=params.input_shape, params=params,
data_augmentation=data_augmentation, padding=True)
to_batch = {'images': image, 'images_widths': img_width, 'filenames': path, 'labels': label}
prepared_batch = tf.train.shuffle_batch(to_batch,
batch_size=batch_size,
min_after_dequeue=500,
num_threads=15, capacity=4000,
allow_smaller_final_batch=False,
name='prepared_batch_queue')
if image_summaries:
tf.summary.image('input/image', prepared_batch.get('images'), max_outputs=1)
tf.summary.text('input/labels', prepared_batch.get('labels')[:10])
tf.summary.text('input/widths', tf.as_string(prepared_batch.get('images_widths')))
return prepared_batch, prepared_batch.get('labels')
return input_fn
def image_reading(path: str, params: Params, resized_size: Tuple[int, int] = None, data_augmentation: bool = False,
padding: bool = False) -> Tuple[tf.Tensor, tf.Tensor]:
# Read image
image_content = tf.read_file(path, name='image_reader')
image = tf.cond(tf.equal(tf.string_split([path], '.').values[1], tf.constant('jpg', dtype=tf.string)),
true_fn=lambda: tf.image.decode_jpeg(image_content, channels=params.image_channels,
try_recover_truncated=True), # TODO channels = 3 ?
false_fn=lambda: tf.image.decode_png(image_content, channels=params.image_channels),
name='image_decoding')
# Data augmentation
if data_augmentation:
image = augment_data(image)
# Padding
if padding:
with tf.name_scope('padding'):
image, img_width = padding_inputs_width(image, resized_size, increment=CONST.DIMENSION_REDUCTION_W_POOLING)
# Resize
else:
image = tf.image.resize_images(image, size=resized_size)
img_width = tf.shape(image)[1]
with tf.control_dependencies([tf.assert_equal(image.shape[:2], resized_size)]):
return image, img_width
def random_rotation(img: tf.Tensor, max_rotation: float = 0.1, crop: bool = True) -> tf.Tensor: # from SeguinBe
with tf.name_scope('RandomRotation'):
rotation = tf.random_uniform([], -max_rotation, max_rotation)
rotated_image = tf.contrib.image.rotate(img, rotation, interpolation='BILINEAR')
if crop:
rotation = tf.abs(rotation)
original_shape = tf.shape(rotated_image)[:2]
h, w = original_shape[0], original_shape[1]
# see https://stackoverflow.com/questions/16702966/rotate-image-and-crop-out-black-borders for formulae
old_l, old_s = tf.cond(h > w, lambda: [h, w], lambda: [w, h])
old_l, old_s = tf.cast(old_l, tf.float32), tf.cast(old_s, tf.float32)
new_l = (old_l * tf.cos(rotation) - old_s * tf.sin(rotation)) / tf.cos(2 * rotation)
new_s = (old_s - tf.sin(rotation) * new_l) / tf.cos(rotation)
new_h, new_w = tf.cond(h > w, lambda: [new_l, new_s], lambda: [new_s, new_l])
new_h, new_w = tf.cast(new_h, tf.int32), tf.cast(new_w, tf.int32)
bb_begin = tf.cast(tf.ceil((h - new_h) / 2), tf.int32), tf.cast(tf.ceil((w - new_w) / 2), tf.int32)
rotated_image_crop = rotated_image[bb_begin[0]:h - bb_begin[0], bb_begin[1]:w - bb_begin[1], :]
# If crop removes the entire image, keep the original image
rotated_image = tf.cond(tf.equal(tf.size(rotated_image_crop), 0),
true_fn=lambda: img,
false_fn=lambda: rotated_image_crop)
return rotated_image
def random_padding(image: tf.Tensor, max_pad_w: int = 5, max_pad_h: int = 10) -> tf.Tensor:
w_pad = list(np.random.randint(0, max_pad_w, size=[2]))
h_pad = list(np.random.randint(0, max_pad_h, size=[2]))
paddings = [h_pad, w_pad, [0, 0]]
return tf.pad(image, paddings, mode='REFLECT', name='random_padding')
def augment_data(image: tf.Tensor) -> tf.Tensor:
with tf.name_scope('DataAugmentation'):
# Random padding
image = random_padding(image)
image = tf.image.random_brightness(image, max_delta=0.1)
image = tf.image.random_contrast(image, 0.5, 1.5)
image = random_rotation(image, 0.05, crop=True)
if image.shape[-1] >= 3:
image = tf.image.random_hue(image, 0.2)
image = tf.image.random_saturation(image, 0.5, 1.5)
return image
def padding_inputs_width(image: tf.Tensor, target_shape: Tuple[int, int], increment: int) -> Tuple[
tf.Tensor, tf.Tensor]:
target_ratio = target_shape[1] / target_shape[0]
# Compute ratio to keep the same ratio in new image and get the size of padding
# necessary to have the final desired shape
shape = tf.shape(image)
# 计算宽高比
ratio = tf.divide(shape[1], shape[0], name='ratio')
new_h = target_shape[0]
new_w = tf.cast(tf.round((ratio * new_h) / increment) * increment, tf.int32)
f1 = lambda: (new_w, ratio)
f2 = lambda: (new_h, tf.constant(1.0, dtype=tf.float64))
new_w, ratio = tf.case({tf.greater(new_w, 0): f1,
tf.less_equal(new_w, 0): f2},
default=f1, exclusive=True)
target_w = target_shape[1]
# Definitions for cases
def pad_fn():
with tf.name_scope('mirror_padding'):
pad = tf.subtract(target_w, new_w)
img_resized = tf.image.resize_images(image, [new_h, new_w])
# Padding to have the desired width
paddings = [[0, 0], [0, pad], [0, 0]]
pad_image = tf.pad(img_resized, paddings, mode='SYMMETRIC', name=None)
# Set manually the shape
pad_image.set_shape([target_shape[0], target_shape[1], img_resized.get_shape()[2]])
return pad_image, (new_h, new_w)
def replicate_fn():
with tf.name_scope('replication_padding'):
img_resized = tf.image.resize_images(image, [new_h, new_w])
# If one symmetry is not enough to have a full width
# Count number of replications needed
n_replication = tf.cast(tf.ceil(target_shape[1] / new_w), tf.int32)
img_replicated = tf.tile(img_resized, tf.stack([1, n_replication, 1]))
pad_image = tf.image.crop_to_bounding_box(image=img_replicated, offset_height=0, offset_width=0,
target_height=target_shape[0], target_width=target_shape[1])
# Set manually the shape
pad_image.set_shape([target_shape[0], target_shape[1], img_resized.get_shape()[2]])
return pad_image, (new_h, new_w)
def simple_resize():
with tf.name_scope('simple_resize'):
img_resized = tf.image.resize_images(image, target_shape)
img_resized.set_shape([target_shape[0], target_shape[1], img_resized.get_shape()[2]])
return img_resized, target_shape
# 3 cases
pad_image, (new_h, new_w) = tf.case(
{ # case 1 : new_w >= target_w
tf.logical_and(tf.greater_equal(ratio, target_ratio),
tf.greater_equal(new_w, target_w)): simple_resize,
# case 2 : new_w >= target_w/2 & new_w < target_w & ratio < target_ratio
tf.logical_and(tf.less(ratio, target_ratio),
tf.logical_and(tf.greater_equal(new_w, tf.cast(tf.divide(target_w, 2), tf.int32)),
tf.less(new_w, target_w))): pad_fn,
# case 3 : new_w < target_w/2 & new_w < target_w & ratio < target_ratio
tf.logical_and(tf.less(ratio, target_ratio),
tf.logical_and(tf.less(new_w, target_w),
tf.less(new_w, tf.cast(tf.divide(target_w, 2), tf.int32)))): replicate_fn
},
default=simple_resize, exclusive=True)
return pad_image, new_w # new_w = image width used for computing sequence lengths
def preprocess_image_for_prediction(fixed_height: int = 32, min_width: int = 8):
"""
Input function to use when exporting the model for making predictions (see estimator.export_savedmodel)
:param fixed_height: height of the input image after resizing
:param min_width: minimum width of image after resizing
:return:
"""
def serving_input_fn():
# define placeholder for input image
image = tf.placeholder(dtype=tf.float32, shape=[None, None, 1])
shape = tf.shape(image)
# Assert shape is h x w x c with c = 1
ratio = tf.divide(shape[1], shape[0])
increment = CONST.DIMENSION_REDUCTION_W_POOLING
new_width = tf.cast(tf.round((ratio * fixed_height) / increment) * increment, tf.int32)
resized_image = tf.cond(new_width < tf.constant(min_width, dtype=tf.int32),
true_fn=lambda: tf.image.resize_images(image, size=(fixed_height, min_width)),
false_fn=lambda: tf.image.resize_images(image, size=(fixed_height, new_width))
)
# Features to serve
features = {'images': resized_image[None], # cast to 1 x h x w x c
'images_widths': new_width[None] # cast to tensor
}
# Inputs received
receiver_inputs = {'images': image}
return tf.estimator.export.ServingInputReceiver(features, receiver_inputs)
return serving_input_fn
log tensorflow1.4
INFO:tensorflow:Using config: {'_model_dir': 'models_vgg_100K_no_eval', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': gpu_options {
per_process_gpu_memory_fraction: 0.6
}
, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_service': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x000002BAAA7A6780>, '_task_type': 'worker', '_task_id': 0, '_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}
INFO:tensorflow:Starting evaluation at 2017-11-28-12:21:42
INFO:tensorflow:Restoring parameters from model_test\model.ckpt-54692
2017-11-28 20:22:04.720980: I C:\tf_jenkins\home\workspace\rel-win\M\windows\PY\36\tensorflow\core\kernels\logging_ops.cc:79] * Loss : [0.236689657]
INFO:tensorflow:Evaluation [1/3]
2017-11-28 20:28:32.360331: I C:\tf_jenkins\home\workspace\rel-win\M\windows\PY\36\tensorflow\core\kernels\logging_ops.cc:79] * Loss : [0.238805175]
INFO:tensorflow:Evaluation [2/3]
2017-11-28 20:35:41.020994: I C:\tf_jenkins\home\workspace\rel-win\M\windows\PY\36\tensorflow\core\kernels\logging_ops.cc:79] * Loss : [0.237995088]
INFO:tensorflow:Evaluation [3/3]
INFO:tensorflow:Finished evaluation at 2017-11-28-12:43:21
INFO:tensorflow:Saving dict for global step 54692: eval/CER = 0.0108218, eval/accuracy = 0.929688, global_step = 54692, loss = 0.23783
time:1306.1133954524994
model: model_test\model.ckpt-54692 Evaluation results: {'eval/CER': 0.01082176, 'eval/accuracy': 0.9296875, 'loss': 0.23782997, 'global_step': 54692}
tensorflow 1.3
INFO:tensorflow:Using config: {'_model_dir': 'models_vgg_100K_no_eval', '_tf_random_seed': 1, '_save_summary_steps': 100, '_save_checkpoints_secs': 600, '_save_checkpoints_steps': None, '_session_config': gpu_options {
per_process_gpu_memory_fraction: 0.6
}
, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100}
INFO:tensorflow:Starting evaluation at 2017-11-28-12:49:50
INFO:tensorflow:Restoring parameters from model_test\model.ckpt-54692
2017-11-28 20:50:12.841210: I C:\tf_jenkins\home\workspace\rel-win\M\windows\PY\36\tensorflow\core\kernels\logging_ops.cc:79] * Loss : [0.17519826]
INFO:tensorflow:Evaluation [1/3]
2017-11-28 20:51:03.366275: I C:\tf_jenkins\home\workspace\rel-win\M\windows\PY\36\tensorflow\core\kernels\logging_ops.cc:79] * Loss : [0.2987892]
INFO:tensorflow:Evaluation [2/3]
2017-11-28 20:51:49.843030: I C:\tf_jenkins\home\workspace\rel-win\M\windows\PY\36\tensorflow\core\kernels\logging_ops.cc:79] * Loss : [0.20660429]
INFO:tensorflow:Evaluation [3/3]
INFO:tensorflow:Finished evaluation at 2017-11-28-12:52:19
INFO:tensorflow:Saving dict for global step 54692: eval/CER = 0.01188, eval/accuracy = 0.924479, global_step = 54692, loss = 0.226864
time:157.26274514198303
model: model_test\model.ckpt-54692 Evaluation results: {'eval/CER': 0.011879961, 'eval/accuracy': 0.92447919, 'loss': 0.22686392, 'global_step': 54692}
About this issue
- Original URL
- State: closed
- Created 7 years ago
- Comments: 36 (18 by maintainers)
Commits related to this issue
- Added __str__ to ClusterSpec. This will improve the logs users are attaching to github issues. For example : #14942 PiperOrigin-RevId: 178296636 — committed to teamdandelion/tensorflow by ispirmustafa 7 years ago
I tried to reproduce your issue with the code you provided but I cannot see any slowdown switching from 1.3 to 1.4. The scripts you provided are very complex, many different issues inside and outside TensorFlow could contribute to a slowdown. Can you provide a small repro script that isolates the issue and which we can use for debugging? I will close this issue as not reproducible, but please reopen this with some more specific information.