DALI: Transformations in a Video Pipeline for PyTorch (VideoPipeline hangs without throwing an error)

I’m trying to make a Video Pipeline with transformations for resize, grayscale, and normalize

Following Simple Video pipeline reading from multiple files and Using DALI in PyTorch from the docs, here’s what I have so far:

from nvidia.dali.pipeline import Pipeline
from nvidia.dali.plugin.pytorch import DALIGenericIterator
import nvidia.dali.ops as ops
import nvidia.dali.types as types


batch_size = 1
sequence_length = 8
initial_prefetch_size = 16
num_iterations = 5

class VideoPipeline(Pipeline):
    
    def __init__(self, batch_size, num_threads, device_id, data, shuffle):
        super(VideoPipeline, self).__init__(batch_size, num_threads, device_id, seed=0)
        
        self.input = ops.VideoReader(device='gpu', filenames=data, sequence_length=sequence_length, 
                                     shard_id=0, num_shards=1, 
                                     random_shuffle=shuffle, 
                                     initial_fill=initial_prefetch_size)
        
        self.decoder = ops.ImageDecoder(output_type = types.GRAY, device='mixed')
        self.resize = ops.Resize(size=(720,720), device='gpu'),
        self.grayscale = ops.Cast(dtype=types.GRAY, device='gpu'),
        self.normalize = ops.Normalize(mean=0.5, stddev=0.5, device='gpu')
    
    def define_graph(self):
        frames = self.input(name='Reader')
        frames = self.decoder(frames)
        frames = self.resize(frames)
        frames = self.grayscale(frames)
        frames = self.normalize(frames)
        return frames
    
video_pipe = VideoPipeline(batch_size=batch_size, num_threads=2, device_id=0, data=vid_path, shuffle=False)
pipes = [video_pipe]
pipes[0].build()
dali_iterator = DALIGenericIterator(pipes, ['data'], pipes[0].epoch_size('Reader'))

I’m getting error:

---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
<ipython-input-176-84c325543c44> in <module>
     35 video_pipe = VideoPipeline(batch_size=batch_size, num_threads=2, device_id=0, data=vid_path, shuffle=False)
     36 pipes = [video_pipe]
---> 37 pipes[0].build()
     38 dali_iterator = DALIGenericIterator(pipes, ['data'], pipes[0].epoch_size('Reader'))

~/anaconda3/envs/cv/lib/python3.6/site-packages/nvidia/dali/pipeline.py in build(self, define_graph)
    475 
    476         if not self._prepared:
--> 477             self._prepare_graph(define_graph)
    478 
    479         self._pipe.Build(self._names_and_devices)

~/anaconda3/envs/cv/lib/python3.6/site-packages/nvidia/dali/pipeline.py in _prepare_graph(self, define_graph)
    384         else:
    385             with self:
--> 386                 outputs = define_graph()
    387         if isinstance(outputs, tuple):
    388             outputs = list(outputs)

<ipython-input-176-84c325543c44> in define_graph(self)
     28         frames = self.input(name='Reader')
     29         frames = self.decoder(frames)
---> 30         frames = self.resize(frames)
     31         frames = self.grayscale(frames)
     32         frames = self.normalize(frames)

TypeError: 'tuple' object is not callable

How do I fix it? Please let me know if you need any other information.

About this issue

Original URL
State: closed
Created 4 years ago
Comments: 24 (12 by maintainers)

Most upvoted comments

We don’t see any clear cause for this issue. We need to dive deeper. It would take at least a couple of days. We will get back to you as soon as we know more.

JanuszL on Jan 7, 2021

@Wilann CoordTransform is a very versatile operator. It basically allows you to apply a linear transformation to the innermost dimension of your data. In this case, it’s channels. You can apply a grayscale matrix:

M = [0.35, 0.5, 0.15]

if you want to preserver 3 channels, but just make them uniform:

M = [0.35, 0.5, 0.15] * 3

A random saturation is a bit more involved, but quite possible (I use the new functional API here, which I also encourage you to try):

gray = np.float32([[0.35, 0.5, 0.15]] * 3)
id = np.identity(3, dtype=np.float32)

M = dali.fn.uniform(range=(0, 1)) * (gray - id) + id

frames = dali.fn.coord_transform(frames, M=M, dtype=types.UINT8, device='gpu')

mzient on Dec 18, 2020

You don’t need to use decoder operator, VideoReader a batch of raw sequences. Something like this should work in your case:

from nvidia.dali.pipeline import Pipeline
from nvidia.dali.plugin.pytorch import DALIGenericIterator
import nvidia.dali.ops as ops
import nvidia.dali.types as types
import os

vid_path = os.path.join(os.environ['DALI_EXTRA_PATH'], 'db/video/cfr_ntsc_29_97_test.mp4')

batch_size = 1
sequence_length = 8
initial_prefetch_size = 16
num_iterations = 5

class VideoPipeline(Pipeline):

    def __init__(self, batch_size, num_threads, device_id, data, shuffle):
        super(VideoPipeline, self).__init__(batch_size, num_threads, device_id, seed=0)

        self.input = ops.VideoReader(device='gpu', filenames=data, sequence_length=sequence_length,
                                     shard_id=0, num_shards=1,
                                     random_shuffle=shuffle,
                                     initial_fill=initial_prefetch_size)

        self.resize = ops.Resize(size=(720,720), device='gpu')
        self.grayscale = ops.CoordTransform(M=[0.35, 0.5, 0.15] * 3, dtype=types.UINT8, device='gpu')
        self.normalize = ops.Normalize(mean=0.5, stddev=0.5, device='gpu')

    def define_graph(self):
        frames = self.input(name='Reader')
        frames = self.resize(frames)
        frames = self.grayscale(frames)
        frames = self.normalize(frames)
        return frames

video_pipe = VideoPipeline(batch_size=batch_size, num_threads=2, device_id=0, data=vid_path, shuffle=False)
pipes = video_pipe
pipes.build()
dali_iterator = DALIGenericIterator(pipes, ['data'], pipes[0].epoch_size('Reader'))

JanuszL on Dec 18, 2020