icevision: IndexError: tensors used as indices must be long, byte or bool tensors (Retinanet only)

🐛 Bug

Describe the bug On icevision 0.4.0, running retinanet on custom dataset (fire) I get the following:

/usr/local/lib/python3.6/dist-packages/torch/nn/_reduction.py:44: UserWarning: size_average and reduce args will be deprecated, please use reduction='sum' instead.
  warnings.warn(warning.format(ret))
---------------------------------------------------------------------------
IndexError                                Traceback (most recent call last)
<ipython-input-28-c232684d32d4> in <module>()
      1 learn.freeze()
----> 2 learn.lr_find()

16 frames
/usr/local/lib/python3.6/dist-packages/fastai/callback/schedule.py in lr_find(self, start_lr, end_lr, num_it, stop_div, show_plot, suggestions)
    222     n_epoch = num_it//len(self.dls.train) + 1
    223     cb=LRFinder(start_lr=start_lr, end_lr=end_lr, num_it=num_it, stop_div=stop_div)
--> 224     with self.no_logging(): self.fit(n_epoch, cbs=cb)
    225     if show_plot: self.recorder.plot_lr_find()
    226     if suggestions:

/usr/local/lib/python3.6/dist-packages/fastai/learner.py in fit(self, n_epoch, lr, wd, cbs, reset_opt)
    203             self.opt.set_hypers(lr=self.lr if lr is None else lr)
    204             self.n_epoch = n_epoch
--> 205             self._with_events(self._do_fit, 'fit', CancelFitException, self._end_cleanup)
    206 
    207     def _end_cleanup(self): self.dl,self.xb,self.yb,self.pred,self.loss = None,(None,),(None,),None,None

/usr/local/lib/python3.6/dist-packages/fastai/learner.py in _with_events(self, f, event_type, ex, final)
    152 
    153     def _with_events(self, f, event_type, ex, final=noop):
--> 154         try:       self(f'before_{event_type}')       ;f()
    155         except ex: self(f'after_cancel_{event_type}')
    156         finally:   self(f'after_{event_type}')        ;final()

/usr/local/lib/python3.6/dist-packages/fastai/learner.py in _do_fit(self)
    194         for epoch in range(self.n_epoch):
    195             self.epoch=epoch
--> 196             self._with_events(self._do_epoch, 'epoch', CancelEpochException)
    197 
    198     def fit(self, n_epoch, lr=None, wd=None, cbs=None, reset_opt=False):

/usr/local/lib/python3.6/dist-packages/fastai/learner.py in _with_events(self, f, event_type, ex, final)
    152 
    153     def _with_events(self, f, event_type, ex, final=noop):
--> 154         try:       self(f'before_{event_type}')       ;f()
    155         except ex: self(f'after_cancel_{event_type}')
    156         finally:   self(f'after_{event_type}')        ;final()

/usr/local/lib/python3.6/dist-packages/fastai/learner.py in _do_epoch(self)
    188 
    189     def _do_epoch(self):
--> 190         self._do_epoch_train()
    191         self._do_epoch_validate()
    192 

/usr/local/lib/python3.6/dist-packages/fastai/learner.py in _do_epoch_train(self)
    180     def _do_epoch_train(self):
    181         self.dl = self.dls.train
--> 182         self._with_events(self.all_batches, 'train', CancelTrainException)
    183 
    184     def _do_epoch_validate(self, ds_idx=1, dl=None):

/usr/local/lib/python3.6/dist-packages/fastai/learner.py in _with_events(self, f, event_type, ex, final)
    152 
    153     def _with_events(self, f, event_type, ex, final=noop):
--> 154         try:       self(f'before_{event_type}')       ;f()
    155         except ex: self(f'after_cancel_{event_type}')
    156         finally:   self(f'after_{event_type}')        ;final()

/usr/local/lib/python3.6/dist-packages/fastai/learner.py in all_batches(self)
    158     def all_batches(self):
    159         self.n_iter = len(self.dl)
--> 160         for o in enumerate(self.dl): self.one_batch(*o)
    161 
    162     def _do_one_batch(self):

/usr/local/lib/python3.6/dist-packages/fastai/learner.py in one_batch(self, i, b)
    176         self.iter = i
    177         self._split(b)
--> 178         self._with_events(self._do_one_batch, 'batch', CancelBatchException)
    179 
    180     def _do_epoch_train(self):

/usr/local/lib/python3.6/dist-packages/fastai/learner.py in _with_events(self, f, event_type, ex, final)
    152 
    153     def _with_events(self, f, event_type, ex, final=noop):
--> 154         try:       self(f'before_{event_type}')       ;f()
    155         except ex: self(f'after_cancel_{event_type}')
    156         finally:   self(f'after_{event_type}')        ;final()

/usr/local/lib/python3.6/dist-packages/fastai/learner.py in _do_one_batch(self)
    161 
    162     def _do_one_batch(self):
--> 163         self.pred = self.model(*self.xb)
    164         self('after_pred')
    165         if len(self.yb): self.loss = self.loss_func(self.pred, *self.yb)

/usr/local/lib/python3.6/dist-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs)
    725             result = self._slow_forward(*input, **kwargs)
    726         else:
--> 727             result = self.forward(*input, **kwargs)
    728         for hook in itertools.chain(
    729                 _global_forward_hooks.values(),

/usr/local/lib/python3.6/dist-packages/torchvision/models/detection/retinanet.py in forward(self, images, targets)
    556 
    557             # compute the losses
--> 558             losses = self.compute_loss(targets, head_outputs, anchors)
    559         else:
    560             # compute the detections

/usr/local/lib/python3.6/dist-packages/torchvision/models/detection/retinanet.py in compute_loss(self, targets, head_outputs, anchors)
    406             matched_idxs.append(self.proposal_matcher(match_quality_matrix))
    407 
--> 408         return self.head.compute_loss(targets, head_outputs, anchors, matched_idxs)
    409 
    410     def postprocess_detections(self, head_outputs, anchors, image_shapes):

/usr/local/lib/python3.6/dist-packages/torchvision/models/detection/retinanet.py in compute_loss(self, targets, head_outputs, anchors, matched_idxs)
     49         # type: (List[Dict[str, Tensor]], Dict[str, Tensor], List[Tensor], List[Tensor]) -> Dict[str, Tensor]
     50         return {
---> 51             'classification': self.classification_head.compute_loss(targets, head_outputs, matched_idxs),
     52             'bbox_regression': self.regression_head.compute_loss(targets, head_outputs, anchors, matched_idxs),
     53         }

/usr/local/lib/python3.6/dist-packages/torchvision/models/detection/retinanet.py in compute_loss(self, targets, head_outputs, matched_idxs)
    118                     foreground_idxs_per_image,
    119                     targets_per_image['labels'][matched_idxs_per_image[foreground_idxs_per_image]]
--> 120                 ] = 1.0
    121 
    122                 # find indices for which anchors should be ignored

IndexError: tensors used as indices must be long, byte or bool tensors

To Reproduce Steps to reproduce the behavior: Has happened during learn.lr_find(), but on another occasion I passed this point and got error during learn.fine_tune(50, 3e-3, freeze_epochs=1). I placed the notebook at https://github.com/robmarkcole/fire-detection-from-images/blob/master/pytorch/icevision/icevision_firenet_retinanet.ipynb

Expected behavior No error

Screenshots NA

Desktop (please complete the following information):

Mac Catalina 10.15.5

Additional context None

About this issue

Original URL
State: closed
Created 4 years ago
Reactions: 1
Comments: 16 (1 by maintainers)

Most upvoted comments

used the same code to train faster_rcnn and efficientdet without problems right: correct 😃

robmarkcole on Nov 19, 2020

@robmarkcole may have to do with the fastai version update

rsomani95 on May 12, 2021