icevision: IndexError: tensors used as indices must be long, byte or bool tensors (Retinanet only)
🐛 Bug
Describe the bug On icevision 0.4.0, running retinanet on custom dataset (fire) I get the following:
/usr/local/lib/python3.6/dist-packages/torch/nn/_reduction.py:44: UserWarning: size_average and reduce args will be deprecated, please use reduction='sum' instead.
warnings.warn(warning.format(ret))
---------------------------------------------------------------------------
IndexError Traceback (most recent call last)
<ipython-input-28-c232684d32d4> in <module>()
1 learn.freeze()
----> 2 learn.lr_find()
16 frames
/usr/local/lib/python3.6/dist-packages/fastai/callback/schedule.py in lr_find(self, start_lr, end_lr, num_it, stop_div, show_plot, suggestions)
222 n_epoch = num_it//len(self.dls.train) + 1
223 cb=LRFinder(start_lr=start_lr, end_lr=end_lr, num_it=num_it, stop_div=stop_div)
--> 224 with self.no_logging(): self.fit(n_epoch, cbs=cb)
225 if show_plot: self.recorder.plot_lr_find()
226 if suggestions:
/usr/local/lib/python3.6/dist-packages/fastai/learner.py in fit(self, n_epoch, lr, wd, cbs, reset_opt)
203 self.opt.set_hypers(lr=self.lr if lr is None else lr)
204 self.n_epoch = n_epoch
--> 205 self._with_events(self._do_fit, 'fit', CancelFitException, self._end_cleanup)
206
207 def _end_cleanup(self): self.dl,self.xb,self.yb,self.pred,self.loss = None,(None,),(None,),None,None
/usr/local/lib/python3.6/dist-packages/fastai/learner.py in _with_events(self, f, event_type, ex, final)
152
153 def _with_events(self, f, event_type, ex, final=noop):
--> 154 try: self(f'before_{event_type}') ;f()
155 except ex: self(f'after_cancel_{event_type}')
156 finally: self(f'after_{event_type}') ;final()
/usr/local/lib/python3.6/dist-packages/fastai/learner.py in _do_fit(self)
194 for epoch in range(self.n_epoch):
195 self.epoch=epoch
--> 196 self._with_events(self._do_epoch, 'epoch', CancelEpochException)
197
198 def fit(self, n_epoch, lr=None, wd=None, cbs=None, reset_opt=False):
/usr/local/lib/python3.6/dist-packages/fastai/learner.py in _with_events(self, f, event_type, ex, final)
152
153 def _with_events(self, f, event_type, ex, final=noop):
--> 154 try: self(f'before_{event_type}') ;f()
155 except ex: self(f'after_cancel_{event_type}')
156 finally: self(f'after_{event_type}') ;final()
/usr/local/lib/python3.6/dist-packages/fastai/learner.py in _do_epoch(self)
188
189 def _do_epoch(self):
--> 190 self._do_epoch_train()
191 self._do_epoch_validate()
192
/usr/local/lib/python3.6/dist-packages/fastai/learner.py in _do_epoch_train(self)
180 def _do_epoch_train(self):
181 self.dl = self.dls.train
--> 182 self._with_events(self.all_batches, 'train', CancelTrainException)
183
184 def _do_epoch_validate(self, ds_idx=1, dl=None):
/usr/local/lib/python3.6/dist-packages/fastai/learner.py in _with_events(self, f, event_type, ex, final)
152
153 def _with_events(self, f, event_type, ex, final=noop):
--> 154 try: self(f'before_{event_type}') ;f()
155 except ex: self(f'after_cancel_{event_type}')
156 finally: self(f'after_{event_type}') ;final()
/usr/local/lib/python3.6/dist-packages/fastai/learner.py in all_batches(self)
158 def all_batches(self):
159 self.n_iter = len(self.dl)
--> 160 for o in enumerate(self.dl): self.one_batch(*o)
161
162 def _do_one_batch(self):
/usr/local/lib/python3.6/dist-packages/fastai/learner.py in one_batch(self, i, b)
176 self.iter = i
177 self._split(b)
--> 178 self._with_events(self._do_one_batch, 'batch', CancelBatchException)
179
180 def _do_epoch_train(self):
/usr/local/lib/python3.6/dist-packages/fastai/learner.py in _with_events(self, f, event_type, ex, final)
152
153 def _with_events(self, f, event_type, ex, final=noop):
--> 154 try: self(f'before_{event_type}') ;f()
155 except ex: self(f'after_cancel_{event_type}')
156 finally: self(f'after_{event_type}') ;final()
/usr/local/lib/python3.6/dist-packages/fastai/learner.py in _do_one_batch(self)
161
162 def _do_one_batch(self):
--> 163 self.pred = self.model(*self.xb)
164 self('after_pred')
165 if len(self.yb): self.loss = self.loss_func(self.pred, *self.yb)
/usr/local/lib/python3.6/dist-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs)
725 result = self._slow_forward(*input, **kwargs)
726 else:
--> 727 result = self.forward(*input, **kwargs)
728 for hook in itertools.chain(
729 _global_forward_hooks.values(),
/usr/local/lib/python3.6/dist-packages/torchvision/models/detection/retinanet.py in forward(self, images, targets)
556
557 # compute the losses
--> 558 losses = self.compute_loss(targets, head_outputs, anchors)
559 else:
560 # compute the detections
/usr/local/lib/python3.6/dist-packages/torchvision/models/detection/retinanet.py in compute_loss(self, targets, head_outputs, anchors)
406 matched_idxs.append(self.proposal_matcher(match_quality_matrix))
407
--> 408 return self.head.compute_loss(targets, head_outputs, anchors, matched_idxs)
409
410 def postprocess_detections(self, head_outputs, anchors, image_shapes):
/usr/local/lib/python3.6/dist-packages/torchvision/models/detection/retinanet.py in compute_loss(self, targets, head_outputs, anchors, matched_idxs)
49 # type: (List[Dict[str, Tensor]], Dict[str, Tensor], List[Tensor], List[Tensor]) -> Dict[str, Tensor]
50 return {
---> 51 'classification': self.classification_head.compute_loss(targets, head_outputs, matched_idxs),
52 'bbox_regression': self.regression_head.compute_loss(targets, head_outputs, anchors, matched_idxs),
53 }
/usr/local/lib/python3.6/dist-packages/torchvision/models/detection/retinanet.py in compute_loss(self, targets, head_outputs, matched_idxs)
118 foreground_idxs_per_image,
119 targets_per_image['labels'][matched_idxs_per_image[foreground_idxs_per_image]]
--> 120 ] = 1.0
121
122 # find indices for which anchors should be ignored
IndexError: tensors used as indices must be long, byte or bool tensors
To Reproduce
Steps to reproduce the behavior:
Has happened during learn.lr_find()
, but on another occasion I passed this point and got error during learn.fine_tune(50, 3e-3, freeze_epochs=1)
. I placed the notebook at https://github.com/robmarkcole/fire-detection-from-images/blob/master/pytorch/icevision/icevision_firenet_retinanet.ipynb
Expected behavior No error
Screenshots NA
Desktop (please complete the following information):
- Mac Catalina 10.15.5
Additional context None
About this issue
- Original URL
- State: closed
- Created 4 years ago
- Reactions: 1
- Comments: 16 (1 by maintainers)
used the same code to train faster_rcnn and efficientdet without problems right
: correct 😃@robmarkcole may have to do with the
fastai
version update