detectron2: DeepLab Inference seems to not be working / Unable to display predictions

Hi, thank you Yuxin and team for making this amazing resource and for all of your support answering questions!!!

I’m running into an issue where I think I am either not running inference properly or I’m just having difficulty displaying my predictions because I keep getting an error when trying to run visualize_data.py.

Instructions To Reproduce the 🐛 Bug:

Full runnable code or full changes you made: This is my file train_net_xbd.py (removing imports)

def build_sem_seg_train_aug(cfg):
    augs = [
        T.ResizeShortestEdge(
            cfg.INPUT.MIN_SIZE_TRAIN, cfg.INPUT.MAX_SIZE_TRAIN, cfg.INPUT.MIN_SIZE_TRAIN_SAMPLING
        )
    ]
    if cfg.INPUT.CROP.ENABLED:
        augs.append(
            T.RandomCrop_CategoryAreaConstraint(
                cfg.INPUT.CROP.TYPE,
                cfg.INPUT.CROP.SIZE,
                cfg.INPUT.CROP.SINGLE_CATEGORY_MAX_AREA,
                cfg.MODEL.SEM_SEG_HEAD.IGNORE_VALUE,
            )
        )
    augs.append(T.RandomFlip())
    return augs


class Trainer(DefaultTrainer):
    """
    We use the "DefaultTrainer" which contains a number pre-defined logic for
    standard training workflow. They may not work for you, especially if you
    are working on a new research project. In that case you can use the cleaner
    "SimpleTrainer", or write your own training loop.
    """
    @classmethod
    def build_evaluator(cls, cfg, dataset_name, output_folder=None):
        """
        Create evaluator(s) for a given dataset.
        This uses the special metadata "evaluator_type" associated with each builtin dataset.
        For your own dataset, you can simply create an evaluator manually in your
        script and do not have to worry about the hacky if-else logic here.
        """
        if output_folder is None:
            output_folder = os.path.join(cfg.OUTPUT_DIR, "inference")
        evaluator_list = []
        evaluator_type = MetadataCatalog.get(dataset_name).evaluator_type
        if evaluator_type == "sem_seg":
            print("datasetname", dataset_name)
            return SemSegEvaluator(
                dataset_name,
                distributed=True,
                num_classes=cfg.MODEL.SEM_SEG_HEAD.NUM_CLASSES,
                ignore_label=cfg.MODEL.SEM_SEG_HEAD.IGNORE_VALUE,
                output_dir=output_folder,
            )
        return DatasetEvaluators(evaluator_list)

    @classmethod
    def build_train_loader(cls, cfg):
        if "SemanticSegmentor" in cfg.MODEL.META_ARCHITECTURE:
            mapper = DatasetMapper(cfg, is_train=True, augmentations=build_sem_seg_train_aug(cfg))
        else:
            mapper = None
        return build_detection_train_loader(cfg, mapper=mapper)

    @classmethod
    def build_lr_scheduler(cls, cfg, optimizer):
        """
        It now calls :func:`detectron2.solver.build_lr_scheduler`.
        Overwrite it if you'd like a different scheduler.
        """
        return build_lr_scheduler(cfg, optimizer)


def setup(args):
    """
    Create configs and perform basic setups.
    """
    cfg = get_cfg()
    add_deeplab_config(cfg)
    cfg.merge_from_file(args.config_file)
    cfg.merge_from_list(args.opts)
    cfg.freeze()
    default_setup(cfg, args)
    return cfg

def get_building_dicts(img_dir):
    """This function loads the JSON file created with the annotator and converts it to
    the detectron2 metadata specifications.
    """
    img_links = glob.glob(img_dir+"labels/*.json")
    # only keep the images that include post
    img_anns = list(filter(lambda x: "post" in x, img_links)) 

    dataset_dicts = []
    # loop through the entries in the JSON file
    for idx, single in enumerate(img_anns):
        v = json.load(open(single))
        record = {}
        # add file_name, image_id, height and width information to the records
        filename = os.path.join(img_dir, "images/", v["metadata"]["img_name"])
        height, width = (v["metadata"]["height"], v["metadata"]["width"])

        record["file_name"] = filename
        record["image_id"] = idx
        record["height"] = height
        record["width"] = width
        record["sem_seg_file_name"] = img_dir+"bin_masks/" + v["metadata"]["img_name"]
        dataset_dicts.append(record)

    return dataset_dicts


def main(args):

    for d in ["train", "test"]:
        DatasetCatalog.register(
            "xbddata_" + d, lambda d=d: get_building_dicts("/n/tambe_lab/Users/michelewang/" + d+"/"),
        )
        MetadataCatalog.get("xbddata_"+d).stuff_classes = ["0","1","2"]
        MetadataCatalog.get("xbddata_"+d).evaluator_type = "sem_seg"
    
    print("Dataset Catalog", DatasetCatalog.list())
    print("XBDDATA_TRAIN", DatasetCatalog.get("xbddata_train"))
    xbdtrain_metadata = MetadataCatalog.get("xbddata_train")
    xbdtest_metadata = MetadataCatalog.get("xbddata_test")

    cfg = setup(args)

    if args.eval_only:
        print("hi, we're in eval only")
        model = Trainer.build_model(cfg)
        print("cfg.MODEL.WEIGHTS", cfg.MODEL.WEIGHTS)
        print("cfg.OUTPUT_DIR", cfg.OUTPUT_DIR)
        DetectionCheckpointer(model, save_dir=cfg.OUTPUT_DIR).resume_or_load(
            cfg.MODEL.WEIGHTS, resume=args.resume
        )
        res = Trainer.test(cfg, model)
        return res

    trainer = Trainer(cfg)
    trainer.resume_or_load(resume=args.resume)
    return trainer.train()


if __name__ == "__main__":
    args = default_argument_parser().parse_args()
    print("Command Line Args:", args)
    launch(
        main,
        args.num_gpus,
        num_machines=args.num_machines,
        machine_rank=args.machine_rank,
        dist_url=args.dist_url,
        args=(args,),
    )

This is my file visualize_data.py:

#!/usr/bin/env python
# Copyright (c) Facebook, Inc. and its affiliates.
import argparse
import os
from itertools import chain
import cv2
import tqdm

from detectron2.config import get_cfg
from detectron2.data import DatasetCatalog, MetadataCatalog, build_detection_train_loader
from detectron2.data import detection_utils as utils
from detectron2.data.build import filter_images_with_few_keypoints
from detectron2.utils.logger import setup_logger
from detectron2.utils.visualizer import Visualizer


def setup(args):
    cfg = get_cfg()
    if args.config_file:
        cfg.merge_from_file(args.config_file)
    cfg.merge_from_list(args.opts)
    cfg.DATALOADER.NUM_WORKERS = 0
    cfg.freeze()
    return cfg


def parse_args(in_args=None):
    parser = argparse.ArgumentParser(description="Visualize ground-truth data")
    parser.add_argument(
        "--source",
        choices=["annotation", "dataloader"],
        required=True,
        help="visualize the annotations or the data loader (with pre-processing)",
    )
    parser.add_argument("--config-file", metavar="FILE", help="path to config file")
    parser.add_argument("--output-dir", default="./", help="path to output directory")
    parser.add_argument("--show", action="store_true", help="show output in a window")
    parser.add_argument(
        "opts",
        help="Modify config options using the command-line",
        default=None,
        nargs=argparse.REMAINDER,
    )
    return parser.parse_args(in_args)


if __name__ == "__main__":
    args = parse_args()
    logger = setup_logger()
    logger.info("Arguments: " + str(args))
    cfg = setup(args)

    dirname = args.output_dir
    os.makedirs(dirname, exist_ok=True)
    metadata = MetadataCatalog.get(cfg.DATASETS.TRAIN[0])

    def output(vis, fname):
        if args.show:
            print(fname)
            cv2.imshow("window", vis.get_image()[:, :, ::-1])
            cv2.waitKey()
        else:
            filepath = os.path.join(dirname, fname)
            print("Saving to {} ...".format(filepath))
            vis.save(filepath)

    scale = 1.0
    if args.source == "dataloader":
        train_data_loader = build_detection_train_loader(cfg)
        for batch in train_data_loader:
            for per_image in batch:
                # Pytorch tensor is in (C, H, W) format
                img = per_image["image"].permute(1, 2, 0).cpu().detach().numpy()
                img = utils.convert_image_to_rgb(img, cfg.INPUT.FORMAT)

                visualizer = Visualizer(img, metadata=metadata, scale=scale)
                target_fields = per_image["instances"].get_fields()
                labels = [metadata.thing_classes[i] for i in target_fields["gt_classes"]]
                vis = visualizer.overlay_instances(
                    labels=labels,
                    boxes=target_fields.get("gt_boxes", None),
                    masks=target_fields.get("gt_masks", None),
                    keypoints=target_fields.get("gt_keypoints", None),
                )
                output(vis, str(per_image["image_id"]) + ".jpg")
    else:
        dicts = list(chain.from_iterable([DatasetCatalog.get(k) for k in cfg.DATASETS.TRAIN]))
        if cfg.MODEL.KEYPOINT_ON:
            dicts = filter_images_with_few_keypoints(dicts, 1)
        for dic in tqdm.tqdm(dicts):
            img = utils.read_image(dic["file_name"], "RGB")
            visualizer = Visualizer(img, metadata=metadata, scale=scale)
            vis = visualizer.draw_dataset_dict(dic)
            output(vis, os.path.basename(dic["file_name"]))

What exact command you run: This was the script I ran for inference (It finished in <1 min so I think something is wrong here):

cd /n/home07/michelewang/thesis/detectron2/projects/DeepLab
python train_net_xbd.py --config-file configs/xBD-configs/base-deeplabv3.yaml --eval-only MODEL.WEIGHTS ./output/model_0024999.pth

This was the script I ran to try to view my predictions:

cd /n/home07/michelewang/thesis/detectron2/tools
python visualize_data.py --source annotation --config-file ../projects/DeepLab/configs/xBD-configs/base-deeplabv3.yaml --output-dir ../projects/DeepLab/output/inference --show

Full logs or other relevant observations: Logs from Inference:

[5m[31mWARNING[0m [32m[03/07 21:38:53 d2.evaluation.sem_seg_evaluation]: [0mSemSegEvaluator(num_classes) is deprecated! It should be obtained from metadata.
[5m[31mWARNING[0m [32m[03/07 21:38:53 d2.evaluation.sem_seg_evaluation]: [0mSemSegEvaluator(ignore_label) is deprecated! It should be obtained from metadata.
[32m[03/07 21:38:54 d2.evaluation.evaluator]: [0mStart inference on 933 images
[32m[03/07 21:38:56 d2.evaluation.evaluator]: [0mInference done 11/933. 0.0817 s / img. ETA=0:01:55
[32m[03/07 21:39:01 d2.evaluation.evaluator]: [0mInference done 51/933. 0.0817 s / img. ETA=0:01:50
[32m[03/07 21:39:06 d2.evaluation.evaluator]: [0mInference done 70/933. 0.0817 s / img. ETA=0:02:26
[32m[03/07 21:39:11 d2.evaluation.evaluator]: [0mInference done 110/933. 0.0818 s / img. ETA=0:02:06
[32m[03/07 21:39:17 d2.evaluation.evaluator]: [0mInference done 150/933. 0.0818 s / img. ETA=0:01:54
[32m[03/07 21:39:22 d2.evaluation.evaluator]: [0mInference done 190/933. 0.0819 s / img. ETA=0:01:45
[32m[03/07 21:39:27 d2.evaluation.evaluator]: [0mInference done 229/933. 0.0820 s / img. ETA=0:01:38
[32m[03/07 21:39:32 d2.evaluation.evaluator]: [0mInference done 270/933. 0.0820 s / img. ETA=0:01:30
[32m[03/07 21:39:37 d2.evaluation.evaluator]: [0mInference done 310/933. 0.0819 s / img. ETA=0:01:24
[32m[03/07 21:39:42 d2.evaluation.evaluator]: [0mInference done 351/933. 0.0819 s / img. ETA=0:01:18
[32m[03/07 21:39:47 d2.evaluation.evaluator]: [0mInference done 391/933. 0.0819 s / img. ETA=0:01:12
[32m[03/07 21:39:52 d2.evaluation.evaluator]: [0mInference done 432/933. 0.0819 s / img. ETA=0:01:06
[32m[03/07 21:39:57 d2.evaluation.evaluator]: [0mInference done 473/933. 0.0819 s / img. ETA=0:01:00
[32m[03/07 21:40:02 d2.evaluation.evaluator]: [0mInference done 514/933. 0.0819 s / img. ETA=0:00:54
[32m[03/07 21:40:07 d2.evaluation.evaluator]: [0mInference done 554/933. 0.0818 s / img. ETA=0:00:49
[32m[03/07 21:40:12 d2.evaluation.evaluator]: [0mInference done 595/933. 0.0818 s / img. ETA=0:00:44
[32m[03/07 21:40:17 d2.evaluation.evaluator]: [0mInference done 636/933. 0.0818 s / img. ETA=0:00:38
[32m[03/07 21:40:23 d2.evaluation.evaluator]: [0mInference done 677/933. 0.0818 s / img. ETA=0:00:33
[32m[03/07 21:40:28 d2.evaluation.evaluator]: [0mInference done 717/933. 0.0818 s / img. ETA=0:00:27
[32m[03/07 21:40:33 d2.evaluation.evaluator]: [0mInference done 758/933. 0.0818 s / img. ETA=0:00:22
[32m[03/07 21:40:38 d2.evaluation.evaluator]: [0mInference done 799/933. 0.0818 s / img. ETA=0:00:17
[32m[03/07 21:40:43 d2.evaluation.evaluator]: [0mInference done 840/933. 0.0818 s / img. ETA=0:00:11
[32m[03/07 21:40:48 d2.evaluation.evaluator]: [0mInference done 881/933. 0.0818 s / img. ETA=0:00:06
[32m[03/07 21:40:53 d2.evaluation.evaluator]: [0mInference done 922/933. 0.0818 s / img. ETA=0:00:01
[32m[03/07 21:40:54 d2.evaluation.evaluator]: [0mTotal inference time: 0:01:59.094921 (0.128335 s / img per device, on 1 devices)
[32m[03/07 21:40:54 d2.evaluation.evaluator]: [0mTotal inference pure compute time: 0:01:15 (0.081782 s / img per device, on 1 devices)
[32m[03/07 21:40:56 d2.evaluation.sem_seg_evaluation]: [0mOrderedDict([('sem_seg', {'mIoU': 14.286231780375028, 'fwIoU': 30.412502468859948, 'IoU-0': 31.958253593820217, 'IoU-1': 5.555678249276511, 'IoU-2': 5.344763498028353, 'mACC': 53.518182755044954, 'pACC': 34.17634953767668, 'ACC-0': 32.13264257356594, 'ACC-1': 69.63928432518483, 'ACC-2': 58.78262136638409})])
[32m[03/07 21:40:56 d2.engine.defaults]: [0mEvaluation results for xbddata_test in csv format:
[32m[03/07 21:40:56 d2.evaluation.testing]: [0mcopypaste: Task: sem_seg
[32m[03/07 21:40:56 d2.evaluation.testing]: [0mcopypaste: mIoU,fwIoU,mACC,pACC
[32m[03/07 21:40:56 d2.evaluation.testing]: [0mcopypaste: 14.2862,30.4125,53.5182,34.1763

The Semantic Segmentation JSON created in my Outputs folder, sem_seg_predictions.json – is this normal??

The Error I got from my second script to visualize_data.py:

Traceback (most recent call last):
Traceback (most recent call last):
  File "visualize_data.py", line 51, in <module>
    cfg = setup(args)
  File "visualize_data.py", line 20, in setup
    cfg.merge_from_file(args.config_file)
  File "/n/home07/michelewang/.conda/envs/active/lib/python3.8/site-packages/detectron2/config/config.py", line 54, in merge_from_file
    self.merge_from_other_cfg(loaded_cfg)
  File "/n/home07/michelewang/.conda/envs/active/lib/python3.8/site-packages/fvcore/common/config.py", line 123, in merge_from_other_cfg
    return super().merge_from_other_cfg(cfg_other)
  File "/n/home07/michelewang/.conda/envs/active/lib/python3.8/site-packages/yacs/config.py", line 217, in merge_from_other_cfg
    _merge_a_into_b(cfg_other, self, self, [])
  File "/n/home07/michelewang/.conda/envs/active/lib/python3.8/site-packages/yacs/config.py", line 478, in _merge_a_into_b
    _merge_a_into_b(v, b[k], root, key_list + [k])
  File "/n/home07/michelewang/.conda/envs/active/lib/python3.8/site-packages/yacs/config.py", line 478, in _merge_a_into_b
    _merge_a_into_b(v, b[k], root, key_list + [k])
  File "/n/home07/michelewang/.conda/envs/active/lib/python3.8/site-packages/yacs/config.py", line 491, in _merge_a_into_b
    raise KeyError("Non-existent config key: {}".format(full_key))
KeyError: 'Non-existent config key: MODEL.RESNETS.RES5_MULTI_GRID'

please simplify the steps as much as possible so they do not require additional resources to run, such as a private dataset.

Expected behavior:

I first ran the inference script. It ran really fast, taking less than a minute. Then I ran the script to visualize my predictions, because I wanted to see how accurate Deeplab’s semantic segmentation predictions for my model were. However, I was blocked by syntax errors for the model and I don’t think I did inference correctly given that it ran so fast, and the Json file of predictions, is gibberish, but I’m not sure how to do it correctly.

Environment:

Provide your environment information using the following command:

wget -nc -q https://github.com/facebookresearch/detectron2/raw/master/detectron2/utils/collect_env.py && python collect_env.py

If your issue looks like an installation issue / environment issue, please first try to solve it yourself with the instructions in https://detectron2.readthedocs.io/tutorials/install.html#common-installation-issues

Full Error Logs for Inference the stuff at the top is just the model reprinting all the data in my original train dataset)

{'file_name': '/n/tambe_lab/Users/michelewang/train/images/midwest-flooding_00000173_post_disaster.png', 'image_id': 2715, 'height': 1024, 'width': 1024, 'sem_seg_file_name': '/n/tambe_lab/Users/michelewang/train/bin_masks/midwest-flooding_00000173_post_disaster.png'}, {'file_name': '/n/tambe_lab/Users/michelewang/train/images/midwest-flooding_00000293_post_disaster.png', 'image_id': 2716, 'height': 1024, 'width': 1024, 'sem_seg_file_n
[32m[03/07 21:38:45 detectron2]: [0mRank of current process: 0. World size: 1
[32m[03/07 21:38:48 detectron2]: [0mEnvironment info:
----------------------  -----------------------------------------------------------------------------------------------------------------
sys.platform            linux
Python                  3.8.5 (default, Sep  4 2020, 07:30:14) [GCC 7.3.0]
numpy                   1.19.2
detectron2              0.3 @/n/home07/michelewang/.conda/envs/active/lib/python3.8/site-packages/detectron2
Compiler                GCC 9.2
CUDA compiler           not available
detectron2 arch flags   /n/home07/michelewang/.conda/envs/active/lib/python3.8/site-packages/detectron2/_C.cpython-38-x86_64-linux-gnu.so
DETECTRON2_ENV_MODULE   <not set>
PyTorch                 1.7.1 @/n/home07/michelewang/.conda/envs/active/lib/python3.8/site-packages/torch
PyTorch debug build     False
GPU available           True
GPU 0,1,2,3,4,5,6,7     Tesla V100-PCIE-32GB (arch=7.0)
CUDA_HOME               /n/helmod/apps/centos7/Core/cuda/10.2.89-fasrc01/cuda
Pillow                  8.1.0
torchvision             0.8.2 @/n/home07/michelewang/.conda/envs/active/lib/python3.8/site-packages/torchvision
torchvision arch flags  3.5, 5.0, 6.0, 7.0, 7.5
fvcore                  0.1.3.post20210220
cv2                     4.4.0
----------------------  -----------------------------------------------------------------------------------------------------------------
PyTorch built with:
  - GCC 7.3
  - C++ Version: 201402
  - Intel(R) Math Kernel Library Version 2020.0.2 Product Build 20200624 for Intel(R) 64 architecture applications
  - Intel(R) MKL-DNN v1.6.0 (Git Hash 5ef631a030a6f73131c77892041042805a06064f)
  - OpenMP 201511 (a.k.a. OpenMP 4.5)
  - NNPACK is enabled
  - CPU capability usage: AVX2
  - CUDA Runtime 10.1
  - NVCC architecture flags: -gencode;arch=compute_37,code=sm_37;-gencode;arch=compute_50,code=sm_50;-gencode;arch=compute_60,code=sm_60;-gencode;arch=compute_61,code=sm_61;-gencode;arch=compute_70,code=sm_70;-gencode;arch=compute_75,code=sm_75;-gencode;arch=compute_37,code=compute_37
  - CuDNN 7.6.3
  - Magma 2.5.2
  - Build settings: BLAS=MKL, BUILD_TYPE=Release, CXX_FLAGS= -Wno-deprecated -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -fopenmp -DNDEBUG -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DUSE_VULKAN_WRAPPER -O2 -fPIC -Wno-narrowing -Wall -Wextra -Werror=return-type -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-sign-compare -Wno-unused-parameter -Wno-unused-variable -Wno-unused-function -Wno-unused-result -Wno-unused-local-typedefs -Wno-strict-overflow -Wno-strict-aliasing -Wno-error=deprecated-declarations -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=redundant-decls -Wno-error=old-style-cast -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Wno-stringop-overflow, PERF_WITH_AVX=1, PERF_WITH_AVX2=1, PERF_WITH_AVX512=1, USE_CUDA=ON, USE_EXCEPTION_PTR=1, USE_GFLAGS=OFF, USE_GLOG=OFF, USE_MKL=ON, USE_MKLDNN=ON, USE_MPI=OFF, USE_NCCL=ON, USE_NNPACK=ON, USE_OPENMP=ON, 

[32m[03/07 21:38:48 detectron2]: [0mCommand line arguments: Namespace(config_file='configs/xBD-configs/base-deeplabv3.yaml', dist_url='tcp://127.0.0.1:62862', eval_only=True, machine_rank=0, num_gpus=1, num_machines=1, opts=['MODEL.WEIGHTS', './output/model_0024999.pth'], resume=False)
[32m[03/07 21:38:48 detectron2]: [0mContents of args.config_file=configs/xBD-configs/base-deeplabv3.yaml:
_BASE_: base.yaml
MODEL:
  WEIGHTS: "detectron2://DeepLab/R-103.pkl"
  PIXEL_MEAN: [123.675, 116.280, 103.530]
  PIXEL_STD: [58.395, 57.120, 57.375]
  BACKBONE:
    NAME: "build_resnet_deeplab_backbone"
  RESNETS:
    DEPTH: 101
    NORM: "SyncBN"
    OUT_FEATURES: ["res2", "res5"]
    RES5_MULTI_GRID: [1, 2, 4]
    STEM_TYPE: "deeplab"
    STEM_OUT_CHANNELS: 128
    STRIDE_IN_1X1: False
  SEM_SEG_HEAD:
    NAME: "DeepLabV3PlusHead"
    IN_FEATURES: ["res2", "res5"]
    PROJECT_FEATURES: ["res2"]
    PROJECT_CHANNELS: [48]
    NORM: "SyncBN"
    COMMON_STRIDE: 4
INPUT:
  FORMAT: "RGB"

[32m[03/07 21:38:48 detectron2]: [0mRunning with full config:
CUDNN_BENCHMARK: False
DATALOADER:
  ASPECT_RATIO_GROUPING: True
  FILTER_EMPTY_ANNOTATIONS: True
  NUM_WORKERS: 10
  REPEAT_THRESHOLD: 0.0
  SAMPLER_TRAIN: TrainingSampler
DATASETS:
  PRECOMPUTED_PROPOSAL_TOPK_TEST: 1000
  PRECOMPUTED_PROPOSAL_TOPK_TRAIN: 2000
  PROPOSAL_FILES_TEST: ()
  PROPOSAL_FILES_TRAIN: ()
  TEST: ('xbddata_test',)
  TRAIN: ('xbddata_train',)
GLOBAL:
  HACK: 1.0
INPUT:
  CROP:
    ENABLED: True
    SINGLE_CATEGORY_MAX_AREA: 1.0
    SIZE: [512, 1024]
    TYPE: absolute
  FORMAT: RGB
  MASK_FORMAT: polygon
  MAX_SIZE_TEST: 1024
  MAX_SIZE_TRAIN: 1024
  MIN_SIZE_TEST: 1024
  MIN_SIZE_TRAIN: (1024,)
  MIN_SIZE_TRAIN_SAMPLING: choice
  RANDOM_FLIP: horizontal
MODEL:
  ANCHOR_GENERATOR:
    ANGLES: [[-90, 0, 90]]
    ASPECT_RATIOS: [[0.5, 1.0, 2.0]]
    NAME: DefaultAnchorGenerator
    OFFSET: 0.0
    SIZES: [[32, 64, 128, 256, 512]]
  BACKBONE:
    FREEZE_AT: 0
    NAME: build_resnet_deeplab_backbone
  DEVICE: cuda
  FPN:
    FUSE_TYPE: sum
    IN_FEATURES: []
    NORM: 
    OUT_CHANNELS: 256
  KEYPOINT_ON: False
  LOAD_PROPOSALS: False
  MASK_ON: False
  META_ARCHITECTURE: SemanticSegmentor
  PANOPTIC_FPN:
    COMBINE:
      ENABLED: True
      INSTANCES_CONFIDENCE_THRESH: 0.5
      OVERLAP_THRESH: 0.5
      STUFF_AREA_LIMIT: 4096
    INSTANCE_LOSS_WEIGHT: 1.0
  PIXEL_MEAN: [123.675, 116.28, 103.53]
  PIXEL_STD: [58.395, 57.12, 57.375]
  PROPOSAL_GENERATOR:
    MIN_SIZE: 0
    NAME: RPN
  RESNETS:
    DEFORM_MODULATED: False
    DEFORM_NUM_GROUPS: 1
    DEFORM_ON_PER_STAGE: [False, False, False, False]
    DEPTH: 101
    NORM: SyncBN
    NUM_GROUPS: 1
    OUT_FEATURES: ['res2', 'res5']
    RES2_OUT_CHANNELS: 256
    RES4_DILATION: 1
    RES5_DILATION: 2
    RES5_MULTI_GRID: [1, 2, 4]
    STEM_OUT_CHANNELS: 128
    STEM_TYPE: deeplab
    STRIDE_IN_1X1: False
    WIDTH_PER_GROUP: 64
  RETINANET:
    BBOX_REG_LOSS_TYPE: smooth_l1
    BBOX_REG_WEIGHTS: (1.0, 1.0, 1.0, 1.0)
    FOCAL_LOSS_ALPHA: 0.25
    FOCAL_LOSS_GAMMA: 2.0
    IN_FEATURES: ['p3', 'p4', 'p5', 'p6', 'p7']
    IOU_LABELS: [0, -1, 1]
    IOU_THRESHOLDS: [0.4, 0.5]
    NMS_THRESH_TEST: 0.5
    NORM: 
    NUM_CLASSES: 80
    NUM_CONVS: 4
    PRIOR_PROB: 0.01
    SCORE_THRESH_TEST: 0.05
    SMOOTH_L1_LOSS_BETA: 0.1
    TOPK_CANDIDATES_TEST: 1000
  ROI_BOX_CASCADE_HEAD:
    BBOX_REG_WEIGHTS: ((10.0, 10.0, 5.0, 5.0), (20.0, 20.0, 10.0, 10.0), (30.0, 30.0, 15.0, 15.0))
    IOUS: (0.5, 0.6, 0.7)
  ROI_BOX_HEAD:
    BBOX_REG_LOSS_TYPE: smooth_l1
    BBOX_REG_LOSS_WEIGHT: 1.0
    BBOX_REG_WEIGHTS: (10.0, 10.0, 5.0, 5.0)
    CLS_AGNOSTIC_BBOX_REG: False
    CONV_DIM: 256
    FC_DIM: 1024
    NAME: FastRCNNConvFCHead
    NORM: 
    NUM_CONV: 0
    NUM_FC: 2
    POOLER_RESOLUTION: 7
    POOLER_SAMPLING_RATIO: 0
    POOLER_TYPE: ROIAlignV2
    SMOOTH_L1_BETA: 0.0
    TRAIN_ON_PRED_BOXES: False
  ROI_HEADS:
    BATCH_SIZE_PER_IMAGE: 512
    IN_FEATURES: ['res5']
    IOU_LABELS: [0, 1]
    IOU_THRESHOLDS: [0.5]
    NAME: StandardROIHeads
    NMS_THRESH_TEST: 0.5
    NUM_CLASSES: 80
    POSITIVE_FRACTION: 0.25
    PROPOSAL_APPEND_GT: True
    SCORE_THRESH_TEST: 0.05
  ROI_KEYPOINT_HEAD:
    CONV_DIMS: (512, 512, 512, 512, 512, 512, 512, 512)
    LOSS_WEIGHT: 1.0
    MIN_KEYPOINTS_PER_IMAGE: 1
    NAME: KRCNNConvDeconvUpsampleHead
    NORMALIZE_LOSS_BY_VISIBLE_KEYPOINTS: True
    NUM_KEYPOINTS: 17
    POOLER_RESOLUTION: 14
    POOLER_SAMPLING_RATIO: 0
    POOLER_TYPE: ROIAlignV2
  ROI_MASK_HEAD:
    CLS_AGNOSTIC_MASK: False
    CONV_DIM: 256
    NAME: MaskRCNNConvUpsampleHead
    NORM: 
    NUM_CONV: 4
    POOLER_RESOLUTION: 14
    POOLER_SAMPLING_RATIO: 0
    POOLER_TYPE: ROIAlignV2
  RPN:
    BATCH_SIZE_PER_IMAGE: 256
    BBOX_REG_LOSS_TYPE: smooth_l1
    BBOX_REG_LOSS_WEIGHT: 1.0
    BBOX_REG_WEIGHTS: (1.0, 1.0, 1.0, 1.0)
    BOUNDARY_THRESH: -1
    HEAD_NAME: StandardRPNHead
    IN_FEATURES: ['res5']
    IOU_LABELS: [0, -1, 1]
    IOU_THRESHOLDS: [0.3, 0.7]
    LOSS_WEIGHT: 1.0
    NMS_THRESH: 0.7
    POSITIVE_FRACTION: 0.5
    POST_NMS_TOPK_TEST: 1000
    POST_NMS_TOPK_TRAIN: 2000
    PRE_NMS_TOPK_TEST: 6000
    PRE_NMS_TOPK_TRAIN: 12000
    SMOOTH_L1_BETA: 0.0
  SEM_SEG_HEAD:
    ASPP_CHANNELS: 256
    ASPP_DILATIONS: [6, 12, 18]
    ASPP_DROPOUT: 0.1
    COMMON_STRIDE: 4
    CONVS_DIM: 256
    IGNORE_VALUE: 255
    IN_FEATURES: ['res2', 'res5']
    LOSS_TYPE: hard_pixel_mining
    LOSS_WEIGHT: 1.0
    NAME: DeepLabV3PlusHead
    NORM: SyncBN
    NUM_CLASSES: 3
    PROJECT_CHANNELS: [48]
    PROJECT_FEATURES: ['res2']
    USE_DEPTHWISE_SEPARABLE_CONV: False
  WEIGHTS: ./output/model_0024999.pth
OUTPUT_DIR: ./output
SEED: -1
SOLVER:
  AMP:
    ENABLED: False
  BASE_LR: 0.01
  BIAS_LR_FACTOR: 1.0
  CHECKPOINT_PERIOD: 5000
  CLIP_GRADIENTS:
    CLIP_TYPE: value
    CLIP_VALUE: 1.0
    ENABLED: False
    NORM_TYPE: 2.0
  GAMMA: 0.1
  IMS_PER_BATCH: 16
  LR_SCHEDULER_NAME: WarmupPolyLR
  MAX_ITER: 90000
  MOMENTUM: 0.9
  NESTEROV: False
  POLY_LR_CONSTANT_ENDING: 0.0
  POLY_LR_POWER: 0.9
  REFERENCE_WORLD_SIZE: 0
  STEPS: (60000, 80000)
  WARMUP_FACTOR: 0.001
  WARMUP_ITERS: 1000
  WARMUP_METHOD: linear
  WEIGHT_DECAY: 0.0001
  WEIGHT_DECAY_BIAS: 0.0001
  WEIGHT_DECAY_NORM: 0.0
TEST:
  AUG:
    ENABLED: False
    FLIP: True
    MAX_SIZE: 4000
    MIN_SIZES: (400, 500, 600, 700, 800, 900, 1000, 1100, 1200)
  DETECTIONS_PER_IMAGE: 100
  EVAL_PERIOD: 0
  EXPECTED_RESULTS: []
  KEYPOINT_OKS_SIGMAS: []
  PRECISE_BN:
    ENABLED: False
    NUM_ITER: 200
VERSION: 2
VIS_PERIOD: 0
[32m[03/07 21:38:48 detectron2]: [0mFull config saved to ./output/config.yaml
[32m[03/07 21:38:48 d2.utils.env]: [0mUsing a generated random seed 48437111

cfg.MODEL.WEIGHTS ./output/model_0024999.pth
cfg.OUTPUT_DIR ./output
[32m[03/07 21:38:52 fvcore.common.checkpoint]: [0mLoading checkpoint from ./output/model_0024999.pth
[5m[31mWARNING[0m [32m[03/07 21:38:52 fvcore.common.checkpoint]: [0mSkip loading parameter 'sem_seg_head.predictor.weight' to the model due to incompatible shapes: (19, 256, 1, 1) in the checkpoint but (3, 256, 1, 1) in the model! You might want to double check if this is expected.
[5m[31mWARNING[0m [32m[03/07 21:38:52 fvcore.common.checkpoint]: [0mSkip loading parameter 'sem_seg_head.predictor.bias' to the model due to incompatible shapes: (19,) in the checkpoint but (3,) in the model! You might want to double check if this is expected.
[32m[03/07 21:38:52 fvcore.common.checkpoint]: [0mSome model parameters or buffers are not found in the checkpoint:
[34msem_seg_head.predictor.{bias, weight}[0m
[32m[03/07 21:38:53 d2.data.dataset_mapper]: [0m[DatasetMapper] Augmentations used in inference: [ResizeShortestEdge(short_edge_length=(1024, 1024), max_size=1024, sample_style='choice')]
[32m[03/07 21:38:53 d2.data.common]: [0mSerializing 933 elements to byte tensors and concatenating them all ...
[32m[03/07 21:38:53 d2.data.common]: [0mSerialized dataset takes 0.23 MiB
datasetname xbddata_test
[5m[31mWARNING[0m [32m[03/07 21:38:53 d2.evaluation.sem_seg_evaluation]: [0mSemSegEvaluator(num_classes) is deprecated! It should be obtained from metadata.
[5m[31mWARNING[0m [32m[03/07 21:38:53 d2.evaluation.sem_seg_evaluation]: [0mSemSegEvaluator(ignore_label) is deprecated! It should be obtained from metadata.
[32m[03/07 21:38:54 d2.evaluation.evaluator]: [0mStart inference on 933 images
[32m[03/07 21:38:56 d2.evaluation.evaluator]: [0mInference done 11/933. 0.0817 s / img. ETA=0:01:55
[32m[03/07 21:39:01 d2.evaluation.evaluator]: [0mInference done 51/933. 0.0817 s / img. ETA=0:01:50
[32m[03/07 21:39:06 d2.evaluation.evaluator]: [0mInference done 70/933. 0.0817 s / img. ETA=0:02:26
[32m[03/07 21:39:11 d2.evaluation.evaluator]: [0mInference done 110/933. 0.0818 s / img. ETA=0:02:06
[32m[03/07 21:39:17 d2.evaluation.evaluator]: [0mInference done 150/933. 0.0818 s / img. ETA=0:01:54
[32m[03/07 21:39:22 d2.evaluation.evaluator]: [0mInference done 190/933. 0.0819 s / img. ETA=0:01:45
[32m[03/07 21:39:27 d2.evaluation.evaluator]: [0mInference done 229/933. 0.0820 s / img. ETA=0:01:38
[32m[03/07 21:39:32 d2.evaluation.evaluator]: [0mInference done 270/933. 0.0820 s / img. ETA=0:01:30
[32m[03/07 21:39:37 d2.evaluation.evaluator]: [0mInference done 310/933. 0.0819 s / img. ETA=0:01:24
[32m[03/07 21:39:42 d2.evaluation.evaluator]: [0mInference done 351/933. 0.0819 s / img. ETA=0:01:18
[32m[03/07 21:39:47 d2.evaluation.evaluator]: [0mInference done 391/933. 0.0819 s / img. ETA=0:01:12
[32m[03/07 21:39:52 d2.evaluation.evaluator]: [0mInference done 432/933. 0.0819 s / img. ETA=0:01:06
[32m[03/07 21:39:57 d2.evaluation.evaluator]: [0mInference done 473/933. 0.0819 s / img. ETA=0:01:00
[32m[03/07 21:40:02 d2.evaluation.evaluator]: [0mInference done 514/933. 0.0819 s / img. ETA=0:00:54
[32m[03/07 21:40:07 d2.evaluation.evaluator]: [0mInference done 554/933. 0.0818 s / img. ETA=0:00:49
[32m[03/07 21:40:12 d2.evaluation.evaluator]: [0mInference done 595/933. 0.0818 s / img. ETA=0:00:44
[32m[03/07 21:40:17 d2.evaluation.evaluator]: [0mInference done 636/933. 0.0818 s / img. ETA=0:00:38
[32m[03/07 21:40:23 d2.evaluation.evaluator]: [0mInference done 677/933. 0.0818 s / img. ETA=0:00:33
[32m[03/07 21:40:28 d2.evaluation.evaluator]: [0mInference done 717/933. 0.0818 s / img. ETA=0:00:27
[32m[03/07 21:40:33 d2.evaluation.evaluator]: [0mInference done 758/933. 0.0818 s / img. ETA=0:00:22
[32m[03/07 21:40:38 d2.evaluation.evaluator]: [0mInference done 799/933. 0.0818 s / img. ETA=0:00:17
[32m[03/07 21:40:43 d2.evaluation.evaluator]: [0mInference done 840/933. 0.0818 s / img. ETA=0:00:11
[32m[03/07 21:40:48 d2.evaluation.evaluator]: [0mInference done 881/933. 0.0818 s / img. ETA=0:00:06
[32m[03/07 21:40:53 d2.evaluation.evaluator]: [0mInference done 922/933. 0.0818 s / img. ETA=0:00:01
[32m[03/07 21:40:54 d2.evaluation.evaluator]: [0mTotal inference time: 0:01:59.094921 (0.128335 s / img per device, on 1 devices)
[32m[03/07 21:40:54 d2.evaluation.evaluator]: [0mTotal inference pure compute time: 0:01:15 (0.081782 s / img per device, on 1 devices)
[32m[03/07 21:40:56 d2.evaluation.sem_seg_evaluation]: [0mOrderedDict([('sem_seg', {'mIoU': 14.286231780375028, 'fwIoU': 30.412502468859948, 'IoU-0': 31.958253593820217, 'IoU-1': 5.555678249276511, 'IoU-2': 5.344763498028353, 'mACC': 53.518182755044954, 'pACC': 34.17634953767668, 'ACC-0': 32.13264257356594, 'ACC-1': 69.63928432518483, 'ACC-2': 58.78262136638409})])
[32m[03/07 21:40:56 d2.engine.defaults]: [0mEvaluation results for xbddata_test in csv format:
[32m[03/07 21:40:56 d2.evaluation.testing]: [0mcopypaste: Task: sem_seg
[32m[03/07 21:40:56 d2.evaluation.testing]: [0mcopypaste: mIoU,fwIoU,mACC,pACC
[32m[03/07 21:40:56 d2.evaluation.testing]: [0mcopypaste: 14.2862,30.4125,53.5182,34.1763

About this issue

Original URL
State: open
Created 3 years ago
Comments: 15 (4 by maintainers)

Most upvoted comments

Hi everyone, this is the code that works for me (for DeepLab inference). This can be used for running deep lab on a webcam:

cap = cv2.VideoCapture(0)
cfg_file = "configs/custom_configs/deeplab_v3_plus_R_103_os16_mg124_poly_90k_bs16_imow_without_background.yaml"
cfg = get_cfg()
add_deeplab_config(cfg)
cfg.merge_from_file(cfg_file)
cfg.INPUT.CROP.ENABLED = False
cfg.MODEL.WEIGHTS = "output/model_v3_plus_without_bg_c1.pth"
cfg.freeze()
predictor = DefaultPredictor(cfg)
while(cap.isOpened()):   
    ret, frame = cap.read()
    if ret==True:
    
        outputs = predictor(frame)

 
        v = Visualizer(frame[:,:,::-1],
                        metadata=val_metadata_imow_nb,
                        scale=1,
                        instance_mode=ColorMode.SEGMENTATION
                        )


        sem_seg = torch.max(outputs["sem_seg"],dim=0)[1]
        sem_seg = sem_seg.to("cpu")
        sem_seg = sem_seg.numpy()
        cv2.imshow("Image",frame)


        out = v.draw_sem_seg(sem_seg, area_threshold=None, alpha=0.8)
        
        cv2.imshow("Seg_out",out.get_image()[:, :, ::-1])
        # Display the resulting frame

        if cv2.waitKey(1) & 0xFF == ord('q'):
            break
    else:
        break
cap.release()

cv2.destroyAllWindows()

ananthu-aniraj on Apr 21, 2021

I had the following solution. It works for my custom dataset in which my background correspond with an output of ‘0’. For the cityscapes every class works, except for the background class. Of course it is also possible to write a custom drawing function since the output is already an integer.

values2, outputs2 = torch.max(F.softmax(outputs,dim=0),axis=0) out = v.draw_sem_seg(outputs2.to("cpu"))

BartvanMarrewijk on Apr 17, 2021