OpenPCDet: CUDA error: no kernel image is available for execution on the device

Training PointPillars and SECOND works fine, but when I try to train PartA^2Net, I get this error message after addressing issue #70:

Traceback (most recent call last):
File "train.py", line 215, in <module>
main()
File "train.py", line 210, in main
max_ckpt_save_num=arguments.max_ckpt_save_num)
File "/scratch_net/hox/mhahner/repositories/PCDet/tools/train_utils/train_utils.py", line 80, in train_model
leave_pbar=(cur_epoch + 1 == total_epochs)
File "/scratch_net/hox/mhahner/repositories/PCDet/tools/train_utils/train_utils.py", line 36, in train_one_epoch
loss, tb_dict, disp_dict = model_func(model, batch)
File "/scratch_net/hox/mhahner/repositories/PCDet/pcdet/models/__init__.py", line 25, in model_func
ret_dict, tb_dict, disp_dict = model(input_dict)
File "/home/mhahner/scratch/apps/anaconda3/envs/spconv/lib/python3.7/site-packages/torch/nn/modules/module.py", line 532, in __call__
result = self.forward(*input, **kwargs)
File "/scratch_net/hox/mhahner/repositories/PCDet/pcdet/models/detectors/PartA2_net.py", line 112, in forward
batch_size, voxel_centers, coords, rpn_ret_dict, input_dict
File "/scratch_net/hox/mhahner/repositories/PCDet/pcdet/models/detectors/PartA2_net.py", line 98, in forward_rcnn
rcnn_ret_dict = self.rcnn_net.forward(rcnn_input_dict)
File "/scratch_net/hox/mhahner/repositories/PCDet/pcdet/models/rcnn/partA2_rcnn_net.py", line 323, in forward
targets_dict = self.assign_targets(batch_size, rcnn_dict)
File "/scratch_net/hox/mhahner/repositories/PCDet/pcdet/models/rcnn/partA2_rcnn_net.py", line 27, in assign_targets
targets_dict = proposal_target_layer(rcnn_dict, roi_sampler_cfg=self.rcnn_target_config)
File "/scratch_net/hox/mhahner/repositories/PCDet/pcdet/models/model_utils/proposal_target_layer.py", line 14, in proposal_target_layer
sample_rois_for_rcnn(rois, gt_boxes, roi_raw_scores, roi_labels, roi_sampler_cfg)
File "/scratch_net/hox/mhahner/repositories/PCDet/pcdet/models/model_utils/proposal_target_layer.py", line 82, in sample_rois_for_rcnn
cur_gt[:, 0:7], cur_gt_labels)
File "/scratch_net/hox/mhahner/repositories/PCDet/pcdet/models/model_utils/proposal_target_layer.py", line 183, in get_maxiou3d_with_same_class
iou3d = iou3d_nms_utils.boxes_iou3d_gpu(cur_roi, cur_gt) # (M, N)
File "/scratch_net/hox/mhahner/repositories/PCDet/pcdet/ops/iou3d_nms/iou3d_nms_utils.py", line 47, in boxes_iou3d_gpu
overlaps_h = torch.clamp(min_of_max - max_of_min, min=0)
RuntimeError: CUDA error: no kernel image is available for execution on the device

My conda environment looks like this:

# packages in environment at /home/mhahner/scratch/apps/anaconda3/envs/spconv:
#
# Name                    Version                   Build  Channel
_libgcc_mutex             0.1                 conda_forge    conda-forge
_openmp_mutex             4.5                      1_llvm    conda-forge
beautifulsoup4            4.9.1                    pypi_0    pypi
bzip2                     1.0.8                h516909a_2    conda-forge
ca-certificates           2020.1.1                      0
cachetools                4.1.0                    pypi_0    pypi
certifi                   2020.4.5.1               py37_0
chardet                   3.0.4                    pypi_0    pypi
cmake                     3.17.0               h28c56e5_0    conda-forge
coloredlogs               14.0                     pypi_0    pypi
cudatoolkit               10.1.243             h6bb024c_0
cudatoolkit-dev           10.1.243             h516909a_3    conda-forge
cudnn                     7.6.5                cuda10.1_0
cycler                    0.10.0                   pypi_0    pypi
decorator                 4.4.2                    pypi_0    pypi
easydict                  1.9                      pypi_0    pypi
expat                     2.2.9                he1b5a44_2    conda-forge
google                    2.0.3                    pypi_0    pypi
google-auth               1.15.0                   pypi_0    pypi
google-auth-oauthlib      0.4.1                    pypi_0    pypi
gspread                   3.6.0                    pypi_0    pypi
httplib2                  0.18.0                   pypi_0    pypi
humanfriendly             8.2                      pypi_0    pypi
idna                      2.9                      pypi_0    pypi
imagecodecs               2020.2.18                pypi_0    pypi
imageio                   2.8.0                    pypi_0    pypi
kiwisolver                1.2.0                    pypi_0    pypi
krb5                      1.17.1               h2fd8d38_0    conda-forge
ld_impl_linux-64          2.34                 h53a641e_0    conda-forge
libblas                   3.8.0               16_openblas    conda-forge
libcblas                  3.8.0               16_openblas    conda-forge
libcurl                   7.69.1               hf7181ac_0    conda-forge
libedit                   3.1.20170329      hf8c457e_1001    conda-forge
libffi                    3.2.1             he1b5a44_1007    conda-forge
libgcc-ng                 9.2.0                h24d8f2e_2    conda-forge
libgfortran-ng            7.5.0                hdf63c60_6    conda-forge
liblapack                 3.8.0               16_openblas    conda-forge
libopenblas               0.3.9                h5ec1e0e_0    conda-forge
libssh2                   1.9.0                hab1572f_2    conda-forge
libstdcxx-ng              9.2.0                hdf63c60_2    conda-forge
libuv                     1.34.0               h516909a_0    conda-forge
llvm-openmp               10.0.0               hc9558a2_0    conda-forge
llvmlite                  0.32.1                   pypi_0    pypi
matplotlib                3.2.1                    pypi_0    pypi
mkl                       2020.1                      219    conda-forge
ncurses                   6.1               hf484d3e_1002    conda-forge
networkx                  2.4                      pypi_0    pypi
ninja                     1.10.0               hc9558a2_0    conda-forge
numba                     0.49.1                   pypi_0    pypi
numpy                     1.18.4           py37h8960a57_0    conda-forge
oauth2client              4.1.3                    pypi_0    pypi
oauthlib                  3.1.0                    pypi_0    pypi
openssl                   1.1.1g               h7b6447c_0
pillow                    7.1.2                    pypi_0    pypi
pip                       20.1.1             pyh9f0ad1d_0    conda-forge
protobuf                  3.12.0                   pypi_0    pypi
pyasn1                    0.4.8                    pypi_0    pypi
pyasn1-modules            0.2.8                    pypi_0    pypi
pyparsing                 2.4.7                    pypi_0    pypi
python                    3.7.6           h8356626_5_cpython    conda-forge
python-dateutil           2.8.1                    pypi_0    pypi
python_abi                3.7                     1_cp37m    conda-forge
pytorch                   1.4.0           py3.7_cuda10.1.243_cudnn7.6.3_0    pytorch
pywavelets                1.1.1                    pypi_0    pypi
pyyaml                    5.3.1                    pypi_0    pypi
readline                  8.0                  hf8c457e_0    conda-forge
requests                  2.23.0                   pypi_0    pypi
requests-oauthlib         1.3.0                    pypi_0    pypi
rhash                     1.3.6             h14c3975_1001    conda-forge
rsa                       4.0                      pypi_0    pypi
scikit-image              0.17.2                   pypi_0    pypi
scipy                     1.4.1                    pypi_0    pypi
setuptools                46.4.0           py37hc8dfbb8_0    conda-forge
six                       1.14.0                   pypi_0    pypi
soupsieve                 2.0.1                    pypi_0    pypi
spconv                    1.0                      pypi_0    pypi
sqlite                    3.30.1               hcee41ef_0    conda-forge
tensorboardx              2.0                      pypi_0    pypi
tifffile                  2020.5.11                pypi_0    pypi
tk                        8.6.10               hed695b0_0    conda-forge
tqdm                      4.46.0                   pypi_0    pypi
urllib3                   1.25.9                   pypi_0    pypi
wheel                     0.34.2                     py_1    conda-forge
xz                        5.2.5                h516909a_0    conda-forge
zlib                      1.2.11            h516909a_1006    conda-forge

About this issue

  • Original URL
  • State: closed
  • Created 4 years ago
  • Comments: 24 (13 by maintainers)

Most upvoted comments

Finally SOLVED!

The final issue was, that we tried to build spconv on a Tesla K40 GPU, which has a Kepler architecture and is seemingly too old to build spconv.

The reason why we always tried to build spconv on a Tesla K40 is that in our lab, it is hard to get a GPU on our GPU cluster interactively. (Usually, you can only submit jobs via qsub/SLURM.) So we always tried to build spconv either on our local Linux clients or on old nodes which only have Tesla K40s. Then, out of despair, we tried to build spconv on a Titan X (Pascal) and we finally found a combination of requirements that worked.

So here is our solution:

conda create --name PCDet python=3.6 pytorch=1.1 cudatoolkit=9.2 cudatoolkit-dev=9.2 \
cmake --channel pytorch --channel=conda-forge
conda activate PCDet
conda install cudnn
conda install boost
git clone https://github.com/traveller59/spconv spconv_8da6f96 --recursive
cd spconv_8da6f96
git checkout 8da6f967fb9a054d8870c3515b1b44eca2103634

If necessary: Download and extract 0001-Allow-to-specifiy-CUDA_ROOT-directory-and-pick-corre.patch.zip and patch spconv via: git am <PATH_TO_EXTRACTED_FILE>/0001-Allow-to-specifiy-CUDA_ROOT-directory-and-pick-corre.patch

CUDA_ROOT=<PATH_TO_YOUR_CONDA_INSTALLATION>/conda_envs/PCDet python setup.py bdist_wheel
cd dist/
pip install *

Test spconv via: python -c 'import spconv' (should just return and not raise any errors)

cd ../..
git clone https://github.com/sshaoshuai/PCDet.git
cd PCDet/
pip install -r requirements.txt
CUDA_ROOT=<PATH_TO_YOUR_CONDA_INSTALLATION>/conda_envs/PCDet python setup.py develop

Done!

I hope these instructions help someone else who struggles to build spconv as well.

CUDA_ROOT=/usr/local/cuda-11.0 python setup.py develop running develop running egg_info writing pcdet.egg-info/PKG-INFO writing dependency_links to pcdet.egg-info/dependency_links.txt writing requirements to pcdet.egg-info/requires.txt writing top-level names to pcdet.egg-info/top_level.txt /opt/conda/lib/python3.6/site-packages/torch/utils/cpp_extension.py:339: UserWarning: Attempted to use ninja as the BuildExtension backend but we could not find ninja.. Falling back to using the slow distutils backend. warnings.warn(msg.format('we could not find ninja.')) reading manifest file 'pcdet.egg-info/SOURCES.txt' writing manifest file 'pcdet.egg-info/SOURCES.txt' running build_ext building 'pcdet.ops.iou3d_nms.iou3d_nms_cuda' extension gcc -pthread -B /opt/conda/compiler_compat -Wl,--sysroot=/ -Wsign-compare -DNDEBUG -g -fwrapv -O3 -Wall -Wstrict-prototypes -fPIC -I/opt/conda/lib/python3.6/site-packages/torch/include -I/opt/conda/lib/python3.6/site-packages/torch/include/torch/csrc/api/include -I/opt/conda/lib/python3.6/site-packages/torch/include/TH -I/opt/conda/lib/python3.6/site-packages/torch/include/THC -I/usr/local/cuda/include -I/opt/conda/include/python3.6m -c pcdet/ops/iou3d_nms/src/iou3d_cpu.cpp -o build/temp.linux-x86_64-3.6/pcdet/ops/iou3d_nms/src/iou3d_cpu.o -DTORCH_API_INCLUDE_EXTENSION_H -DTORCH_EXTENSION_NAME=iou3d_nms_cuda -D_GLIBCXX_USE_CXX11_ABI=0 -std=c++14 cc1plus: warning: command line option '-Wstrict-prototypes' is valid for C/ObjC but not for C++ In file included from /opt/conda/lib/python3.6/site-packages/torch/include/ATen/Parallel.h:149:0, from /opt/conda/lib/python3.6/site-packages/torch/include/torch/csrc/api/include/torch/utils.h:3, from /opt/conda/lib/python3.6/site-packages/torch/include/torch/csrc/api/include/torch/nn/cloneable.h:5, from /opt/conda/lib/python3.6/site-packages/torch/include/torch/csrc/api/include/torch/nn.h:3, from /opt/conda/lib/python3.6/site-packages/torch/include/torch/csrc/api/include/torch/all.h:12, from /opt/conda/lib/python3.6/site-packages/torch/include/torch/extension.h:4, from pcdet/ops/iou3d_nms/src/iou3d_cpu.cpp:10: /opt/conda/lib/python3.6/site-packages/torch/include/ATen/ParallelOpenMP.h:84:0: warning: ignoring #pragma omp parallel [-Wunknown-pragmas] #pragma omp parallel for if ((end - begin) >= grain_size) ^ pcdet/ops/iou3d_nms/src/iou3d_cpu.cpp:12:18: fatal error: cuda.h: No such file or directory compilation terminated. error: command 'gcc' failed with exit status 1