numba: OSError: exception: access violation reading ... in nvvm.py

I am using cudatoolkit 10.1.168, llvmlite 0.30.0 and numba 0.46.0 in Windows 10, python 3.7. I wrote some code for using the cuda & GPU. It drives me crazy that the kernel call line in the code doesn’t compile (and run) on a random basis from time to time.

The memory access violation occurs every time in the numba\cuda\cudadrv\nvvm.py in compile(self, **options), line 232, err = self.driver.nvvmCompileProgram(self._handle, len(opts), c_opts)

8     r = cuda_dp.play_A_Game_With_GPU(env, env_out, doCheckDiceSel_2rollsLeft, doCheckDiceSel_1rollsLeft, doCheckSlotSel, countWins_out_gpu, countDraws_out_gpu, countLosts_out_gpu, avgRes_out_gpu, s_countWins_out_gpu, s_countDraws_out_gpu, s_countLosts_out_gpu, s_avgRes_out_gpu, cuda_dp.gpu_const_arrays, blocks, threads_per_block, random_states, Verbose = 0)
      9     clear_output(wait=True)
     10     if r==0:draws+=1

~\sajat\DicePokerAI\cuda_dp.py in play_A_Game_With_GPU(env_local, env_YP, doCheckDiceSel_2rollsLeft, doCheckDiceSel_1rollsLeft, doCheckSlotSel, countWins_out_gpu, countDraws_out_gpu, countLosts_out_gpu, avgRes_out_gpu, s_countWins_out_gpu, s_countDraws_out_gpu, s_countLosts_out_gpu, s_avgRes_out_gpu, gpu_const_arrays, blocks, threads_per_block, random_states, Verbose)
   1895             env_YP_gpu = cuda.to_device(env_YP)
   1896 
-> 1897             kernel_call_cuda_findBestDiceSelToWin[blocks, threads_per_block](env_local_gpu,env_YP_gpu, countWins_out_gpu, countDraws_out_gpu, countLosts_out_gpu, avgRes_out_gpu, gpu_const_arrays, random_states)
   1898 
   1899             countWins_out_res = countWins_out_gpu.copy_to_host()

~\.conda\envs\tensorflow-gpu\lib\site-packages\numba\cuda\compiler.py in __call__(self, *args)
    797         Specialize and invoke this kernel with *args*.
    798         '''
--> 799         kernel = self.specialize(*args)
    800         cfg = kernel[self.griddim, self.blockdim, self.stream, self.sharedmem]
    801         cfg(*args)

~\.conda\envs\tensorflow-gpu\lib\site-packages\numba\cuda\compiler.py in specialize(self, *args)
    808         argtypes = tuple(
    809             [self.typingctx.resolve_argument_type(a) for a in args])
--> 810         kernel = self.compile(argtypes)
    811         return kernel
    812 

~\.conda\envs\tensorflow-gpu\lib\site-packages\numba\cuda\compiler.py in compile(self, sig)
    827             self.definitions[(cc, argtypes)] = kernel
    828             if self.bind:
--> 829                 kernel.bind()
    830         return kernel
    831 

~\.conda\envs\tensorflow-gpu\lib\site-packages\numba\cuda\compiler.py in bind(self)
    541         Force binding to current CUDA context
    542         """
--> 543         self._func.get()
    544 
    545     @property

~\.conda\envs\tensorflow-gpu\lib\site-packages\numba\cuda\compiler.py in get(self)
    419         cufunc = self.cache.get(device.id)
    420         if cufunc is None:
--> 421             ptx = self.ptx.get()
    422 
    423             # Link

~\.conda\envs\tensorflow-gpu\lib\site-packages\numba\cuda\compiler.py in get(self)
    390             arch = nvvm.get_arch_option(*cc)
    391             ptx = nvvm.llvm_to_ptx(self.llvmir, opt=3, arch=arch,
--> 392                                    **self._extra_options)
    393             self.cache[cc] = ptx
    394             if config.DUMP_ASSEMBLY:

~\.conda\envs\tensorflow-gpu\lib\site-packages\numba\cuda\cudadrv\nvvm.py in llvm_to_ptx(llvmir, **opts)
    494     cu.add_module(libdevice.get())
    495 
--> 496     ptx = cu.compile(**opts)
    497     # XXX remove debug_pubnames seems to be necessary sometimes
    498     return patch_ptx_debug_pubnames(ptx)

~\.conda\envs\tensorflow-gpu\lib\site-packages\numba\cuda\cudadrv\nvvm.py in compile(self, **options)
    230         c_opts = (c_char_p * len(opts))(*[c_char_p(x.encode('utf8'))
    231                                           for x in opts])
--> 232         err = self.driver.nvvmCompileProgram(self._handle, len(opts), c_opts)
    233         self._try_error(err, 'Failed to compile\n')
    234 

OSError: exception: access violation reading 0x000002868DC6FFF8


If I try to re-run this part of the code from jupyter without restarting the kernel, I got a slightly different error:
....
~\.conda\envs\tensorflow-gpu\lib\site-packages\numba\cuda\cudadrv\nvvm.py in check_error(self, error, msg, exit)
    139                 sys.exit(1)
    140             else:
--> 141                 raise exc
    142 
    143 

NvvmError: Failed to compile


NVVM_ERROR_COMPILATION

Any hint?

About this issue

Original URL
State: closed
Created 5 years ago
Reactions: 1
Comments: 20 (9 by maintainers)

Most upvoted comments

@sgbaird Many thanks, new issue created for this as it’s a distinct problem: #7326.

gmarkall on Aug 20, 2021