joblib: Bug when passing a function as parameter in a delayed function

The following script was working on 0.10.0:

from joblib import Parallel, delayed

def twice(a):
    return 2 * a

def run(method, a):
    return method(a)

if __name__ == '__main__':
    methods = [twice, ]

    results = Parallel(n_jobs=2)(delayed(run)(
        method=method, a=10) for method in methods)

Now it returns :

Traceback (most recent call last):
  File "/cal/homes/tdupre/work/src/joblib/joblib/externals/loky/process_executor.py", line 338, in _process_worker
    call_item = call_queue.get(block=True, timeout=timeout)
  File "/cal/homes/tdupre/miniconda3/envs/py36/lib/python3.6/multiprocessing/queues.py", line 113, in get
    return _ForkingPickler.loads(res)
AttributeError: Can't get attribute 'twice' on <module 'joblib.externals.loky.backend.popen_loky_posix' from '/cal/homes/tdupre/work/src/joblib/joblib/externals/loky/backend/popen_loky_posix.py'>
Traceback (most recent call last):
  File "test.py", line 17, in <module>
    method=method, a=10) for method in methods)
  File "/cal/homes/tdupre/work/src/joblib/joblib/parallel.py", line 814, in __call__
    self.retrieve()
  File "/cal/homes/tdupre/work/src/joblib/joblib/parallel.py", line 716, in retrieve
    self._output.extend(job.get(timeout=self.timeout))
  File "/cal/homes/tdupre/work/src/joblib/joblib/_parallel_backends.py", line 402, in wrap_future_result
    return future.result(timeout=timeout)
  File "/cal/homes/tdupre/work/src/joblib/joblib/externals/loky/_base.py", line 431, in result
    return self.__get_result()
  File "/cal/homes/tdupre/work/src/joblib/joblib/externals/loky/_base.py", line 382, in __get_result
    raise self._exception
joblib.externals.loky.process_executor.BrokenExecutor: A process in the process pool was terminated abruptly while the future was running or pending.

A git bisect leads to #516

About this issue

  • Original URL
  • State: closed
  • Created 6 years ago
  • Comments: 15 (9 by maintainers)

Commits related to this issue

Most upvoted comments

I would rather like to deprecate the multiprocessing backend in the long term.

Minimal example (sorry for the stupid names). Seems to break when passing lists of functions or class objects to a function. Works fine with single objects.

from joblib import Parallel, delayed

class MetricBad:
    def __init__(self):
        self.x = 5

    def project(self):
        self.x = 9
        return self.x

def _process(projectionlist):
    for p in projectionlist:
        res = p.project()
    return res

def _process2(projection):
    return projection.project()

results = Parallel(n_jobs=4)(delayed(_process)([MetricBad(), MetricBad()]) for i in range(20))  # Breaks
results = Parallel(n_jobs=4)(delayed(_process2)(MetricBad()) for i in range(20))  # Works

def _process3(funcs):
    for fn in funcs:
        res = fn()
    return res

def return6():
    return 6

def _process4(fn):
    return fn()

results = Parallel(n_jobs=4)(delayed(_process3)([return6, return6]) for i in range(20)) # Breaks also with lists of functions
results = Parallel(n_jobs=4)(delayed(_process4)(return6) for i in range(20)) # Works