scikit-learn: datasets.fetch_california_housing broken because http://lib.stat.cmu.edu is down

from sklearn.datasets import fetch_california_housing
fetch_california_housing()

http://lib.stat.cmu.edu seems to be down, and it has been down for at least a couple of days. Not sure how to check whether it will be fixed back up eventually.

A possible fix would be to fetch the datasets from somewhere else, for example: ~~https://archive.ics.uci.edu/ml/datasets/Housing.~~ Edit: actually this link is actually the Boston housing dataset.

Full traceback:

---------------------------------------------------------------------------
gaierror                                  Traceback (most recent call last)
/volatile/le243287/miniconda3/lib/python3.4/urllib/request.py in do_open(self, http_class, req, **http_conn_args)
   1181             try:
-> 1182                 h.request(req.get_method(), req.selector, req.data, headers)
   1183             except OSError as err: # timeout error

/volatile/le243287/miniconda3/lib/python3.4/http/client.py in request(self, method, url, body, headers)
   1087         """Send a complete request to the server."""
-> 1088         self._send_request(method, url, body, headers)
   1089 

/volatile/le243287/miniconda3/lib/python3.4/http/client.py in _send_request(self, method, url, body, headers)
   1125             body = body.encode('iso-8859-1')
-> 1126         self.endheaders(body)
   1127 

/volatile/le243287/miniconda3/lib/python3.4/http/client.py in endheaders(self, message_body)
   1083             raise CannotSendHeader()
-> 1084         self._send_output(message_body)
   1085 

/volatile/le243287/miniconda3/lib/python3.4/http/client.py in _send_output(self, message_body)
    921             message_body = None
--> 922         self.send(msg)
    923         if message_body is not None:

/volatile/le243287/miniconda3/lib/python3.4/http/client.py in send(self, data)
    856             if self.auto_open:
--> 857                 self.connect()
    858             else:

/volatile/le243287/miniconda3/lib/python3.4/http/client.py in connect(self)
    833         self.sock = self._create_connection((self.host,self.port),
--> 834                                             self.timeout, self.source_address)
    835 

/volatile/le243287/miniconda3/lib/python3.4/socket.py in create_connection(address, timeout, source_address)
    493     err = None
--> 494     for res in getaddrinfo(host, port, 0, SOCK_STREAM):
    495         af, socktype, proto, canonname, sa = res

/volatile/le243287/miniconda3/lib/python3.4/socket.py in getaddrinfo(host, port, family, type, proto, flags)
    532     addrlist = []
--> 533     for res in _socket.getaddrinfo(host, port, family, type, proto, flags):
    534         af, socktype, proto, canonname, sa = res

gaierror: [Errno -2] Name or service not known

During handling of the above exception, another exception occurred:

URLError                                  Traceback (most recent call last)
/home/le243287/dev/scikit-learn/examples/ensemble/plot_partial_dependence.py in <module>()
     59 
     60 # fetch California housing dataset
---> 61 cal_housing = fetch_california_housing()
     62 
     63 # split 80/20 train-test

/home/le243287/dev/scikit-learn/sklearn/datasets/california_housing.py in fetch_california_housing(data_home, download_if_missing)
     91     if not exists(filepath):
     92         print('downloading Cal. housing from %s to %s' % (DATA_URL, data_home))
---> 93         fhandle = urlopen(DATA_URL)
     94         buf = BytesIO(fhandle.read())
     95         zip_file = ZipFile(buf)

/volatile/le243287/miniconda3/lib/python3.4/urllib/request.py in urlopen(url, data, timeout, cafile, capath, cadefault, context)
    159     else:
    160         opener = _opener
--> 161     return opener.open(url, data, timeout)
    162 
    163 def install_opener(opener):

/volatile/le243287/miniconda3/lib/python3.4/urllib/request.py in open(self, fullurl, data, timeout)
    461             req = meth(req)
    462 
--> 463         response = self._open(req, data)
    464 
    465         # post-process response

/volatile/le243287/miniconda3/lib/python3.4/urllib/request.py in _open(self, req, data)
    479         protocol = req.type
    480         result = self._call_chain(self.handle_open, protocol, protocol +
--> 481                                   '_open', req)
    482         if result:
    483             return result

/volatile/le243287/miniconda3/lib/python3.4/urllib/request.py in _call_chain(self, chain, kind, meth_name, *args)
    439         for handler in handlers:
    440             func = getattr(handler, meth_name)
--> 441             result = func(*args)
    442             if result is not None:
    443                 return result

/volatile/le243287/miniconda3/lib/python3.4/urllib/request.py in http_open(self, req)
   1208 
   1209     def http_open(self, req):
-> 1210         return self.do_open(http.client.HTTPConnection, req)
   1211 
   1212     http_request = AbstractHTTPHandler.do_request_

/volatile/le243287/miniconda3/lib/python3.4/urllib/request.py in do_open(self, http_class, req, **http_conn_args)
   1182                 h.request(req.get_method(), req.selector, req.data, headers)
   1183             except OSError as err: # timeout error
-> 1184                 raise URLError(err)
   1185             r = h.getresponse()
   1186         except:

URLError: <urlopen error [Errno -2] Name or service not known>

About this issue

  • Original URL
  • State: closed
  • Created 9 years ago
  • Comments: 23 (13 by maintainers)

Most upvoted comments

Looks like http://www.dcc.fc.up.pt/~ltorgo/Regression/cal_housing.tgz is down now as well.

Down for me as well.