spikeinterface: Failure of export_to_phy from existing mountainsort cluster data

I have a use case where I take already data already sorted and curated with MountainSort, and funnel through SpikeInterface API to generate phy files…

import json, os, spikeinterface
import spikeinterface.core.waveform_extractor as wave
import spikeinterface.exporters.to_phy as phy
import spikeinterface.extractors.mdaextractors as mda

# Point to a prv recording and the params file
local_path = '/Volumes/GenuDrive/RY16_direct/MountainSort/RY16_36.mountain/RY16_36.nt1.mountain'
prv_file = os.path.join(local_path, 'raw.mda.prv')
params_file = os.path.join(local_path, 'params.json')

# Derive the properties in those json dicts
with open(prv_file, 'r') as F:
    raw_dict = json.load(F)
with open(params_file, 'r') as F:
    params_dict = json.load(F)

# Create the mda object
mda_reecording = mda.read_mda_recording(local_path,
                                   raw_fname=raw_dict['original_path'])
mda_reecording.annotate(is_filtered=True) # we've already filtered it, so mark it thus

# Derive the spikes file
firings_file = os.path.join(local_path, 'firings_raw.mda')
sorted_spikes = mda.read_mda_sorting(firings_file, sampling_frequency=params_dict['samplerate'])

# Create waveforms per spike
waveform_file = os.path.join(local_path, 'waveforms')
waveforms = wave.extract_waveforms(mda_reecording, sorted_spikes, waveform_file)

# Export to phy
phyplace = os.path.join(local_path, "phy")
phy.export_to_phy(waveforms, phyplace)

Which is great: I like phy a bit more than qt-mountainview curation. But for some of my data, this triggers an error in the export_phy step. Namely, recording chunk end points can overshoot the mda N1 x N2 size bounds, raising an error.

Disclaimer: I didn’t fully read the code in the execution stack to ensure this is kosher. But bounding the end to the true sample here fixes it. In other words, it works if I alter line 55 in job_tools.py …

def devide_recording_into_chunks(recording, chunk_size):
 51     all_chunks = []
 52     for segment_index in range(recording.get_num_segments()):
 53         num_frames = recording.get_num_samples(segment_index)
 54         chunks = divide_segment_into_chunks(num_frames, chunk_size)
 55         #all_chunks.extend([(segment_index, frame_start, frame_stop) for frame_start, frame_stop in chunks])
 57         # modification in the the next two lines 
 56         sample_max = recording.get_num_samples()
 57         all_chunks.extend([(segment_index, frame_start, np.min((frame_stop, sample_max)) for frame_start, frame_stop in chunks])
 58     return all_chunks

About this issue

Original URL
State: closed
Created 3 years ago
Comments: 21 (16 by maintainers)

Most upvoted comments

Did you try my working branch ?

pip uninstall spikeinterface
git clone https://github.com/samuelgarcia/spikeinterface.git
cd spikeinterface
git checkout some_fixes
python setup.py install

On my side there is no bug anymore.

samuelgarcia on Sep 16, 2021

I think this could be fixed by setting end_frame=num_samples if end_frame>num_samples in the get_traces function of the MdaRecordingSegment 😃

alejoe91 on Sep 12, 2021

Yep. Attaching:

[ins] In [28]: phyfiles = phy.export_to_phy(waveform, local_path + os.path.sep + "phy")
write_binary_recording with n_jobs 1  chunk_size None
Setting 'return_scaled' to False
-------------------------------------------------------------------------
ValueError                              Traceback (most recent call last)
<ipython-input-28-66a465ff0240> in <module>
----> 1 phyfiles = sexpphy.export_to_phy(waveform, local_path + os.path.sep + "phy")

~/anaconda3/envs/phy2/lib/python3.7/site-packages/spikeinterface-0.90.2.dev0-py3.7.egg/spikeinterface/exporters/to_phy.py in export_to_phy(waveform_extractor, output_folder, compute_pc_features, compute_amplitudes, max_channels_per_template, copy_binary, remove_if_exists, peak_sign, template_mode, dtype, verbose, **job_kwargs)
    166         pc.run_for_all_spikes(output_folder / 'pc_features.npy',
    167                               max_channels_per_template=max_channels_per_template, peak_sign=peak_sign,
--> 168                               **job_kwargs)
    169
    170         max_channels_per_template = min(max_channels_per_template, len(channel_ids))

~/anaconda3/envs/phy2/lib/python3.7/site-packages/spikeinterface-0.90.2.dev0-py3.7.egg/spikeinterface/toolkit/postprocessing/principal_component.py in run_for_all_spikes(self, file_path, max_channels_per_template, peak_sign, **job_kwargs)
    227         init_args = init_args + (all_pcs_args, spike_times, spike_labels, we.nbefore, we.nafter, unit_channels, all_pca)
    228         processor = ChunkRecordingExecutor(recording, func, init_func, init_args, job_name='extract PCs', **job_kwargs)
--> 229         processor.run()
    230
    231     def _fit_by_channel_local(self):

~/anaconda3/envs/phy2/lib/python3.7/site-packages/spikeinterface-0.90.2.dev0-py3.7.egg/spikeinterface/core/job_tools.py in run(self)
    220             worker_ctx = self.init_func(*self.init_args)
    221             for segment_index, frame_start, frame_stop in all_chunks:
--> 222                 res = self.func(segment_index, frame_start, frame_stop, worker_ctx)
    223                 if self.handle_returns:
    224                     returns.append(res)

~/anaconda3/envs/phy2/lib/python3.7/site-packages/spikeinterface-0.90.2.dev0-py3.7.egg/spikeinterface/toolkit/postprocessing/principal_component.py in _all_pc_extractor_chunk(segment_index, start_frame, end_frame, worker_ctx)
    354     start = int(spike_times[i0] - nbefore)
    355     end = int(spike_times[i1 - 1] + nafter)
--> 356     traces = recording.get_traces(start_frame=start, end_frame=end, segment_index=segment_index)
    357
    358     for i in range(i0, i1):

~/anaconda3/envs/phy2/lib/python3.7/site-packages/spikeinterface-0.90.2.dev0-py3.7.egg/spikeinterface/core/baserecording.py in get_traces(self, segment_index, start_frame, end_frame, channel_ids, order, return_scaled)
     98         channel_indices = self.ids_to_indices(channel_ids, prefer_slice=True)
     99         rs = self._recording_segments[segment_index]
--> 100         traces = rs.get_traces(start_frame=start_frame, end_frame=end_frame, channel_indices=channel_indices)
    101         if order is not None:
    102             assert order in ["C", "F"]

~/anaconda3/envs/phy2/lib/python3.7/site-packages/spikeinterface-0.90.2.dev0-py3.7.egg/spikeinterface/extractors/mdaextractors.py in get_traces(self, start_frame, end_frame, channel_indices)
    129             end_frame = self.get_num_samples()
    130         recordings = self._diskreadmda.readChunk(i1=0, i2=start_frame, N1=self._diskreadmda.N1(),
--> 131                                                  N2=end_frame - start_frame)
    132         recordings = recordings[channel_indices, :].T
    133         return recordings

~/anaconda3/envs/phy2/lib/python3.7/site-packages/spikeinterface-0.90.2.dev0-py3.7.egg/spikeinterface/extractors/mdaextractors.py in readChunk(self, i1, i2, i3, N1, N2, N3)
    342                 A = np.load(self._path, mmap_mode='r')
    343                 return A[:, i2:i2 + N2]
--> 344             return np.reshape(X, (N1, N2), order='F')
    345         else:
    346             if N1 != self.N1():

<__array_function__ internals> in reshape(*args, **kwargs)

~/anaconda3/envs/phy2/lib/python3.7/site-packages/numpy/core/fromnumeric.py in reshape(a, newshape, order)
    297            [5, 6]])
    298     """
--> 299     return _wrapfunc(a, 'reshape', newshape, order=order)
    300
    301

~/anaconda3/envs/phy2/lib/python3.7/site-packages/numpy/core/fromnumeric.py in _wrapfunc(obj, method, *args, **kwds)
     56
     57     try:
---> 58         return bound(*args, **kwds)
     59     except TypeError:
     60         # A TypeError occurs if the object does have such a method in its

ValueError: cannot reshape array of size 2055585504 into shape (4,513896463)

The extract_waveforms step can also overshoot the same spot in the stack:

---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-77-36265b78d9b9> in <module>
----> 1 waveform = scwave.extract_waveforms(mdafile, spikefile, waveform_file)

~/anaconda3/envs/phy2/lib/python3.7/site-packages/spikeinterface-0.90.2.dev0-py3.7.egg/spikeinterface/core/waveform_extractor.py in extract_waveforms(recording, sorting, folder, load_if_exists, ms_before, ms_after, max_spikes_per_unit, overwrite, return_scaled, dtype, **job_kwargs)
    550         we.set_params(ms_before=ms_before, ms_after=ms_after, max_spikes_per_unit=max_spikes_per_unit, dtype=dtype,
    551                       return_scaled=return_scaled)
--> 552         we.run(**job_kwargs)
    553
    554     return we

~/anaconda3/envs/phy2/lib/python3.7/site-packages/spikeinterface-0.90.2.dev0-py3.7.egg/spikeinterface/core/waveform_extractor.py in run(self, **job_kwargs)
    375         processor = ChunkRecordingExecutor(self.recording, func, init_func, init_args, job_name='extract waveforms',
    376                                            **job_kwargs)
--> 377         processor.run()
    378
    379

~/anaconda3/envs/phy2/lib/python3.7/site-packages/spikeinterface-0.90.2.dev0-py3.7.egg/spikeinterface/core/job_tools.py in run(self)
    220             worker_ctx = self.init_func(*self.init_args)
    221             for segment_index, frame_start, frame_stop in all_chunks:
--> 222                 res = self.func(segment_index, frame_start, frame_stop, worker_ctx)
    223                 if self.handle_returns:
    224                     returns.append(res)

~/anaconda3/envs/phy2/lib/python3.7/site-packages/spikeinterface-0.90.2.dev0-py3.7.egg/spikeinterface/core/waveform_extractor.py in _waveform_extractor_chunk(segment_index, start_frame, end_frame, worker_ctx)
    483         # load trace in memory
    484         traces = recording.get_traces(start_frame=start, end_frame=end, segment_index=segment_index,
--> 485                                       return_scaled=return_scaled)
    486
    487         for unit_id, (i0, i1, local_spike_times) in to_extract.items():

~/anaconda3/envs/phy2/lib/python3.7/site-packages/spikeinterface-0.90.2.dev0-py3.7.egg/spikeinterface/core/baserecording.py in get_traces(self, segment_index, start_frame, end_frame, channel_ids, order, return_scaled)
     98         channel_indices = self.ids_to_indices(channel_ids, prefer_slice=True)
     99         rs = self._recording_segments[segment_index]
--> 100         traces = rs.get_traces(start_frame=start_frame, end_frame=end_frame, channel_indices=channel_indices)
    101         if order is not None:
    102             assert order in ["C", "F"]

~/anaconda3/envs/phy2/lib/python3.7/site-packages/spikeinterface-0.90.2.dev0-py3.7.egg/spikeinterface/extractors/mdaextractors.py in get_traces(self, start_frame, end_frame, channel_indices)
    129             end_frame = self.get_num_samples()
    130         recordings = self._diskreadmda.readChunk(i1=0, i2=start_frame, N1=self._diskreadmda.N1(),
--> 131                                                  N2=end_frame - start_frame)
    132         recordings = recordings[channel_indices, :].T
    133         return recordings

~/anaconda3/envs/phy2/lib/python3.7/site-packages/spikeinterface-0.90.2.dev0-py3.7.egg/spikeinterface/extractors/mdaextractors.py in readChunk(self, i1, i2, i3, N1, N2, N3)
    342                 A = np.load(self._path, mmap_mode='r')
    343                 return A[:, i2:i2 + N2]
--> 344             return np.reshape(X, (N1, N2), order='F')
    345         else:

<__array_function__ internals> in reshape(*args, **kwargs)

~/anaconda3/envs/phy2/lib/python3.7/site-packages/numpy/core/fromnumeric.py in reshape(a, newshape, order)
    297            [5, 6]])
    298     """
--> 299     return _wrapfunc(a, 'reshape', newshape, order=order)
    300
    301

~/anaconda3/envs/phy2/lib/python3.7/site-packages/numpy/core/fromnumeric.py in _wrapfunc(obj, method, *args, **kwds)
     56
     57     try:
---> 58         return bound(*args, **kwds)
     59     except TypeError:
     60         # A TypeError occurs if the object does have such a method in its

ValueError: cannot reshape array of size 2053833200 into shape (4,513585503)

I applied the same bandage for that waveform extraction step too on my end. Bounds check at get_traces. But before doing that earlier today, looked around the mda extractor…but didn’t catch anything weird — although couldn’t tell if the header offset was correct. I probably missed something, given I’m not super familiar with this extractor code.

drive link with related mdas

SynapticSage on Sep 13, 2021