Source code for utils.io

import json
from pathlib import PurePath, Path
from typing import TYPE_CHECKING

import numpy as np

from py_neuromodulation.utils.types import _PathLike
from py_neuromodulation import logger, PYNM_DIR

if TYPE_CHECKING:
    from mne_bids import BIDSPath
    from mne import io as mne_io
    import pandas as pd


[docs] def load_channels( channels: "pd.DataFrame | _PathLike", ) -> "pd.DataFrame": """Read channels from path or specify via BIDS arguments. Necessary parameters are then ch_names (list), ch_types (list), bads (list), used_types (list), target_keywords (list) and reference Union[list, str]. """ import pandas as pd if isinstance(channels, pd.DataFrame): return channels if not Path(channels).is_file(): raise ValueError("PATH_CHANNELS is not a valid file. Got: " f"{channels}") return pd.read_csv(channels)
[docs] def read_BIDS_data( PATH_RUN: "_PathLike | BIDSPath", line_noise: int = 50, ) -> tuple["mne_io.Raw", np.ndarray, float, int, list | None, list | None]: """Given a run path and bids data path, read the respective data Parameters ---------- PATH_RUN : path to bids run file supported formats: https://bids-specification.readthedocs.io/en/v1.2.1/04-modality-specific-files/04-intracranial-electroencephalography.html#ieeg-recording-data line_noise: int, optional by default 50 Returns ------- raw_arr : mne.io.RawArray raw_arr_data : np.ndarray sfreq : float line_noise : int coord_list : list | None coord_names : list | None """ from mne_bids import read_raw_bids, get_bids_path_from_fname bids_path = get_bids_path_from_fname(PATH_RUN) raw_arr = read_raw_bids(bids_path) coord_list, coord_names = get_coord_list(raw_arr) if raw_arr.info["line_freq"] is not None: line_noise = int(raw_arr.info["line_freq"]) else: logger.info( f"Line noise is not available in the data, using value of {line_noise} Hz." ) return ( raw_arr, raw_arr.get_data(), raw_arr.info["sfreq"], line_noise, coord_list, coord_names, )
[docs] def read_mne_data( PATH_RUN: "_PathLike | BIDSPath", line_noise: int = 50, ): """Read data in the mne.io.read_raw supported format. Parameters ---------- PATH_RUN : _PathLike | BIDSPath Path to mne.io.read_raw supported types https://mne.tools/stable/generated/mne.io.read_raw.html line_noise : int, optional line noise, by default 50 Returns ------- raw : mne.io.Raw sfreq : float ch_names : list[str] ch_type : list[str] bads : list[str] """ from mne import io as mne_io raw_arr = mne_io.read_raw(PATH_RUN) sfreq = raw_arr.info["sfreq"] ch_names = raw_arr.info["ch_names"] ch_types = raw_arr.get_channel_types() logger.info( "Channel data is read using mne.io.read_raw function. Channel types might not be correct" " and set to 'eeg' by default" ) bads = raw_arr.info["bads"] if raw_arr.info["line_freq"] is not None: line_noise = int(raw_arr.info["line_freq"]) else: logger.info( f"Line noise is not available in the data, using value of {line_noise} Hz." ) return raw_arr.get_data(), sfreq, ch_names, ch_types, bads
[docs] def get_coord_list( raw: "mne_io.BaseRaw", ) -> tuple[list, list] | tuple[None, None]: """Return the coordinate list and names from mne RawArray Parameters ---------- raw : mne_io.BaseRaw Returns ------- coord_list[list, list] | coord_names[None, None] """ montage = raw.get_montage() if montage is not None: coord_list = np.array( list(dict(montage.get_positions()["ch_pos"]).values()) ).tolist() coord_names = np.array( list(dict(montage.get_positions()["ch_pos"]).keys()) ).tolist() else: coord_list = None coord_names = None return coord_list, coord_names
[docs] def read_grid(PATH_GRIDS: _PathLike | None, grid_str: str) -> "pd.DataFrame": """Read grid file from path or PYNM_DIR Parameters ---------- PATH_GRIDS : _PathLike | None path to grid file, by default None grid_str : str grid name Returns ------- pd.DataFrame pd.DataFrame including mni x,y,z coordinates for each grid point """ import pandas as pd if PATH_GRIDS is None: grid = pd.read_csv(PYNM_DIR / ("grid_" + grid_str.lower() + ".tsv"), sep="\t") else: grid = pd.read_csv( PurePath(PATH_GRIDS, "grid_" + grid_str.lower() + ".tsv"), sep="\t" ) return grid
def get_annotations(PATH_ANNOTATIONS: str, PATH_RUN: str, raw_arr: "mne_io.RawArray"): filepath = PurePath(PATH_ANNOTATIONS, PurePath(PATH_RUN).name[:-5] + ".txt") from mne import read_annotations try: annot = read_annotations(filepath) raw_arr.set_annotations(annot) # annotations starting with "BAD" are omitted with reject_by_annotations 'omit' param annot_data = raw_arr.get_data(reject_by_annotation="omit") except FileNotFoundError: logger.critical(f"Annotations file could not be found: {filepath}") return annot, annot_data, raw_arr
[docs] def write_csv(df, path_out): """ Function to save Pandas dataframes to disk as CSV using PyArrow (almost 10x faster than Pandas) Difference with pandas.df.to_csv() is that it does not write an index column by default """ from pyarrow import csv, Table csv.write_csv(Table.from_pandas(df), path_out)
def save_channels( nmchannels: "pd.DataFrame", out_dir: _PathLike = "", prefix: str = "", ) -> None: out_dir = Path.cwd() if not out_dir else Path(out_dir) filename = "channels.csv" if not prefix else prefix + "_channels.csv" write_csv(nmchannels, out_dir / filename) logger.info(f"{filename} saved to {out_dir}") def save_features( df_features: "pd.DataFrame", out_dir: _PathLike = "", prefix: str = "", ) -> None: out_dir = Path.cwd() if not out_dir else Path(out_dir) filename = f"{prefix}_FEATURES.csv" if prefix else "_FEATURES.csv" write_csv(df_features, out_dir / filename) logger.info(f"{filename} saved to {str(out_dir)}") def save_sidecar( sidecar: dict, out_dir: _PathLike = "", prefix: str = "", ) -> None: save_general_dict(sidecar, out_dir, prefix, "_SIDECAR.json") def save_general_dict( dict_: dict, out_dir: _PathLike = "", prefix: str = "", str_add: str = "", ) -> None: out_dir = Path.cwd() if not out_dir else Path(out_dir) filename = f"{prefix}{str_add}" with open(out_dir / filename, "w") as f: json.dump( dict_, f, default=default_json_convert, indent=4, separators=(",", ": "), ) logger.info(f"{filename} saved to {out_dir}") def default_json_convert(obj) -> list | float: import pandas as pd if isinstance(obj, np.ndarray): return obj.tolist() if isinstance(obj, pd.DataFrame): return obj.to_numpy().tolist() if isinstance(obj, np.integer): return int(obj) if isinstance(obj, np.floating): return float(obj) raise TypeError("Not serializable") def read_sidecar(PATH: _PathLike) -> dict: with open(PurePath(str(PATH) + "_SIDECAR.json")) as f: return json.load(f) def read_features(PATH: _PathLike) -> "pd.DataFrame": import pandas as pd return pd.read_csv(str(PATH) + "_FEATURES.csv", engine="pyarrow") def read_channels(PATH: _PathLike) -> "pd.DataFrame": import pandas as pd return pd.read_csv(str(PATH) + "_channels.csv") def get_run_list_indir(PATH: _PathLike) -> list: from os import walk f_files = [] # for dirpath, _, files in Path(PATH).walk(): # Only works in python >=3.12 for dirpath, _, files in walk(PATH): for x in files: if "FEATURES" in x: f_files.append(PurePath(dirpath).name) return f_files
[docs] def loadmat(filename) -> dict: """ this function should be called instead of direct spio.loadmat as it cures the problem of not properly recovering python dictionaries from mat files. It calls the function check keys to cure all entries which are still mat-objects """ from scipy.io import loadmat as sio_loadmat data = sio_loadmat(filename, struct_as_record=False, squeeze_me=True) return _check_keys(data)
[docs] def get_paths_example_data(): """ This function should provide RUN_NAME, PATH_RUN, PATH_BIDS, PATH_OUT and datatype for the example dataset used in most examples. """ sub = "testsub" ses = "EphysMedOff" task = "gripforce" run = 0 datatype = "ieeg" # Define run name and access paths in the BIDS format. RUN_NAME = f"sub-{sub}_ses-{ses}_task-{task}_run-{run}" PATH_BIDS = PYNM_DIR / "data" PATH_RUN = PYNM_DIR / "data" / f"sub-{sub}" / f"ses-{ses}" / datatype / RUN_NAME # Provide a path for the output data. PATH_OUT = PATH_BIDS / "derivatives" return RUN_NAME, PATH_RUN, PATH_BIDS, PATH_OUT, datatype
def _check_keys(dict): """ checks if entries in dictionary are mat-objects. If yes todict is called to change them to nested dictionaries """ from scipy.io.matlab import mat_struct for key in dict: if isinstance(dict[key], mat_struct): dict[key] = _todict(dict[key]) return dict def _todict(matobj) -> dict: """ A recursive function which constructs from matobjects nested dictionaries """ from scipy.io.matlab import mat_struct dict = {} for strg in matobj._fieldnames: elem = matobj.__dict__[strg] if isinstance(elem, mat_struct): dict[strg] = _todict(elem) else: dict[strg] = elem return dict def generate_unique_filename(path: _PathLike): path = Path(path) dir = path.parent filename = path.stem extension = path.suffix counter = 1 while True: new_filename = f"{filename}_{counter}{extension}" new_file_path = dir / new_filename if not new_file_path.exists(): return Path(new_file_path) counter += 1