Source code for brainstat.context.meta_analysis

""" Meta-analytic decoding based on NiMARE """
import re
import urllib
import zipfile
from pathlib import Path
from tempfile import NamedTemporaryFile
from typing import Generator, Optional, Sequence, Union

import nibabel as nib
import numpy as np
import pandas as pd
import templateflow.api as tflow
from brainspace.vtk_interface.wrappers.data_object import BSPolyData
from scipy.stats.stats import pearsonr, spearmanr
from math import pi
import matplotlib.pyplot as plt

from brainstat._utils import (
    data_directories,
    deprecated,
    logger,
    read_data_fetcher_json,
)
from brainstat.mesh.interpolate import _surf2vol, multi_surface_to_volume


[docs]def meta_analytic_decoder( template: str, stat_labels: np.ndarray, corrtype: Optional[str] = "pearson", data_dir: Optional[Union[str, Path]] = None, ): """Meta-analytic decoding of surface maps using NeuroSynth or NeuroQuery. Parameters ---------- template : str Path of a template volume file. stat_labels : str, numpy.ndarray, sequence of str or numpy.ndarray Path to a label file for the surfaces, numpy array containing the labels, or a list containing multiple of the aforementioned. corrtype : str, optional Correlation type {'pearson', 'spearman'}. Default is 'pearson'. data_dir : str, optional The directory of the dataset. If none exists, a new dataset will be downloaded and saved to this path. If None, the directory defaults to your home directory, by default None. Returns ------- pandas.DataFrame Table with correlation values for each feature. """ data_dir = Path(data_dir) if data_dir else data_directories["NEUROSYNTH_DATA_DIR"] data_dir.mkdir(exist_ok=True, parents=True) logger.info( "Fetching Neurosynth feature files. This may take several minutes if you haven't downloaded them yet." ) feature_files = tuple(_fetch_precomputed(data_dir, database="neurosynth")) mni152 = nib.load(tflow.get("MNI152Lin", resolution=2, desc="brain", suffix="mask")) stat_nii = _surf2vol(template, stat_labels.flatten()) mask = (stat_nii.get_fdata() != 0) & (mni152.get_fdata() != 0) stat_vector = stat_nii.get_fdata()[mask] feature_names = [] correlations = np.zeros(len(feature_files)) logger.info("Running correlations with all Neurosynth features.") for i in range(len(feature_files)): feature_names.append(re.search("__[A-Za-z0-9 ]+", feature_files[i].stem)[0][2:]) # type: ignore feature_data = nib.load(feature_files[i]).get_fdata()[mask] keep = np.logical_not( np.isnan(feature_data) | np.isinf(feature_data) | np.isnan(stat_vector) | np.isinf(stat_vector) ) if corrtype == "pearson": correlations[i], _ = pearsonr(stat_vector[keep], feature_data[keep]) colname = "Pearson's r" elif corrtype == "spearman": correlations[i], _ = spearmanr(stat_vector[keep], feature_data[keep]) colname = "Spearman's r" df = pd.DataFrame(correlations, index=feature_names, columns=[colname]) return df.sort_values(by="Pearson's r", ascending=False)
@deprecated( "surface_decoder has been deprecated in favor of meta_analytic_decoder in the same module." ) def surface_decoder( pial: Union[str, BSPolyData, Sequence[Union[str, BSPolyData]]], white: Union[str, BSPolyData, Sequence[Union[str, BSPolyData]]], stat_labels: Union[str, np.ndarray, Sequence[Union[str, np.ndarray]]], *, interpolation: str = "linear", data_dir: Optional[Union[str, Path]] = None, database: str = "neurosynth", ) -> pd.DataFrame: """Meta-analytic decoding of surface maps using NeuroSynth or NeuroQuery. Parameters ---------- pial : str, BSPolyData, sequence of str or BSPolyData Path of a pial surface file, BSPolyData of a pial surface or a list containing multiple of the aforementioned. white : str, BSPolyData, sequence of str or BSPolyData Path of a white matter surface file, BSPolyData of a pial surface or a list containing multiple of the aforementioned. stat_labels : str, numpy.ndarray, sequence of str or numpy.ndarray Path to a label file for the surfaces, numpy array containing the labels, or a list containing multiple of the aforementioned. interpolation : str, optional Either 'nearest' for nearest neighbor interpolation, or 'linear' for trilinear interpolation, by default 'linear'. data_dir : str, optional The directory of the dataset. If none exists, a new dataset will be downloaded and saved to this path. If None, the directory defaults to your home directory, by default None. Returns ------- pandas.DataFrame Table with correlation values for each feature. """ from nilearn.datasets import load_mni152_brain_mask data_dir = Path(data_dir) if data_dir else data_directories["NEUROSYNTH_DATA_DIR"] data_dir.mkdir(exist_ok=True, parents=True) logger.info( "Fetching Neurosynth feature files. This may take several minutes if you haven't downloaded them yet." ) feature_files = tuple(_fetch_precomputed(data_dir, database=database)) mni152 = load_mni152_brain_mask() with NamedTemporaryFile(suffix=".nii.gz", delete=False) as f: name = f.name try: multi_surface_to_volume( pial=pial, white=white, volume_template=mni152, output_file=name, labels=stat_labels, interpolation=interpolation, ) stat_volume = nib.load(name) mask = (stat_volume.get_fdata() != 0) & (mni152.get_fdata() != 0) stat_vector = stat_volume.get_fdata()[mask] finally: Path(name).unlink() feature_names = [] correlations = np.zeros(len(feature_files)) logger.info("Running correlations with all Neurosynth features.") for i in range(len(feature_files)): feature_names.append(re.search("__[A-Za-z0-9 ]+", feature_files[i].stem)[0][2:]) # type: ignore feature_data = nib.load(feature_files[i]).get_fdata()[mask] keep = np.logical_not( np.isnan(feature_data) | np.isinf(feature_data) | np.isnan(stat_vector) | np.isinf(stat_vector) ) correlations[i], _ = pearsonr(stat_vector[keep], feature_data[keep]) df = pd.DataFrame(correlations, index=feature_names, columns=["Pearson's r"]) return df.sort_values(by="Pearson's r", ascending=False) def _fetch_precomputed(data_dir: Path, database: str) -> Generator[Path, None, None]: """Wrapper for any future data fetcher. Parameters ---------- data_dir : Path Directory where the data is stored. database : str Name of the database, valid arguments are 'neurosynth'. Returns ------- generator Generator of paths to the precomputed files. Raises ------ NotImplementedError Returned when requesting the Neuroquery data fetcher. ValueError Returned when requesting an unknown database. """ if database == "neurosynth": return _fetch_precomputed_neurosynth(data_dir) elif database == "neuroquery": raise NotImplementedError("Neuroquery has not been implemented yet.") else: raise ValueError(f"Unknown database {database}.") def _fetch_precomputed_neurosynth(data_dir: Path) -> Generator[Path, None, None]: """Downloads precomputed Neurosynth features and returns the filepaths.""" json = read_data_fetcher_json()["neurosynth_precomputed"] url = json["url"] existing_files = data_dir.glob("Neurosynth_TFIDF__*z_desc-consistency.nii.gz") if len(list(existing_files)) != json["n_files"]: logger.info("Downloading Neurosynth data files.") response = urllib.request.urlopen(url) # Open, close, and reopen file to deal with Windows permission issues. with NamedTemporaryFile(prefix=str(data_dir), suffix=".zip", delete=False) as f: name = f.name try: with open(name, "wb") as fw: fw.write(response.read()) with zipfile.ZipFile(name, "r") as fr: fr.extractall(data_dir) finally: (Path(name)).unlink() return data_dir.glob("Neurosynth_TFIDF__*z_desc-consistency.nii.gz")
[docs]def radar_plot(data=None, title="", axis_range=None, label=None, color=(0, 0, 0)): """Visualize data in radar plot (author: @saratheriver) Parameters ---------- data : ndarray, shape = (n_val,) Data. title : string, optional Title of spider plot. Default is empty. axis_range : tuple, optional Range of spider plot axes. Default is (min, max). label : list, optional List of axis labels. Length = same as data.shape[0]. Default is empty. color : tuple, optional Color of line. Default is (0, 0, 0). Returns ------- class_mean : ndarray, shape = (data.shape[0],) Values for each branch. figure Spider plot. """ # Data check if data is None: print("Need data to plot") # Create dataframe class_mean = pd.DataFrame(data=data.T, columns=label) # Number of variable categories = list(class_mean) N = len(categories) # We are going to plot the first line of the data frame. # But we need to repeat the first value to close the circular graph: values = class_mean.loc[0].values.flatten().tolist() values += values[:1] # What will be the angle of each axis in the plot? (we divide the plot / number of variable) angles = [n / float(N) * 2 * pi for n in range(N)] angles += angles[:1] # Initialise the spider plot plt.figure(figsize=(8, 4)) ax = plt.subplot(111, polar=True) ax.spines["polar"].set_visible(False) # Draw one axe per variable + add labels labels yet if label is None: label = list(class_mean) plt.xticks(angles[:-1], label, color="black", size=12) ax.xaxis.get_majorticklabels()[0].set_horizontalalignment("left") ax.xaxis.get_majorticklabels()[2].set_verticalalignment("bottom") ax.xaxis.get_majorticklabels()[3].set_verticalalignment("top") # Draw ylabels ax.set_rlabel_position(0) if axis_range is None: axis_range = (np.min(values), np.max(values)) inc = (axis_range[1] - axis_range[0]) / 4 newinc = [ axis_range[0] + inc, axis_range[0] + (inc * 2), axis_range[0] + (inc * 3), axis_range[0], ] plt.yticks( newinc, [str("{:.2f}".format(elem)) for elem in newinc], color="grey", size=10 ) plt.ylim(axis_range) # add title if title: plt.title(title) # Plot data ax.plot(angles, values, linewidth=3, linestyle="solid", color=color) return class_mean