Menu

Package that provides tools for brain MRI Deep Learning pre-processing.

Source code for brainprep.workflow.mriqc

# -*- coding: utf-8 -*-
##########################################################################
# NSAp - Copyright (C) CEA, 2021 - 2022
# Distributed under the terms of the CeCILL-B license, as published by
# the CEA-CNRS-INRIA. Refer to the LICENSE file or to
# http://www.cecill.info/licences/Licence_CeCILL-B_V1-en.html
# for details.
##########################################################################

"""
Interface for mriqc.
"""

# System import
import os
import json
import glob
import requests
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
import seaborn as sns
import matplotlib.pyplot as plt
import brainprep
from brainprep.color_utils import print_title


[docs] def brainprep_mriqc(rawdir, subjid, outdir="/out", workdir="/work", mriqc="mriqc"): """ Define the mriqc pre-processing workflow. Parameters ---------- rawdir: str the BIDS raw folder. subjid: str the subject identifier. outdir: str the destination folder. workdir: str the working folder. mriqc: str path to the mriqc binary. """ print_title("Launch mriqc...") status = os.path.join(outdir, subjid, "ok") if not os.path.isfile(status): cmd = [ mriqc, rawdir, outdir, "participant", "-w", workdir, "--no-sub", "--participant-label", subjid] brainprep.execute_command(cmd) open(status, "a").close()
[docs] def brainprep_mriqc_summary(indir, outdir, filters=None): """ Provide context for the image quality metrics (IQMs) shown in the MRIQC reports. Parameters ---------- indir: str the derivatives folder with the mriqc results. outdir: str the destination folder with the IQMs summary. filters: list, default None list of filters as strings: by default filter on the scanner field. """ print_title("Launch mriqc summary...") resource_dir = os.path.join(os.path.dirname(__file__), "resources") api_data = { "t1w": pd.read_csv(os.path.join(resource_dir, "iqm_T1w.csv")), "t2w": pd.read_csv(os.path.join(resource_dir, "iqm_T2w.csv")), "bold": pd.read_csv(os.path.join(resource_dir, "iqm_bold.csv"))} selected_iqms = pd.read_csv(os.path.join( resource_dir, "iqm_select.tsv"), sep="\t") dtype_iqms = dict((row["ALIAS"], row["MAXIMIZE"]) for _, row in selected_iqms.iterrows()) anat_iqms = selected_iqms[selected_iqms["APPLIES_TO"].isin( ["structural", "structural, functional"])]["ALIAS"].values.tolist() func_iqms = selected_iqms[selected_iqms["APPLIES_TO"].isin( ["functional", "strucural, functional"])]["ALIAS"].values.tolist() user_files = { "t1w": glob.glob(os.path.join( indir, "sub-*", "ses-*", "anat", "sub-*T1w.json")), "t2w": glob.glob(os.path.join( indir, "sub-*", "ses-*", "anat", "sub-*T2w.json")), "bold": glob.glob(os.path.join( indir, "sub-*", "ses-*", "func", "sub-*bold.json"))} user_data = dict((key, load_iqms(val)) for key, val in user_files.items()) if filters is None: fields = user_data["t1w"]["bids_meta.MagneticFieldStrength"].values filters = [] for val in np.unique(fields): filters.append("FIELD == {}".format(val)) api_data = dict((key, filter_iqms(val, filters)) for key, val in api_data.items()) data = dict((key, merge_dfs(user_data[key], api_data[key])) for key in user_data) data = { "t1w": data["t1w"][["_id", "source"] + anat_iqms], "t2w": data["t2w"][["_id", "source"] + anat_iqms], "bold": data["bold"][["_id", "source"] + func_iqms]} for dtype, df in data.items(): print("--", dtype) print(df) plot_iqms(df, dtype, outdir, rm_outliers=True) qc = detect_outliers(df) score = compute_score(df, dtype_iqms) df["score"] = score df["qc"] = qc.astype(int) df = df[df["source"] == "user"] df = df.drop(columns=["source"]) df.to_csv(os.path.join(outdir, "{}_qc.tsv".format(dtype)), sep="\t", index=False)
[docs] def compute_score(data, dtype_iqms): """ Compute an agregation score. Parameters ---------- data: DataFrame the table with the raw scores. dtype_iqms: dict specify which IQM needs to be maximized/minimized. Returns ------- score: array the generated summary score. """ score = np.zeros((len(data), ), dtype=data.values.dtype) if "dummy_trs" in data.columns: _data = data.drop(columns=["_id", "source", "dummy_trs"]) else: _data = data.drop(columns=["_id", "source"]) _columns = _data.columns.tolist() _data = MinMaxScaler().fit_transform(_data.values) for key in _columns: to_maximize = dtype_iqms[key] index = _columns.index(key) if to_maximize: score += _data[:, index] else: score += (1 - _data[:, index]) score /= len(_columns) return score
[docs] def detect_outliers(data, percentiles=[95, 5]): """ Detect outliers. Lower outlier threshold is calculated as 5% quartile(data) - 1.5*IQR(data); upper outlier threshold calculated as 95% quartile(data) + 1.5*IQR(data). Parameters ---------- data: DataFrame the table with the data to QC. percentiles: 2-uplet, default [95, 5] sequence of percentiles to compute. Returns ------- qc: array the QC result as a binary vector. """ qc = [] for key in data.columns: if key in ["_id", "source", "dummy_trs"]: continue api_data = data[data["source"] == "api"] q2, q1 = np.percentile(api_data[key], percentiles) iqr = q2 - q1 min_out = q1 - 1.5 * iqr max_out = q2 + 1.5 * iqr qc.append(np.logical_and((data[key].values <= max_out), (data[key].values >= min_out))) qc = np.all(np.asarray(qc), axis=0) return qc
[docs] def load_iqms(files): """ Load/merge individual IQM file. Parameters ---------- files: list the input mriqc IQM files. Returns ------- mergedf: DataFrame the merged IQM data. """ data = [] for path in files: name = os.path.basename(path).split(".")[0] with open(path, "rt") as of: _data = json.load(of) _data["_id"] = name data.append(_data) return pd.json_normalize(data)
[docs] def filter_iqms(apidf, filters): """ Filters the API table based on user-provided parameters. Filter parameters should be a list of strings and string formats should be "(VAR) (Operator) (Value)". Example: ['TR == 3.0'] or ['TR > 1.0','FD < .3'] Notes ----- Each filter element is SPACE separated! Parameters ---------- apidf: DataFrame API table. filters: list list of filters as strings. Returns ------- filterdf: DataFrame table containing data pulled from the mriqc API, but filtered to contain only your match specifications. """ cols = apidf.columns cols = cols.map(lambda x: x.replace(".", "_")) apidf.columns = cols expected_filters = { "SNR": "snr", "TSNR": "tsnr", "SNR_WM": "snr_wm", "SNR_CSF": "snr_csf", "CNR": "cnr", "EFC": "efc", "FIELD": "bids_meta_MagneticFieldStrength", "TE": "bids_meta_EchoTime", "TR": "bids_meta_RepetitionTime"} query = [] for cond in filters: var, op, val = cond.split(" ") if var not in expected_filters: raise ValueError("Unrecognize filtering variable: {}.".format(var)) cond_str = expected_filters[var] + op + val query.append(cond_str) query = [" or ".join(query)] filterdf = apidf.query(" & ".join(query)) return filterdf
[docs] def merge_dfs(userdf, apidf): """ Merges the user dataframe and the filtered API dataframe while adding a 'source' column. Parameters ---------- userdf: DataFrame user mriqc table. apidf: DataFrame filtered API table. Returns ------- mergedf: DataFrame a merged pandas dataframe containing the user and the filtered API tables. A 'source' column is added with 'user' or 'api' entries for easy sorting/splitting. """ userdf["source"] = "user" apidf["source"] = "api" mergedf = pd.concat([userdf, apidf], sort=True).fillna(0) return mergedf
[docs] def plot_iqms(data, dtype, outdir, rm_outliers=False): """ Make a violin plot of the api and user IQMs. Parameters ---------- data: DataFrame a table including the api and uer data. Must have a column labeled 'source' with 'user' or 'api' defined. dtype: str the data type in the input table. outdir: str the destination folder. rm_outliers: bool, default False remove outliers from the API data. Returns ------- A violin plot of each MRIQC metric, comparing the user-level data to the API data. """ # Filter outliers if rm_outliers: data = data.reset_index(drop=True) var_name = "snr_total" if var_name not in data.columns: var_name = "snr" user_index = data[data["source"] == "user"].index api_data = data[data["source"] == "api"] q75, q25 = np.percentile(api_data[var_name].values, [75, 25]) iqr = q75 - q25 min_out = q25 - 1.5 * iqr max_out = q75 + 1.5 * iqr api_data = api_data[api_data[var_name] <= max_out] api_data = api_data[api_data[var_name] >= min_out] api_index = api_data.index index = user_index.values.tolist() + api_index.values.tolist() data = data.iloc[index] # Change the table from short format to long format df_long = pd.melt(data, id_vars=["_id", "source"], var_name="var", value_name="val") # Make colors dictionary for family: # temporal: #D2691E # spatial: #DAA520 # noise: #A52A2A # motion: #66CDAA # artifact: #6495ED # descriptive: #00008B # other: #9932CC plot_dict = { "tsnr": "#D2691E", "gcor": "#D2691E", "dvars_vstd": "#D2691E", "dvars_std": "#D2691E", "dvars_nstd": "#D2691E", "fwhm_x": "#DAA520", "fwhm_y": "#DAA520", "fwhm_z": "#DAA520", "fwhm_avg": "#DAA520", "fber": "#DAA520", "efc": "#DAA520", "cjv": "#A52A2A", "cnr": "#A52A2A", "qi_2": "#A52A2A", "snr": "#A52A2A", "snr_csf": "#A52A2A", "snr_gm": "#A52A2A", "snr_wm": "#A52A2A", "snr_total": "#A52A2A", "snrd_csf": "#A52A2A", "snrd_gm": "#A52A2A", "snrd_wm": "#A52A2A", "fd_mean": "#66CDAA", "fd_num": "#66CDAA", "fd_perc": "#66CDAA", "inu_med": "#6495ED", "inu_range": "#6495ED", "wm2max": "#6495ED", "aor": "#9932CC", "aqi": "#9932CC", "dummy_trs": "#9932CC", "gsr_x": "#9932CC", "gsr_y": "#9932CC", "qi_1": "#9932CC", "rpve_csf": "#9932CC", "rpve_gm": "#9932CC", "rpve_wm": "#9932CC", "tpm_overlap_csf": "#9932CC", "tpm_overlap_gm": "#9932CC", "tpm_overlap_wm": "#9932CC", "icvs_csf": "#00008B", "icvs_gm": "#00008B", "icvs_wm": "#00008B", "summary_bg_k": "#00008B", "summary_bg_mad": "#00008B", "summary_bg_mean": "#00008B", "summary_bg_median": "#00008B", "summary_bg_n": "#00008B", "summary_bg_p05": "#00008B", "summary_bg_p95": "#00008B", "summary_bg_stdv": "#00008B", "summary_csf_k": "#00008B", "summary_csf_mad": "#00008B", "summary_csf_mean": "#00008B", "summary_csf_median": "#00008B", "summary_csf_n": "#00008B", "summary_csf_p05": "#00008B", "summary_csf_p95": "#00008B", "summary_csf_stdv": "#00008B", "summary_fg_k": "#00008B", "summary_fg_mad": "#00008B", "summary_fg_mean": "#00008B", "summary_fg_median": "#00008B", "summary_fg_n": "#00008B", "summary_fg_p05": "#00008B", "summary_fg_p95": "#00008B", "summary_fg_stdv": "#00008B", "summary_gm_k": "#00008B", "summary_gm_mad": "#00008B", "summary_gm_mean": "#00008B", "summary_gm_median": "#00008B", "summary_gm_n": "#00008B", "summary_gm_p05": "#00008B", "summary_gm_p95": "#00008B", "summary_gm_stdv": "#00008B", "summary_wm_k": "#00008B", "summary_wm_mad": "#00008B", "summary_wm_mean": "#00008B", "summary_wm_median": "#00008B", "summary_wm_n": "#00008B", "summary_wm_p05": "#00008B", "summary_wm_p95": "#00008B", "summary_wm_stdv": "#00008B" } for var_name, df in df_long.groupby(by="var"): plt.figure() df = df.assign(hue=1) sns.boxplot(x="source", y="val", data=df, color=plot_dict[var_name], hue="hue", hue_order=[1, 0]) graph = sns.violinplot(x="source", y="val", data=df, color="0.8", hue="hue", hue_order=[0, 1], split=True) graph.legend_.remove() graph = sns.stripplot(x="source", y="val", data=df, jitter=True, alpha=0.4, color=plot_dict[var_name]) ax = plt.gca() ax.spines["top"].set_visible(False) ax.spines["right"].set_visible(False) ax.set_ylabel(var_name) plt.savefig(os.path.join(outdir, "{}_{}.png".format(dtype, var_name)))
[docs] def query_api(dtype, filters=None, maxpage=None): """ Query the mriqc API using 3 element conditional statement. Parameters ---------- dtype: str the data type: 'bold','T1w',or 'T2w'. filters: list or str, default None list of conditional phrases consisting of: keyword to query + conditional argument + value. All conditions checked against API as and phrases. maxpage: int, default None optionally define the maximum number of page to scroll. Returns ------- df: DataFrame a table of all mriqc entries that satisfy the contitional statement. """ # API limits at a max results of 1k url_root = "https://mriqc.nimh.nih.gov/api/v1/" + dtype if filters is not None: if isinstance(filters, str): filters_str = filters elif isinstance(filters, list): filters_str = "&".join(filters) else: raise ValueError("The filters can either be a list of strings or " "a string.") dfs = [] page = 0 last_page = -1 headers = {"content-type": "application/json", "Accept-Charset": "UTF-8"} while True: if page % 10 == 0: print("On page {}/{}...".format(page, last_page)) if filters is not None: page_url = url_root + "?max_results=1000&{}&page={}".format( filters_str, page) else: page_url = url_root + "?max_results=1000&page={}".format(page) req = requests.get(page_url, headers=headers) data = req.json() if last_page == -1: last_page = int( data["_links"]["last"]["href"].split("=")[-1]) last_page = min(last_page, maxpage) dfs.append(pd.json_normalize(data["_items"])) if page >= last_page: break else: page += 1 df = pd.concat(dfs, ignore_index=True, sort=True) return df
if __name__ == "__main__": dirname = os.path.dirname(__file__) destdir = os.path.join(dirname, "resources") for mod in ("T1w", "T2w", "bold"): print_title("Fetching {} reference...".format(mod)) df = query_api(dtype=mod, filters=None, maxpage=5000) print(df) df.to_csv(os.path.join(destdir, "iqm_{}.tsv".format(mod)), sep="\t", index=False)

Follow us

© 2025, brainprep developers