Package that provides tools for brain MRI Deep Learning pre-processing.
Source code for brainprep.workflow.mriqc
# -*- coding: utf-8 -*-
##########################################################################
# NSAp - Copyright (C) CEA, 2021 - 2022
# Distributed under the terms of the CeCILL-B license, as published by
# the CEA-CNRS-INRIA. Refer to the LICENSE file or to
# http://www.cecill.info/licences/Licence_CeCILL-B_V1-en.html
# for details.
##########################################################################
"""
Interface for mriqc.
"""
# System import
import os
import json
import glob
import requests
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
import seaborn as sns
import matplotlib.pyplot as plt
import brainprep
from brainprep.color_utils import print_title
[docs]
def brainprep_mriqc(rawdir, subjid, outdir="/out", workdir="/work",
mriqc="mriqc"):
""" Define the mriqc pre-processing workflow.
Parameters
----------
rawdir: str
the BIDS raw folder.
subjid: str
the subject identifier.
outdir: str
the destination folder.
workdir: str
the working folder.
mriqc: str
path to the mriqc binary.
"""
print_title("Launch mriqc...")
status = os.path.join(outdir, subjid, "ok")
if not os.path.isfile(status):
cmd = [
mriqc,
rawdir,
outdir,
"participant",
"-w", workdir,
"--no-sub",
"--participant-label", subjid]
brainprep.execute_command(cmd)
open(status, "a").close()
[docs]
def brainprep_mriqc_summary(indir, outdir, filters=None):
""" Provide context for the image quality metrics (IQMs) shown in the
MRIQC reports.
Parameters
----------
indir: str
the derivatives folder with the mriqc results.
outdir: str
the destination folder with the IQMs summary.
filters: list, default None
list of filters as strings: by default filter on the scanner field.
"""
print_title("Launch mriqc summary...")
resource_dir = os.path.join(os.path.dirname(__file__), "resources")
api_data = {
"t1w": pd.read_csv(os.path.join(resource_dir, "iqm_T1w.csv")),
"t2w": pd.read_csv(os.path.join(resource_dir, "iqm_T2w.csv")),
"bold": pd.read_csv(os.path.join(resource_dir, "iqm_bold.csv"))}
selected_iqms = pd.read_csv(os.path.join(
resource_dir, "iqm_select.tsv"), sep="\t")
dtype_iqms = dict((row["ALIAS"], row["MAXIMIZE"])
for _, row in selected_iqms.iterrows())
anat_iqms = selected_iqms[selected_iqms["APPLIES_TO"].isin(
["structural", "structural, functional"])]["ALIAS"].values.tolist()
func_iqms = selected_iqms[selected_iqms["APPLIES_TO"].isin(
["functional", "strucural, functional"])]["ALIAS"].values.tolist()
user_files = {
"t1w": glob.glob(os.path.join(
indir, "sub-*", "ses-*", "anat", "sub-*T1w.json")),
"t2w": glob.glob(os.path.join(
indir, "sub-*", "ses-*", "anat", "sub-*T2w.json")),
"bold": glob.glob(os.path.join(
indir, "sub-*", "ses-*", "func", "sub-*bold.json"))}
user_data = dict((key, load_iqms(val)) for key, val in user_files.items())
if filters is None:
fields = user_data["t1w"]["bids_meta.MagneticFieldStrength"].values
filters = []
for val in np.unique(fields):
filters.append("FIELD == {}".format(val))
api_data = dict((key, filter_iqms(val, filters))
for key, val in api_data.items())
data = dict((key, merge_dfs(user_data[key], api_data[key]))
for key in user_data)
data = {
"t1w": data["t1w"][["_id", "source"] + anat_iqms],
"t2w": data["t2w"][["_id", "source"] + anat_iqms],
"bold": data["bold"][["_id", "source"] + func_iqms]}
for dtype, df in data.items():
print("--", dtype)
print(df)
plot_iqms(df, dtype, outdir, rm_outliers=True)
qc = detect_outliers(df)
score = compute_score(df, dtype_iqms)
df["score"] = score
df["qc"] = qc.astype(int)
df = df[df["source"] == "user"]
df = df.drop(columns=["source"])
df.to_csv(os.path.join(outdir, "{}_qc.tsv".format(dtype)), sep="\t",
index=False)
[docs]
def compute_score(data, dtype_iqms):
""" Compute an agregation score.
Parameters
----------
data: DataFrame
the table with the raw scores.
dtype_iqms: dict
specify which IQM needs to be maximized/minimized.
Returns
-------
score: array
the generated summary score.
"""
score = np.zeros((len(data), ), dtype=data.values.dtype)
if "dummy_trs" in data.columns:
_data = data.drop(columns=["_id", "source", "dummy_trs"])
else:
_data = data.drop(columns=["_id", "source"])
_columns = _data.columns.tolist()
_data = MinMaxScaler().fit_transform(_data.values)
for key in _columns:
to_maximize = dtype_iqms[key]
index = _columns.index(key)
if to_maximize:
score += _data[:, index]
else:
score += (1 - _data[:, index])
score /= len(_columns)
return score
[docs]
def detect_outliers(data, percentiles=[95, 5]):
""" Detect outliers.
Lower outlier threshold is calculated as 5% quartile(data) -
1.5*IQR(data); upper outlier threshold calculated as 95% quartile(data) +
1.5*IQR(data).
Parameters
----------
data: DataFrame
the table with the data to QC.
percentiles: 2-uplet, default [95, 5]
sequence of percentiles to compute.
Returns
-------
qc: array
the QC result as a binary vector.
"""
qc = []
for key in data.columns:
if key in ["_id", "source", "dummy_trs"]:
continue
api_data = data[data["source"] == "api"]
q2, q1 = np.percentile(api_data[key], percentiles)
iqr = q2 - q1
min_out = q1 - 1.5 * iqr
max_out = q2 + 1.5 * iqr
qc.append(np.logical_and((data[key].values <= max_out),
(data[key].values >= min_out)))
qc = np.all(np.asarray(qc), axis=0)
return qc
[docs]
def load_iqms(files):
""" Load/merge individual IQM file.
Parameters
----------
files: list
the input mriqc IQM files.
Returns
-------
mergedf: DataFrame
the merged IQM data.
"""
data = []
for path in files:
name = os.path.basename(path).split(".")[0]
with open(path, "rt") as of:
_data = json.load(of)
_data["_id"] = name
data.append(_data)
return pd.json_normalize(data)
[docs]
def filter_iqms(apidf, filters):
""" Filters the API table based on user-provided parameters. Filter
parameters should be a list of strings and string formats should
be "(VAR) (Operator) (Value)".
Example: ['TR == 3.0'] or ['TR > 1.0','FD < .3']
Notes
-----
Each filter element is SPACE separated!
Parameters
----------
apidf: DataFrame
API table.
filters: list
list of filters as strings.
Returns
-------
filterdf: DataFrame
table containing data pulled from the mriqc API, but filtered to
contain only your match specifications.
"""
cols = apidf.columns
cols = cols.map(lambda x: x.replace(".", "_"))
apidf.columns = cols
expected_filters = {
"SNR": "snr", "TSNR": "tsnr", "SNR_WM": "snr_wm",
"SNR_CSF": "snr_csf", "CNR": "cnr", "EFC": "efc",
"FIELD": "bids_meta_MagneticFieldStrength",
"TE": "bids_meta_EchoTime", "TR": "bids_meta_RepetitionTime"}
query = []
for cond in filters:
var, op, val = cond.split(" ")
if var not in expected_filters:
raise ValueError("Unrecognize filtering variable: {}.".format(var))
cond_str = expected_filters[var] + op + val
query.append(cond_str)
query = [" or ".join(query)]
filterdf = apidf.query(" & ".join(query))
return filterdf
[docs]
def merge_dfs(userdf, apidf):
""" Merges the user dataframe and the filtered API dataframe
while adding a 'source' column.
Parameters
----------
userdf: DataFrame
user mriqc table.
apidf: DataFrame
filtered API table.
Returns
-------
mergedf: DataFrame
a merged pandas dataframe containing the user and
the filtered API tables. A 'source' column is added
with 'user' or 'api' entries for easy sorting/splitting.
"""
userdf["source"] = "user"
apidf["source"] = "api"
mergedf = pd.concat([userdf, apidf], sort=True).fillna(0)
return mergedf
[docs]
def plot_iqms(data, dtype, outdir, rm_outliers=False):
""" Make a violin plot of the api and user IQMs.
Parameters
----------
data: DataFrame
a table including the api and uer data. Must have a column labeled
'source' with 'user' or 'api' defined.
dtype: str
the data type in the input table.
outdir: str
the destination folder.
rm_outliers: bool, default False
remove outliers from the API data.
Returns
-------
A violin plot of each MRIQC metric, comparing the user-level data to
the API data.
"""
# Filter outliers
if rm_outliers:
data = data.reset_index(drop=True)
var_name = "snr_total"
if var_name not in data.columns:
var_name = "snr"
user_index = data[data["source"] == "user"].index
api_data = data[data["source"] == "api"]
q75, q25 = np.percentile(api_data[var_name].values, [75, 25])
iqr = q75 - q25
min_out = q25 - 1.5 * iqr
max_out = q75 + 1.5 * iqr
api_data = api_data[api_data[var_name] <= max_out]
api_data = api_data[api_data[var_name] >= min_out]
api_index = api_data.index
index = user_index.values.tolist() + api_index.values.tolist()
data = data.iloc[index]
# Change the table from short format to long format
df_long = pd.melt(data, id_vars=["_id", "source"], var_name="var",
value_name="val")
# Make colors dictionary for family:
# temporal: #D2691E
# spatial: #DAA520
# noise: #A52A2A
# motion: #66CDAA
# artifact: #6495ED
# descriptive: #00008B
# other: #9932CC
plot_dict = {
"tsnr": "#D2691E", "gcor": "#D2691E", "dvars_vstd": "#D2691E",
"dvars_std": "#D2691E", "dvars_nstd": "#D2691E",
"fwhm_x": "#DAA520", "fwhm_y": "#DAA520", "fwhm_z": "#DAA520",
"fwhm_avg": "#DAA520", "fber": "#DAA520", "efc": "#DAA520",
"cjv": "#A52A2A", "cnr": "#A52A2A", "qi_2": "#A52A2A",
"snr": "#A52A2A", "snr_csf": "#A52A2A", "snr_gm": "#A52A2A",
"snr_wm": "#A52A2A", "snr_total": "#A52A2A", "snrd_csf": "#A52A2A",
"snrd_gm": "#A52A2A", "snrd_wm": "#A52A2A",
"fd_mean": "#66CDAA", "fd_num": "#66CDAA", "fd_perc": "#66CDAA",
"inu_med": "#6495ED", "inu_range": "#6495ED", "wm2max": "#6495ED",
"aor": "#9932CC", "aqi": "#9932CC", "dummy_trs": "#9932CC",
"gsr_x": "#9932CC", "gsr_y": "#9932CC", "qi_1": "#9932CC",
"rpve_csf": "#9932CC", "rpve_gm": "#9932CC", "rpve_wm": "#9932CC",
"tpm_overlap_csf": "#9932CC", "tpm_overlap_gm": "#9932CC",
"tpm_overlap_wm": "#9932CC",
"icvs_csf": "#00008B", "icvs_gm": "#00008B", "icvs_wm": "#00008B",
"summary_bg_k": "#00008B", "summary_bg_mad": "#00008B",
"summary_bg_mean": "#00008B", "summary_bg_median": "#00008B",
"summary_bg_n": "#00008B", "summary_bg_p05": "#00008B",
"summary_bg_p95": "#00008B", "summary_bg_stdv": "#00008B",
"summary_csf_k": "#00008B", "summary_csf_mad": "#00008B",
"summary_csf_mean": "#00008B", "summary_csf_median": "#00008B",
"summary_csf_n": "#00008B", "summary_csf_p05": "#00008B",
"summary_csf_p95": "#00008B", "summary_csf_stdv": "#00008B",
"summary_fg_k": "#00008B", "summary_fg_mad": "#00008B",
"summary_fg_mean": "#00008B", "summary_fg_median": "#00008B",
"summary_fg_n": "#00008B", "summary_fg_p05": "#00008B",
"summary_fg_p95": "#00008B", "summary_fg_stdv": "#00008B",
"summary_gm_k": "#00008B", "summary_gm_mad": "#00008B",
"summary_gm_mean": "#00008B", "summary_gm_median": "#00008B",
"summary_gm_n": "#00008B", "summary_gm_p05": "#00008B",
"summary_gm_p95": "#00008B", "summary_gm_stdv": "#00008B",
"summary_wm_k": "#00008B", "summary_wm_mad": "#00008B",
"summary_wm_mean": "#00008B", "summary_wm_median": "#00008B",
"summary_wm_n": "#00008B", "summary_wm_p05": "#00008B",
"summary_wm_p95": "#00008B", "summary_wm_stdv": "#00008B"
}
for var_name, df in df_long.groupby(by="var"):
plt.figure()
df = df.assign(hue=1)
sns.boxplot(x="source", y="val", data=df, color=plot_dict[var_name],
hue="hue", hue_order=[1, 0])
graph = sns.violinplot(x="source", y="val", data=df, color="0.8",
hue="hue", hue_order=[0, 1], split=True)
graph.legend_.remove()
graph = sns.stripplot(x="source", y="val", data=df, jitter=True,
alpha=0.4, color=plot_dict[var_name])
ax = plt.gca()
ax.spines["top"].set_visible(False)
ax.spines["right"].set_visible(False)
ax.set_ylabel(var_name)
plt.savefig(os.path.join(outdir, "{}_{}.png".format(dtype, var_name)))
[docs]
def query_api(dtype, filters=None, maxpage=None):
""" Query the mriqc API using 3 element conditional statement.
Parameters
----------
dtype: str
the data type: 'bold','T1w',or 'T2w'.
filters: list or str, default None
list of conditional phrases consisting of:
keyword to query + conditional argument + value. All
conditions checked against API as and phrases.
maxpage: int, default None
optionally define the maximum number of page to scroll.
Returns
-------
df: DataFrame
a table of all mriqc entries that satisfy the contitional
statement.
"""
# API limits at a max results of 1k
url_root = "https://mriqc.nimh.nih.gov/api/v1/" + dtype
if filters is not None:
if isinstance(filters, str):
filters_str = filters
elif isinstance(filters, list):
filters_str = "&".join(filters)
else:
raise ValueError("The filters can either be a list of strings or "
"a string.")
dfs = []
page = 0
last_page = -1
headers = {"content-type": "application/json", "Accept-Charset": "UTF-8"}
while True:
if page % 10 == 0:
print("On page {}/{}...".format(page, last_page))
if filters is not None:
page_url = url_root + "?max_results=1000&{}&page={}".format(
filters_str, page)
else:
page_url = url_root + "?max_results=1000&page={}".format(page)
req = requests.get(page_url, headers=headers)
data = req.json()
if last_page == -1:
last_page = int(
data["_links"]["last"]["href"].split("=")[-1])
last_page = min(last_page, maxpage)
dfs.append(pd.json_normalize(data["_items"]))
if page >= last_page:
break
else:
page += 1
df = pd.concat(dfs, ignore_index=True, sort=True)
return df
if __name__ == "__main__":
dirname = os.path.dirname(__file__)
destdir = os.path.join(dirname, "resources")
for mod in ("T1w", "T2w", "bold"):
print_title("Fetching {} reference...".format(mod))
df = query_api(dtype=mod, filters=None, maxpage=5000)
print(df)
df.to_csv(os.path.join(destdir, "iqm_{}.tsv".format(mod)), sep="\t",
index=False)
Follow us