Source code for nideconv.simulate

import numpy as np
import pandas as pd
import scipy as sp
from scipy import signal
from .utils import convolve_with_function


[docs]def simulate_fmri_experiment(conditions=None, TR=1., n_subjects=1, n_runs=1, n_trials=40, run_duration=300, oversample=20, noise_level=1.0, n_rois=1, kernel='double_gamma', kernel_pars={}): """ Simulates an fMRI experiment and returns a pandas DataFrame with the resulting time series in an analysis-ready format. By default a single run of a single subject is simulated, but a larger number of subjects, runs, and ROIs can also be simulated. Parameters ---------- conditions : list of dictionaries or *None* Can be used to customize different conditions. Every conditions is represented as a dictionary in this list and has the following form: :: [{'name':'Condition A', 'mu_group':1, 'std_group':0.1}, {'name':'Condition B', 'mu_group':1, 'std_group':0.1}] *mu_group* indicates the mean amplitude of the response to this condition across subjects. *std_group* indicates the standard deviation of this amplitude across subjects. Potentially, customized onsets can also be used as follows: :: {'name':'Condition A', 'mu_group':1, 'std_group':0.1 'onsets':[10, 20, 30]} TR : float Indicates the time between volume acquistisions in seconds (Inverse of the sample rate). n_subjects : int Number of subjects. n_runs : int Number of runs *per subject*. n_trials : int Number of trials *per condition per run*. Only used when no custom onsets are provided (see *conditions*). run_duration : float Duration of a single run in seconds. noise_level : float Standard deviation of Gaussian noise added to time series. n_rois : int Number of regions-of-interest. Determines the number of columns of *data*. Other Parameters ---------------- oversample : int Determines how many times the kernel is oversampled before convolution. Should usually not be changed. kernel : str Sets which kernel to use for response function. Currently only '`double_hrf`' can be used. Returns ------- data : DataFrame Contains simulated time series with subj_idx, run and time (s) as index. Columns correspond to different ROIs onsets : DataFrame Contains used event onsets with subj_idx, run and trial type as index. parameters : DataFrame Contains parameters (amplitude) of the different event type. Examples -------- By default, `simulate_fmri_experiment` simulates a 5 minute run with 40 trials for one subject >>> data, onsets, params = simulate_fmri_experiment() >>> print(data.head()) area 1 subj_idx run t 1 1 0.0 -1.280023 1.0 0.908086 2.0 0.850847 3.0 -1.010475 4.0 -0.299650 >>> print(data.onsets) onset subj_idx run trial_type 1 1 A 94.317361 A 106.547084 A 198.175115 A 34.941112 A 31.323272 >>> print(params) amplitude subj_idx trial_type 1 A 1.0 B 2.0 With n_subjects we can increase the number of subjects >>> data, onsets, params = simulate_fmri_experiment(n_subjects=20) >>> data.index.get_level_values('subj_idx').unique() Int64Index([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20], dtype='int64', name='subj_idx') """ if kernel not in ['double_gamma', 'gamma']: raise NotImplementedError() data = [] if conditions is None: conditions = [{'name':'Condition A', 'mu_group':1, 'std_group':0,}, {'name':'Condition B', 'mu_group':2, 'std_group':0}] conditions = pd.DataFrame(conditions).set_index('name') sample_rate = 1./TR frametimes = np.arange(0, run_duration, TR) all_onsets = [] parameters = [] for subject in np.arange(1, n_subjects+1): for i, condition in conditions.iterrows(): amplitude = sp.stats.norm(loc=condition['mu_group'], scale=condition['std_group']).rvs() condition['amplitude'] = amplitude condition['subject'] = subject condition['trial_type'] = condition.name parameters.append(condition.drop(['mu_group', 'std_group'], axis=0)) parameters = pd.DataFrame(parameters).set_index(['subject', 'trial_type']) if 'kernel' not in parameters.columns: parameters['kernel'] = kernel else: parameters['kernel'].fillna(kernel, inplace=True) if 'kernel_pars' not in parameters.columns: parameters['kernel_pars'] = np.nan if type(n_trials) is int: n_trials = [n_trials] * len(conditions) for subject in np.arange(1, n_subjects+1): for run in range(1, n_runs+1): signals = np.zeros((len(conditions), len(frametimes))) for i, (_, condition) in enumerate(conditions.iterrows()): if 'onsets' in condition: onsets = np.array(condition.onsets) else: onsets = np.ones(0) while len(onsets) < n_trials[i]: isis = np.random.gamma(run_duration / n_trials[i], 1, size=n_trials[i] * 10) onsets = np.cumsum(isis) onsets = onsets[onsets < run_duration] onsets = np.random.choice(onsets, n_trials[i], replace=False) signals[i, (onsets / TR).astype(int)] = parameters.loc[(subject, condition.name), 'amplitude'] all_onsets.append(pd.DataFrame({'onset':onsets})) all_onsets[-1]['subject'] = subject all_onsets[-1]['run'] = run all_onsets[-1]['trial_type'] = condition.name if np.isnan(parameters.loc[(subject, condition.name), 'kernel_pars']): kernel_pars_ = kernel_pars else: kernel_pars_ = parameters.loc[(subject, condition.name), 'kernel_pars'] signals[i] = convolve_with_function(signals[i], parameters.loc[(subject, condition.name), 'kernel'], sample_rate, **kernel_pars_) signal = signals.sum(0) signal = np.repeat(signal[:, np.newaxis], n_rois, 1) signal += np.random.randn(*signal.shape) * noise_level if n_rois == 1: columns = ['signal'] else: columns = ['area %d' % i for i in range(1, n_rois + 1)] tmp = pd.DataFrame(signal, columns=columns) tmp['t'] = frametimes tmp['subject'], tmp['run'] = subject, run data.append(tmp) data = pd.concat(data).set_index(['subject', 'run', 't']) onsets = pd.concat(all_onsets).set_index(['subject', 'run', 'trial_type']) if n_subjects == 1: data.index = data.index.droplevel('subject') onsets.index = onsets.index.droplevel('subject') if n_runs == 1: data.index = data.index.droplevel('run') onsets.index = onsets.index.droplevel('run') return data, onsets, parameters