Source code for GLE_analysisEM.datas_loaders

"""
Somes utilities function
"""
import numpy as np
from sklearn.model_selection import ShuffleSplit


[docs]def loadData(paths, dim_x, maxlenght=None): """Loads trajectories from a list of file Parameters ---------- paths : list of str List of paths to trajectory files, one trajectory per file The file are loaded with ``numpy.loadtxt`` and should have one column by dimension and one data point per line. dim_x : int Number of column to take from each file """ X = None idx_trajs = [] for chemin in paths: trj = np.loadtxt(chemin) if maxlenght is None: tps = np.asarray(trj[:, :1] - trj[0, 0]) # Set origin of time to zero pos = np.asarray(trj[:, 1 : 1 + dim_x]) else: tps = np.asarray(trj[:maxlenght, :1] - trj[0, 0]) # Set origin of time to zero pos = np.asarray(trj[:maxlenght, 1 : 1 + dim_x]) velocity = np.gradient(pos, tps[:, 0], axis=0) txv = np.hstack((tps, pos, velocity)) if X is None: X = txv else: idx_trajs.append(len(X)) X = np.vstack((X, txv)) return X, idx_trajs
def cutTrajs(X, idx_trajs=[], n_cut=1): """ Cut trajectory into smaller piece """ X_cut = None idx_cut = [] traj_list = np.split(X, idx_trajs) for trj in traj_list: sub_trajs = np.array_split(trj, n_cut) for txv in sub_trajs: if X is None: X_cut = txv else: idx_cut.append(len(X)) X_cut = np.vstack((X_cut, txv)) return X_cut, idx_cut def split_loadDatas(paths, dim_x, n_splits=5, test_size=None, train_size=0.9, random_state=None): """ Give a generator that give only a subset of the paths for cross validation See sklearn.model_selection.ShuffleSplit for documentation """ nppaths = np.asarray(paths) ss = ShuffleSplit(n_splits=n_splits, test_size=test_size, train_size=train_size, random_state=random_state) for train_index, test_index in ss.split(paths): yield loadData(nppaths[train_index], dim_x) def bootstrap_Datas(paths, dim_x, n_splits=5, test_size=None, train_size=0.9, random_state=np.random.default_rng()): """ Give a generator that give only a subset of the paths with replacement for bootstrapping See sklearn.utils.resample for documentation """ nppaths = np.asarray(paths) number_paths = int(np.floor(train_size * len(nppaths))) for n in range(n_splits): paths_n = random_state.choice(nppaths, size=number_paths, replace=True) yield loadData(paths_n, dim_x)