Source code for GLE_analysisEM._gle_basis_projection
"""
Contain tools for basis projection
"""
import numpy as np
from itertools import chain, combinations, tee
from itertools import combinations_with_replacement as combinations_w_r
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.utils.validation import check_is_fitted
from sklearn.utils import check_array
from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import LinearRegression
from ._gle_potential_projection import GLE_PotentialTransform
from ._basis_features import LinearFeatures, BinsFeatures, BSplineFeatures
def _combinations(n_features, degree, interaction_only, include_bias):
"""To get combinaison of features
"""
comb = combinations if interaction_only else combinations_w_r
start = int(not include_bias)
return chain.from_iterable(comb(range(n_features), i) for i in range(start, degree + 1))
[docs]class GLE_BasisTransform(TransformerMixin, BaseEstimator):
"""A transformer that give values of the basis along the trajectories.
Parameters
----------
dim_x : int, default=1
The number of visible dimensions.
basis_type : str, default= "linear"
Give the type of basis projection
Must be one of::
"linear" : Linear basis.
"polynomial" : Polynomial basis.
"bins" : Bins basis.
"bsplines" : BSplines basis.
"free_energy" : Use histogram estimation of the free energy as unique basis function.
"free_energy_kde" : Use Kernel Density estimation of the free energy as unique basis function.
"custom": custom basis, you should pass a Transformer class
"""
[docs] def __init__(self, basis_type="linear", transformer=None, **kwargs):
self.basis_type = basis_type
self.transformer = transformer
self.kwargs = kwargs
def _initialize(self):
"""
Initialize the feature class
"""
self.basis_type = self.basis_type.casefold()
self.to_combine_ = False
if self.transformer is None:
if self.basis_type == "linear":
self.featuresTransformer = LinearFeatures(to_center=self.kwargs.get("to_center", False))
elif self.basis_type == "polynomial":
degree = self.kwargs.get("degree", 3)
if degree < 0:
raise ValueError("The number of basis element must be positive")
self.featuresTransformer = PolynomialFeatures(degree=degree)
elif self.basis_type == "bins":
strategy = self.kwargs.get("strategy", "uniform")
n_bins_arg = self.kwargs.get("n_bins", "auto") # n_bins should be a number, we cannot have different number of bins for different direction
self.featuresTransformer = BinsFeatures(n_bins_arg=n_bins_arg, strategy=strategy) # TODO use sparse array
self.to_combine_ = True and (not self.dim_x == 1) # No need for combinaison if only one dimensionnal datas
elif self.basis_type == "bsplines":
n_knots = self.kwargs.get("n_knots", 5)
degree = self.kwargs.get("degree", 3)
periodic = self.kwargs.get("periodic", False)
self.featuresTransformer = BSplineFeatures(n_knots, degree=degree, periodic=periodic)
self.to_combine_ = True and (not self.dim_x == 1)
elif self.basis_type == "free_energy_kde":
self.featuresTransformer = GLE_PotentialTransform(estimator="kde", dim_x=self.dim_x, bandwidth=self.kwargs.get("bandwidth", "scott"), per=self.kwargs.get("periodic", False))
elif self.basis_type == "free_energy_histogram" or self.basis_type == "free_energy": # Default free energy set to histogram
self.featuresTransformer = GLE_PotentialTransform(estimator="histogram", dim_x=self.dim_x, bins=self.kwargs.get("bins", "auto"), per=self.kwargs.get("periodic", False))
elif self.basis_type == "custom":
raise ValueError("No transformer have been passed as argument for custom transformer")
else:
raise ValueError("The basis type {} is not implemented.".format(self.basis_type))
else:
self.featuresTransformer = self.transformer
def fit(self, X, y=None):
"""A reference implementation of a fitting function for a transformer.
Parameters
----------
X : {array-like, sparse matrix}, shape (n_samples, dim_x)
The training input samples.
y : None
There is no need of a target in a transformer, yet the pipeline API
requires this parameter.
basis_params : dict
Additional basis parameters.
Returns
-------
self : object
Returns self.
"""
# Input validation
X = check_array(X)
self.dim_x = X.shape[1]
self._initialize()
self.featuresTransformer = self.featuresTransformer.fit(X)
if hasattr(self.featuresTransformer, "n_output_features_"):
self.nb_basis_elt_ = self.featuresTransformer.n_output_features_
else:
self.nb_basis_elt_ = self.dim_x
if self.to_combine_: # If it is needed to combine the features
iter_combinations = _combinations(self.featuresTransformer.n_output_features_, self.dim_x, False, False)
self.combinations = [comb for comb in iter_combinations]
self.nb_basis_elt_ = sum(1 for _ in self.combinations)
self.fitted_ = True
return self
def combine(self, X):
"""
If basis are one dimensionnal, combine them into product
"""
n_samples, _ = X.shape
# What follows is a faster implementation of:
combX = np.empty((n_samples, self.nb_basis_elt_), dtype=X.dtype)
for i, comb in enumerate(self.combinations):
combX[:, i] = X[:, comb].prod(1)
# This implementation uses two optimisations.
# First one is broadcasting,
# multiply ([X1, ..., Xn], X1) -> [X1 X1, ..., Xn X1]
# multiply ([X2, ..., Xn], X2) -> [X2 X2, ..., Xn X2]
# ...
# multiply ([X[:, start:end], X[:, start]) -> ...
# Second optimisation happens for degrees >= 3.
# Xi^3 is computed reusing previous computation:
# Xi^3 = Xi^2 * Xi.
# combX = np.empty((n_samples, self.ncomb_), dtype=X.dtype)
# current_col = 0
#
# # d = 1
# combX[:, current_col : current_col + self.dim_x] = X
# index = list(range(current_col, current_col + self.dim_x))
# current_col += self.dim_x
# index.append(current_col)
#
# # d >= 2
# for _ in range(1, self.dim_x):
# new_index = []
# end = index[-1]
# for feature_idx in range(self.dim_x):
# start = index[feature_idx]
# new_index.append(current_col)
# next_col = current_col + end - start
# if next_col <= current_col:
# break
# # bk[:, start:end] are terms of degree d - 1
# # that exclude feature #feature_idx.
# np.multiply(combX[:, start:end], X[:, feature_idx : feature_idx + 1], out=combX[:, current_col:next_col], casting="no")
# current_col = next_col
#
# new_index.append(current_col)
# index = new_index
return combX[:, :]
def transform(self, X):
"""Take the position as input and output basis expansion
Parameters
----------
X : {array-like, sparse-matrix}, shape (n_samples, n_features)
The input samples.
Returns
-------
X_transformed : array, shape (n_samples, n_features)
"""
# Check is fit had been called
check_is_fitted(self, "fitted_")
# Input validation
X = check_array(X)
n_samples, n_features = X.shape
if n_features != self.dim_x:
raise ValueError("X shape does not match training shape")
bk = self.featuresTransformer.transform(X)
if self.to_combine_:
bk = self.combine(bk)
return bk
def orthogonal_projection(self, X, y=None):
"""Get coefficients of the affine projection on the basis
Parameters
----------
X : {array-like, sparse-matrix}, shape (n_samples, n_features)
The input samples.
y : {array-like, sparse-matrix}, shape (n_samples, n_features)
Values at X of the function to project.
"""
# Check is fit had been called
check_is_fitted(self, "fitted_")
# Input validation
X = check_array(X)
bk = self.transform(X)
if y is None:
y = X
# For future we can try to implement minibatch via SGDRegressor and partial_fit
self.regr_ = LinearRegression(fit_intercept=False).fit(bk, y=y) # regressor is saved for latter used if needed
return self.regr_.coef_.reshape(self.dim_x, -1) # If this is a 1D array, that will becomes a 2D array
def get_coefficients(self):
"""
Save fitted coefficients
"""
params = self.get_params()
params.update({"dim_x": self.dim_x, "nb_basis_elt": self.nb_basis_elt_})
params.update(self.kwargs)
if hasattr(self.featuresTransformer, "_get_fitted"):
params.update(self.featuresTransformer._get_fitted())
return params
def set_coefficients(self, params):
"""
Set fitted coefficients
"""
self.set_params(**{k: params[k] for k in ("basis_type", "transformer", "kwargs") if k in params})
self.dim_x = params["dim_x"]
self._initialize()
if hasattr(self.featuresTransformer, "_set_fitted"):
self.featuresTransformer._set_fitted(params)
self.nb_basis_elt_ = params["nb_basis_elt"]
self.fitted_ = True