Source code for GLE_analysisEM._gle_basis_projection

"""
Contain tools for basis projection
"""
import numpy as np
from itertools import chain, combinations, tee
from itertools import combinations_with_replacement as combinations_w_r


from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.utils.validation import check_is_fitted
from sklearn.utils import check_array
from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import LinearRegression

from ._gle_potential_projection import GLE_PotentialTransform
from ._basis_features import LinearFeatures, BinsFeatures, BSplineFeatures


def _combinations(n_features, degree, interaction_only, include_bias):
    """To get combinaison of features
    """
    comb = combinations if interaction_only else combinations_w_r
    start = int(not include_bias)
    return chain.from_iterable(comb(range(n_features), i) for i in range(start, degree + 1))


[docs]class GLE_BasisTransform(TransformerMixin, BaseEstimator): """A transformer that give values of the basis along the trajectories. Parameters ---------- dim_x : int, default=1 The number of visible dimensions. basis_type : str, default= "linear" Give the type of basis projection Must be one of:: "linear" : Linear basis. "polynomial" : Polynomial basis. "bins" : Bins basis. "bsplines" : BSplines basis. "free_energy" : Use histogram estimation of the free energy as unique basis function. "free_energy_kde" : Use Kernel Density estimation of the free energy as unique basis function. "custom": custom basis, you should pass a Transformer class """
[docs] def __init__(self, basis_type="linear", transformer=None, **kwargs): self.basis_type = basis_type self.transformer = transformer self.kwargs = kwargs
def _initialize(self): """ Initialize the feature class """ self.basis_type = self.basis_type.casefold() self.to_combine_ = False if self.transformer is None: if self.basis_type == "linear": self.featuresTransformer = LinearFeatures(to_center=self.kwargs.get("to_center", False)) elif self.basis_type == "polynomial": degree = self.kwargs.get("degree", 3) if degree < 0: raise ValueError("The number of basis element must be positive") self.featuresTransformer = PolynomialFeatures(degree=degree) elif self.basis_type == "bins": strategy = self.kwargs.get("strategy", "uniform") n_bins_arg = self.kwargs.get("n_bins", "auto") # n_bins should be a number, we cannot have different number of bins for different direction self.featuresTransformer = BinsFeatures(n_bins_arg=n_bins_arg, strategy=strategy) # TODO use sparse array self.to_combine_ = True and (not self.dim_x == 1) # No need for combinaison if only one dimensionnal datas elif self.basis_type == "bsplines": n_knots = self.kwargs.get("n_knots", 5) degree = self.kwargs.get("degree", 3) periodic = self.kwargs.get("periodic", False) self.featuresTransformer = BSplineFeatures(n_knots, degree=degree, periodic=periodic) self.to_combine_ = True and (not self.dim_x == 1) elif self.basis_type == "free_energy_kde": self.featuresTransformer = GLE_PotentialTransform(estimator="kde", dim_x=self.dim_x, bandwidth=self.kwargs.get("bandwidth", "scott"), per=self.kwargs.get("periodic", False)) elif self.basis_type == "free_energy_histogram" or self.basis_type == "free_energy": # Default free energy set to histogram self.featuresTransformer = GLE_PotentialTransform(estimator="histogram", dim_x=self.dim_x, bins=self.kwargs.get("bins", "auto"), per=self.kwargs.get("periodic", False)) elif self.basis_type == "custom": raise ValueError("No transformer have been passed as argument for custom transformer") else: raise ValueError("The basis type {} is not implemented.".format(self.basis_type)) else: self.featuresTransformer = self.transformer def fit(self, X, y=None): """A reference implementation of a fitting function for a transformer. Parameters ---------- X : {array-like, sparse matrix}, shape (n_samples, dim_x) The training input samples. y : None There is no need of a target in a transformer, yet the pipeline API requires this parameter. basis_params : dict Additional basis parameters. Returns ------- self : object Returns self. """ # Input validation X = check_array(X) self.dim_x = X.shape[1] self._initialize() self.featuresTransformer = self.featuresTransformer.fit(X) if hasattr(self.featuresTransformer, "n_output_features_"): self.nb_basis_elt_ = self.featuresTransformer.n_output_features_ else: self.nb_basis_elt_ = self.dim_x if self.to_combine_: # If it is needed to combine the features iter_combinations = _combinations(self.featuresTransformer.n_output_features_, self.dim_x, False, False) self.combinations = [comb for comb in iter_combinations] self.nb_basis_elt_ = sum(1 for _ in self.combinations) self.fitted_ = True return self def combine(self, X): """ If basis are one dimensionnal, combine them into product """ n_samples, _ = X.shape # What follows is a faster implementation of: combX = np.empty((n_samples, self.nb_basis_elt_), dtype=X.dtype) for i, comb in enumerate(self.combinations): combX[:, i] = X[:, comb].prod(1) # This implementation uses two optimisations. # First one is broadcasting, # multiply ([X1, ..., Xn], X1) -> [X1 X1, ..., Xn X1] # multiply ([X2, ..., Xn], X2) -> [X2 X2, ..., Xn X2] # ... # multiply ([X[:, start:end], X[:, start]) -> ... # Second optimisation happens for degrees >= 3. # Xi^3 is computed reusing previous computation: # Xi^3 = Xi^2 * Xi. # combX = np.empty((n_samples, self.ncomb_), dtype=X.dtype) # current_col = 0 # # # d = 1 # combX[:, current_col : current_col + self.dim_x] = X # index = list(range(current_col, current_col + self.dim_x)) # current_col += self.dim_x # index.append(current_col) # # # d >= 2 # for _ in range(1, self.dim_x): # new_index = [] # end = index[-1] # for feature_idx in range(self.dim_x): # start = index[feature_idx] # new_index.append(current_col) # next_col = current_col + end - start # if next_col <= current_col: # break # # bk[:, start:end] are terms of degree d - 1 # # that exclude feature #feature_idx. # np.multiply(combX[:, start:end], X[:, feature_idx : feature_idx + 1], out=combX[:, current_col:next_col], casting="no") # current_col = next_col # # new_index.append(current_col) # index = new_index return combX[:, :] def transform(self, X): """Take the position as input and output basis expansion Parameters ---------- X : {array-like, sparse-matrix}, shape (n_samples, n_features) The input samples. Returns ------- X_transformed : array, shape (n_samples, n_features) """ # Check is fit had been called check_is_fitted(self, "fitted_") # Input validation X = check_array(X) n_samples, n_features = X.shape if n_features != self.dim_x: raise ValueError("X shape does not match training shape") bk = self.featuresTransformer.transform(X) if self.to_combine_: bk = self.combine(bk) return bk def orthogonal_projection(self, X, y=None): """Get coefficients of the affine projection on the basis Parameters ---------- X : {array-like, sparse-matrix}, shape (n_samples, n_features) The input samples. y : {array-like, sparse-matrix}, shape (n_samples, n_features) Values at X of the function to project. """ # Check is fit had been called check_is_fitted(self, "fitted_") # Input validation X = check_array(X) bk = self.transform(X) if y is None: y = X # For future we can try to implement minibatch via SGDRegressor and partial_fit self.regr_ = LinearRegression(fit_intercept=False).fit(bk, y=y) # regressor is saved for latter used if needed return self.regr_.coef_.reshape(self.dim_x, -1) # If this is a 1D array, that will becomes a 2D array def get_coefficients(self): """ Save fitted coefficients """ params = self.get_params() params.update({"dim_x": self.dim_x, "nb_basis_elt": self.nb_basis_elt_}) params.update(self.kwargs) if hasattr(self.featuresTransformer, "_get_fitted"): params.update(self.featuresTransformer._get_fitted()) return params def set_coefficients(self, params): """ Set fitted coefficients """ self.set_params(**{k: params[k] for k in ("basis_type", "transformer", "kwargs") if k in params}) self.dim_x = params["dim_x"] self._initialize() if hasattr(self.featuresTransformer, "_set_fitted"): self.featuresTransformer._set_fitted(params) self.nb_basis_elt_ = params["nb_basis_elt"] self.fitted_ = True