Source code for anotherspdnet.nn

# ========================================
# FileName: nn.py
# Date: 11 october 2023 - 14:04
# Author: Ammar Mian
# Email: ammar.mian@univ-smb.fr
# Brief: Implementation of torch layers
# =========================================

import os
from typing import Optional

from math import prod
import torch
from torch import nn

from geoopt.manifolds import Stiefel, Sphere
from geoopt.tensor import ManifoldParameter

from .functions import (
    BiMapFunction,
    ReEigFunction,
    LogEigFunction,
    vec_batch,
    unvec_batch,
    vech_batch,
    unvech_batch,
    eig_operation,
    biMap,
    ReEigBiasFunction,
)
from .utils import initialize_weights_sphere, initialize_weights_stiefel


# =============================================================================
# BiMap layer
# =============================================================================
[docs] class BiMap(nn.Module):
[docs] def __init__( self, n_in: int, n_out: int, n_batches: Optional[tuple] = None, manifold: str = "stiefel", seed: Optional[int] = None, dtype: torch.dtype = torch.float64, device: torch.device = torch.device("cpu"), mm_mode: str = "einsum", use_autograd: bool = False, ) -> None: """BiMap layer in a SPDnet layer according to the paper: A Riemannian Network for SPD Matrix Learning, Huang et al AAAI Conference on Artificial Intelligence, 2017 Parameters ---------- n_in : int Number of input features. n_out : int Number of output features. n_batches : tuple Number of Batches of SPD matrices. It must be a tuple containing at least one batch dimension. Default is None. manifold : str, optional Manifold on which the layer is initialized. Default is 'stiefel'. choice between 'stiefel' and 'sphere'. seed : int, optional Seed for the initialization of the weight matrix. Default is None. dtype : torch.dtype, optional Data type of the layer. Default is torch.float64. device : torch.device, optional Device on which the layer is initialized. Default is 'cpu'. mm_mode : str, optional Mode for the matrix multiplication. Default is 'einsum'. Choice between 'einsum' and 'bmm'. use_autograd : bool, optional Use torch autograd for the computation of the gradient rather than the analytical formula. Default is False. """ super().__init__() self.n_in = n_in self.n_out = n_out self.n_batches = n_batches self.device = device self.seed = seed self.dtype = dtype self.use_autograd = use_autograd self.dim = n_out if mm_mode not in ["einsum", "bmm"]: raise ValueError("mm_mode must be either einsum or bmm") self.mm_mode = mm_mode if manifold not in ["stiefel", "sphere"]: raise ValueError("manifold must be either stiefel or sphere") if not isinstance(device, torch.device): raise TypeError("device must be a torch.device") if manifold == "stiefel": self.manifold = Stiefel() initialize_weights = initialize_weights_stiefel else: self.manifold = Sphere() initialize_weights = initialize_weights_sphere # Initialize the weight matrix using geoopt. # We need to keep a Stiefel matrix in both cases because of the # Riemanian optimization. if n_out > n_in: if n_batches is None: shape = (n_out, n_in) else: shape = n_batches + (n_out, n_in) else: if n_batches is None: shape = (n_in, n_out) else: shape = n_batches + (n_in, n_out) _W = ManifoldParameter( torch.empty(shape, dtype=dtype, device=device), manifold=self.manifold, ) self.W = initialize_weights(_W, seed=seed).type(self.dtype)
[docs] def forward(self, X: torch.Tensor) -> torch.Tensor: """Forward pass of the BiMap layer Parameters ---------- X : torch.Tensor of shape self.n_batches + (n_matrices, n_in, n_in) Batches of input SPD matrices. Returns ------- Y : torch.Tensor of shape self.n_batches + (n_matrices, n_out, n_out) The output matrices is close to SPD. They need regularization with the ReEig layer especially if n_out > n_in. """ if self.n_out < self.n_in: _W = self.W.transpose(-2, -1) else: _W = self.W if self.use_autograd: return biMap(X, _W, self.mm_mode) return BiMapFunction.apply(X, _W, self.mm_mode)
def __repr__(self) -> str: """Representation of the layer Returns ------- str Representation of the layer """ return ( f'BiMap(n_in={self.n_in}, n_out={self.n_out}, " \ f"shape={self.W.shape}, use_autograd={self.use_autograd}, ' f"seed={self.seed}, dtype={self.dtype}, device={self.device}, " f"mm_mode={self.mm_mode})" ) def __str__(self) -> str: """String representation of the layer Returns ------- str String representation of the layer """ return self.__repr__()
# ============================================================================= # ReEig layer # =============================================================================
[docs] class ReEigBias(nn.Module):
[docs] def __init__( self, dim: int, eps: float = 1e-4, use_autograd: bool = False, dtype: torch.dtype = torch.float64, device: torch.device = torch.device("cpu"), seed: Optional[int] = None, ) -> None: """ReEig layer with a bias term. Parameters ---------- dim : int Dimension of the SPD matrices. eps : float, optional Value of rectification of the eigenvalues. Default is 1e-4. use_autograd : bool, optional Use torch autograd for the computation of the gradient rather than the analytical formula. Default is True. FOR NOW: without autograd, the layer is not implemented. dtype : torch.dtype, optional Data type of the layer. Default is torch.float64. device : torch.device, optional Device on which the layer is initialized. Default is 'cpu'. seed : int, optional Seed for the initialization of the bias term. Default is None. """ super().__init__() self.eps = eps self.dim = dim self.use_autograd = use_autograd self.dtype = dtype self.seed = seed # Initialize the bias term # init_bias = torch.randn(dim, dtype=self.dtype, # generator=torch.Generator().manual_seed(seed)) self.bias = nn.Parameter(torch.zeros(dim, dtype=self.dtype, device=device))
[docs] def forward(self, X: torch.Tensor) -> torch.Tensor: """Forward pass of the ReEig layer with bias Parameters ---------- X : torch.Tensor of shape (..., n_features, n_features) Batches of input almost-SPD matrices. Returns ------- Y : torch.Tensor of shape (..., n_features, n_features) The regularized SPD matrices. """ assert X.shape[-1] == self.dim, f"Input matrices must have dimension {self.dim}" if self.use_autograd: operation = lambda x: torch.min( torch.nn.functional.threshold(x + self.bias, self.eps, self.eps), (1 / self.eps) * torch.ones_like(x), ) _, _, res = eig_operation(X, operation) else: res = ReEigBiasFunction.apply(X, self.bias, self.eps) return res
def __repr__(self) -> str: """Representation of the layer Returns ------- str Representation of the layer """ return f'ReEigBias(dim={self.dim}, eps={self.eps}, " \ f"use_autograd={self.use_autograd}, seed={self.seed})'
[docs] class ReEig(nn.Module):
[docs] def __init__( self, eps: float = 1e-4, use_autograd: bool = False, mm_mode: str = "einsum", eig_function: str = "eigh", formula: str = "brooks", dim: Optional[int] = None, ) -> None: """ReEig layer in a SPDnet layer according to the paper: A Riemannian Network for SPD Matrix Learning, Huang et al AAAI Conference on Artificial Intelligence, 2017 Parameters ---------- eps : float, optional Value of rectification of the eigenvalues. Default is 1e-4. use_autograd : bool, optional Use torch autograd for the computation of the gradient rather than the analytical formula. Default is False. mm_mode : str, optional Mode for the matrix multiplication. Default is 'einsum'. Choice between 'einsum' and 'bmm'. eig_function : str, optional Function used for the computation of the eigendecomposition. Default is 'eigh'. Choice between 'eigh' and 'eig'. formula : str, optional Formula used for the computation of the gradient. Default is 'brooks'. Choice between 'brooks' and 'ionescu". dim : int, optional Dimension of the SPD matrices. Default is None. Used for logging purposes. """ super().__init__() self.eps = eps self.use_autograd = use_autograd self.dim = dim self.mm_mode = mm_mode self.eig_function = eig_function self.formula = formula
[docs] def forward(self, X: torch.Tensor) -> torch.Tensor: """Forward pass of the ReEig layer Parameters ---------- X : torch.Tensor of shape (..., n_features, n_features) Batches of input almost-SPD matrices. Returns ------- Y : torch.Tensor of shape (..., n_features, n_features) The regularized SPD matrices. """ if self.use_autograd: operation = lambda X: torch.nn.functional.threshold(X, self.eps, self.eps) _, _, res = eig_operation(X, operation, self.eig_function, self.mm_mode) return res return ReEigFunction.apply( X, self.eps, self.mm_mode, self.eig_function, self.formula )
def __repr__(self) -> str: """Representation of the layer Returns ------- str Representation of the layer """ base_str = ( f"ReEig(eps={self.eps}, use_autograd={self.use_autograd}, " f"mm_mode={self.mm_mode}, eig_function={self.eig_function}, " f"formula={self.formula}" ) if self.dim is None: base_str += ")" else: base_str += f", dim={self.dim})" return base_str def __str__(self) -> str: """String representation of the layer Returns ------- str String representation of the layer """ return self.__repr__()
# ============================================================================= # LogEig layer # =============================================================================
[docs] class LogEig(nn.Module):
[docs] def __init__( self, use_autograd: bool = False, mm_mode: str = "einsum", eig_function: str = "eigh", formula: str = "brooks", ) -> None: """LogEig layer in a SPDnet layer according to the paper: A Riemannian Network for SPD Matrix Learning, Huang et al AAAI Conference on Artificial Intelligence, 2017 Parameters ---------- use_autograd : bool, optional Use torch autograd for the computation of the gradient rather than the analytical formula. Default is False. mm_mode : str, optional Mode for the matrix multiplication. Default is 'einsum'. Choice between 'einsum' and 'bmm'. eig_function : str, optional Function used for the computation of the eigendecomposition. Default is 'eigh'. Choice between 'eigh' and 'eig'. formula : str, optional Formula used for the computation of the gradient. Default is 'brooks'. Choice between 'brooks' and 'ionescu". """ super().__init__() self.use_autograd = use_autograd self.mm_mode = mm_mode self.eig_function = eig_function self.formula = formula
[docs] def forward(self, X: torch.Tensor) -> torch.Tensor: """Forward pass of the ReEig layer Parameters ---------- X : torch.Tensor of shape (..., n_features, n_features) Batches of input almost-SPD matrices. Returns ------- Y : torch.Tensor of shape (..., n_features, n_features) The regularized SPD matrices. """ if self.use_autograd: operation = lambda X: torch.log(X) _, _, res = eig_operation(X, operation, self.eig_function, self.mm_mode) return res return LogEigFunction.apply(X, self.mm_mode, self.eig_function, self.formula)
def __repr__(self) -> str: """Representation of the layer Returns ------- str Representation of the layer """ return ( f"LogEig(auto_grad={self.use_autograd}, mm_mode={self.mm_mode}, " f"eig_function={self.eig_function}, formula={self.formula})" ) def __str__(self) -> str: """String representation of the layer Returns ------- str String representation of the layer """ return self.__repr__()
# ============================================================================= # Vectorization layer # =============================================================================
[docs] class Vectorization(nn.Module):
[docs] def __init__(self) -> None: """Vectorization of a batch of matrices according to the last two dimensions""" super().__init__()
[docs] def forward(self, X: torch.Tensor) -> torch.Tensor: """Forward pass of the Vectorization layer Parameters ---------- X: torch.Tensor of shape (..., n, k) Batch of matrices. Returns ------- X_vec: torch.Tensor of shape (..., n*k) Batch of vectorized matrices. """ return vec_batch(X)
[docs] def inverse_transform(self, X: torch.Tensor, n_rows: int) -> torch.Tensor: """Inverse transform of the Vectorization layer Parameters ---------- X: torch.Tensor of shape (..., n_rows*k) Batch of vectorized matrices. n_rows: int Number of rows of the original matrices. Returns ------- X_vec: torch.Tensor of shape (..., n_rows, k) Batch of matrices. """ return unvec_batch(X, n_rows)
def __repr__(self) -> str: """Representation of the layer Returns -------- str Representation of the layer """ return "Vectorization()" def __str__(self) -> str: """String representation of the layer Returns ------- str String representation of the layer """ return self.__repr__()
[docs] class Vech(nn.Module):
[docs] def __init__(self) -> None: """Vech operator of a batch of matrices according to the last two dimensions""" super().__init__()
[docs] def forward(self, X: torch.Tensor) -> torch.Tensor: """Forward pass of the Vech layer Parameters ---------- X: torch.Tensor of shape (..., n, k) Batch of matrices. Returns ------- X_vech: torch.Tensor of shape (..., n*(n+1)//2) Batch of vech matrices. """ return vech_batch(X)
[docs] def inverse_transform(self, X: torch.Tensor) -> torch.Tensor: """Inverse transform of the Vech layer Parameters ---------- X: torch.Tensor of shape (..., n*(n+1)//2) Batch of vech matrices. Returns ------- X_vech: torch.Tensor of shape (..., n, k) Batch of matrices. """ return unvech_batch(X)
def __repr__(self) -> str: """Representation of the layer Returns -------- str Representation of the layer """ return f"Vech()" def __str__(self) -> str: """String representation of the layer Returns ------- str String representation of the layer """ return self.__repr__()