## LARS (LAR and LASSO)
   
## This code is written by Davide Albanese, <albanese@fbk.eu>.
## (C) 2010 Fondazione Bruno Kessler - Via Santa Croce 77, 38100 Trento, ITALY.

## This program is free software: you can redistribute it and/or modify
## it under the terms of the GNU General Public License as published by
## the Free Software Foundation, either version 3 of the License, or
## (at your option) any later version.

## This program is distributed in the hope that it will be useful,
## but WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
## GNU General Public License for more details.

## You should have received a copy of the GNU General Public License
## along with this program.  If not, see <http://www.gnu.org/licenses/>.

__all__ = ["Lar", "Lasso", "LarExt", "LassoExt"]

import numpy as np


def lars(x, y, m, method="lar"):
    """
    lar -> m <= x.shape[1]
    lasso -> m can be > x.shape[1]
    """


    mu = np.zeros(x.shape[0])
    active = []
    inactive = range(x.shape[1])
    beta = np.zeros(x.shape[1])

    for i in range(m):
        
        if len(inactive) == 0:
            break

        # equation 2.8
        c = np.dot(x.T, (y - mu))
        
        # equation 2.9
        ct = c.copy()
        ct[active] = 0.0 # avoid re-selections
        ct_abs = np.abs(ct)
        j = np.argmax(ct_abs)

        if np.any(np.isnan(ct_abs)): # saturation
            break

        C = ct_abs[j]
        active.append(j)
        inactive.remove(j)
        
        # equation 2.10
        s = np.sign(c[active])

        # equation 2.4
        xa = x[:, active] * s

        # equation 2.5  
        G = np.dot(xa.T, xa)
        try:
            Gi = np.linalg.inv(G)
        except np.linalg.LinAlgError:
            Gi = np.linalg.pinv(G)
            
        A = np.sum(Gi)**(-0.5)
        
        # equation 2.6
        w = np.sum(A * Gi, axis=1) 
        u = np.dot(xa, w) 
        
        # equation 2.11
        a = np.dot(x.T, u)
        
        # equation 2.13      
        g1 = (C - c[inactive]) / (A - a[inactive]) 
        g2 = (C + c[inactive]) / (A + a[inactive])
        
        g = np.concatenate((g1, g2))
        g = g[g > 0.0]

        if g.shape[0] == 0:
            gammahat = C / A # equation 2.21
        else:
            gammahat = np.min(g)
        
        if method == "lasso":
            rm = False

            g = - beta # equation 3.4
            g[active] /= w # equation 3.4
            gp = g[g > 0.0] # equation 3.5
            
            if gp.shape[0] == 0:
                gammatilde = gammahat
            else:
                gammatilde = np.min(gp) # equation 3.5
     
            # equation 3.6
            if gammatilde < gammahat:
                gammahat = gammatilde
                idx = np.where(gammahat == g)[0]
                rm = True

        beta[active] = beta[active] + gammahat * w
        mu = mu + (gammahat * u) # equation 2.12 and 3.6 (lasso)
        
        if method == "lasso" and rm:
            beta[idx] = 0.0
            for k in idx:
                active.remove(k)
                inactive.append(k) 
                    
    beta[active] = beta[active] * s
       
    return active, beta, i+1
        

class Lar(object):
    """LAR.
    """
    
    def __init__(self, m=None):
        """Initialization.

        :Parameters:
          m : int (> 0)
            max number of steps (= number of features selected).
            If m=None -> m=x.shape[1] in .learn(x, y)
        """

        self.__m = m # max number of steps
        self.__beta = None
        self.__selected = None
        self.__steps = None
        
    def learn(self, x, y):
        """Compute the regression coefficients.

        :Parameters:
          x : numpy 2d array (nxp)
            matrix of regressors
          y : numpy 1d array (n)
            response
        """

        if not isinstance(x, np.ndarray):
            raise ValueError("x must be an numpy 2d array")

        if not isinstance(y, np.ndarray):
            raise ValueError("y must be an numpy 1d array")
            
        if x.ndim > 2:
            raise ValueError("x must be an 2d array")
        
        if x.shape[0] != y.shape[0]:
            raise ValueError("x and y are not aligned")
           
        if self.__m > x.shape[1] or self.__m == None:
            m = x.shape[1]
        else:
            m = self.__m

        self.__selected, self.__beta, self.__steps = \
            lars(x, y, m, "lar")
        
    def pred(self, x):
        """Compute the predicted response.
        
        :Parameters:
          x : numpy 2d array (nxp)
            matrix of regressors
        
        :Returns:
          yp : 1d ndarray
             predicted response
        """

        if not isinstance(x, np.ndarray):
            raise ValueError("x must be an numpy 2d array")

        if x.ndim > 2:
            raise ValueError("x must be an 2d array")

        if x.shape[1] != self.__beta.shape[0]:
            raise ValueError("x and beta are not aligned")
                
        return np.dot(x, self.__beta)

    def selected(self):
        """Returns the regressors ranking.
        """

        return self.__selected

    def beta(self):
        """Return b_1, ..., b_p.
        """
        
        return self.__beta

    def steps(self):
        """Return the number of steps really performed.
        """
        
        return self.__steps
     

class Lasso(object):
    """LASSO computed with LARS algoritm.
    """

    def __init__(self, m):
        """Initialization.

        :Parameters:
          m : int (> 0)
            max number of steps.
        """

        self.__m = m # max number of steps
        self.__beta = None
        self.__selected = None
        self.__steps = None

    def learn(self, x, y):
        """Compute the regression coefficients.

        :Parameters:
          x : numpy 2d array (nxp)
            matrix of regressors
          y : numpy 1d array (n)
            response
        """

        if not isinstance(x, np.ndarray):
            raise ValueError("x must be an numpy 2d array")

        if not isinstance(y, np.ndarray):
            raise ValueError("y must be an numpy 1d array")
            
        if x.ndim > 2:
            raise ValueError("x must be an 2d array")
        
        if x.shape[0] != y.shape[0]:
            raise ValueError("x and y are not aligned")

        self.__selected, self.__beta, self.__steps = \
            lars(x, y, self.__m, "lasso")
    
    def pred(self, x):
        """Compute the predicted response.
        
        :Parameters:
          x : numpy 2d array (nxp)
            matrix of regressors
        
        :Returns:
          yp : 1d ndarray
             predicted response
        """
        
        if not isinstance(x, np.ndarray):
            raise ValueError("x must be an numpy 2d array")

        if x.ndim > 2:
            raise ValueError("x must be an 2d array")

        if x.shape[1] != self.__beta.shape[0]:
            raise ValueError("x and beta are not aligned")

        return np.dot(x, self.__beta)

    def selected(self):        
        """Returns the regressors ranking.
        """

        return self.__selected

    def beta(self):
        """Return b_1, ..., b_p.
        """

        return self.__beta
    
    def steps(self):
        """Return the number of steps really performed.
        """

        return self.__steps


class LarExt(object):
    
    def __init__(self, m=None):
        self.__m = m # max number of steps
        self.__selected = None
        
    def learn(self, x, y):
        if x.ndim == 1:
            xx = x.copy()
            xx.shape = (-1, 1)
        
        if x.ndim == 2:
            xx = x
            
        if x.ndim > 2:
            raise ValueError("x must be an 1-D or 2-D array")
        
        if x.shape[0] != y.shape[0]:
            raise ValueError("x and y are not aligned")

        if self.__m > xx.shape[1] or self.__m == None:
            m = xx.shape[1]
        else:
            m = self.__m

        # compute number of LAR steps
        runs = m / x.shape[0]
        ms = ([xx.shape[0]] * runs) + \
            [m - (xx.shape[0] * runs)]

        active = []
        remaining = np.arange(xx.shape[1])       

        for i in ms:
            lars = Lar(m=i)
            lars.learn(xx[:, remaining], y)
            sel = lars.selected()
            active.extend(remaining[sel].tolist())
            remaining = np.setdiff1d(remaining, remaining[sel])
            
        self.__selected = np.array(active)
        
    def selected(self):        
        return self.__selected


class LassoExt(object):
    
    def __init__(self, m):
        self.__m = m # max number of steps
        self.__selected = None
                
    def learn(self, x, y):
        if x.ndim == 1:
            xx = x.copy()
            xx.shape = (-1, 1)
        
        if x.ndim == 2:
            xx = x
            
        if x.ndim > 2:
            raise ValueError("x must be an 1-D or 2-D array")
        
        if x.shape[0] != y.shape[0]:
            raise ValueError("x and y are not aligned")

        m = self.__m

        # compute number of LASSO steps
        runs = xx.shape[1] / xx.shape[0]
        ms = ([xx.shape[0]] * runs) + \
            [xx.shape[1] - (xx.shape[0] * runs)]

        active = []
        remaining = np.arange(xx.shape[1])
               
        while len(remaining) != 0:
            lasso = Lasso(m=m)
            lasso.learn(xx[:, remaining], y)
            sel = lasso.selected()
            active.extend(remaining[sel].tolist())
            remaining = np.setdiff1d(remaining, remaining[sel])
            
        self.__selected = np.array(active)
        
    def selected(self):        
        return self.__selected
