Added all previous methods for regression

parent a3954226
......@@ -5,7 +5,8 @@ from libc.math cimport exp
DTYPE = np.float
ctypedef np.float_t DTYPE_t
cdef void cy_li(double bw, np.ndarray[DTYPE_t, ndim=1] xdata, np.ndarray[DTYPE_t, ndim=1] ydata, np.ndarray[DTYPE_t, ndim=1] points,
cdef void cy_li(double bw, np.ndarray[DTYPE_t, ndim=1] xdata, np.ndarray[DTYPE_t, ndim=1] ydata,
np.ndarray[DTYPE_t, ndim=1] points,
np.ndarray[DTYPE_t, ndim=1] li2, np.ndarray[DTYPE_t, ndim=1] output):
cdef unsigned int nx = xdata.shape[0]
cdef unsigned int npts = points.shape[0]
......@@ -43,15 +44,11 @@ cdef void cy_li(double bw, np.ndarray[DTYPE_t, ndim=1] xdata, np.ndarray[DTYPE_t
li2[j] += li*li
Op[j] += li * yp[i]
def local_linear_1d(bw, xdata, ydata, points, output = None):
def local_linear_1d(bw, xdata, ydata, points, out):
bw = float(bw)
xdata = np.ascontiguousarray(xdata, dtype=np.float)
ydata = np.ascontiguousarray(ydata, dtype=np.float)
points = np.ascontiguousarray(points, dtype=np.float)
li2 = np.empty(points.shape, dtype=float)
if output is None:
output = np.empty(points.shape, dtype=float)
else:
output = np.ascontiguousarray(output, dtype=np.float)
cy_li(bw, xdata, ydata, points, li2, output)
return li2, output
cy_li(bw, xdata, ydata, points, li2, out)
return li2, out
......@@ -32,12 +32,11 @@ class NonParamRegression(object):
self._bandwidth = None
self._bw_fct = None
self._method = None
self._fitted = False
self._kernel = None
self._lower = None
self._upper = None
self._kernel_type = None
self._method_obj = None
self._fitted_method = None
self._n = None
self._d = None
......@@ -48,7 +47,7 @@ class NonParamRegression(object):
self.kernel_type = kernels.normal_kernel
if self._method is None:
self._method = npr_methods.default_method
self.method = npr_methods.default_method
if self._cov_fct is None and self._bw_fct is None and self._covariance is None and self._bandwidth is None:
self._cov_fct = kde_bandwidth.scotts_covariance
......@@ -69,14 +68,14 @@ class NonParamRegression(object):
"""
Calling this function will mark the object as needing fitting.
"""
self._fitted = False
self._fitted_method = None
@property
def fitted(self):
"""
Check if the fitting needs to be performed.
"""
return self._fitted
return self._fitted_method is not None
@property
def kernel(self):
......@@ -223,7 +222,7 @@ class NonParamRegression(object):
@property
def method(self):
"""
Regression method itself. It should follow the template of
Regression method itself. It should be an instance of the class following the template
:py:class:`pyqt_fit.npr_methods.RegressionKernelMethod`.
"""
return self._method
......@@ -234,11 +233,13 @@ class NonParamRegression(object):
self.need_fit()
@property
def method_object(self):
def fitted_method(self):
"""
Instance of the method, fitted for the current class.
Method actually used after fitting.
The main method may choose to provide a more tuned method during fitting.
"""
return self._method_obj
return self._fitted_method
@property
def N(self):
......@@ -259,14 +260,12 @@ class NonParamRegression(object):
return self._kernel
return self._kernel_type(D)
def _create_method(self):
return self._method(self)
def compute_bandwidth(self):
def set_actual_bandwidth(self, bandwidth, covariance):
"""
Method computing the bandwidth if needed (i.e. if it was defined by functions)
"""
self._bandwidth, self._covariance = npr_methods.compute_bandwidth(self)
self._bandwidth = bandwidth
self._covariance = covariance
def fit(self):
"""
......@@ -281,8 +280,7 @@ class NonParamRegression(object):
self._upper = np.inf * np.ones((D,), dtype=float)
self._n = N
self._d = D
self._method_obj = self._create_method()
self._method_obj.fit(self)
self._fitted_method = self._method.fit(self)
self._fitted = True
def evaluate(self, points, out=None):
......@@ -298,7 +296,7 @@ class NonParamRegression(object):
out = np.empty((points.shape[-1],), dtype=type(points.dtype.type() + 0.))
else:
out.shape = (points.shape[-1],)
self._method_obj.evaluate(self, points, out)
self._fitted_method.evaluate(self, points, out)
out.shape = real_shape[-1:]
return out
......
......@@ -10,8 +10,8 @@ from scipy.linalg import sqrtm, solve
import scipy
import numpy as np
from .compat import irange
from .cyth import HAS_CYTHON
from . import kde
local_linear = None
......@@ -58,8 +58,222 @@ class RegressionKernelMethod(object):
Base class for regression kernel methods
"""
def fit(self, reg):
reg.compute_bandwidth()
"""
Fit the method and returns the fitted object that will be used for actual evaluation.
The object needs to call the :py:meth:`pyqt_fit.nonparam_regression.NonParamRegression.set_actual_bandwidth`
method with the computed bandwidth and covariance.
:Default: Compute the bandwidth based on the real data and set it in the regression object
"""
reg.set_actual_bandwidth(*compute_bandwidth(reg))
return self
def evaluate(self, points, out):
"""
Evaluate the regression of the provided points.
:param ndarray points: 2d-array of points to compute the regression on. Each column is a point.
:param ndarray out: 1d-array in which to store the result
:rtype: ndarray
:return: The method must return the ``out`` array, updated with the regression values
"""
raise NotImplementedError()
class SpatialAverage(RegressionKernelMethod):
r"""
Perform a Nadaraya-Watson regression on the data (i.e. also called
local-constant regression) using a gaussian kernel.
The Nadaraya-Watson estimate is given by:
.. math::
f_n(x) \triangleq \frac{\sum_i K\left(\frac{x-X_i}{h}\right) Y_i}
{\sum_i K\left(\frac{x-X_i}{h}\right)}
Where :math:`K(x)` is the kernel and must be such that :math:`E(K(x)) = 0`
and :math:`h` is the bandwidth of the method.
:param ndarray xdata: Explaining variables (at most 2D array)
:param ndarray ydata: Explained variables (should be 1D array)
:type cov: ndarray or callable
:param cov: If an ndarray, it should be a 2D array giving the matrix of
covariance of the gaussian kernel. Otherwise, it should be a function
``cov(xdata, ydata)`` returning the covariance matrix.
"""
def __init__(self):
self.correction = 1.
def fit(self, reg):
self = super(SpatialAverage, self).fit(reg)
return self
def evaluate(self, points, out=None):
d, m = points.shape
norm = np.zeros((m,), points.dtype)
# iterate on the internal points
for i, ci in np.broadcast(irange(self.n),
irange(self._correction.shape[0])):
diff = np.dot(self._correction[ci],
self.xdata[:, i, np.newaxis] - points)
tdiff = np.dot(self._inv_cov, diff)
energy = np.exp(-np.sum(diff * tdiff, axis=0) / 2.0)
out += self.ydata[i] * energy
norm += energy
out[norm > 1e-50] /= norm[norm > 1e-50]
return out
@property
def correction(self):
"""
The correction coefficient allows to change the width of the kernel
depending on the point considered. It can be either a constant (to
correct globaly the kernel width), or a 1D array of same size as the
input.
"""
return self._correction
@correction.setter
def correction(self, value):
value = np.atleast_1d(value)
assert len(value.shape) == 1, "Error, the correction must be a single value or a 1D array"
self._correction = value
def set_density_correction(self):
"""
Add a correction coefficient depending on the density of the input
"""
est = kde.KDE1D(self.xdata)
dens = est(self.xdata)
dm = dens.max()
dens[dens < 1e-50] = dm
self._correction = dm / dens
class LocalLinearKernel1D(RegressionKernelMethod):
r"""
Perform a local-linear regression using a gaussian kernel.
The local constant regression is the function that minimises, for each
position:
.. math::
f_n(x) \triangleq \argmin_{a_0\in\mathbb{R}}
\sum_i K\left(\frac{x-X_i}{h}\right)
\left(Y_i - a_0 - a_1(x-X_i)\right)^2
Where :math:`K(x)` is the kernel and must be such that :math:`E(K(x)) = 0`
and :math:`h` is the bandwidth of the method.
"""
def fit(self, reg):
return super(LocalLinearKernel1D, self).fit(reg)
def evaluate(self, reg, points, out):
"""
Evaluate the spatial averaging on a set of points
:param ndarray points: Points to evaluate the averaging on
:param ndarray result: If provided, the result will be put in this
array
"""
points = points[0]
xdata = reg.xdata[0]
ll = local_linear.local_linear_1d
if not out.flags['C_CONTIGUOUS']:
ll = py_local_linear.local_linear_1d
li2, out = ll(reg.bandwidth, xdata, reg.ydata, points, out)
self.li2 = li2
return out
class PolynomialDesignMatrix1D(object):
def __init__(self, degree):
self.degree = degree
powers = np.arange(0, degree + 1).reshape((1, degree + 1))
self.powers = powers
def __call__(self, dX, out=None):
return np.power(dX, self.powers, out)
class LocalPolynomialKernel1D(RegressionKernelMethod):
r"""
Perform a local-polynomial regression using a user-provided kernel
(Gaussian by default).
The local constant regression is the function that minimises, for each
position:
.. math::
f_n(x) \triangleq \argmin_{a_0\in\mathbb{R}}
\sum_i K\left(\frac{x-X_i}{h}\right)
\left(Y_i - a_0 - a_1(x-X_i) - \ldots -
a_q \frac{(x-X_i)^q}{q!}\right)^2
Where :math:`K(x)` is the kernel such that :math:`E(K(x)) = 0`, :math:`q`
is the order of the fitted polynomial and :math:`h` is the bandwidth of
the method. It is also recommended to have :math:`\int_\mathbb{R} x^2K(x)dx
= 1`, (i.e. variance of the kernel is 1) or the effective bandwidth will be
scaled by the square-root of this integral (i.e. the standard deviation of
the kernel).
:param ndarray xdata: Explaining variables (at most 2D array)
:param ndarray ydata: Explained variables (should be 1D array)
:param int q: Order of the polynomial to fit. **Default:** 3
:type cov: float or callable
:param cov: If an float, it should be a variance of the gaussian kernel.
Otherwise, it should be a function ``cov(xdata, ydata)`` returning
the variance.
**Default:** ``scotts_covariance``
"""
def __init__(self, q=3):
self.q = q
def fit(self, reg):
assert reg.dim == 1, "This method can only be used with 1D data"
if self.q == 0:
obj = SpatialAverage()
return obj.fit(reg)
elif self.q == 1:
obj = LocalLinearKernel1D()
return obj.fit(reg)
self = super(LocalPolynomialKernel1D, self).fit(reg)
self.designMatrix = PolynomialDesignMatrix1D(self.q)
return self
def evaluate(self, reg, points, out):
"""
Evaluate the spatial averaging on a set of points
:param ndarray points: Points to evaluate the averaging on
:param ndarray result: If provided, the result will be put
in this array
"""
xdata = reg.xdata[0, :, np.newaxis] # make it a column vector
ydata = reg.ydata[:, np.newaxis] # make it a column vector
points = points[0] # make it a line vector
q = self.q
bw = reg.bandwidth
kernel = reg.kernel
designMatrix = self.designMatrix
for i, p in enumerate(points):
dX = (xdata - p)
Wx = kernel(dX / bw)
Xx = designMatrix(dX)
WxXx = Wx * Xx
XWX = np.dot(Xx.T, WxXx)
Lx = solve(XWX, WxXx.T)[0]
out[i] = np.dot(Lx, ydata)
return out
class PolynomialDesignMatrix(object):
"""
......@@ -67,6 +281,8 @@ class PolynomialDesignMatrix(object):
"""
def __init__(self, dim, deg):
self.dim = dim
if out is None:
out = np.empty(points.shape, dtype=float)
self.deg = deg
self._designMatrixSize()
......@@ -205,12 +421,16 @@ class LocalPolynomialKernel(RegressionKernelMethod):
the variance.
**Default:** ``scotts_covariance``
"""
def __init__(self, reg, q=3):
def __init__(self, q=3):
self.q = q
def fit(self, reg):
super(LocalPolynomialKernel, self).fit(reg)
if reg.dim == 1:
obj = LocalPolynomialKernel1D(self.q)
return obj.fit(reg)
self = super(LocalPolynomialKernel, self).fit(reg)
self.designMatrix = PolynomialDesignMatrix(reg.dim, self.q)
return self
def evaluate(self, reg, points, out):
"""
......@@ -239,5 +459,5 @@ class LocalPolynomialKernel(RegressionKernelMethod):
out[i] = np.dot(Lx, ydata)
return out
default_method = LocalPolynomialKernel
default_method = LocalPolynomialKernel(q=1)
......@@ -2,8 +2,7 @@ from __future__ import division, absolute_import, print_function
import numpy as np
def local_linear_1d(bw, xdata, ydata, points, out=None):
points = np.atleast_1d(points).astype(xdata.dtype)
def local_linear_1d(bw, xdata, ydata, points, out):
x0 = points - xdata[:, np.newaxis]
x02 = x0 * x0
wi = np.exp(-x02 / (2.0 * bw * bw))
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment