Added missing parameters in nonparam regression.

Added Kernel to the 1D local linear fitting.
Added ytrans for the main object to transform the ydata before fitting
and after evaluation.
Removed reloadable Cython option as it causes trouble with IPython.
parent 9d351769
......@@ -36,6 +36,10 @@ Only extra methods will be described:
.. autoattribute:: q
This class uses the following function:
.. autofunction:: pyqt_fit.py_local_linear.local_linear_1d
.. autoclass:: LocalPolynomialKernel1D
.. autoattribute:: q
......
......@@ -9,7 +9,7 @@ import numpy as np
from numpy.random import randint
from scipy import optimize
from collections import namedtuple
from . import kernel_smoothing
from . import nonparam_regression
from . import sharedmem
import multiprocessing as mp
from . import bootstrap_workers
......@@ -56,7 +56,7 @@ def bootstrap_residuals(fct, xdata, ydata, repeats=3000, residuals=None,
:type add_residual: callable or None
:param add_residual: Function that add a residual to a value. The call
``add_residual(ydata, residual)`` should return the new ydata, with
``add_residual(yopt, residual)`` should return the new ydata, with
the residuals 'applied'. If None, it is considered the residuals should
simply be added.
......@@ -97,7 +97,9 @@ def bootstrap_residuals(fct, xdata, ydata, repeats=3000, residuals=None,
shuffled_res = res[shuffle]
if correct_bias:
kde = kernel_smoothing.LocalLinearKernel1D(xdata, res)
kde = nonparam_regression.NonParamRegression(xdata, res)
kde.method.q = 1
kde.fit()
bias = kde(xdata)
shuffled_res += bias
......
......@@ -7,9 +7,10 @@ DTYPE = np.float
ctypedef np.float64_t DTYPE_t
@cython.boundscheck(False)
cdef void cy_li(double bw, np.ndarray[DTYPE_t, ndim=1] xdata, np.ndarray[DTYPE_t, ndim=1] ydata,
np.ndarray[DTYPE_t, ndim=1] points,
np.ndarray[DTYPE_t, ndim=1] li2, np.ndarray[DTYPE_t, ndim=1] out):
cdef cy_li(double bw, np.ndarray[DTYPE_t, ndim=1] xdata, np.ndarray[DTYPE_t, ndim=1] ydata,
np.ndarray[DTYPE_t, ndim=1] points,
np.ndarray[DTYPE_t, ndim=1] li2,
np.ndarray[DTYPE_t, ndim=1] out):
cdef:
unsigned int nx = xdata.shape[0]
unsigned int npts = points.shape[0]
......@@ -38,13 +39,16 @@ cdef void cy_li(double bw, np.ndarray[DTYPE_t, ndim=1] xdata, np.ndarray[DTYPE_t
bi[i] = lbi
sbi += lbi
if sbi == 0:
raise ValueError("sbi == 0")
out[j] = 0
for i in range(nx):
li = bi[i] / sbi
li2[j] += li * li
out[j] += li * ydata[i]
def local_linear_1d(bw, xdata, ydata, points, out):
def local_linear_1d(bw, xdata, ydata, points, kernel, out):
bw = float(bw)
li2 = np.empty(points.shape, dtype=float)
cy_li(bw, xdata, ydata, points, li2, out)
......
......@@ -29,7 +29,7 @@ if HAS_CYTHON:
os.environ['PATH'] = r'C:\MinGW\bin'
mingw_setup_args = {'options': {'build_ext': {'compiler': 'mingw32'}}}
pyximport.install(setup_args=mingw_setup_args, reload_support=True)
pyximport.install(setup_args=mingw_setup_args)
elif os.name == 'posix':
extra_flags = '-I' + numpy.get_include()
......@@ -38,5 +38,5 @@ if HAS_CYTHON:
os.environ['CXXFLAGS'] = " ".join([os.environ.get('CXXFLAGS', ''),
extra_flags])
pyximport.install(reload_support=True)
pyximport.install()
......@@ -34,6 +34,8 @@ class NonParamRegression(object):
self._fitted_method = None
self._n = None
self._d = None
self._ytrans = None
self._fitted_ydata = None
for kw in kwords:
setattr(self, kw, kwords[kw])
......@@ -218,6 +220,33 @@ class NonParamRegression(object):
self._ydata = yd
self.need_fit()
@property
def fitted_ydata(self):
"""
Data actually fitted. It may differ from ydata if ytrans is specified.
"""
return self._fitted_ydata
@property
def ytrans(self):
"""
Function used to transform the Y data before fitting.
This must be a callable that also has a ``inv`` attribute returning the inverse function.
:Note: The ``inv`` method must accept an ``out`` argument to store the output.
"""
return self._ytrans
@ytrans.setter
def ytrans(self, tr):
assert hasattr(tr, '__call__') and hasattr(tr, 'inv'), "The transform must be a callable with an `inv` attribute"
self._ytrans = tr
@ytrans.deleter
def ytrans(self):
self._ytrans = None
@property
def method(self):
"""
......@@ -272,6 +301,10 @@ class NonParamRegression(object):
"""
D, N = self._xdata.shape
assert self._ydata.shape[0] == N, "There must be as many points for X and Y"
if self.ytrans is not None:
self._fitted_ydata = self.ytrans(self.ydata)
else:
self._fitted_ydata = self.ydata
self._kernel = self._create_kernel(D)
self._n = N
self._d = D
......@@ -300,6 +333,8 @@ class NonParamRegression(object):
out.shape = (points.shape[-1],)
self._fitted_method.evaluate(self, points, out)
out.shape = real_shape[-1:]
if self.ytrans:
self.ytrans.inv(out, out=out)
return out
def __call__(self, points, out=None):
......
......@@ -12,6 +12,7 @@ from .compat import irange
from .cyth import HAS_CYTHON
from . import kde
from . import py_local_linear
from . import kernels
local_linear = None
......@@ -116,7 +117,7 @@ class SpatialAverage(RegressionKernelMethod):
d, m = points.shape
norm = np.zeros((m,), points.dtype)
xdata = reg.xdata[..., np.newaxis]
ydata = reg.ydata
ydata = reg.fitted_ydata
correction = self.correction
N = reg.N
inv_cov = self.inv_cov
......@@ -198,9 +199,9 @@ class LocalLinearKernel1D(RegressionKernelMethod):
points = points[0]
xdata = reg.xdata[0]
ll = local_linear.local_linear_1d
if not out.flags['C_CONTIGUOUS']:
if not isinstance(reg.kernel, kernels.normal_kernel1d):
ll = py_local_linear.local_linear_1d
li2, out = ll(reg.bandwidth, xdata, reg.ydata, points, out)
li2, out = ll(reg.bandwidth, xdata, reg.fitted_ydata, points, reg.kernel, out)
self.li2 = li2
return out
......@@ -289,7 +290,7 @@ class LocalPolynomialKernel1D(RegressionKernelMethod):
in this array
"""
xdata = reg.xdata[0, :, np.newaxis] # make it a column vector
ydata = reg.ydata[:, np.newaxis] # make it a column vector
ydata = reg.fitted_ydata[:, np.newaxis] # make it a column vector
points = points[0] # make it a line vector
bw = reg.bandwidth
kernel = reg.kernel
......@@ -480,7 +481,7 @@ class LocalPolynomialKernel(RegressionKernelMethod):
:param ndarray out: Pre-allocated array for the result
"""
xdata = reg.xdata
ydata = reg.ydata[:, np.newaxis] # make it a column vector
ydata = reg.fitted_ydata[:, np.newaxis] # make it a column vector
d, n = xdata.shape
designMatrix = self.designMatrix
dm_size = designMatrix.size
......
......@@ -5,7 +5,7 @@ This modules implement functions to test and plot parametric regression.
"""
from __future__ import division, print_function, absolute_import
from numpy import argsort, std, abs, sqrt, arange, pi, c_
from numpy import argsort, std, abs, sqrt, arange, pi, c_, asarray
from pylab import figure, title, legend, plot, xlabel, ylabel, subplot, ylim, hist, suptitle, gca
from .compat import izip
from itertools import chain
......@@ -309,6 +309,10 @@ def plot_residual_tests(xdata, yopts, res, fct_name, xname="X", yname='Y', res_n
except TypeError:
figure(fig.number)
xdata = asarray(xdata)
yopts = asarray(yopts)
res = asarray(res)
subplot(2, 2, 1)
# First subplot is the residuals
if len(xdata.shape) == 1 or xdata.shape[1] == 1:
......
......@@ -2,9 +2,33 @@ from __future__ import division, absolute_import, print_function
import numpy as np
def local_linear_1d(bw, xdata, ydata, points, out):
def local_linear_1d(bw, xdata, ydata, points, kernel, out):
r'''
We are trying to find the fitting for points :math:`x` given a gaussian kernel
Given the following definitions:
.. math::
x_0 &=& x-x_i
\begin{array}{rlc|rlc}
w_i &=& \mathcal{K}\left(\frac{x_0}{h}\right) & W &=& \sum_i w_i \\
X &=& \sum_i w_i x_0 & X_2 &=& w_i x_0^2 \\
Y &=& \sum_i w_i y_i & Y_2 &=& \sum_i w_i y_i x_0
\end{array}
The fitted value is given by:
.. math::
f(x) = \frac{X_2 T - X Y_2}{W X_2 - X^2}
'''
x0 = points - xdata[:, np.newaxis]
x02 = x0 * x0
# wi = kernel(x0 / bw)
wi = np.exp(-x02 / (2.0 * bw * bw))
X = np.sum(wi * x0, axis=0)
X2 = np.sum(wi * x02, axis=0)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment