KDE fully tested and debuged!

parent 8c9a9966
......@@ -4,6 +4,8 @@ v1.2.5, ??
- Corrected all the kernels, both Python and Cython
- Added and restructure kernel tests
- Restructured KDE tests.
- Moved TransformKDE to the kde_methods
- Updated tutorial
v1.2.4, 30/7/2014
- Corrected KDE code to pass tests
- All tests pass for Python2 and Python3
......
......@@ -9,7 +9,7 @@ Kernel Density Estimation Methods
.. autoclass:: KDE1D
:members: bandwidth, closed, copy, covariance, evaluate, grid, kernel,
cdf, cdf_grid, sf, hazard, cumhazard, icdf, icdf_grid,
lambdas, lower, method, total_weights, update_bandwidth, upper,
lambdas, lower, method, total_weights, fit, upper,
weights, xdata, __call__
Bandwidth Estimation Methods
......
......@@ -3,7 +3,7 @@ Module :py:mod:`pyqt_fit.kde_methods`
.. automodule:: pyqt_fit.kde_methods
:module: pyqt_fit.kde_methods
.. py:currentmodule:: pyqt_fit.kde_methods
Univariate KDE estimation methods
---------------------------------
......@@ -12,6 +12,8 @@ The exact definition of such a method is found in :py:attr:`pyqt_fit.kde.KDE1D.m
.. autofunction:: generate_grid
.. autofunction:: compute_bandwidth
.. autoclass:: KDE1DMethod
The following methods are interface methods that should be overriden with ones specific to the implemented method.
......@@ -52,6 +54,7 @@ The exact definition of such a method is found in :py:attr:`pyqt_fit.kde.KDE1D.m
.. attribute:: name
:type: str
Specify a human-readable name for the method, for presentation purposes.
But the class also provide a number of utility methods to help implementing you own:
......
......@@ -145,18 +145,8 @@ class KDE1D(object):
def fit(self):
"""
Re-compute the bandwidth if it was specified as a function.
"""
if self._bw_fct:
_bw = float(self._bw_fct(self._xdata, model=self))
_cov = _bw * _bw
elif self._cov_fct:
_cov = float(self._cov_fct(self._xdata, model=self))
_bw = np.sqrt(_cov)
else:
return
self._covariance = _cov
self._bw = _bw
Compute the various parameters needed by the kde method
"""
if self._weights.shape:
assert self._weights.shape == self._xdata.shape, \
"There must be as many weights as data points"
......@@ -477,6 +467,13 @@ class KDE1D(object):
self.fit_if_needed()
return self.method.cumhazard(self, np.atleast_1d(points), out)
def cumhazard_grid(self, N=None, cut=None):
"""
Compute the cumulative hazard function evaluated on a grid.
"""
self.fit_if_needed()
return self.method.cumhazard_grid(self, N, cut)
@property
def method(self):
"""
......
This diff is collapsed.
......@@ -69,7 +69,40 @@ class Kernel1D(object):
- an average of 0 (i.e. centered);
- a finite variance. It is even recommanded that the variance is close to 1 to give a uniform meaning to the
bandwidth.
.. py:attribute:: cut
:type: float
Cutting point after which there is a negligeable part of the probability. More formally, if :math:`c` is the
cutting point:
.. math::
\int_{-c}^c p(x) dx \approx 1
.. py:attribute:: lower
:type: float
Lower bound of the support of the PDF. Formally, if :math:`l` is the lower bound:
.. math::
\int_{-\infty}^l p(x)dx = 0
.. py:attribute:: upper
:type: float
Upper bound of the support of the PDF. Formally, if :math:`u` is the upper bound:
.. math::
\int_u^\infty p(x)dx = 0
"""
cut = 3.
lower = -np.inf
upper = np.inf
......@@ -103,9 +136,15 @@ class Kernel1D(object):
except AttributeError:
def pdf(x):
return self.pdf(np.atleast_1d(x))
lower = self.lower
upper = self.upper
@make_ufunc()
def comp_pdf(x):
return integrate.quad(pdf, self.lower, x)[0]
if x < lower:
return 0
if x > upper:
x = upper
return integrate.quad(pdf, lower, x)[0]
self.__comp_cdf = comp_pdf
if out is None:
out = np.empty(z.shape, dtype=float)
......@@ -123,11 +162,17 @@ class Kernel1D(object):
try:
comp_pm1 = self.__comp_pm1
except AttributeError:
lower = self.lower
upper = self.upper
def pm1(x):
return x * self.pdf(np.atleast_1d(x))
@make_ufunc()
def comp_pm1(x):
return integrate.quad(pm1, -np.inf, x)[0]
if x <= lower:
return 0
if x > upper:
x = upper
return integrate.quad(pm1, lower, x)[0]
self.__comp_pm1 = comp_pm1
if out is None:
out = np.empty(z.shape, dtype=float)
......@@ -146,11 +191,17 @@ class Kernel1D(object):
try:
comp_pm2 = self.__comp_pm2
except AttributeError:
lower = self.lower
upper = self.upper
def pm2(x):
return x * x * self.pdf(np.atleast_1d(x))
@make_ufunc()
def comp_pm2(x):
return integrate.quad(pm2, -np.inf, x)[0]
if x <= lower:
return 0
if x > upper:
x = upper
return integrate.quad(pm2, lower, x)[0]
self.__comp_pm2 = comp_pm2
if out is None:
out = np.empty(z.shape, dtype=float)
......@@ -370,8 +421,9 @@ class tricube(Kernel1D):
__call__ = pdf
lower = -_kernels_py.tricube_width
upper = _kernels_py.tricube_width
upper = 1. / _kernels_py.tricube_width
lower = -upper
cut = upper
def cdf(self, z, out=None):
r"""
......@@ -444,8 +496,9 @@ class Epanechnikov(Kernel1D):
return kernels_imp.epanechnikov_pdf(xs, out)
__call__ = pdf
lower = -_kernels_py.epanechnikov_width
upper = _kernels_py.epanechnikov_width
upper = 1./_kernels_py.epanechnikov_width
lower = -upper
cut = upper
def cdf(self, xs, out=None):
r"""
......@@ -502,6 +555,11 @@ class Epanechnikov_order4(Kernel1D):
where :math:`K` is the non-normalized Epanechnikov kernel.
"""
upper = 1
lower = -upper
cut = upper
def pdf(self, xs, out=None):
return kernels_imp.epanechnikov_o4_pdf(xs, out)
__call__ = pdf
......@@ -527,6 +585,11 @@ class normal_order4(Kernel1D):
where :math:`\phi` is the normal kernel.
"""
lower = -np.inf
upper = np.inf
cut = 3.
def pdf(self, xs, out=None):
return kernels_imp.normal_o4_pdf(xs, out)
__call__ = pdf
......@@ -543,67 +606,3 @@ class normal_order4(Kernel1D):
kernels1D = [normal_kernel1d, tricube, Epanechnikov, Epanechnikov_order4, normal_order4]
kernelsnD = [normal_kernel]
"""
.. py:class:: Kernel
.. py:method:: pdf(self, z, out=None)
Returns the density of the kernel on the points `z`. This is the funtion :math:`K(z)` itself.
:param ndarray z: Array of points to evaluate the function on. The method should accept any shape of array.
:param ndarray out: If provided, it will be of the same shape as `z` and the result should be stored in it.
Ideally, it should be used for as many intermediate computation as possible.
.. note::
This is the only function that really needs to be implemented. Although the others will be required for an
acceptable execution time.
.. py:method:: __call__(self, z, out=None)
Alias for the :py:meth:`pdf` method.
.. py:method:: cdf(self, z, out=None)
Returns the cumulative density function on the points `z`, i.e.:
.. math::
K_0(z) = \int_{-\infty}^z K(t) dt
.. py:method:: pm1(self, z, out=None)
Returns the first moment of the density function, i.e.:
.. math::
K_1(z) = \int_{-\infty}^z z K(t) dt
.. py:method:: pm2(self, z, out=None)
Returns the second moment of the density function, i.e.:
.. math::
K_2(z) = \int_{-\infty}^z z^2 K(t) dt
.. py:method:: fft(self, z, out=None)
FFT of the kernel on the points of ``z``. The points will always be provided as a grid with :math:`2^n` points,
representing the whole frequency range to be explored. For convenience, the second half of the points will be
provided as negative values.
.. note::
This method is optional. If note provided, the FFT will be computed using ``fftpack``.
.. py:method:: dct(self, z, out=None)
DCT of the kernel on the points of ``z``. The points will always be provided as a grid with :math:`2^n` points,
representing the whole frequency range to be explored.
.. note::
This method is optional. If note provided, the DCT will be computed using ``fftpack``.
"""
......@@ -78,7 +78,7 @@ class KDETester(object):
return k
def test_methods(self):
for m in methods:
for m in self.methods:
for i in irange(len(self.sizes)):
k = self.createKDE(self.vs[i], m)
yield self.method_works, k, m, '{0}_{1}'.format(m.instance, i)
......
......@@ -114,6 +114,7 @@ class LogTestKDE1D(TestKDE1D):
@classmethod
def setUpClass(cls):
kde_utils.setupClass_lognorm(cls)
cls.methods = kde_utils.methods_log
class TestSF(kde_utils.KDETester):
@classmethod
......@@ -144,6 +145,7 @@ class TestLogSF(TestSF):
@classmethod
def setUpClass(cls):
kde_utils.setupClass_lognorm(cls)
cls.methods = kde_utils.methods_log
del cls.sizes[1:]
class TestISF(kde_utils.KDETester):
......@@ -154,7 +156,6 @@ class TestISF(kde_utils.KDETester):
del cls.sizes[1:]
def method_works(self, k, method, name):
k.fit()
sf = np.linspace(0, 1, 64)
sf_xs = k.isf(sf)
cdf_xs = k.icdf(1-sf)
......@@ -177,4 +178,112 @@ class TestLogISF(TestISF):
kde_utils.setupClass_lognorm(cls)
del cls.sizes[1:]
class TestICDF(kde_utils.KDETester):
@classmethod
def setUpClass(cls):
kde_utils.setupClass_norm(cls)
cls.methods = kde_utils.methods
del cls.sizes[1:]
def method_works(self, k, method, name):
quant = np.linspace(0, 1, 64)
xs = k.icdf(quant)
cdf_quant = k.cdf(xs)
np.testing.assert_allclose(cdf_quant, quant, method.accuracy, method.accuracy)
def grid_method_works(self, k, method, name):
comp_cdf, xs = k.icdf_grid()
ref_cdf = k.cdf(xs)
np.testing.assert_allclose(comp_cdf, ref_cdf, method.grid_accuracy, method.grid_accuracy)
def kernel_works(self, ker, name):
pass
def grid_kernel_works(self, ker, name):
pass
class TestLogICDF(TestICDF):
@classmethod
def setUpClass(cls):
kde_utils.setupClass_lognorm(cls)
cls.methods = kde_utils.methods_log
del cls.sizes[1:]
class TestHazard(kde_utils.KDETester):
@classmethod
def setUpClass(cls):
kde_utils.setupClass_norm(cls)
cls.methods = kde_utils.methods
del cls.sizes[1:]
def method_works(self, k, method, name):
k.fit()
xs = kde_methods.generate_grid(k)
h_comp = k.hazard(xs)
sf = k.sf(xs)
h_ref = k.pdf(xs)
h_ref /= k.sf(xs)
sel = sf > np.sqrt(method.accuracy)
np.testing.assert_allclose(h_comp[sel], h_ref[sel], method.accuracy, method.accuracy)
def grid_method_works(self, k, method, name):
xs, h_comp = k.hazard_grid()
xs, sf = k.sf_grid()
h_ref = k.grid()[1]
h_ref /= sf
sel = sf > np.sqrt(method.accuracy)
# Only tests for sf big enough or error is too large
np.testing.assert_allclose(h_comp[sel], h_ref[sel], method.accuracy, method.accuracy)
def kernel_works(self, ker, name):
pass
def grid_kernel_works(self, ker, name):
pass
class TestLogHazard(TestHazard):
@classmethod
def setUpClass(cls):
kde_utils.setupClass_lognorm(cls)
del cls.sizes[1:]
cls.methods = kde_utils.methods_log
class TestCumHazard(kde_utils.KDETester):
@classmethod
def setUpClass(cls):
kde_utils.setupClass_norm(cls)
cls.methods = kde_utils.methods
del cls.sizes[1:]
def method_works(self, k, method, name):
k.fit()
xs = kde_methods.generate_grid(k)
h_comp = k.cumhazard(xs)
sf = k.sf(xs)
h_ref = -np.log(sf)
sel = sf > np.sqrt(method.accuracy)
np.testing.assert_allclose(h_comp[sel], h_ref[sel], method.accuracy, method.accuracy)
def grid_method_works(self, k, method, name):
xs, h_comp = k.cumhazard_grid()
xs, sf = k.sf_grid()
h_ref = -np.log(sf)
sel = sf > np.sqrt(method.accuracy)
# Only tests for sf big enough or error is too large
np.testing.assert_allclose(h_comp[sel], h_ref[sel], method.accuracy, method.accuracy)
def kernel_works(self, ker, name):
pass
def grid_kernel_works(self, ker, name):
pass
class TestLogCumHazard(TestCumHazard):
@classmethod
def setUpClass(cls):
kde_utils.setupClass_lognorm(cls)
del cls.sizes[1:]
cls.methods = kde_utils.methods_log
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment