mirror of https://github.com/python/cpython
gh-115532: Minor tweaks to kde() (gh-117897)
This commit is contained in:
parent
10f1a2687a
commit
0823f43618
|
@ -1163,7 +1163,7 @@ accurately approximated inverse cumulative distribution function.
|
||||||
.. testcode::
|
.. testcode::
|
||||||
|
|
||||||
from random import choice, random, seed
|
from random import choice, random, seed
|
||||||
from math import sqrt, log, pi, tan, asin
|
from math import sqrt, log, pi, tan, asin, cos, acos
|
||||||
from statistics import NormalDist
|
from statistics import NormalDist
|
||||||
|
|
||||||
kernel_invcdfs = {
|
kernel_invcdfs = {
|
||||||
|
@ -1172,6 +1172,7 @@ accurately approximated inverse cumulative distribution function.
|
||||||
'sigmoid': lambda p: log(tan(p * pi/2)),
|
'sigmoid': lambda p: log(tan(p * pi/2)),
|
||||||
'rectangular': lambda p: 2*p - 1,
|
'rectangular': lambda p: 2*p - 1,
|
||||||
'triangular': lambda p: sqrt(2*p) - 1 if p < 0.5 else 1 - sqrt(2 - 2*p),
|
'triangular': lambda p: sqrt(2*p) - 1 if p < 0.5 else 1 - sqrt(2 - 2*p),
|
||||||
|
'parabolic': lambda p: 2 * cos((acos(2*p-1) + pi) / 3),
|
||||||
'cosine': lambda p: 2*asin(2*p - 1)/pi,
|
'cosine': lambda p: 2*asin(2*p - 1)/pi,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -919,13 +919,13 @@ def kde(data, h, kernel='normal', *, cumulative=False):
|
||||||
sqrt2pi = sqrt(2 * pi)
|
sqrt2pi = sqrt(2 * pi)
|
||||||
sqrt2 = sqrt(2)
|
sqrt2 = sqrt(2)
|
||||||
K = lambda t: exp(-1/2 * t * t) / sqrt2pi
|
K = lambda t: exp(-1/2 * t * t) / sqrt2pi
|
||||||
I = lambda t: 1/2 * (1.0 + erf(t / sqrt2))
|
W = lambda t: 1/2 * (1.0 + erf(t / sqrt2))
|
||||||
support = None
|
support = None
|
||||||
|
|
||||||
case 'logistic':
|
case 'logistic':
|
||||||
# 1.0 / (exp(t) + 2.0 + exp(-t))
|
# 1.0 / (exp(t) + 2.0 + exp(-t))
|
||||||
K = lambda t: 1/2 / (1.0 + cosh(t))
|
K = lambda t: 1/2 / (1.0 + cosh(t))
|
||||||
I = lambda t: 1.0 - 1.0 / (exp(t) + 1.0)
|
W = lambda t: 1.0 - 1.0 / (exp(t) + 1.0)
|
||||||
support = None
|
support = None
|
||||||
|
|
||||||
case 'sigmoid':
|
case 'sigmoid':
|
||||||
|
@ -933,39 +933,39 @@ def kde(data, h, kernel='normal', *, cumulative=False):
|
||||||
c1 = 1 / pi
|
c1 = 1 / pi
|
||||||
c2 = 2 / pi
|
c2 = 2 / pi
|
||||||
K = lambda t: c1 / cosh(t)
|
K = lambda t: c1 / cosh(t)
|
||||||
I = lambda t: c2 * atan(exp(t))
|
W = lambda t: c2 * atan(exp(t))
|
||||||
support = None
|
support = None
|
||||||
|
|
||||||
case 'rectangular' | 'uniform':
|
case 'rectangular' | 'uniform':
|
||||||
K = lambda t: 1/2
|
K = lambda t: 1/2
|
||||||
I = lambda t: 1/2 * t + 1/2
|
W = lambda t: 1/2 * t + 1/2
|
||||||
support = 1.0
|
support = 1.0
|
||||||
|
|
||||||
case 'triangular':
|
case 'triangular':
|
||||||
K = lambda t: 1.0 - abs(t)
|
K = lambda t: 1.0 - abs(t)
|
||||||
I = lambda t: t*t * (1/2 if t < 0.0 else -1/2) + t + 1/2
|
W = lambda t: t*t * (1/2 if t < 0.0 else -1/2) + t + 1/2
|
||||||
support = 1.0
|
support = 1.0
|
||||||
|
|
||||||
case 'parabolic' | 'epanechnikov':
|
case 'parabolic' | 'epanechnikov':
|
||||||
K = lambda t: 3/4 * (1.0 - t * t)
|
K = lambda t: 3/4 * (1.0 - t * t)
|
||||||
I = lambda t: -1/4 * t**3 + 3/4 * t + 1/2
|
W = lambda t: -1/4 * t**3 + 3/4 * t + 1/2
|
||||||
support = 1.0
|
support = 1.0
|
||||||
|
|
||||||
case 'quartic' | 'biweight':
|
case 'quartic' | 'biweight':
|
||||||
K = lambda t: 15/16 * (1.0 - t * t) ** 2
|
K = lambda t: 15/16 * (1.0 - t * t) ** 2
|
||||||
I = lambda t: 3/16 * t**5 - 5/8 * t**3 + 15/16 * t + 1/2
|
W = lambda t: 3/16 * t**5 - 5/8 * t**3 + 15/16 * t + 1/2
|
||||||
support = 1.0
|
support = 1.0
|
||||||
|
|
||||||
case 'triweight':
|
case 'triweight':
|
||||||
K = lambda t: 35/32 * (1.0 - t * t) ** 3
|
K = lambda t: 35/32 * (1.0 - t * t) ** 3
|
||||||
I = lambda t: 35/32 * (-1/7*t**7 + 3/5*t**5 - t**3 + t) + 1/2
|
W = lambda t: 35/32 * (-1/7*t**7 + 3/5*t**5 - t**3 + t) + 1/2
|
||||||
support = 1.0
|
support = 1.0
|
||||||
|
|
||||||
case 'cosine':
|
case 'cosine':
|
||||||
c1 = pi / 4
|
c1 = pi / 4
|
||||||
c2 = pi / 2
|
c2 = pi / 2
|
||||||
K = lambda t: c1 * cos(c2 * t)
|
K = lambda t: c1 * cos(c2 * t)
|
||||||
I = lambda t: 1/2 * sin(c2 * t) + 1/2
|
W = lambda t: 1/2 * sin(c2 * t) + 1/2
|
||||||
support = 1.0
|
support = 1.0
|
||||||
|
|
||||||
case _:
|
case _:
|
||||||
|
@ -974,10 +974,14 @@ def kde(data, h, kernel='normal', *, cumulative=False):
|
||||||
if support is None:
|
if support is None:
|
||||||
|
|
||||||
def pdf(x):
|
def pdf(x):
|
||||||
|
n = len(data)
|
||||||
return sum(K((x - x_i) / h) for x_i in data) / (n * h)
|
return sum(K((x - x_i) / h) for x_i in data) / (n * h)
|
||||||
|
|
||||||
def cdf(x):
|
def cdf(x):
|
||||||
return sum(I((x - x_i) / h) for x_i in data) / n
|
|
||||||
|
n = len(data)
|
||||||
|
return sum(W((x - x_i) / h) for x_i in data) / n
|
||||||
|
|
||||||
|
|
||||||
else:
|
else:
|
||||||
|
|
||||||
|
@ -985,16 +989,24 @@ def kde(data, h, kernel='normal', *, cumulative=False):
|
||||||
bandwidth = h * support
|
bandwidth = h * support
|
||||||
|
|
||||||
def pdf(x):
|
def pdf(x):
|
||||||
|
nonlocal n, sample
|
||||||
|
if len(data) != n:
|
||||||
|
sample = sorted(data)
|
||||||
|
n = len(data)
|
||||||
i = bisect_left(sample, x - bandwidth)
|
i = bisect_left(sample, x - bandwidth)
|
||||||
j = bisect_right(sample, x + bandwidth)
|
j = bisect_right(sample, x + bandwidth)
|
||||||
supported = sample[i : j]
|
supported = sample[i : j]
|
||||||
return sum(K((x - x_i) / h) for x_i in supported) / (n * h)
|
return sum(K((x - x_i) / h) for x_i in supported) / (n * h)
|
||||||
|
|
||||||
def cdf(x):
|
def cdf(x):
|
||||||
|
nonlocal n, sample
|
||||||
|
if len(data) != n:
|
||||||
|
sample = sorted(data)
|
||||||
|
n = len(data)
|
||||||
i = bisect_left(sample, x - bandwidth)
|
i = bisect_left(sample, x - bandwidth)
|
||||||
j = bisect_right(sample, x + bandwidth)
|
j = bisect_right(sample, x + bandwidth)
|
||||||
supported = sample[i : j]
|
supported = sample[i : j]
|
||||||
return sum((I((x - x_i) / h) for x_i in supported), i) / n
|
return sum((W((x - x_i) / h) for x_i in supported), i) / n
|
||||||
|
|
||||||
if cumulative:
|
if cumulative:
|
||||||
cdf.__doc__ = f'CDF estimate with {h=!r} and {kernel=!r}'
|
cdf.__doc__ = f'CDF estimate with {h=!r} and {kernel=!r}'
|
||||||
|
|
Loading…
Reference in New Issue