Re: Nonparametric kernel density estimators
- To: mathgroup at smc.vnet.net
- Subject: [mg38478] Re: [mg38465] Nonparametric kernel density estimators
- From: "Johannes Ludsteck" <johannes.ludsteck at wiwi.uni-regensburg.de>
- Date: Fri, 20 Dec 2002 04:24:16 -0500 (EST)
- Organization: Universitaet Regensburg
- Sender: owner-wri-mathgroup at wolfram.com
Dear Mark,
I have writen a small quick and dirty
implementation. But it was only for own
explorative use and not tested exensively. Thus
there is absolute no warranty...(!)
Since the code is very short, I append it here.
Probably the code could be optimized in several
ways.
Best regards,
Johannes Ludsteck
BeginPackage["Statistics`KernelDensitySmoothing`",
{"Statistics`DescriptiveStatistics`",
"Statistics`ContinuousDistributions`"}]
Epanechnikov::usage = "Epanechnikov[x] is the
Epanechnikov kernel.
See help for Kernel"
Quartic::usage = "Quartic is the Quartic kernel.
See help for \
Kernel."
Triangular::usage = "Triangular is the Triangular
kernel. See help \
for Kernel."
Rectangular::usage = "Rectangular is the
Rectangular kernel. See help \
for Kernel."
Kernel::usage = "Kernel->aKernel is an option for
Smooth, defining which \
kernel to use for kernel smoothing. Available
kernels are \
Epanechnikov,Quartic,Triangular and Rectangular;
default is \
Kernel->Epanechnikov."
Smooth::usage = "Smooth[data,gridSpec] computes a
density \
estimate for the one-dimensional list data. \
gridSpec may be a list of points where the density
has to be computed, \
or an iterator specification {min,max,step}, or a
positive real number. \
If gridSpec is a number, Smooth interprets it as
step length and \
generates a grid list
{Min[data],Max[data],gridSpec}. \
Options are Kernel->aKernel and SRatio->aNumber. \
Kernel defines the kernel to use (see help for
Kernel), \
and SRatio defines the kernel bandwidth as ratio
to Silverman's optimal bandwidth \
rule."
SRatio::usage = "SRatio->aRealNumber defines the
kernel bandwidth (smoothing \
parameter) as ratio to Silverman's optimal
bandwidth rule. Default is \
SRatio->1.0"
OptBandWidth::usage =
"OptBandWidth[data_List,Kernel_] returns the
optimal band width \
according to Silverman's rule of thumb."
CIPLot::usage = "CIPlot[dens, bandWidth, alpha,
kernelType] \
creates a confidence interval plot of density dens
(dens must be a list \
of pairs {x,y}). You have to provide the band
width, which was used to \
smooth the density, the confidence level alpha and
the kernel Type (see \
help for Kernel). Allowed options are all options
for ListPlot except \
DisplayFunction.";
Unprotect[Variance,Smooth,BandWidth,CIPlot,Kernel,
SRatio];
Options[Smooth] = {Kernel -> Epanechnikov, SRatio
-> 1.0};
Options[Variance] = {Kernel -> Epanechnikov};
Smooth::opttype = "Kernel specification `1` is not
an valid Kernel type.";
Smooth::gridspec = "Grid specification `1` is
incorrect. See help for Smooth.";
Smooth::sratio = "Smooting parameter `1` in option
SRatio-> must be a real number > 0.";
Unprotect[Smooth, Triangular, Epanechnikov,
Quartic, Rectangular, OptBandWidth];
Begin["`Private`"]
epanechnikov = Compile[{{x, _Real, 1}},
Map[If[Abs[#] <= 1, (3/4)(1 - #^2), 0] &, x]];
quartic = Compile[{{x, _Real, 1}},
Map[If[Abs[#] <= 1, (15/16) (1 - #^2)^2, 0] &,
x]];
triangular = Compile[{{x, _Real, 1}},
Map[If[Abs[#] <= 1, 1 - Abs[#], 0] &, x]];
rectangular = Compile[{{x, _Real, 1}},
Map[If[Abs[#] <= 1, 0.5, 0] &, x]];
Smooth[data_/;VectorQ[data,NumberQ], gridSpec_,
opts___] :=
Module[{d = sort[data], n = Length[data],
bw, min, max, kernType, kernFun, gridList},
{min, max} = {Min[d], Max[d]};
{kernType, sr} = {Kernel, SRatio} /. {opts} /.
Options[Smooth];
If[!optsOK[kernType,sr], Return[]];
If[!gridOK[gridSpec,min,max],
Message[Smooth::gridspec, gridSpec];
Return[]];
gridList = setGridList[gridSpec,min,max];
kernFun = chooseKernel[kernType];
bw = sr OptBandWidth[data,kernType];
d = Flatten[{min - 2 bw, d, max + 2 bw}];
MapThread[{#1, If[First[#2]==0, 0.0, 1/(n bw)
kernelSum[d, #1, #2, kernFun, bw]]} &,
{gridList, binListIndices[d, gridList,
bw]}]];
OptBandWidth[data_List,kType_] :=
1.364 (nu[2][kType] / mu[2][kType]^2)^0.2 *
Sqrt[Variance[data]] / Length[data]^0.2;
binListIndices = Compile[{{d, _Real, 1}, {grid,
_Real, 1}, {absBW, _Real}},
Module[{i, j, lim, result, n =
Length[grid]},
result = Table[{0, 0}, {n}];
For[i = 1; j = 2, i <= n, i++,
lim = grid[[i]] + absBW; While[d[[j]] <=
lim, j++];
If[d[[j - 1]] >= grid[[i]] - absBW ,
result[[i, 2]] = j - 1]];
For[i = n; j = Length[d] - 1, i > 0, i--,
lim = grid[[i]] - absBW; While[d[[j]] >=
lim, j--];
If[d[[j + 1]] <= grid[[i]] + absBW,
result[[i, 1]] = j + 1]];
result]];
kernelSum[d_, x_, limits_, kernFun_, bw_] :=
Tr[kernFun[Map[(x - #)/bw &, Take[d,
limits]]]];
optsOK[kernType_, sr_]:=
(If[!MemberQ[kernelTypes,kernType],
Message[Smooth::opttype, kernType];
Return[False]];
If[!NumberQ[sr], Message[Smooth::sratio, sr];
Return[False]];
If[sr <= 0, Message[Smooth::sratio, sr];
Return[False]];
(* ELSE: *)
True);
gridOK[step_?NumberQ,dataMin_,dataMax_]:=
If[step > dataMax - dataMin, False, True];
gridOK[{gridMin_,gridMax_,step_},dataMin_,dataMax_
]:=
(If[gridMin > gridMax, Return[False]];
If[gridMin < dataMin || gridMax >
dataMax,Return[False]];
If[step > gridMax - gridMin, Return[False]];
(* ELSE *) True);
gridOK[grid_,dataMin_,dataMax_]:=
(If[!VectorQ[grid,NumberQ], Return[False]];
If[Min[grid] < dataMin || Max[grid] > dataMax,
False, True]);
setGridList[grid_,__]:= grid;
setGridList[{gridMin_,gridMax_,step_},__]:=
Range[gridMin,gridMax,step];
setGridList[step_?NumberQ,dataMin_,dataMax_]:=
Range[dataMin,dataMax,step];
chooseKernel[kernelType_]:=
kernelFunctions[[Position[kernelTypes,kernelType][
[1,1]]]];
kernelTypes = {Epanechnikov, Triangular, Quartic,
Rectangular};
kernelFunctions = {epanechnikov, triangular,
quartic, rectangular};
(* Definition of kernel characteristics (used in
band width computataion) *)
mu2Values = {1/5, 1/6, 1/5, 1/3};
nu2Values = {3/5, 2/3, 5/7, 1/2};
MapThread[(mu[2][#1] = #2) &, {kernelTypes,
mu2Values}];
MapThread[(nu[2][#1] = #2) &, {kernelTypes,
nu2Values}];
sort = Compile[{{list,_Real,1}}, Sort[list,
Less]];
Variance[dens_Real, bandWidth_Real, nObs_Integer,
kernelType_]:=
dens nu[2][kernelType] / (nObs bandWidth);
Variance[dens:{{_,_}..}, bandWidth_Real,
kernelType_]:=
Transpose[{First[#],
Variance[Last[#], bandWidth, kernelType]}
]&[Transpose[dens]];
Variance[dens_/;VectorQ[dens,NumberQ],
bandWidth_Real, kernelType_] :=
dens nu[2][kernelType] / (Length[dens]
bandWidth);
CIPlot[densXY:{{_,_}..}, bandWidth_, confLevel_,
kernelType_, showOpts___]:=
Module[{s, q, x, y},
q = Sqrt[2] InverseErf[2 confLevel - 1];
{x, y} =
{First[#],Last[#]}&[Transpose[densXY]];
s = Sqrt[Variance[y, bandWidth,
kernelType]];
Show[Map[ListPlot[#, DisplayFunction-
>Identity] &,
Map[Transpose[{x,#}] &, {y - s q, y , y + s
q}]],
showOpts, DisplayFunction-
>$DisplayFunction];]
End[];
SetAttributes[Smooth, ReadProtected];
SetAttributes[Variance, ReadProtected];
SetAttributes[Triangular, ReadProtected];
SetAttributes[Epanechnikov, ReadProtected];
SetAttributes[Quartic, ReadProtected];
SetAttributes[Rectangular, ReadProtected];
SetAttributes[OptBandWidth, ReadProtected];
SetAttributes[CIPlot, ReadProtected];
Protect[Smooth, Variance, OptBandWidth, CIPlot,
Triangular, Epanechnikov, Quartic,
Rectangular];
EndPackage[]
On 18 Dec 2002 at 1:53, Mark Coleman wrote:
>
> Greetings,
>
> Is anyone aware of Mathematica code for calculation nonparametric kernel density
> estimates?
>
> Happy Holidays,
>
> Mark
>
>
<><><><><><><><><><><><><><><><><><>
Johannes Ludsteck
Institut fuer Volkswirtschaftslehre
Lehrstuhl Prof. Dr. Moeller
Universitaet Regensburg
Universitaetsstrasse 31
93053 Regensburg
Tel +49/0941/943-2741