Re: Nonparametric kernel density estimators
- To: mathgroup at smc.vnet.net
 - Subject: [mg38478] Re: [mg38465] Nonparametric kernel density estimators
 - From: "Johannes Ludsteck" <johannes.ludsteck at wiwi.uni-regensburg.de>
 - Date: Fri, 20 Dec 2002 04:24:16 -0500 (EST)
 - Organization: Universitaet Regensburg
 - Sender: owner-wri-mathgroup at wolfram.com
 
Dear Mark,
I have writen a small quick and dirty 
implementation. But it was only for own 
explorative use and not tested exensively. Thus 
there is absolute no warranty...(!)
Since the code is very short, I append it here.
Probably the code could be optimized in several 
ways. 
Best regards,
	Johannes Ludsteck
BeginPackage["Statistics`KernelDensitySmoothing`",
    {"Statistics`DescriptiveStatistics`",
     "Statistics`ContinuousDistributions`"}]
Epanechnikov::usage = "Epanechnikov[x] is the 
Epanechnikov kernel.
See help for Kernel"
Quartic::usage = "Quartic is the Quartic kernel. 
See help for \
Kernel."
Triangular::usage = "Triangular is the Triangular 
kernel. See help \
for Kernel."
Rectangular::usage = "Rectangular is the 
Rectangular kernel. See help \
for Kernel."
Kernel::usage = "Kernel->aKernel is an option for 
Smooth, defining which \
kernel to use for kernel smoothing. Available 
kernels are \
Epanechnikov,Quartic,Triangular and Rectangular; 
default is \
Kernel->Epanechnikov."
Smooth::usage = "Smooth[data,gridSpec] computes a 
density \
estimate for the one-dimensional list data. \
gridSpec may be a list of points where the density 
has to be computed, \
or an iterator specification {min,max,step}, or a 
positive real number. \
If gridSpec is a number, Smooth interprets it as 
step length and \
generates a grid list 
{Min[data],Max[data],gridSpec}. \
Options are Kernel->aKernel and SRatio->aNumber. \
Kernel defines the kernel to use (see help for 
Kernel), \
and SRatio defines the kernel bandwidth as ratio 
to Silverman's optimal bandwidth \
rule."
SRatio::usage = "SRatio->aRealNumber defines the 
kernel bandwidth (smoothing \
parameter) as ratio to Silverman's optimal 
bandwidth rule. Default is \
SRatio->1.0"
OptBandWidth::usage = 
"OptBandWidth[data_List,Kernel_] returns the 
optimal band width \
according to Silverman's rule of thumb."
CIPLot::usage = "CIPlot[dens, bandWidth, alpha, 
kernelType] \
creates a confidence interval plot of density dens 
(dens must be a list \
of pairs {x,y}). You have to provide the band 
width, which was used to \
smooth the density, the confidence level alpha and 
the kernel Type (see \
help for Kernel). Allowed options are all options 
for ListPlot except \
DisplayFunction.";
Unprotect[Variance,Smooth,BandWidth,CIPlot,Kernel,
SRatio];
Options[Smooth] = {Kernel -> Epanechnikov, SRatio 
-> 1.0};
Options[Variance] = {Kernel -> Epanechnikov};
Smooth::opttype = "Kernel specification `1` is not 
an valid Kernel type.";
Smooth::gridspec = "Grid specification `1` is 
incorrect. See help for Smooth.";
Smooth::sratio = "Smooting parameter `1` in option 
SRatio-> must be a real number > 0.";
Unprotect[Smooth, Triangular, Epanechnikov, 
Quartic, Rectangular, OptBandWidth];
Begin["`Private`"]
epanechnikov = Compile[{{x, _Real, 1}},
    Map[If[Abs[#] <= 1, (3/4)(1 - #^2), 0] &, x]];
quartic = Compile[{{x, _Real, 1}},
    Map[If[Abs[#] <= 1, (15/16) (1 - #^2)^2, 0] &, 
x]];
triangular = Compile[{{x, _Real, 1}},
    Map[If[Abs[#] <= 1, 1 - Abs[#], 0] &, x]];
rectangular = Compile[{{x, _Real, 1}},
    Map[If[Abs[#] <= 1, 0.5, 0] &, x]];
Smooth[data_/;VectorQ[data,NumberQ], gridSpec_, 
opts___] :=
   Module[{d = sort[data], n = Length[data],
      bw, min, max, kernType, kernFun, gridList},
   {min, max} = {Min[d], Max[d]};
   {kernType, sr} = {Kernel, SRatio} /. {opts} /. 
Options[Smooth];
   If[!optsOK[kernType,sr], Return[]];
   If[!gridOK[gridSpec,min,max],
      Message[Smooth::gridspec, gridSpec]; 
Return[]];
   gridList = setGridList[gridSpec,min,max];
   kernFun = chooseKernel[kernType];
   bw = sr OptBandWidth[data,kernType];
   d = Flatten[{min - 2 bw, d, max + 2 bw}];
   MapThread[{#1, If[First[#2]==0, 0.0, 1/(n bw) 
kernelSum[d, #1, #2, kernFun, bw]]} &,
      {gridList, binListIndices[d, gridList, 
bw]}]];
OptBandWidth[data_List,kType_] :=
    1.364 (nu[2][kType] / mu[2][kType]^2)^0.2 * 
Sqrt[Variance[data]] / Length[data]^0.2;
binListIndices = Compile[{{d, _Real, 1}, {grid, 
_Real, 1}, {absBW, _Real}},
      Module[{i, j, lim, result, n = 
Length[grid]},
        result = Table[{0, 0}, {n}];
        For[i = 1; j = 2, i <= n, i++,
          lim = grid[[i]] + absBW; While[d[[j]] <= 
lim, j++];
          If[d[[j - 1]] >= grid[[i]] - absBW , 
result[[i, 2]] = j - 1]];
        For[i = n; j = Length[d] - 1, i > 0, i--,
          lim = grid[[i]] - absBW; While[d[[j]] >= 
lim, j--];
          If[d[[j + 1]] <= grid[[i]] + absBW, 
result[[i, 1]] = j + 1]];
        result]];
kernelSum[d_, x_, limits_, kernFun_, bw_] :=
   Tr[kernFun[Map[(x - #)/bw &, Take[d, 
limits]]]];
optsOK[kernType_, sr_]:=
   (If[!MemberQ[kernelTypes,kernType],
      Message[Smooth::opttype, kernType];        
Return[False]];
   If[!NumberQ[sr], Message[Smooth::sratio, sr]; 
Return[False]];
   If[sr <= 0, Message[Smooth::sratio, sr];      
Return[False]];
   (* ELSE: *)                                   
True);
gridOK[step_?NumberQ,dataMin_,dataMax_]:=
   If[step > dataMax - dataMin, False, True];
gridOK[{gridMin_,gridMax_,step_},dataMin_,dataMax_
]:=
   (If[gridMin > gridMax, Return[False]];
    If[gridMin < dataMin || gridMax > 
dataMax,Return[False]];
    If[step > gridMax - gridMin, Return[False]];
    (* ELSE *) True);
gridOK[grid_,dataMin_,dataMax_]:=
   (If[!VectorQ[grid,NumberQ], Return[False]];
    If[Min[grid] < dataMin || Max[grid] > dataMax, 
False, True]);
setGridList[grid_,__]:= grid;
setGridList[{gridMin_,gridMax_,step_},__]:= 
Range[gridMin,gridMax,step];
setGridList[step_?NumberQ,dataMin_,dataMax_]:= 
Range[dataMin,dataMax,step];
chooseKernel[kernelType_]:=
   
kernelFunctions[[Position[kernelTypes,kernelType][
[1,1]]]];
kernelTypes = {Epanechnikov, Triangular, Quartic, 
Rectangular};
kernelFunctions = {epanechnikov, triangular, 
quartic, rectangular};
(* Definition of kernel characteristics (used in 
band width computataion) *)
mu2Values = {1/5, 1/6, 1/5, 1/3};
nu2Values = {3/5, 2/3, 5/7, 1/2};
MapThread[(mu[2][#1] = #2) &, {kernelTypes, 
mu2Values}];
MapThread[(nu[2][#1] = #2) &, {kernelTypes, 
nu2Values}];
sort = Compile[{{list,_Real,1}}, Sort[list, 
Less]];
Variance[dens_Real, bandWidth_Real, nObs_Integer, 
kernelType_]:=
      dens nu[2][kernelType] / (nObs bandWidth);
Variance[dens:{{_,_}..}, bandWidth_Real, 
kernelType_]:=
  Transpose[{First[#],
        Variance[Last[#], bandWidth, kernelType]}
           ]&[Transpose[dens]];
Variance[dens_/;VectorQ[dens,NumberQ], 
bandWidth_Real, kernelType_] :=
    dens nu[2][kernelType] / (Length[dens] 
bandWidth);
CIPlot[densXY:{{_,_}..}, bandWidth_, confLevel_, 
kernelType_, showOpts___]:=
   Module[{s, q, x, y},
      q = Sqrt[2] InverseErf[2 confLevel - 1];
      {x, y} = 
{First[#],Last[#]}&[Transpose[densXY]];
      s = Sqrt[Variance[y, bandWidth, 
kernelType]];
      Show[Map[ListPlot[#, DisplayFunction-
>Identity] &,
       Map[Transpose[{x,#}] &, {y - s q, y , y + s 
q}]],     
        showOpts, DisplayFunction-
>$DisplayFunction];]
      
End[];
SetAttributes[Smooth, ReadProtected];
SetAttributes[Variance, ReadProtected];
SetAttributes[Triangular, ReadProtected];
SetAttributes[Epanechnikov, ReadProtected];
SetAttributes[Quartic, ReadProtected];
SetAttributes[Rectangular, ReadProtected];
SetAttributes[OptBandWidth, ReadProtected];
SetAttributes[CIPlot, ReadProtected];
Protect[Smooth, Variance, OptBandWidth, CIPlot,
    Triangular, Epanechnikov, Quartic, 
Rectangular];
EndPackage[]
On 18 Dec 2002 at 1:53, Mark Coleman wrote:
> 
> Greetings,
> 
> Is anyone aware of Mathematica code for calculation nonparametric kernel density 
> estimates?
> 
> Happy Holidays,
> 
> Mark
> 
> 
<><><><><><><><><><><><><><><><><><>
Johannes Ludsteck
Institut fuer Volkswirtschaftslehre
Lehrstuhl Prof. Dr. Moeller
Universitaet Regensburg
Universitaetsstrasse 31
93053 Regensburg
Tel +49/0941/943-2741