MathGroup Archive 2002

[Date Index] [Thread Index] [Author Index]

Search the Archive

Re: Nonparametric kernel density estimators

  • To: mathgroup at smc.vnet.net
  • Subject: [mg38478] Re: [mg38465] Nonparametric kernel density estimators
  • From: "Johannes Ludsteck" <johannes.ludsteck at wiwi.uni-regensburg.de>
  • Date: Fri, 20 Dec 2002 04:24:16 -0500 (EST)
  • Organization: Universitaet Regensburg
  • Sender: owner-wri-mathgroup at wolfram.com

Dear Mark,
I have writen a small quick and dirty 
implementation. But it was only for own 
explorative use and not tested exensively. Thus 
there is absolute no warranty...(!)

Since the code is very short, I append it here.
Probably the code could be optimized in several 
ways. 

Best regards,
	Johannes Ludsteck

BeginPackage["Statistics`KernelDensitySmoothing`",
    {"Statistics`DescriptiveStatistics`",
     "Statistics`ContinuousDistributions`"}]

Epanechnikov::usage = "Epanechnikov[x] is the 
Epanechnikov kernel.
See help for Kernel"

Quartic::usage = "Quartic is the Quartic kernel. 
See help for \
Kernel."

Triangular::usage = "Triangular is the Triangular 
kernel. See help \
for Kernel."

Rectangular::usage = "Rectangular is the 
Rectangular kernel. See help \
for Kernel."

Kernel::usage = "Kernel->aKernel is an option for 
Smooth, defining which \
kernel to use for kernel smoothing. Available 
kernels are \
Epanechnikov,Quartic,Triangular and Rectangular; 
default is \
Kernel->Epanechnikov."

Smooth::usage = "Smooth[data,gridSpec] computes a 
density \
estimate for the one-dimensional list data. \
gridSpec may be a list of points where the density 
has to be computed, \
or an iterator specification {min,max,step}, or a 
positive real number. \
If gridSpec is a number, Smooth interprets it as 
step length and \
generates a grid list 
{Min[data],Max[data],gridSpec}. \
Options are Kernel->aKernel and SRatio->aNumber. \
Kernel defines the kernel to use (see help for 
Kernel), \
and SRatio defines the kernel bandwidth as ratio 
to Silverman's optimal bandwidth \
rule."


SRatio::usage = "SRatio->aRealNumber defines the 
kernel bandwidth (smoothing \
parameter) as ratio to Silverman's optimal 
bandwidth rule. Default is \
SRatio->1.0"

OptBandWidth::usage = 
"OptBandWidth[data_List,Kernel_] returns the 
optimal band width \
according to Silverman's rule of thumb."

CIPLot::usage = "CIPlot[dens, bandWidth, alpha, 
kernelType] \
creates a confidence interval plot of density dens 
(dens must be a list \
of pairs {x,y}). You have to provide the band 
width, which was used to \
smooth the density, the confidence level alpha and 
the kernel Type (see \
help for Kernel). Allowed options are all options 
for ListPlot except \
DisplayFunction.";

Unprotect[Variance,Smooth,BandWidth,CIPlot,Kernel,
SRatio];

Options[Smooth] = {Kernel -> Epanechnikov, SRatio 
-> 1.0};
Options[Variance] = {Kernel -> Epanechnikov};

Smooth::opttype = "Kernel specification `1` is not 
an valid Kernel type.";

Smooth::gridspec = "Grid specification `1` is 
incorrect. See help for Smooth.";

Smooth::sratio = "Smooting parameter `1` in option 
SRatio-> must be a real number > 0.";

Unprotect[Smooth, Triangular, Epanechnikov, 
Quartic, Rectangular, OptBandWidth];

Begin["`Private`"]

epanechnikov = Compile[{{x, _Real, 1}},
    Map[If[Abs[#] <= 1, (3/4)(1 - #^2), 0] &, x]];

quartic = Compile[{{x, _Real, 1}},
    Map[If[Abs[#] <= 1, (15/16) (1 - #^2)^2, 0] &, 
x]];

triangular = Compile[{{x, _Real, 1}},
    Map[If[Abs[#] <= 1, 1 - Abs[#], 0] &, x]];

rectangular = Compile[{{x, _Real, 1}},
    Map[If[Abs[#] <= 1, 0.5, 0] &, x]];

Smooth[data_/;VectorQ[data,NumberQ], gridSpec_, 
opts___] :=
   Module[{d = sort[data], n = Length[data],
      bw, min, max, kernType, kernFun, gridList},
   {min, max} = {Min[d], Max[d]};
   {kernType, sr} = {Kernel, SRatio} /. {opts} /. 
Options[Smooth];
   If[!optsOK[kernType,sr], Return[]];
   If[!gridOK[gridSpec,min,max],
      Message[Smooth::gridspec, gridSpec]; 
Return[]];
   gridList = setGridList[gridSpec,min,max];
   kernFun = chooseKernel[kernType];
   bw = sr OptBandWidth[data,kernType];
   d = Flatten[{min - 2 bw, d, max + 2 bw}];
   MapThread[{#1, If[First[#2]==0, 0.0, 1/(n bw) 
kernelSum[d, #1, #2, kernFun, bw]]} &,
      {gridList, binListIndices[d, gridList, 
bw]}]];

OptBandWidth[data_List,kType_] :=
    1.364 (nu[2][kType] / mu[2][kType]^2)^0.2 * 
Sqrt[Variance[data]] / Length[data]^0.2;

binListIndices = Compile[{{d, _Real, 1}, {grid, 
_Real, 1}, {absBW, _Real}},
      Module[{i, j, lim, result, n = 
Length[grid]},
        result = Table[{0, 0}, {n}];
        For[i = 1; j = 2, i <= n, i++,
          lim = grid[[i]] + absBW; While[d[[j]] <= 
lim, j++];
          If[d[[j - 1]] >= grid[[i]] - absBW , 
result[[i, 2]] = j - 1]];
        For[i = n; j = Length[d] - 1, i > 0, i--,
          lim = grid[[i]] - absBW; While[d[[j]] >= 
lim, j--];
          If[d[[j + 1]] <= grid[[i]] + absBW, 
result[[i, 1]] = j + 1]];
        result]];

kernelSum[d_, x_, limits_, kernFun_, bw_] :=
   Tr[kernFun[Map[(x - #)/bw &, Take[d, 
limits]]]];

optsOK[kernType_, sr_]:=
   (If[!MemberQ[kernelTypes,kernType],
      Message[Smooth::opttype, kernType];        
Return[False]];
   If[!NumberQ[sr], Message[Smooth::sratio, sr]; 
Return[False]];
   If[sr <= 0, Message[Smooth::sratio, sr];      
Return[False]];
   (* ELSE: *)                                   
True);

gridOK[step_?NumberQ,dataMin_,dataMax_]:=
   If[step > dataMax - dataMin, False, True];

gridOK[{gridMin_,gridMax_,step_},dataMin_,dataMax_
]:=
   (If[gridMin > gridMax, Return[False]];
    If[gridMin < dataMin || gridMax > 
dataMax,Return[False]];
    If[step > gridMax - gridMin, Return[False]];
    (* ELSE *) True);

gridOK[grid_,dataMin_,dataMax_]:=
   (If[!VectorQ[grid,NumberQ], Return[False]];
    If[Min[grid] < dataMin || Max[grid] > dataMax, 
False, True]);

setGridList[grid_,__]:= grid;

setGridList[{gridMin_,gridMax_,step_},__]:= 
Range[gridMin,gridMax,step];

setGridList[step_?NumberQ,dataMin_,dataMax_]:= 
Range[dataMin,dataMax,step];


chooseKernel[kernelType_]:=
   
kernelFunctions[[Position[kernelTypes,kernelType][
[1,1]]]];

kernelTypes = {Epanechnikov, Triangular, Quartic, 
Rectangular};
kernelFunctions = {epanechnikov, triangular, 
quartic, rectangular};

(* Definition of kernel characteristics (used in 
band width computataion) *)

mu2Values = {1/5, 1/6, 1/5, 1/3};
nu2Values = {3/5, 2/3, 5/7, 1/2};

MapThread[(mu[2][#1] = #2) &, {kernelTypes, 
mu2Values}];
MapThread[(nu[2][#1] = #2) &, {kernelTypes, 
nu2Values}];


sort = Compile[{{list,_Real,1}}, Sort[list, 
Less]];

Variance[dens_Real, bandWidth_Real, nObs_Integer, 
kernelType_]:=
      dens nu[2][kernelType] / (nObs bandWidth);

Variance[dens:{{_,_}..}, bandWidth_Real, 
kernelType_]:=
  Transpose[{First[#],
        Variance[Last[#], bandWidth, kernelType]}
           ]&[Transpose[dens]];

Variance[dens_/;VectorQ[dens,NumberQ], 
bandWidth_Real, kernelType_] :=
    dens nu[2][kernelType] / (Length[dens] 
bandWidth);

CIPlot[densXY:{{_,_}..}, bandWidth_, confLevel_, 
kernelType_, showOpts___]:=
   Module[{s, q, x, y},
      q = Sqrt[2] InverseErf[2 confLevel - 1];
      {x, y} = 
{First[#],Last[#]}&[Transpose[densXY]];
      s = Sqrt[Variance[y, bandWidth, 
kernelType]];
      Show[Map[ListPlot[#, DisplayFunction-
>Identity] &,
       Map[Transpose[{x,#}] &, {y - s q, y , y + s 
q}]],     
        showOpts, DisplayFunction-
>$DisplayFunction];]
      
End[];

SetAttributes[Smooth, ReadProtected];
SetAttributes[Variance, ReadProtected];
SetAttributes[Triangular, ReadProtected];
SetAttributes[Epanechnikov, ReadProtected];
SetAttributes[Quartic, ReadProtected];
SetAttributes[Rectangular, ReadProtected];
SetAttributes[OptBandWidth, ReadProtected];
SetAttributes[CIPlot, ReadProtected];

Protect[Smooth, Variance, OptBandWidth, CIPlot,
    Triangular, Epanechnikov, Quartic, 
Rectangular];

EndPackage[]

On 18 Dec 2002 at 1:53, Mark Coleman wrote:

> 
> Greetings,
> 
> Is anyone aware of Mathematica code for calculation nonparametric kernel density 
> estimates?
> 
> Happy Holidays,
> 
> Mark
> 
> 


<><><><><><><><><><><><><><><><><><>
Johannes Ludsteck
Institut fuer Volkswirtschaftslehre
Lehrstuhl Prof. Dr. Moeller
Universitaet Regensburg
Universitaetsstrasse 31
93053 Regensburg
Tel +49/0941/943-2741



  • Prev by Date: RE: Gridlines in contour Plots
  • Next by Date: RE: Re: Nonparametric kernel density estimators
  • Previous by thread: Re: Nonparametric kernel density estimators
  • Next by thread: RE: Re: Nonparametric kernel density estimators