Bimetric 3+1 toolkit for spherical symmetry
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines
Lowess< Container, T > Class Template Reference

Templated class for LOWESS (locally weighted scatterplot smoothing). More...

#include <lowess.h>

Public Member Functions

int lowess (const Container &x, const Container &y, double frac, int nsteps, T delta, Container &ys, Container &resid_weights, Container &weights)

Private Member Functions

pow2 (T x)
pow3 (T x)
template<class RandAccessIter >
median (RandAccessIter begin, RandAccessIter end)
 Return the median of a sequence of numbers defined by the random access iterators begin and end.
bool calculate_weights (const Container &x, const size_t n, const T current_x, const bool use_resid_weights, const size_t nleft, const Container &resid_weights, Container &weights, size_t &nrt, const T h)
 Calculate weights for weighted regression.
void calculate_y_fit (const Container &x, const Container &y, const T current_x, const size_t n, const size_t nleft, const size_t nrt, const T h, T &ys, Container &weights)
 Calculate smoothed/fitted y-value by weighted regression.
bool lowest (const Container &x, const Container &y, size_t n, T current_x, T &ys, size_t nleft, size_t nright, Container &weights, bool use_resid_weights, const Container &resid_weights)
void update_neighborhood (const Container &x, const size_t n, const size_t i, size_t &nleft, size_t &nright)
 Find the indices bounding the k-nearest-neighbors of the current point.
void update_indices (const Container &x, const size_t n, const T delta, size_t &i, size_t &last, Container &ys)
 Update the counters of the local regression.
void interpolate_skipped_fits (const Container &x, const size_t i, const size_t last, Container &ys)
 Calculate smoothed/fitted y by linear interpolation between the current and previous y fitted by weighted regression.
void calculate_residual_weights (const size_t n, const Container &weights, Container &resid_weights)
 Calculate residual weights for the next robustifying iteration.

Detailed Description

template<typename Container, typename T>
class Lowess< Container, T >

Templated class for LOWESS (locally weighted scatterplot smoothing).

The lowess code is translated from RATFOR lowess code written by W. S. Cleveland as obtained from NETLIB. It is based on two functions written in ratfor: lowest and lowess. The code has since been refactored and commented further.

Authors:
W.S. Cleveland, Hannes Roest

Definition at line 20 of file lowess.h.


Member Function Documentation

template<typename Container , typename T >
void Lowess< Container, T >::calculate_residual_weights ( const size_t  n,
const Container &  weights,
Container &  resid_weights 
) [inline, private]

Calculate residual weights for the next robustifying iteration.

Definition at line 308 of file lowess.h.

    {
      T r;

      for (size_t i = 0; i < n; i++)
      {
        resid_weights[i] = std::abs(weights[i]);
      }

      // ***********************************
      // Compute pseudo-median (take average even if we have an odd number of
      // elements), following the original implementation. We could also use a
      // true median calculation here:
      // T cmad = 6.0 * median(resid_weights.begin(), resid_weights.end());
      // ***********************************

      size_t m1 = n / 2; // FORTRAN starts with one, CPP with zero
      // size_t m1 = 1 + n / 2; // original FORTRAN code
      // size_t m2 = n - m1 + 1; // see below, we don't explicitly sort but use max_element

      // Use nth element to find element m1, which produces a partially sorted
      // vector. This means we can get element m2 by looking for the maximum in the
      // remainder.
      typename Container::iterator it_m1 = resid_weights.begin() + m1;
      std::nth_element(resid_weights.begin(), it_m1, resid_weights.end());
      typename Container::iterator it_m2 = std::max_element(
        resid_weights.begin(), it_m1);
      T cmad = 3.0 * (*it_m1 + *it_m2);
      T c9 = .999 * cmad;
      T c1 = .001 * cmad;

      for (size_t i = 0; i < n; i++)
      {
        r = std::abs(weights[i]);
        if (r <= c1)
        {
          // near 0, avoid underflow
          resid_weights[i] = 1.0;
        }
        else if (r > c9)
        {
          // near 1, avoid underflow
          resid_weights[i] = 0.0;
        }
        else
        {
          resid_weights[i] = pow2(1.0 - pow2(r / cmad));
        }
      }
    }
template<typename Container , typename T >
bool Lowess< Container, T >::calculate_weights ( const Container &  x,
const size_t  n,
const T  current_x,
const bool  use_resid_weights,
const size_t  nleft,
const Container &  resid_weights,
Container &  weights,
size_t &  nrt,
const T  h 
) [inline, private]

Calculate weights for weighted regression.

Definition at line 55 of file lowess.h.

    {
      T r;
      size_t j;

      T h9 = .999 * h;
      T h1 = .001 * h;
      T a = 0.0; // sum of weights

      // compute weights (pick up all ties on right)
      for (j = nleft; j < n; j++)
      {
        // Compute the distance measure, then apply the tricube
        // function on the distance to get the weight.
        // use_resid_weights will be False on the first iteration, then True
        // on the subsequent ones, after some residuals have been calculated.
        weights[j] = 0.0;
        r = std::abs(x[j] - current_x);
        if (r <= h9)
        {
          if (r > h1)
          {
            // small enough for non-zero weight
            // compute tricube function: ( 1 - (r/h)^3 )^3
            weights[j] = pow3(1.0 - pow3(r / h));
          }
          else
          {
            weights[j] = 1.0;
          }

          if (use_resid_weights)
          {
            weights[j] = resid_weights[j] * weights[j];
          }

          a += weights[j];
        }
        else if (x[j] > current_x)
        {
          // get out at first zero wt on right
          break;
        }
      }

      // rightmost pt (may be greater than nright because of ties)
      nrt = j - 1;
      if (a <= 0.0)
      {
        return false;
      }
      else
      {
        // normalize weights (make sum of w[j] == 1)
        for (j = nleft; j <= nrt; j++)
        {
          weights[j] = weights[j] / a;
        }

        return true;
      }
    }
template<typename Container , typename T >
void Lowess< Container, T >::calculate_y_fit ( const Container &  x,
const Container &  y,
const T  current_x,
const size_t  n,
const size_t  nleft,
const size_t  nrt,
const T  h,
T &  ys,
Container &  weights 
) [inline, private]

Calculate smoothed/fitted y-value by weighted regression.

Definition at line 127 of file lowess.h.

    {
      T range = x[n - 1] - x[0];

      if (h > 0.0)
      {
        // use linear fit

        // No regression function (e.g. lstsq) is called. Instead a "projection
        // vector" p_i_j is calculated, and y_fit[i] = sum(p_i_j * y[j]) = y_fit[i]
        // for j s.t. x[j] is in the neighborhood of x[i]. p_i_j is a function of
        // the weights, x[i], and its neighbors.
        // To save space, p_i_j is computed in place using the weight vector.

        // find weighted center of x values
        T sum_weighted_x = 0.0; // originally variable a
        for (size_t j = nleft; j <= nrt; j++)
        {
          sum_weighted_x += weights[j] * x[j];
        }

        T b = current_x - sum_weighted_x; // originally variable b
        T weighted_sqdev = 0.0; // originally variable c
        for (size_t j = nleft; j <= nrt; j++)
        {
          weighted_sqdev += weights[j] *
                            (x[j] - sum_weighted_x) * (x[j] - sum_weighted_x);
        }

        if (sqrt(weighted_sqdev) > .001 * range)
        {
          // points are spread out enough to compute slope
          b = b / weighted_sqdev;
          for (size_t j = nleft; j <= nrt; j++)
          {
            // Compute p_i_j in place
            weights[j] = weights[j] * (1.0 + b * (x[j] - sum_weighted_x));
          }
        }
      }

      ys = 0.0;
      for (size_t j = nleft; j <= nrt; j++)
      {
        ys += weights[j] * y[j];
      }
    }
template<typename Container , typename T >
void Lowess< Container, T >::interpolate_skipped_fits ( const Container &  x,
const size_t  i,
const size_t  last,
Container &  ys 
) [inline, private]

Calculate smoothed/fitted y by linear interpolation between the current and previous y fitted by weighted regression.

Definition at line 292 of file lowess.h.

    {
      // skipped points -- interpolate
      T alpha;
      T denom = x[i] - x[last]; // non-zero - proof?
      for (size_t j = last + 1; j < i; j = j + 1)
      {
        alpha = (x[j] - x[last]) / denom;
        ys[j] = alpha * ys[i] + (1.0 - alpha) * ys[last];
      }
    }
template<typename Container , typename T >
int Lowess< Container, T >::lowess ( const Container &  x,
const Container &  y,
double  frac,
int  nsteps,
delta,
Container &  ys,
Container &  resid_weights,
Container &  weights 
) [inline]

Definition at line 362 of file lowess.h.

    {
      bool fit_ok;
      size_t i, last, nleft, nright, ns;

      size_t n = x.size();
      if (n < 2)
      {
        ys[0] = y[0];
        return 1;
      }

      // how many points around estimation point should be used for regression:
      // at least two, at most n points
      size_t tmp = (size_t)(frac * (double)n);
      ns = std::max(std::min(tmp, n), (size_t)2);

      // robustness iterations
      for (int iter = 1; iter <= nsteps + 1; iter++)
      {
        // start of array in C++ at 0 / in FORTRAN at 1
        nleft = 0;
        nright = ns - 1;
        last = -1;          // index of prev estimated point
        i = 0;              // index of current point

        // Fit all data points y[i] until the end of the array
        do
        {
          // Identify the neighborhood around the current x[i]
          // -> get the nearest ns points
          update_neighborhood(x, n, i, nleft, nright);

          // Calculate weights and apply fit (original lowest function)
          fit_ok = lowest(x, y, n, x[i], ys[i], nleft, nright,
                          weights, (iter > 1), resid_weights);

          // if something went wrong during the fit, use y[i] as the
          // fitted value at x[i]
          if (!fit_ok) ys[i] = y[i];

          // If we skipped some points (because of how delta was set), go back
          // and fit them by linear interpolation.
          if (last < i - 1)
          {
            interpolate_skipped_fits(x, i, last, ys);
          }

          // Update the last fit counter to indicate we've now fit this point.
          // Find the next i for which we'll run a regression.
          update_indices(x, n, delta, i, last, ys);
        }
        while (last < n - 1);

        // compute current residuals
        for (i = 0; i < n; i++)
        {
          weights[i] = y[i] - ys[i];
        }

        // compute robustness weights except last time
        if (iter > nsteps) break;

        calculate_residual_weights(n, weights, resid_weights);
      }
      return 0;
    }
template<typename Container , typename T >
bool Lowess< Container, T >::lowest ( const Container &  x,
const Container &  y,
size_t  n,
current_x,
T &  ys,
size_t  nleft,
size_t  nright,
Container &  weights,
bool  use_resid_weights,
const Container &  resid_weights 
) [inline, private]

Definition at line 183 of file lowess.h.

    {
      T h;
      size_t nrt; // rightmost pt (may be greater than nright because of ties)

      h = std::max(current_x - x[nleft], x[nright] - current_x);

      // Calculate the weights for the regression in this neighborhood.
      // Determine if at least some weights are positive, so a regression
      // is ok.
      bool fit_ok = calculate_weights(x, n, current_x, use_resid_weights,
                                      nleft, resid_weights,
                                      weights, nrt, h);
      if (!fit_ok)
      {
        return fit_ok;
      }

      // If it is ok to fit, run the weighted least squares regression
      calculate_y_fit(x, y, current_x, n, nleft, nrt, h, ys, weights);

      return fit_ok;
    }
template<typename Container , typename T >
template<class RandAccessIter >
T Lowess< Container, T >::median ( RandAccessIter  begin,
RandAccessIter  end 
) [inline, private]

Return the median of a sequence of numbers defined by the random access iterators begin and end.

The sequence must not be empty (median is undefined for an empty set).

The numbers must be convertible to double.

Definition at line 32 of file lowess.h.

    {
      std::size_t size = end - begin;
      std::size_t middleIdx = size / 2;
      RandAccessIter target = begin + middleIdx;
      std::nth_element(begin, target, end);

      if (size % 2 != 0)
      {
        //Odd number of elements
        return *target;
      }
      else
      {
        //Even number of elements
        double a = *target;
        RandAccessIter targetNeighbor = target - 1;
        targetNeighbor = std::max_element(begin, target);
        return (a + *targetNeighbor) / 2.0;
      }
    }
template<typename Container , typename T >
T Lowess< Container, T >::pow2 ( x) [inline, private]

Definition at line 22 of file lowess.h.

{ return x * x;  }
template<typename Container , typename T >
T Lowess< Container, T >::pow3 ( x) [inline, private]

Definition at line 23 of file lowess.h.

{ return x * x * x;  }
template<typename Container , typename T >
void Lowess< Container, T >::update_indices ( const Container &  x,
const size_t  n,
const T  delta,
size_t &  i,
size_t &  last,
Container &  ys 
) [inline, private]

Update the counters of the local regression.

Definition at line 247 of file lowess.h.

    {
      // For most points within delta of the current point, we skip the
      // weighted linear regression (which save much computation of
      // weights and fitted points). Instead, we'll jump to the last
      // point within delta, fit the weighted regression at that point,
      // and linearly interpolate in between.

      // the last point actually estimated
      last = i;

      // This loop increments until we fall just outside of delta distance,
      // copying the results for any repeated x's along the way.
      T cut = x[last] + delta;
      for (i = last + 1; i < n; i++)
      {
        // find close points
        if (x[i] > cut) break;

        // i one beyond last pt within cut
        if (x[i] == x[last])
        {
          // exact match in x
          // if tied with previous x-value, just use the already
          // fitted y, and update the last-fit counter.
          ys[i] = ys[last];
          last = i;
        }
      }


      // the next point to fit the regression at is either one prior to i (since
      // i should be the first point outside of delta) or it is "last + 1" in the
      // case that i never got incremented. This insures we always step forward.
      // -> back 1 point so interpolation within delta, but always go forward
      i = std::max(last + 1, i - 1);
    }
template<typename Container , typename T >
void Lowess< Container, T >::update_neighborhood ( const Container &  x,
const size_t  n,
const size_t  i,
size_t &  nleft,
size_t &  nright 
) [inline, private]

Find the indices bounding the k-nearest-neighbors of the current point.

Definition at line 217 of file lowess.h.

    {
      T d1, d2;
      // A subtle loop. Start from the current neighborhood range:
      // [nleft, nright). Shift both ends rightwards by one
      // (so that the neighborhood still contains ns points), until
      // the current point is in the center (or just to the left of
      // the center) of the neighborhood. This neighborhood will
      // contain the ns-nearest neighbors of x[i].
      //
      // Once the right end hits the end of the data, hold the
      // neighborhood the same for the remaining x[i]s.
      while (nright < n - 1)
      {
        // move nleft, nright to right if radius decreases
        d1 = x[i] - x[nleft];
        d2 = x[nright + 1] - x[i];
        // if d1 <= d2 with x[nright+1] == x[nright], lowest fixes
        if (d1 <= d2) break;
        // radius will not decrease by move right
        nleft++;
        nright++;
      }
    }

The documentation for this class was generated from the following file: