![]() |
Bimetric 3+1 toolkit for spherical symmetry
|
00001 #ifndef _LOWESS_H_INCLUDED 00002 #define _LOWESS_H_INCLUDED 00003 00004 #include <cstdlib> 00005 #include <cmath> 00006 #include <algorithm> // std::min, std::max 00007 00008 /** Templated class for LOWESS (locally weighted scatterplot smoothing). 00009 * 00010 * The lowess code is translated from `RATFOR` lowess code written by W. S. 00011 * Cleveland as obtained from `NETLIB`. 00012 * It is based on two functions written in ratfor: `lowest` 00013 * and `lowess`. The code has since been refactored and commented further. 00014 * 00015 * @authors W.S. Cleveland, Hannes Roest 00016 * @copyright Copyright (c) 2015. All rights reserved. 00017 * This software is released under a three-clause BSD license. 00018 */ 00019 template <typename Container, typename T> 00020 class Lowess 00021 { 00022 inline T pow2(T x) { return x * x; } 00023 inline T pow3(T x) { return x * x * x; } 00024 00025 /// Return the median of a sequence of numbers defined by the random 00026 /// access iterators begin and end. The sequence must not be empty 00027 /// (median is undefined for an empty set). 00028 /// 00029 /// The numbers must be convertible to double. 00030 /// 00031 template <class RandAccessIter> 00032 T median(RandAccessIter begin, RandAccessIter end) 00033 { 00034 std::size_t size = end - begin; 00035 std::size_t middleIdx = size / 2; 00036 RandAccessIter target = begin + middleIdx; 00037 std::nth_element(begin, target, end); 00038 00039 if (size % 2 != 0) 00040 { 00041 //Odd number of elements 00042 return *target; 00043 } 00044 else 00045 { 00046 //Even number of elements 00047 double a = *target; 00048 RandAccessIter targetNeighbor = target - 1; 00049 targetNeighbor = std::max_element(begin, target); 00050 return (a + *targetNeighbor) / 2.0; 00051 } 00052 } 00053 00054 /// Calculate weights for weighted regression. 00055 bool calculate_weights(const Container& x, 00056 const size_t n, 00057 const T current_x, 00058 const bool use_resid_weights, 00059 const size_t nleft, 00060 const Container& resid_weights, 00061 Container& weights, 00062 size_t& nrt, 00063 const T h) 00064 { 00065 T r; 00066 size_t j; 00067 00068 T h9 = .999 * h; 00069 T h1 = .001 * h; 00070 T a = 0.0; // sum of weights 00071 00072 // compute weights (pick up all ties on right) 00073 for (j = nleft; j < n; j++) 00074 { 00075 // Compute the distance measure, then apply the tricube 00076 // function on the distance to get the weight. 00077 // use_resid_weights will be False on the first iteration, then True 00078 // on the subsequent ones, after some residuals have been calculated. 00079 weights[j] = 0.0; 00080 r = std::abs(x[j] - current_x); 00081 if (r <= h9) 00082 { 00083 if (r > h1) 00084 { 00085 // small enough for non-zero weight 00086 // compute tricube function: ( 1 - (r/h)^3 )^3 00087 weights[j] = pow3(1.0 - pow3(r / h)); 00088 } 00089 else 00090 { 00091 weights[j] = 1.0; 00092 } 00093 00094 if (use_resid_weights) 00095 { 00096 weights[j] = resid_weights[j] * weights[j]; 00097 } 00098 00099 a += weights[j]; 00100 } 00101 else if (x[j] > current_x) 00102 { 00103 // get out at first zero wt on right 00104 break; 00105 } 00106 } 00107 00108 // rightmost pt (may be greater than nright because of ties) 00109 nrt = j - 1; 00110 if (a <= 0.0) 00111 { 00112 return false; 00113 } 00114 else 00115 { 00116 // normalize weights (make sum of w[j] == 1) 00117 for (j = nleft; j <= nrt; j++) 00118 { 00119 weights[j] = weights[j] / a; 00120 } 00121 00122 return true; 00123 } 00124 } 00125 00126 /// Calculate smoothed/fitted y-value by weighted regression. 00127 void calculate_y_fit(const Container& x, 00128 const Container& y, 00129 const T current_x, 00130 const size_t n, 00131 const size_t nleft, 00132 const size_t nrt, 00133 const T h, 00134 T& ys, 00135 Container& weights) 00136 { 00137 T range = x[n - 1] - x[0]; 00138 00139 if (h > 0.0) 00140 { 00141 // use linear fit 00142 00143 // No regression function (e.g. lstsq) is called. Instead a "projection 00144 // vector" p_i_j is calculated, and y_fit[i] = sum(p_i_j * y[j]) = y_fit[i] 00145 // for j s.t. x[j] is in the neighborhood of x[i]. p_i_j is a function of 00146 // the weights, x[i], and its neighbors. 00147 // To save space, p_i_j is computed in place using the weight vector. 00148 00149 // find weighted center of x values 00150 T sum_weighted_x = 0.0; // originally variable a 00151 for (size_t j = nleft; j <= nrt; j++) 00152 { 00153 sum_weighted_x += weights[j] * x[j]; 00154 } 00155 00156 T b = current_x - sum_weighted_x; // originally variable b 00157 T weighted_sqdev = 0.0; // originally variable c 00158 for (size_t j = nleft; j <= nrt; j++) 00159 { 00160 weighted_sqdev += weights[j] * 00161 (x[j] - sum_weighted_x) * (x[j] - sum_weighted_x); 00162 } 00163 00164 if (sqrt(weighted_sqdev) > .001 * range) 00165 { 00166 // points are spread out enough to compute slope 00167 b = b / weighted_sqdev; 00168 for (size_t j = nleft; j <= nrt; j++) 00169 { 00170 // Compute p_i_j in place 00171 weights[j] = weights[j] * (1.0 + b * (x[j] - sum_weighted_x)); 00172 } 00173 } 00174 } 00175 00176 ys = 0.0; 00177 for (size_t j = nleft; j <= nrt; j++) 00178 { 00179 ys += weights[j] * y[j]; 00180 } 00181 } 00182 00183 bool lowest(const Container& x, 00184 const Container& y, 00185 size_t n, 00186 T current_x, //xs 00187 T& ys, 00188 size_t nleft, 00189 size_t nright, 00190 Container& weights, // vector w 00191 bool use_resid_weights, // userw 00192 const Container& resid_weights) 00193 { 00194 T h; 00195 size_t nrt; // rightmost pt (may be greater than nright because of ties) 00196 00197 h = std::max(current_x - x[nleft], x[nright] - current_x); 00198 00199 // Calculate the weights for the regression in this neighborhood. 00200 // Determine if at least some weights are positive, so a regression 00201 // is ok. 00202 bool fit_ok = calculate_weights(x, n, current_x, use_resid_weights, 00203 nleft, resid_weights, 00204 weights, nrt, h); 00205 if (!fit_ok) 00206 { 00207 return fit_ok; 00208 } 00209 00210 // If it is ok to fit, run the weighted least squares regression 00211 calculate_y_fit(x, y, current_x, n, nleft, nrt, h, ys, weights); 00212 00213 return fit_ok; 00214 } 00215 00216 /// Find the indices bounding the k-nearest-neighbors of the current point. 00217 void update_neighborhood(const Container& x, 00218 const size_t n, 00219 const size_t i, 00220 size_t& nleft, 00221 size_t& nright) 00222 { 00223 T d1, d2; 00224 // A subtle loop. Start from the current neighborhood range: 00225 // [nleft, nright). Shift both ends rightwards by one 00226 // (so that the neighborhood still contains ns points), until 00227 // the current point is in the center (or just to the left of 00228 // the center) of the neighborhood. This neighborhood will 00229 // contain the ns-nearest neighbors of x[i]. 00230 // 00231 // Once the right end hits the end of the data, hold the 00232 // neighborhood the same for the remaining x[i]s. 00233 while (nright < n - 1) 00234 { 00235 // move nleft, nright to right if radius decreases 00236 d1 = x[i] - x[nleft]; 00237 d2 = x[nright + 1] - x[i]; 00238 // if d1 <= d2 with x[nright+1] == x[nright], lowest fixes 00239 if (d1 <= d2) break; 00240 // radius will not decrease by move right 00241 nleft++; 00242 nright++; 00243 } 00244 } 00245 00246 /// Update the counters of the local regression. 00247 void update_indices(const Container& x, 00248 const size_t n, 00249 const T delta, 00250 size_t& i, 00251 size_t& last, 00252 Container& ys) 00253 { 00254 // For most points within delta of the current point, we skip the 00255 // weighted linear regression (which save much computation of 00256 // weights and fitted points). Instead, we'll jump to the last 00257 // point within delta, fit the weighted regression at that point, 00258 // and linearly interpolate in between. 00259 00260 // the last point actually estimated 00261 last = i; 00262 00263 // This loop increments until we fall just outside of delta distance, 00264 // copying the results for any repeated x's along the way. 00265 T cut = x[last] + delta; 00266 for (i = last + 1; i < n; i++) 00267 { 00268 // find close points 00269 if (x[i] > cut) break; 00270 00271 // i one beyond last pt within cut 00272 if (x[i] == x[last]) 00273 { 00274 // exact match in x 00275 // if tied with previous x-value, just use the already 00276 // fitted y, and update the last-fit counter. 00277 ys[i] = ys[last]; 00278 last = i; 00279 } 00280 } 00281 00282 00283 // the next point to fit the regression at is either one prior to i (since 00284 // i should be the first point outside of delta) or it is "last + 1" in the 00285 // case that i never got incremented. This insures we always step forward. 00286 // -> back 1 point so interpolation within delta, but always go forward 00287 i = std::max(last + 1, i - 1); 00288 } 00289 00290 /// Calculate smoothed/fitted y by linear interpolation between the current 00291 /// and previous y fitted by weighted regression. 00292 void interpolate_skipped_fits(const Container& x, 00293 const size_t i, 00294 const size_t last, 00295 Container& ys) 00296 { 00297 // skipped points -- interpolate 00298 T alpha; 00299 T denom = x[i] - x[last]; // non-zero - proof? 00300 for (size_t j = last + 1; j < i; j = j + 1) 00301 { 00302 alpha = (x[j] - x[last]) / denom; 00303 ys[j] = alpha * ys[i] + (1.0 - alpha) * ys[last]; 00304 } 00305 } 00306 00307 /// Calculate residual weights for the next `robustifying` iteration. 00308 void calculate_residual_weights(const size_t n, 00309 const Container& weights, 00310 Container& resid_weights) 00311 { 00312 T r; 00313 00314 for (size_t i = 0; i < n; i++) 00315 { 00316 resid_weights[i] = std::abs(weights[i]); 00317 } 00318 00319 // *********************************** 00320 // Compute pseudo-median (take average even if we have an odd number of 00321 // elements), following the original implementation. We could also use a 00322 // true median calculation here: 00323 // T cmad = 6.0 * median(resid_weights.begin(), resid_weights.end()); 00324 // *********************************** 00325 00326 size_t m1 = n / 2; // FORTRAN starts with one, CPP with zero 00327 // size_t m1 = 1 + n / 2; // original FORTRAN code 00328 // size_t m2 = n - m1 + 1; // see below, we don't explicitly sort but use max_element 00329 00330 // Use nth element to find element m1, which produces a partially sorted 00331 // vector. This means we can get element m2 by looking for the maximum in the 00332 // remainder. 00333 typename Container::iterator it_m1 = resid_weights.begin() + m1; 00334 std::nth_element(resid_weights.begin(), it_m1, resid_weights.end()); 00335 typename Container::iterator it_m2 = std::max_element( 00336 resid_weights.begin(), it_m1); 00337 T cmad = 3.0 * (*it_m1 + *it_m2); 00338 T c9 = .999 * cmad; 00339 T c1 = .001 * cmad; 00340 00341 for (size_t i = 0; i < n; i++) 00342 { 00343 r = std::abs(weights[i]); 00344 if (r <= c1) 00345 { 00346 // near 0, avoid underflow 00347 resid_weights[i] = 1.0; 00348 } 00349 else if (r > c9) 00350 { 00351 // near 1, avoid underflow 00352 resid_weights[i] = 0.0; 00353 } 00354 else 00355 { 00356 resid_weights[i] = pow2(1.0 - pow2(r / cmad)); 00357 } 00358 } 00359 } 00360 00361 public: 00362 int lowess(const Container& x, 00363 const Container& y, 00364 double frac, // parameter f 00365 int nsteps, 00366 T delta, 00367 Container& ys, 00368 Container& resid_weights, // vector rw 00369 Container& weights // vector res 00370 ) 00371 { 00372 bool fit_ok; 00373 size_t i, last, nleft, nright, ns; 00374 00375 size_t n = x.size(); 00376 if (n < 2) 00377 { 00378 ys[0] = y[0]; 00379 return 1; 00380 } 00381 00382 // how many points around estimation point should be used for regression: 00383 // at least two, at most n points 00384 size_t tmp = (size_t)(frac * (double)n); 00385 ns = std::max(std::min(tmp, n), (size_t)2); 00386 00387 // robustness iterations 00388 for (int iter = 1; iter <= nsteps + 1; iter++) 00389 { 00390 // start of array in C++ at 0 / in FORTRAN at 1 00391 nleft = 0; 00392 nright = ns - 1; 00393 last = -1; // index of prev estimated point 00394 i = 0; // index of current point 00395 00396 // Fit all data points y[i] until the end of the array 00397 do 00398 { 00399 // Identify the neighborhood around the current x[i] 00400 // -> get the nearest ns points 00401 update_neighborhood(x, n, i, nleft, nright); 00402 00403 // Calculate weights and apply fit (original lowest function) 00404 fit_ok = lowest(x, y, n, x[i], ys[i], nleft, nright, 00405 weights, (iter > 1), resid_weights); 00406 00407 // if something went wrong during the fit, use y[i] as the 00408 // fitted value at x[i] 00409 if (!fit_ok) ys[i] = y[i]; 00410 00411 // If we skipped some points (because of how delta was set), go back 00412 // and fit them by linear interpolation. 00413 if (last < i - 1) 00414 { 00415 interpolate_skipped_fits(x, i, last, ys); 00416 } 00417 00418 // Update the last fit counter to indicate we've now fit this point. 00419 // Find the next i for which we'll run a regression. 00420 update_indices(x, n, delta, i, last, ys); 00421 } 00422 while (last < n - 1); 00423 00424 // compute current residuals 00425 for (i = 0; i < n; i++) 00426 { 00427 weights[i] = y[i] - ys[i]; 00428 } 00429 00430 // compute robustness weights except last time 00431 if (iter > nsteps) break; 00432 00433 calculate_residual_weights(n, weights, resid_weights); 00434 } 00435 return 0; 00436 } 00437 }; 00438 00439 #endif // _LOWESS_H_INCLUDED