User Tools

Site Tools


blog:least_squares_regression_of_parabola

Least squares regression of parabola

For a project I needed a really fast least squares regression of a second order polynomial ($f(x)=ax^2+bx+c$).

Using GSL

Gnu Scientific library contains solutions for a lot of numerical problems. Here is a solution using gsl

https://stackoverflow.com/questions/36522882/how-can-i-use-gsl-to-calculate-polynomial-regression-data-points

However, this uses general polynomial regression, so I was a bit worried about performance.

Direct calculation

A C++ implementation was found on the cplusplus forum. I took this, built it into the gsl implementation (see above) to run both approaches side by side and tuned it for more performance.

polifitgsl.cpp
/*
 * =====================================================================================
 *
 *       Filename:  polyfit.cpp
 *
 *    Description:  Least squares fit of second order polynomials 
 *                  Test program using gsl and direct calculation
 *        Version:  1.0
 *        Created:  2017-07-17 09:32:55
 *       Compiler:  gcc
 *
 *         Author:  Bernhard Brunner, brb_blog@epr.ch
 *     References:  This code was merged, adapted and optimized from these two sources:
 *                  http://www.cplusplus.com/forum/general/181580/
 *                  https://stackoverflow.com/questions/36522882/how-can-i-use-gsl-to-calculate-polynomial-regression-data-points
 *          Build:  compile and link using 
 *                  g++    -c -o polifitgsl.o polifitgsl.cpp
 *                  gcc  polifitgsl.o -lgsl -lm -lblas -o polifitgsl
 * 
 *      Profiling:
 *                  valgrind --tool=callgrind ./polifitgsl
 *                  kcachegrind
 *                  
 *                  polynomialfit        takes 5.22s for 1000 calls
 *                  findQuadCoefficients takes 0.08s for 1000 calls
*                   65x faster
 * =====================================================================================
 */
 
#include <stdio.h>
 
#include "polifitgsl.h"
 
double x[] = {  0,  1,  2,  3,  4,  5,  6,  7,  8, 9,
               10, 11, 12, 13, 14, 15, 16, 17, 18, 19};
double y[] = { 98.02, 98.01, 98.01, 98.02, 97.98,
               97.97, 97.96, 97.94, 97.96, 97.96,
               97.97, 97.97, 97.94, 97.94, 97.94,
               97.92, 97.96, 97.9,  97.85, 97.9 };
 
#define NP (sizeof(x)/sizeof(double)) // 20
 
#define DEGREE 3
double coeff[DEGREE];
 
bool findQuadCoefficients(double timeArray[], double valueArray[], double *coef, double &critPoint, int PointsNum){
    const double S00=PointsNum;//points number
    double S40=0, S10=0, S20=0, S30=0, S01=0, S11=0, S21 = 0;
//    const double MINvalue = valueArray[0];
//    const double MINtime = timeArray[0];
    for (int i=0; i<PointsNum; i++ ){
        double value = valueArray[i]; //  - MINvalue); //normalizing
//      cout << "i=" << i << " index=" << index << " value=" << value << endl;
 
        int index = timeArray[i]; //  - MINtime;
        int index2 = index * index;
        int index3 = index2 * index;
        int index4 = index3 * index;
 
        S40+= index4;
        S30+= index3; 
        S20+= index2;
        S10+= index;
 
        S01 += value;
        S11 += value*index;
        S21 += value*index2;
    }
 
    double S20squared = S20*S20;
 
    //minors M_ij=M_ji
    double M11 = S20*S00 - S10*S10;
    double M21 = S30*S00 - S20*S10;
    double M22 = S40*S00 - S20squared;
    double M31 = S30*S10 - S20squared;
 
    double M32 = S40*S10 - S20*S30;
//  double M33 = S40*S20 - pow(S30,2);
 
    double discriminant = S40*M11 - S30*M21 + S20*M31;
//    printf("discriminant :%lf\n", discriminant);
    if (abs(discriminant) < .00000000001) return  false;
 
    double Da = S21*M11
               -S11*M21
               +S01*M31;
    coef[2] = Da/discriminant;
//  cout << "discriminant=" << discriminant;
//  cout << " Da=" << Da;
 
    double Db = -S21*M21
                +S11*M22
                -S01*M32;
    coef[1] = Db/discriminant;
//  cout << " Db=" << Db << endl;
 
    double Dc =   S40*(S20*S01 - S10*S11) 
                - S30*(S30*S01 - S10*S21) 
                + S20*(S30*S11 - S20*S21);
    coef[0] = Dc/discriminant;
//    printf("c=%lf\n", c);
 
    critPoint = -Db/(2*Da); // + MINtime; //-b/(2*a)= -Db/discriminant / (2*(Da/discriminant)) = -Db/(2*Da);
 
    return true;
}
 
bool polynomialfit(int obs, int degree, 
           double *dx, double *dy, double *store) /* n, p */
{
  gsl_multifit_linear_workspace *ws;
  gsl_matrix *cov, *X;
  gsl_vector *y, *c;
  double chisq;
 
  int i, j;
 
  X = gsl_matrix_alloc(obs, degree);
  y = gsl_vector_alloc(obs);
  c = gsl_vector_alloc(degree);
  cov = gsl_matrix_alloc(degree, degree);
 
  for(i=0; i < obs; i++) {
    for(j=0; j < degree; j++) {
      gsl_matrix_set(X, i, j, pow(dx[i], j));
    }
    gsl_vector_set(y, i, dy[i]);
  }
 
  ws = gsl_multifit_linear_alloc(obs, degree);
  gsl_multifit_linear(X, y, c, cov, &chisq, ws);
 
  /* store result ... */
  for(i=0; i < degree; i++)
  {
    store[i] = gsl_vector_get(c, i);
  }
 
  gsl_multifit_linear_free(ws);
  gsl_matrix_free(X);
  gsl_matrix_free(cov);
  gsl_vector_free(y);
  gsl_vector_free(c);
  return true; /* we do not "analyse" the result (cov matrix mainly)
          to know if the fit is "good" */
}
 
void testcoeff(double *coeff)
{
    printf ("\n polynomial coefficients using gsl:\n\n");
    for (int i = 0; i < DEGREE; i++) {
        printf ("  coeff[%d] : %11.7lf\n", i, coeff[i]);
    }
    putchar ('\n');
 
    printf (" computed values using direct calculation:\n\n   x, yi, yip\n");
    for (unsigned i = 0; i < NP; i++) {
        printf ("%2u,%.7lf,%.7lf\n", i, 
                y[i], 
                i*i*coeff[2] + i*coeff[1] + coeff[0]);
    }
    putchar ('\n');
}
 
int main (void)
{
    #define ITER 1000
    for (int i=0; i< ITER; i++)
        polynomialfit (NP, DEGREE, x, y, coeff);
    testcoeff(coeff);
 
    double sx; 
    for (int i=0; i< ITER; i++)
        findQuadCoefficients(x, y, coeff, sx, NP);
    printf("critical point %lf\n", sx);
    testcoeff(coeff);
    return 0;
}

References:

Discussion

Enter your comment. Wiki syntax is allowed:
  ____    __    __ __  _   __   ___ 
 / __ \  / /   / //_/ | | / /  / _ \
/ /_/ / / /__ / ,<    | |/ /  / // /
\____/ /____//_/|_|   |___/  /____/
 
blog/least_squares_regression_of_parabola.txt · Last modified: 2017-07-17 14:18 by brb