This blog is my notebook for computer and technology related information, copies of useful webcontent (with refs of course), troubleshooting, general tips and setups that work well for me.

Useful tags:

Blog

For a project I needed a really fast least squares regression of a second order polynomial ($f(x)=ax^2+bx+c$).

Using GSL

Gnu Scientific library contains solutions for a lot of numerical problems. Here is a solution using gsl

https://stackoverflow.com/questions/36522882/how-can-i-use-gsl-to-calculate-polynomial-regression-data-points

However, this uses general polynomial regression, so I was a bit worried about performance.

Direct calculation

A C++ implementation was found on the cplusplus forum. I took this, built it into the gsl implementation (see above) to run both approaches side by side and tuned it for more performance.

polifitgsl.cpp
/*
 * =====================================================================================
 *
 *       Filename:  polyfit.cpp
 *
 *    Description:  Least squares fit of second order polynomials 
 *                  Test program using gsl and direct calculation
 *        Version:  1.0
 *        Created:  2017-07-17 09:32:55
 *       Compiler:  gcc
 *
 *         Author:  Bernhard Brunner, brb_blog@epr.ch
 *     References:  This code was merged, adapted and optimized from these two sources:
 *                  http://www.cplusplus.com/forum/general/181580/
 *                  https://stackoverflow.com/questions/36522882/how-can-i-use-gsl-to-calculate-polynomial-regression-data-points
 *          Build:  compile and link using 
 *                  g++    -c -o polifitgsl.o polifitgsl.cpp
 *                  gcc  polifitgsl.o -lgsl -lm -lblas -o polifitgsl
 * 
 *      Profiling:
 *                  valgrind --tool=callgrind ./polifitgsl
 *                  kcachegrind
 *                  
 *                  polynomialfit        takes 5.22s for 1000 calls
 *                  findQuadCoefficients takes 0.08s for 1000 calls
*                   65x faster
 * =====================================================================================
 */
 
#include <stdio.h>
 
#include "polifitgsl.h"
 
double x[] = {  0,  1,  2,  3,  4,  5,  6,  7,  8, 9,
               10, 11, 12, 13, 14, 15, 16, 17, 18, 19};
double y[] = { 98.02, 98.01, 98.01, 98.02, 97.98,
               97.97, 97.96, 97.94, 97.96, 97.96,
               97.97, 97.97, 97.94, 97.94, 97.94,
               97.92, 97.96, 97.9,  97.85, 97.9 };
 
#define NP (sizeof(x)/sizeof(double)) // 20
 
#define DEGREE 3
double coeff[DEGREE];
 
bool findQuadCoefficients(double timeArray[], double valueArray[], double *coef, double &critPoint, int PointsNum){
    const double S00=PointsNum;//points number
    double S40=0, S10=0, S20=0, S30=0, S01=0, S11=0, S21 = 0;
//    const double MINvalue = valueArray[0];
//    const double MINtime = timeArray[0];
    for (int i=0; i<PointsNum; i++ ){
        double value = valueArray[i]; //  - MINvalue); //normalizing
//      cout << "i=" << i << " index=" << index << " value=" << value << endl;
 
        int index = timeArray[i]; //  - MINtime;
        int index2 = index * index;
        int index3 = index2 * index;
        int index4 = index3 * index;
 
        S40+= index4;
        S30+= index3; 
        S20+= index2;
        S10+= index;
 
        S01 += value;
        S11 += value*index;
        S21 += value*index2;
    }
 
    double S20squared = S20*S20;
 
    //minors M_ij=M_ji
    double M11 = S20*S00 - S10*S10;
    double M21 = S30*S00 - S20*S10;
    double M22 = S40*S00 - S20squared;
    double M31 = S30*S10 - S20squared;
 
    double M32 = S40*S10 - S20*S30;
//  double M33 = S40*S20 - pow(S30,2);
 
    double discriminant = S40*M11 - S30*M21 + S20*M31;
//    printf("discriminant :%lf\n", discriminant);
    if (abs(discriminant) < .00000000001) return  false;
 
    double Da = S21*M11
               -S11*M21
               +S01*M31;
    coef[2] = Da/discriminant;
//  cout << "discriminant=" << discriminant;
//  cout << " Da=" << Da;
 
    double Db = -S21*M21
                +S11*M22
                -S01*M32;
    coef[1] = Db/discriminant;
//  cout << " Db=" << Db << endl;
 
    double Dc =   S40*(S20*S01 - S10*S11) 
                - S30*(S30*S01 - S10*S21) 
                + S20*(S30*S11 - S20*S21);
    coef[0] = Dc/discriminant;
//    printf("c=%lf\n", c);
 
    critPoint = -Db/(2*Da); // + MINtime; //-b/(2*a)= -Db/discriminant / (2*(Da/discriminant)) = -Db/(2*Da);
 
    return true;
}
 
bool polynomialfit(int obs, int degree, 
           double *dx, double *dy, double *store) /* n, p */
{
  gsl_multifit_linear_workspace *ws;
  gsl_matrix *cov, *X;
  gsl_vector *y, *c;
  double chisq;
 
  int i, j;
 
  X = gsl_matrix_alloc(obs, degree);
  y = gsl_vector_alloc(obs);
  c = gsl_vector_alloc(degree);
  cov = gsl_matrix_alloc(degree, degree);
 
  for(i=0; i < obs; i++) {
    for(j=0; j < degree; j++) {
      gsl_matrix_set(X, i, j, pow(dx[i], j));
    }
    gsl_vector_set(y, i, dy[i]);
  }
 
  ws = gsl_multifit_linear_alloc(obs, degree);
  gsl_multifit_linear(X, y, c, cov, &chisq, ws);
 
  /* store result ... */
  for(i=0; i < degree; i++)
  {
    store[i] = gsl_vector_get(c, i);
  }
 
  gsl_multifit_linear_free(ws);
  gsl_matrix_free(X);
  gsl_matrix_free(cov);
  gsl_vector_free(y);
  gsl_vector_free(c);
  return true; /* we do not "analyse" the result (cov matrix mainly)
          to know if the fit is "good" */
}
 
void testcoeff(double *coeff)
{
    printf ("\n polynomial coefficients using gsl:\n\n");
    for (int i = 0; i < DEGREE; i++) {
        printf ("  coeff[%d] : %11.7lf\n", i, coeff[i]);
    }
    putchar ('\n');
 
    printf (" computed values using direct calculation:\n\n   x, yi, yip\n");
    for (unsigned i = 0; i < NP; i++) {
        printf ("%2u,%.7lf,%.7lf\n", i, 
                y[i], 
                i*i*coeff[2] + i*coeff[1] + coeff[0]);
    }
    putchar ('\n');
}
 
int main (void)
{
    #define ITER 1000
    for (int i=0; i< ITER; i++)
        polynomialfit (NP, DEGREE, x, y, coeff);
    testcoeff(coeff);
 
    double sx; 
    for (int i=0; i< ITER; i++)
        findQuadCoefficients(x, y, coeff, sx, NP);
    printf("critical point %lf\n", sx);
    testcoeff(coeff);
    return 0;
}

References:

2017-07-17 14:17 · brb · 0 Comments

Archive

Blog History

2009-05: 18 entries 2009-06: 12 entries 2009-07: 9 entries 2009-08: 5 entries 2009-09: 2 entries 2009-10: 2 entries 2009-11: 5 entries 2009-12: 4 entries 2010-01: 1 entry 2010-02: 3 entries 2010-03: 2 entries 2010-04: 2 entries 2010-05: 3 entries 2010-06: 10 entries 2010-07: 8 entries 2010-08: 5 entries 2010-09: 4 entries 2010-10: 6 entries 2010-11: 2 entries 2010-12: 6 entries 2011-01: 5 entries 2011-02: 3 entries 2011-03: 5 entries 2011-04: 2 entries 2011-05: 4 entries 2011-06: 5 entries 2011-07: 11 entries 2011-08: 12 entries 2011-09: 4 entries 2011-10: 4 entries 2011-11: 2 entries 2011-12: 1 entry 2012-01: 5 entries 2012-02: 2 entries 2012-04: 3 entries 2012-06: 1 entry 2012-09: 4 entries 2012-10: 1 entry 2013-04: 1 entry 2014-04: 10 entries 2014-08: 2 entries 2015-04: 1 entry 2015-09: 3 entries 2016-04: 1 entry 2016-05: 1 entry 2016-06: 1 entry 2016-07: 2 entries 2016-10: 1 entry 2017-01: 1 entry 2017-02: 2 entries 2017-04: 1 entry 2017-05: 1 entry 2017-07: 2 entries

July

May

April

February

January

October

July

June

May

April

September

April

August

April

April

October

September

June

April

February

January

December

November

October

September

August

July

June

May

April

March

February

January

December

November

October

September

August

July

June

May

April

March

February

January

December

November

October

September

August

July

June

May