User Tools

Site Tools


start

This blog is my notebook for computer and technology related information, copies of useful webcontent (with refs of course), troubleshooting, general tips and setups that work well for me.

Useful tags:

Blog

Least squares regression of parabola

For a project I needed a really fast least squares regression of a second order polynomial ($f(x)=ax^2+bx+c$).

Using GSL

Gnu Scientific library contains solutions for a lot of numerical problems. Here is a solution using gsl

https://stackoverflow.com/questions/36522882/how-can-i-use-gsl-to-calculate-polynomial-regression-data-points

However, this uses general polynomial regression, so I was a bit worried about performance.

Direct calculation

A C++ implementation was found on the cplusplus forum. I took this, built it into the gsl implementation (see above) to run both approaches side by side and tuned it for more performance.

polifitgsl.cpp
/*
 * =====================================================================================
 *
 *       Filename:  polyfit.cpp
 *
 *    Description:  Least squares fit of second order polynomials 
 *                  Test program using gsl and direct calculation
 *        Version:  1.0
 *        Created:  2017-07-17 09:32:55
 *       Compiler:  gcc
 *
 *         Author:  Bernhard Brunner, brb_blog@epr.ch
 *     References:  This code was merged, adapted and optimized from these two sources:
 *                  http://www.cplusplus.com/forum/general/181580/
 *                  https://stackoverflow.com/questions/36522882/how-can-i-use-gsl-to-calculate-polynomial-regression-data-points
 *          Build:  compile and link using 
 *                  g++    -c -o polifitgsl.o polifitgsl.cpp
 *                  gcc  polifitgsl.o -lgsl -lm -lblas -o polifitgsl
 * 
 *      Profiling:
 *                  valgrind --tool=callgrind ./polifitgsl
 *                  kcachegrind
 *                  
 *                  polynomialfit        takes 5.22s for 1000 calls
 *                  findQuadCoefficients takes 0.08s for 1000 calls
*                   65x faster
 * =====================================================================================
 */
 
#include <stdio.h>
 
#include "polifitgsl.h"
 
double x[] = {  0,  1,  2,  3,  4,  5,  6,  7,  8, 9,
               10, 11, 12, 13, 14, 15, 16, 17, 18, 19};
double y[] = { 98.02, 98.01, 98.01, 98.02, 97.98,
               97.97, 97.96, 97.94, 97.96, 97.96,
               97.97, 97.97, 97.94, 97.94, 97.94,
               97.92, 97.96, 97.9,  97.85, 97.9 };
 
#define NP (sizeof(x)/sizeof(double)) // 20
 
#define DEGREE 3
double coeff[DEGREE];
 
bool findQuadCoefficients(double timeArray[], double valueArray[], double *coef, double &critPoint, int PointsNum){
    const double S00=PointsNum;//points number
    double S40=0, S10=0, S20=0, S30=0, S01=0, S11=0, S21 = 0;
//    const double MINvalue = valueArray[0];
//    const double MINtime = timeArray[0];
    for (int i=0; i<PointsNum; i++ ){
        double value = valueArray[i]; //  - MINvalue); //normalizing
//      cout << "i=" << i << " index=" << index << " value=" << value << endl;
 
        int index = timeArray[i]; //  - MINtime;
        int index2 = index * index;
        int index3 = index2 * index;
        int index4 = index3 * index;
 
        S40+= index4;
        S30+= index3; 
        S20+= index2;
        S10+= index;
 
        S01 += value;
        S11 += value*index;
        S21 += value*index2;
    }
 
    double S20squared = S20*S20;
 
    //minors M_ij=M_ji
    double M11 = S20*S00 - S10*S10;
    double M21 = S30*S00 - S20*S10;
    double M22 = S40*S00 - S20squared;
    double M31 = S30*S10 - S20squared;
 
    double M32 = S40*S10 - S20*S30;
//  double M33 = S40*S20 - pow(S30,2);
 
    double discriminant = S40*M11 - S30*M21 + S20*M31;
//    printf("discriminant :%lf\n", discriminant);
    if (abs(discriminant) < .00000000001) return  false;
 
    double Da = S21*M11
               -S11*M21
               +S01*M31;
    coef[2] = Da/discriminant;
//  cout << "discriminant=" << discriminant;
//  cout << " Da=" << Da;
 
    double Db = -S21*M21
                +S11*M22
                -S01*M32;
    coef[1] = Db/discriminant;
//  cout << " Db=" << Db << endl;
 
    double Dc =   S40*(S20*S01 - S10*S11) 
                - S30*(S30*S01 - S10*S21) 
                + S20*(S30*S11 - S20*S21);
    coef[0] = Dc/discriminant;
//    printf("c=%lf\n", c);
 
    critPoint = -Db/(2*Da); // + MINtime; //-b/(2*a)= -Db/discriminant / (2*(Da/discriminant)) = -Db/(2*Da);
 
    return true;
}
 
bool polynomialfit(int obs, int degree, 
           double *dx, double *dy, double *store) /* n, p */
{
  gsl_multifit_linear_workspace *ws;
  gsl_matrix *cov, *X;
  gsl_vector *y, *c;
  double chisq;
 
  int i, j;
 
  X = gsl_matrix_alloc(obs, degree);
  y = gsl_vector_alloc(obs);
  c = gsl_vector_alloc(degree);
  cov = gsl_matrix_alloc(degree, degree);
 
  for(i=0; i < obs; i++) {
    for(j=0; j < degree; j++) {
      gsl_matrix_set(X, i, j, pow(dx[i], j));
    }
    gsl_vector_set(y, i, dy[i]);
  }
 
  ws = gsl_multifit_linear_alloc(obs, degree);
  gsl_multifit_linear(X, y, c, cov, &chisq, ws);
 
  /* store result ... */
  for(i=0; i < degree; i++)
  {
    store[i] = gsl_vector_get(c, i);
  }
 
  gsl_multifit_linear_free(ws);
  gsl_matrix_free(X);
  gsl_matrix_free(cov);
  gsl_vector_free(y);
  gsl_vector_free(c);
  return true; /* we do not "analyse" the result (cov matrix mainly)
          to know if the fit is "good" */
}
 
void testcoeff(double *coeff)
{
    printf ("\n polynomial coefficients using gsl:\n\n");
    for (int i = 0; i < DEGREE; i++) {
        printf ("  coeff[%d] : %11.7lf\n", i, coeff[i]);
    }
    putchar ('\n');
 
    printf (" computed values using direct calculation:\n\n   x, yi, yip\n");
    for (unsigned i = 0; i < NP; i++) {
        printf ("%2u,%.7lf,%.7lf\n", i, 
                y[i], 
                i*i*coeff[2] + i*coeff[1] + coeff[0]);
    }
    putchar ('\n');
}
 
int main (void)
{
    #define ITER 1000
    for (int i=0; i< ITER; i++)
        polynomialfit (NP, DEGREE, x, y, coeff);
    testcoeff(coeff);
 
    double sx; 
    for (int i=0; i< ITER; i++)
        findQuadCoefficients(x, y, coeff, sx, NP);
    printf("critical point %lf\n", sx);
    testcoeff(coeff);
    return 0;
}

References:

2017-07-17 14:17 · brb · 0 Comments

Archive

Blog History

2009-05: 18 entries 2009-06: 12 entries 2009-07: 9 entries 2009-08: 5 entries 2009-09: 2 entries 2009-10: 2 entries 2009-11: 5 entries 2009-12: 4 entries 2010-01: 1 entry 2010-02: 3 entries 2010-03: 2 entries 2010-04: 2 entries 2010-05: 3 entries 2010-06: 10 entries 2010-07: 8 entries 2010-08: 5 entries 2010-09: 4 entries 2010-10: 6 entries 2010-11: 2 entries 2010-12: 6 entries 2011-01: 5 entries 2011-02: 3 entries 2011-03: 5 entries 2011-04: 2 entries 2011-05: 4 entries 2011-06: 5 entries 2011-07: 11 entries 2011-08: 12 entries 2011-09: 4 entries 2011-10: 4 entries 2011-11: 2 entries 2011-12: 1 entry 2012-01: 5 entries 2012-02: 2 entries 2012-04: 3 entries 2012-06: 1 entry 2012-09: 4 entries 2012-10: 1 entry 2013-04: 1 entry 2014-04: 10 entries 2014-08: 2 entries 2015-04: 1 entry 2015-09: 3 entries 2016-04: 1 entry 2016-05: 1 entry 2016-06: 1 entry 2016-07: 2 entries 2016-10: 1 entry 2017-01: 1 entry 2017-02: 2 entries 2017-04: 1 entry 2017-05: 1 entry 2017-07: 2 entries 2017-08: 1 entry 2017-10: 1 entry 2017-11: 1 entry 2017-12: 2 entries 2018-10: 1 entry 2019-02: 1 entry 2019-05: 1 entry 2019-09: 1 entry 2019-10: 2 entries 2020-08: 1 entry 2020-01: 1 entry 2021-01: 1 entry 2021-02: 1 entry 2021-07: 1 entry 2022-05: 2 entries 2022-06: 1 entry 2022-11: 1 entry 2023-05: 1 entry 2023-09: 2 entries 2023-12: 2 entries 2024-09: 1 entry

2024

September

2023

December

September

May

2022

November

June

May

2021

July

February

January

2020

January

August

2019

October

September

May

February

2018

October

2017

December

November

October

August

July

May

April

February

January

2016

October

July

June

May

April

2015

September

April

2014

August

April

2013

April

2012

October

September

June

April

February

January

2011

December

November

October

September

August

July

June

May

April

March

February

January

2010

December

November

October

September

August

July

June

May

April

March

February

January

2009

December

November

October

September

August

July

June

May

start.txt · Last modified: 2022-11-29 08:28 by brb