blog:least_squares_regression_of_parabola
Table of Contents
Least squares regression of parabola
For a project I needed a really fast least squares regression of a second order polynomial ($f(x)=ax^2+bx+c$).
Using GSL
Gnu Scientific library contains solutions for a lot of numerical problems.
Here is a solution using gsl
However, this uses general polynomial regression, so I was a bit worried about performance.
Direct calculation
A C++ implementation was found on the cplusplus forum. I took this, built it into the gsl implementation (see above) to run both approaches side by side and tuned it for more performance.
- polifitgsl.cpp
/* * ===================================================================================== * * Filename: polyfit.cpp * * Description: Least squares fit of second order polynomials * Test program using gsl and direct calculation * Version: 1.0 * Created: 2017-07-17 09:32:55 * Compiler: gcc * * Author: Bernhard Brunner, brb_blog@epr.ch * References: This code was merged, adapted and optimized from these two sources: * http://www.cplusplus.com/forum/general/181580/ * https://stackoverflow.com/questions/36522882/how-can-i-use-gsl-to-calculate-polynomial-regression-data-points * Build: compile and link using * g++ -c -o polifitgsl.o polifitgsl.cpp * gcc polifitgsl.o -lgsl -lm -lblas -o polifitgsl * * Profiling: * valgrind --tool=callgrind ./polifitgsl * kcachegrind * * polynomialfit takes 5.22s for 1000 calls * findQuadCoefficients takes 0.08s for 1000 calls * 65x faster * ===================================================================================== */ #include <stdio.h> #include "polifitgsl.h" double x[] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19}; double y[] = { 98.02, 98.01, 98.01, 98.02, 97.98, 97.97, 97.96, 97.94, 97.96, 97.96, 97.97, 97.97, 97.94, 97.94, 97.94, 97.92, 97.96, 97.9, 97.85, 97.9 }; #define NP (sizeof(x)/sizeof(double)) // 20 #define DEGREE 3 double coeff[DEGREE]; bool findQuadCoefficients(double timeArray[], double valueArray[], double *coef, double &critPoint, int PointsNum){ const double S00=PointsNum;//points number double S40=0, S10=0, S20=0, S30=0, S01=0, S11=0, S21 = 0; // const double MINvalue = valueArray[0]; // const double MINtime = timeArray[0]; for (int i=0; i<PointsNum; i++ ){ double value = valueArray[i]; // - MINvalue); //normalizing // cout << "i=" << i << " index=" << index << " value=" << value << endl; int index = timeArray[i]; // - MINtime; int index2 = index * index; int index3 = index2 * index; int index4 = index3 * index; S40+= index4; S30+= index3; S20+= index2; S10+= index; S01 += value; S11 += value*index; S21 += value*index2; } double S20squared = S20*S20; //minors M_ij=M_ji double M11 = S20*S00 - S10*S10; double M21 = S30*S00 - S20*S10; double M22 = S40*S00 - S20squared; double M31 = S30*S10 - S20squared; double M32 = S40*S10 - S20*S30; // double M33 = S40*S20 - pow(S30,2); double discriminant = S40*M11 - S30*M21 + S20*M31; // printf("discriminant :%lf\n", discriminant); if (abs(discriminant) < .00000000001) return false; double Da = S21*M11 -S11*M21 +S01*M31; coef[2] = Da/discriminant; // cout << "discriminant=" << discriminant; // cout << " Da=" << Da; double Db = -S21*M21 +S11*M22 -S01*M32; coef[1] = Db/discriminant; // cout << " Db=" << Db << endl; double Dc = S40*(S20*S01 - S10*S11) - S30*(S30*S01 - S10*S21) + S20*(S30*S11 - S20*S21); coef[0] = Dc/discriminant; // printf("c=%lf\n", c); critPoint = -Db/(2*Da); // + MINtime; //-b/(2*a)= -Db/discriminant / (2*(Da/discriminant)) = -Db/(2*Da); return true; } bool polynomialfit(int obs, int degree, double *dx, double *dy, double *store) /* n, p */ { gsl_multifit_linear_workspace *ws; gsl_matrix *cov, *X; gsl_vector *y, *c; double chisq; int i, j; X = gsl_matrix_alloc(obs, degree); y = gsl_vector_alloc(obs); c = gsl_vector_alloc(degree); cov = gsl_matrix_alloc(degree, degree); for(i=0; i < obs; i++) { for(j=0; j < degree; j++) { gsl_matrix_set(X, i, j, pow(dx[i], j)); } gsl_vector_set(y, i, dy[i]); } ws = gsl_multifit_linear_alloc(obs, degree); gsl_multifit_linear(X, y, c, cov, &chisq, ws); /* store result ... */ for(i=0; i < degree; i++) { store[i] = gsl_vector_get(c, i); } gsl_multifit_linear_free(ws); gsl_matrix_free(X); gsl_matrix_free(cov); gsl_vector_free(y); gsl_vector_free(c); return true; /* we do not "analyse" the result (cov matrix mainly) to know if the fit is "good" */ } void testcoeff(double *coeff) { printf ("\n polynomial coefficients using gsl:\n\n"); for (int i = 0; i < DEGREE; i++) { printf (" coeff[%d] : %11.7lf\n", i, coeff[i]); } putchar ('\n'); printf (" computed values using direct calculation:\n\n x, yi, yip\n"); for (unsigned i = 0; i < NP; i++) { printf ("%2u,%.7lf,%.7lf\n", i, y[i], i*i*coeff[2] + i*coeff[1] + coeff[0]); } putchar ('\n'); } int main (void) { #define ITER 1000 for (int i=0; i< ITER; i++) polynomialfit (NP, DEGREE, x, y, coeff); testcoeff(coeff); double sx; for (int i=0; i< ITER; i++) findQuadCoefficients(x, y, coeff, sx, NP); printf("critical point %lf\n", sx); testcoeff(coeff); return 0; }
References:
blog/least_squares_regression_of_parabola.txt · Last modified: 2017-07-17 14:18 by brb
Discussion