#region License Information /* HeuristicLab * Copyright (C) 2002-2008 Heuristic and Evolutionary Algorithms Laboratory (HEAL) * * This file is part of HeuristicLab. * * HeuristicLab is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * HeuristicLab is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with HeuristicLab. If not, see . */ #endregion using System; using System.Collections.Generic; using System.Text; namespace HeuristicLab.DataAnalysis { public class LinearStatistics { /// /// Calculates linear regression for the given data. The result is given as regression values, which are returned, /// as well as the characteristic coefficients and . /// /// The data that is given; linear regression is calculated for these data samples. /// Regression coefficient 'a' (output parameter). /// Regression coefficient 'b' (output parameter). /// Calculated linear regression values. public static double[] LinearRegression(double[] data, out double a, out double b) { int n = data.Length; double xMean = n / 2.0; double yMean = Statistics.Mean(data); double[] xMinusMean = new double[n]; double[] yMinusMean = new double[n]; double[] xMinusMeanSquared = new double[n]; double[] xMinusMeanTimesYMinusMean = new double[n]; double ssxx = 0; double ssxy = 0; for(int i = 0; i < n; i++) { xMinusMean[i] = i - xMean; yMinusMean[i] = data[i] - yMean; xMinusMeanSquared[i] = xMinusMean[i] * xMinusMean[i]; xMinusMeanTimesYMinusMean[i] = xMinusMean[i] * yMinusMean[i]; ssxx += xMinusMeanSquared[i]; ssxy += xMinusMeanTimesYMinusMean[i]; } b = ssxy / ssxx; a = yMean - b * xMean; double[] result = new double[n]; for(int x = 0; x < n; x++) result[x] = a + x * b; return result; } /// /// Calculates linear regression for the given data. The result is given as regression values, which are returned, /// as well as the characteristic coefficients and . /// /// The data that is given; linear regression is calculated for these data samples. /// Calculated linear regression values. public static double[] LinearRegression(double[] data) { double a, b; return LinearRegression(data, out a, out b); } public static double CorrelationCoefficient(double[] xValues, double[] yValues) { if(xValues.Length != yValues.Length) throw new Exception("ERROR in CorrelationCoefficient: The given variables have to be equally long!"); int n = xValues.Length; double[] x = new double[n]; double[] y = new double[n]; for(int i = 0; i < n; i++) { if(double.IsNaN(xValues[i])) throw new NotFiniteNumberException(); else x[i] = xValues[i]; if(double.IsNaN(yValues[i])) throw new NotFiniteNumberException(); else y[i] = yValues[i]; } double OneOverN = 1.0 / (n + 1); double xMean = Statistics.Mean(x); double yMean = Statistics.Mean(y); double[] xMinusMean = new double[n]; double[] yMinusMean = new double[n]; double[] xMinusMeanSquared = new double[n]; double xMinusMeanSquaredSum = 0.0; double[] yMinusMeanSquared = new double[n]; double yMinusMeanSquaredSum = 0.0; double[] xMinusMeanTimesYMinusMean = new double[n]; double xMinusMeanTimesYMinusMeanSum = 0.0; for(int i = 0; i < n; i++) { xMinusMean[i] = x[i] - xMean; yMinusMean[i] = y[i] - yMean; xMinusMeanSquared[i] = xMinusMean[i] * xMinusMean[i]; xMinusMeanSquaredSum += xMinusMeanSquared[i]; yMinusMeanSquared[i] = yMinusMean[i] * yMinusMean[i]; yMinusMeanSquaredSum += yMinusMeanSquared[i]; xMinusMeanTimesYMinusMean[i] = xMinusMean[i] * yMinusMean[i]; xMinusMeanTimesYMinusMeanSum += xMinusMeanTimesYMinusMean[i]; } return (OneOverN * xMinusMeanTimesYMinusMeanSum) / (Math.Sqrt(OneOverN * xMinusMeanSquaredSum) * Math.Sqrt(OneOverN * yMinusMeanSquaredSum)); } #region Coefficient of Determination (R-squared) /// /// In statistics, the coefficient of determination (R-squared) is the proportion of a sample variance /// of a response variable that is "explained" by the predictor (explanatory) variables when regression is done. /// /// The original values for which a model shall be created. /// The errors between original and predicted values. /// public static double CoefficientOfDetermination(double[] originalValues, double[] residuals) { int n = originalValues.Length; double originalValuesMean = Statistics.Mean(originalValues); double[] originalValuesMinusMeanSquared = new double[n]; originalValuesMinusMeanSquared = Array.ConvertAll(originalValues, delegate(double v) { double t = v - originalValuesMean; return t * t; }); double totalSumOfSquares = Statistics.Sum(originalValuesMinusMeanSquared); double[] residualsSquared = new double[residuals.Length]; residualsSquared = Array.ConvertAll(residuals, delegate(double r) { return r * r; }); double sumOfSquaredResiduals = Statistics.Sum(residualsSquared); return (1.0 - sumOfSquaredResiduals / totalSumOfSquares); } #endregion #region Adjusted Coefficient of Determination (Adjusted R-squared) public static double AdjustedCoefficientOfDetermination(double[] originalValues, double[] residuals, int numberOfExplanatoryTerms) { double rSquared = CoefficientOfDetermination(originalValues, residuals); double n = originalValues.Length; return (1 - (1 - rSquared) * (n - 1) / (n - numberOfExplanatoryTerms - 1)); } #endregion } }