001/* =========================================================== 002 * JFreeChart : a free chart library for the Java(tm) platform 003 * =========================================================== 004 * 005 * (C) Copyright 2000-present, by David Gilbert and Contributors. 006 * 007 * Project Info: http://www.jfree.org/jfreechart/index.html 008 * 009 * This library is free software; you can redistribute it and/or modify it 010 * under the terms of the GNU Lesser General Public License as published by 011 * the Free Software Foundation; either version 2.1 of the License, or 012 * (at your option) any later version. 013 * 014 * This library is distributed in the hope that it will be useful, but 015 * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 016 * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public 017 * License for more details. 018 * 019 * You should have received a copy of the GNU Lesser General Public 020 * License along with this library; if not, write to the Free Software 021 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, 022 * USA. 023 * 024 * [Oracle and Java are registered trademarks of Oracle and/or its affiliates. 025 * Other names may be trademarks of their respective owners.] 026 * 027 * --------------- 028 * Regression.java 029 * --------------- 030 * (C) Copyright 2002-present, by David Gilbert. 031 * 032 * Original Author: David Gilbert; 033 * Contributor(s): Peter Kolb (patch 2795746); 034 * 035 */ 036 037package org.jfree.data.statistics; 038 039import org.jfree.chart.util.Args; 040import org.jfree.data.xy.XYDataset; 041 042/** 043 * A utility class for fitting regression curves to data. 044 */ 045public abstract class Regression { 046 047 /** 048 * Returns the parameters 'a' and 'b' for an equation y = a + bx, fitted to 049 * the data using ordinary least squares regression. The result is 050 * returned as a double[], where result[0] --> a, and result[1] --> b. 051 * 052 * @param data the data. 053 * 054 * @return The parameters. 055 */ 056 public static double[] getOLSRegression(double[][] data) { 057 058 int n = data.length; 059 if (n < 2) { 060 throw new IllegalArgumentException("Not enough data."); 061 } 062 063 double sumX = 0; 064 double sumY = 0; 065 double sumXX = 0; 066 double sumXY = 0; 067 for (int i = 0; i < n; i++) { 068 double x = data[i][0]; 069 double y = data[i][1]; 070 sumX += x; 071 sumY += y; 072 double xx = x * x; 073 sumXX += xx; 074 double xy = x * y; 075 sumXY += xy; 076 } 077 double sxx = sumXX - (sumX * sumX) / n; 078 double sxy = sumXY - (sumX * sumY) / n; 079 double xbar = sumX / n; 080 double ybar = sumY / n; 081 082 double[] result = new double[2]; 083 result[1] = sxy / sxx; 084 result[0] = ybar - result[1] * xbar; 085 086 return result; 087 088 } 089 090 /** 091 * Returns the parameters 'a' and 'b' for an equation y = a + bx, fitted to 092 * the data using ordinary least squares regression. The result is returned 093 * as a double[], where result[0] --> a, and result[1] --> b. 094 * 095 * @param data the data. 096 * @param series the series (zero-based index). 097 * 098 * @return The parameters. 099 */ 100 public static double[] getOLSRegression(XYDataset data, int series) { 101 102 int n = data.getItemCount(series); 103 if (n < 2) { 104 throw new IllegalArgumentException("Not enough data."); 105 } 106 107 double sumX = 0; 108 double sumY = 0; 109 double sumXX = 0; 110 double sumXY = 0; 111 for (int i = 0; i < n; i++) { 112 double x = data.getXValue(series, i); 113 double y = data.getYValue(series, i); 114 sumX += x; 115 sumY += y; 116 double xx = x * x; 117 sumXX += xx; 118 double xy = x * y; 119 sumXY += xy; 120 } 121 double sxx = sumXX - (sumX * sumX) / n; 122 double sxy = sumXY - (sumX * sumY) / n; 123 double xbar = sumX / n; 124 double ybar = sumY / n; 125 126 double[] result = new double[2]; 127 result[1] = sxy / sxx; 128 result[0] = ybar - result[1] * xbar; 129 130 return result; 131 132 } 133 134 /** 135 * Returns the parameters 'a' and 'b' for an equation y = ax^b, fitted to 136 * the data using a power regression equation. The result is returned as 137 * an array, where double[0] --> a, and double[1] --> b. 138 * 139 * @param data the data. 140 * 141 * @return The parameters. 142 */ 143 public static double[] getPowerRegression(double[][] data) { 144 145 int n = data.length; 146 if (n < 2) { 147 throw new IllegalArgumentException("Not enough data."); 148 } 149 150 double sumX = 0; 151 double sumY = 0; 152 double sumXX = 0; 153 double sumXY = 0; 154 for (int i = 0; i < n; i++) { 155 double x = Math.log(data[i][0]); 156 double y = Math.log(data[i][1]); 157 sumX += x; 158 sumY += y; 159 double xx = x * x; 160 sumXX += xx; 161 double xy = x * y; 162 sumXY += xy; 163 } 164 double sxx = sumXX - (sumX * sumX) / n; 165 double sxy = sumXY - (sumX * sumY) / n; 166 double xbar = sumX / n; 167 double ybar = sumY / n; 168 169 double[] result = new double[2]; 170 result[1] = sxy / sxx; 171 result[0] = Math.pow(Math.exp(1.0), ybar - result[1] * xbar); 172 173 return result; 174 175 } 176 177 /** 178 * Returns the parameters 'a' and 'b' for an equation y = ax^b, fitted to 179 * the data using a power regression equation. The result is returned as 180 * an array, where double[0] --> a, and double[1] --> b. 181 * 182 * @param data the data. 183 * @param series the series to fit the regression line against. 184 * 185 * @return The parameters. 186 */ 187 public static double[] getPowerRegression(XYDataset data, int series) { 188 189 int n = data.getItemCount(series); 190 if (n < 2) { 191 throw new IllegalArgumentException("Not enough data."); 192 } 193 194 double sumX = 0; 195 double sumY = 0; 196 double sumXX = 0; 197 double sumXY = 0; 198 for (int i = 0; i < n; i++) { 199 double x = Math.log(data.getXValue(series, i)); 200 double y = Math.log(data.getYValue(series, i)); 201 sumX += x; 202 sumY += y; 203 double xx = x * x; 204 sumXX += xx; 205 double xy = x * y; 206 sumXY += xy; 207 } 208 double sxx = sumXX - (sumX * sumX) / n; 209 double sxy = sumXY - (sumX * sumY) / n; 210 double xbar = sumX / n; 211 double ybar = sumY / n; 212 213 double[] result = new double[2]; 214 result[1] = sxy / sxx; 215 result[0] = Math.pow(Math.exp(1.0), ybar - result[1] * xbar); 216 217 return result; 218 219 } 220 221 /** 222 * Returns the parameters 'a0', 'a1', 'a2', ..., 'an' for a polynomial 223 * function of order n, y = a0 + a1 * x + a2 * x^2 + ... + an * x^n, 224 * fitted to the data using a polynomial regression equation. 225 * The result is returned as an array with a length of n + 2, 226 * where double[0] --> a0, double[1] --> a1, .., double[n] --> an. 227 * and double[n + 1] is the correlation coefficient R2 228 * Reference: J. D. Faires, R. L. Burden, Numerische Methoden (german 229 * edition), pp. 243ff and 327ff. 230 * 231 * @param dataset the dataset ({@code null} not permitted). 232 * @param series the series to fit the regression line against (the series 233 * must have at least order + 1 non-NaN items). 234 * @param order the order of the function (> 0). 235 * 236 * @return The parameters. 237 */ 238 public static double[] getPolynomialRegression(XYDataset dataset, 239 int series, int order) { 240 Args.nullNotPermitted(dataset, "dataset"); 241 int itemCount = dataset.getItemCount(series); 242 if (itemCount < order + 1) { 243 throw new IllegalArgumentException("Not enough data."); 244 } 245 int validItems = 0; 246 double[][] data = new double[2][itemCount]; 247 for(int item = 0; item < itemCount; item++){ 248 double x = dataset.getXValue(series, item); 249 double y = dataset.getYValue(series, item); 250 if (!Double.isNaN(x) && !Double.isNaN(y)){ 251 data[0][validItems] = x; 252 data[1][validItems] = y; 253 validItems++; 254 } 255 } 256 if (validItems < order + 1) { 257 throw new IllegalArgumentException("Not enough data."); 258 } 259 int equations = order + 1; 260 int coefficients = order + 2; 261 double[] result = new double[equations + 1]; 262 double[][] matrix = new double[equations][coefficients]; 263 double sumX = 0.0; 264 double sumY = 0.0; 265 266 for(int item = 0; item < validItems; item++){ 267 sumX += data[0][item]; 268 sumY += data[1][item]; 269 for(int eq = 0; eq < equations; eq++){ 270 for(int coe = 0; coe < coefficients - 1; coe++){ 271 matrix[eq][coe] += Math.pow(data[0][item],eq + coe); 272 } 273 matrix[eq][coefficients - 1] += data[1][item] 274 * Math.pow(data[0][item],eq); 275 } 276 } 277 double[][] subMatrix = calculateSubMatrix(matrix); 278 for (int eq = 1; eq < equations; eq++) { 279 matrix[eq][0] = 0; 280 if (coefficients - 1 >= 0) System.arraycopy(subMatrix[eq - 1], 0, matrix[eq], 1, coefficients - 1); 281 } 282 for (int eq = equations - 1; eq > -1; eq--) { 283 double value = matrix[eq][coefficients - 1]; 284 for (int coe = eq; coe < coefficients -1; coe++) { 285 value -= matrix[eq][coe] * result[coe]; 286 } 287 result[eq] = value / matrix[eq][eq]; 288 } 289 double meanY = sumY / validItems; 290 double yObsSquare = 0.0; 291 double yRegSquare = 0.0; 292 for (int item = 0; item < validItems; item++) { 293 double yCalc = 0; 294 for (int eq = 0; eq < equations; eq++) { 295 yCalc += result[eq] * Math.pow(data[0][item],eq); 296 } 297 yRegSquare += Math.pow(yCalc - meanY, 2); 298 yObsSquare += Math.pow(data[1][item] - meanY, 2); 299 } 300 double rSquare = yRegSquare / yObsSquare; 301 result[equations] = rSquare; 302 return result; 303 } 304 305 /** 306 * Returns a matrix with the following features: (1) the number of rows 307 * and columns is 1 less than that of the original matrix; (2)the matrix 308 * is triangular, i.e. all elements a (row, column) with column > row are 309 * zero. This method is used for calculating a polynomial regression. 310 * 311 * @param matrix the start matrix. 312 * 313 * @return The new matrix. 314 */ 315 private static double[][] calculateSubMatrix(double[][] matrix){ 316 int equations = matrix.length; 317 int coefficients = matrix[0].length; 318 double[][] result = new double[equations - 1][coefficients - 1]; 319 for (int eq = 1; eq < equations; eq++) { 320 double factor = matrix[0][0] / matrix[eq][0]; 321 for (int coe = 1; coe < coefficients; coe++) { 322 result[eq - 1][coe -1] = matrix[0][coe] - matrix[eq][coe] 323 * factor; 324 } 325 } 326 if (equations == 1) { 327 return result; 328 } 329 // check for zero pivot element 330 if (result[0][0] == 0) { 331 boolean found = false; 332 for (int i = 0; i < result.length; i ++) { 333 if (result[i][0] != 0) { 334 found = true; 335 double[] temp = result[0]; 336 System.arraycopy(result[i], 0, result[0], 0, 337 result[i].length); 338 System.arraycopy(temp, 0, result[i], 0, temp.length); 339 break; 340 } 341 } 342 if (!found) { 343 //System.out.println("Equation has no solution!"); 344 return new double[equations - 1][coefficients - 1]; 345 } 346 } 347 double[][] subMatrix = calculateSubMatrix(result); 348 for (int eq = 1; eq < equations - 1; eq++) { 349 result[eq][0] = 0; 350 if (coefficients - 1 - 1 >= 0) System.arraycopy(subMatrix[eq - 1], 0, result[eq], 1, coefficients - 1 - 1); 351 } 352 return result; 353 } 354 355}