001/* =========================================================== 002 * JFreeChart : a free chart library for the Java(tm) platform 003 * =========================================================== 004 * 005 * (C) Copyright 2000-present, by David Gilbert and Contributors. 006 * 007 * Project Info: http://www.jfree.org/jfreechart/index.html 008 * 009 * This library is free software; you can redistribute it and/or modify it 010 * under the terms of the GNU Lesser General Public License as published by 011 * the Free Software Foundation; either version 2.1 of the License, or 012 * (at your option) any later version. 013 * 014 * This library is distributed in the hope that it will be useful, but 015 * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 016 * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public 017 * License for more details. 018 * 019 * You should have received a copy of the GNU Lesser General Public 020 * License along with this library; if not, write to the Free Software 021 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, 022 * USA. 023 * 024 * [Oracle and Java are registered trademarks of Oracle and/or its affiliates. 025 * Other names may be trademarks of their respective owners.] 026 * 027 * --------------- 028 * Statistics.java 029 * --------------- 030 * (C) Copyright 2000-present, by Matthew Wright and Contributors. 031 * 032 * Original Author: Matthew Wright; 033 * Contributor(s): David Gilbert; 034 * 035 */ 036 037package org.jfree.data.statistics; 038 039import java.util.ArrayList; 040import java.util.Collection; 041import java.util.Collections; 042import java.util.Iterator; 043import java.util.List; 044import org.jfree.chart.util.Args; 045 046/** 047 * A utility class that provides some common statistical functions. 048 */ 049public abstract class Statistics { 050 051 /** 052 * Returns the mean of an array of numbers. This is equivalent to calling 053 * {@code calculateMean(values, true)}. 054 * 055 * @param values the values ({@code null} not permitted). 056 * 057 * @return The mean. 058 */ 059 public static double calculateMean(Number[] values) { 060 return calculateMean(values, true); 061 } 062 063 /** 064 * Returns the mean of an array of numbers. 065 * 066 * @param values the values ({@code null} not permitted). 067 * @param includeNullAndNaN a flag that controls whether or not 068 * {@code null} and {@code Double.NaN} values are included 069 * in the calculation (if either is present in the array, the result is 070 * {@link Double#NaN}). 071 * 072 * @return The mean. 073 */ 074 public static double calculateMean(Number[] values, 075 boolean includeNullAndNaN) { 076 077 Args.nullNotPermitted(values, "values"); 078 double sum = 0.0; 079 double current; 080 int counter = 0; 081 for (int i = 0; i < values.length; i++) { 082 // treat nulls the same as NaNs 083 if (values[i] != null) { 084 current = values[i].doubleValue(); 085 } 086 else { 087 current = Double.NaN; 088 } 089 // calculate the sum and count 090 if (includeNullAndNaN || !Double.isNaN(current)) { 091 sum = sum + current; 092 counter++; 093 } 094 } 095 double result = (sum / counter); 096 return result; 097 } 098 099 /** 100 * Returns the mean of a collection of {@code Number} objects. 101 * 102 * @param values the values ({@code null} not permitted). 103 * 104 * @return The mean. 105 */ 106 public static double calculateMean(Collection values) { 107 return calculateMean(values, true); 108 } 109 110 /** 111 * Returns the mean of a collection of {@code Number} objects. 112 * 113 * @param values the values ({@code null} not permitted). 114 * @param includeNullAndNaN a flag that controls whether or not 115 * {@code null} and {@code Double.NaN} values are included 116 * in the calculation (if either is present in the array, the result is 117 * {@link Double#NaN}). 118 * 119 * @return The mean. 120 */ 121 public static double calculateMean(Collection values, 122 boolean includeNullAndNaN) { 123 124 Args.nullNotPermitted(values, "values"); 125 int count = 0; 126 double total = 0.0; 127 Iterator iterator = values.iterator(); 128 while (iterator.hasNext()) { 129 Object object = iterator.next(); 130 if (object == null) { 131 if (includeNullAndNaN) { 132 return Double.NaN; 133 } 134 } 135 else { 136 if (object instanceof Number) { 137 Number number = (Number) object; 138 double value = number.doubleValue(); 139 if (Double.isNaN(value)) { 140 if (includeNullAndNaN) { 141 return Double.NaN; 142 } 143 } 144 else { 145 total = total + number.doubleValue(); 146 count = count + 1; 147 } 148 } 149 } 150 } 151 return total / count; 152 } 153 154 /** 155 * Calculates the median for a list of values ({@code Number} objects). 156 * The list of values will be copied, and the copy sorted, before 157 * calculating the median. To avoid this step (if your list of values 158 * is already sorted), use the {@link #calculateMedian(List, boolean)} 159 * method. 160 * 161 * @param values the values ({@code null} permitted). 162 * 163 * @return The median. 164 */ 165 public static double calculateMedian(List values) { 166 return calculateMedian(values, true); 167 } 168 169 /** 170 * Calculates the median for a list of values ({@code Number} objects). 171 * If {@code copyAndSort} is {@code false}, the list is assumed 172 * to be presorted in ascending order by value. 173 * 174 * @param values the values ({@code null} permitted). 175 * @param copyAndSort a flag that controls whether the list of values is 176 * copied and sorted. 177 * 178 * @return The median. 179 */ 180 public static double calculateMedian(List values, boolean copyAndSort) { 181 182 double result = Double.NaN; 183 if (values != null) { 184 if (copyAndSort) { 185 int itemCount = values.size(); 186 List copy = new ArrayList(itemCount); 187 for (int i = 0; i < itemCount; i++) { 188 copy.add(i, values.get(i)); 189 } 190 Collections.sort(copy); 191 values = copy; 192 } 193 int count = values.size(); 194 if (count > 0) { 195 if (count % 2 == 1) { 196 if (count > 1) { 197 Number value = (Number) values.get((count - 1) / 2); 198 result = value.doubleValue(); 199 } 200 else { 201 Number value = (Number) values.get(0); 202 result = value.doubleValue(); 203 } 204 } 205 else { 206 Number value1 = (Number) values.get(count / 2 - 1); 207 Number value2 = (Number) values.get(count / 2); 208 result = (value1.doubleValue() + value2.doubleValue()) 209 / 2.0; 210 } 211 } 212 } 213 return result; 214 } 215 216 /** 217 * Calculates the median for a sublist within a list of values 218 * ({@code Number} objects). 219 * 220 * @param values the values, in any order ({@code null} not permitted). 221 * @param start the start index. 222 * @param end the end index. 223 * 224 * @return The median. 225 */ 226 public static double calculateMedian(List values, int start, int end) { 227 return calculateMedian(values, start, end, true); 228 } 229 230 /** 231 * Calculates the median for a sublist within a list of values 232 * ({@code Number} objects). The entire list will be sorted if the 233 * {@code ascending} argument is {@code false}. 234 * 235 * @param values the values ({@code null} not permitted). 236 * @param start the start index. 237 * @param end the end index. 238 * @param copyAndSort a flag that that controls whether the list of values 239 * is copied and sorted. 240 * 241 * @return The median. 242 */ 243 public static double calculateMedian(List values, int start, int end, 244 boolean copyAndSort) { 245 246 double result = Double.NaN; 247 if (copyAndSort) { 248 List working = new ArrayList(end - start + 1); 249 for (int i = start; i <= end; i++) { 250 working.add(values.get(i)); 251 } 252 Collections.sort(working); 253 result = calculateMedian(working, false); 254 } 255 else { 256 int count = end - start + 1; 257 if (count > 0) { 258 if (count % 2 == 1) { 259 if (count > 1) { 260 Number value 261 = (Number) values.get(start + (count - 1) / 2); 262 result = value.doubleValue(); 263 } 264 else { 265 Number value = (Number) values.get(start); 266 result = value.doubleValue(); 267 } 268 } 269 else { 270 Number value1 = (Number) values.get(start + count / 2 - 1); 271 Number value2 = (Number) values.get(start + count / 2); 272 result 273 = (value1.doubleValue() + value2.doubleValue()) / 2.0; 274 } 275 } 276 } 277 return result; 278 279 } 280 281 /** 282 * Returns the standard deviation of a set of numbers. 283 * 284 * @param data the data ({@code null} or zero length array not 285 * permitted). 286 * 287 * @return The standard deviation of a set of numbers. 288 */ 289 public static double getStdDev(Number[] data) { 290 Args.nullNotPermitted(data, "data"); 291 if (data.length == 0) { 292 throw new IllegalArgumentException("Zero length 'data' array."); 293 } 294 double avg = calculateMean(data); 295 double sum = 0.0; 296 297 for (int counter = 0; counter < data.length; counter++) { 298 double diff = data[counter].doubleValue() - avg; 299 sum = sum + diff * diff; 300 } 301 return Math.sqrt(sum / (data.length - 1)); 302 } 303 304 /** 305 * Fits a straight line to a set of (x, y) data, returning the slope and 306 * intercept. 307 * 308 * @param xData the x-data ({@code null} not permitted). 309 * @param yData the y-data ({@code null} not permitted). 310 * 311 * @return A double array with the intercept in [0] and the slope in [1]. 312 */ 313 public static double[] getLinearFit(Number[] xData, Number[] yData) { 314 315 Args.nullNotPermitted(xData, "xData"); 316 Args.nullNotPermitted(yData, "yData"); 317 if (xData.length != yData.length) { 318 throw new IllegalArgumentException( 319 "Statistics.getLinearFit(): array lengths must be equal."); 320 } 321 322 double[] result = new double[2]; 323 // slope 324 result[1] = getSlope(xData, yData); 325 // intercept 326 result[0] = calculateMean(yData) - result[1] * calculateMean(xData); 327 328 return result; 329 330 } 331 332 /** 333 * Finds the slope of a regression line using least squares. 334 * 335 * @param xData the x-values ({@code null} not permitted). 336 * @param yData the y-values ({@code null} not permitted). 337 * 338 * @return The slope. 339 */ 340 public static double getSlope(Number[] xData, Number[] yData) { 341 Args.nullNotPermitted(xData, "xData"); 342 Args.nullNotPermitted(yData, "yData"); 343 if (xData.length != yData.length) { 344 throw new IllegalArgumentException("Array lengths must be equal."); 345 } 346 347 // ********* stat function for linear slope ******** 348 // y = a + bx 349 // a = ybar - b * xbar 350 // sum(x * y) - (sum (x) * sum(y)) / n 351 // b = ------------------------------------ 352 // sum (x^2) - (sum(x)^2 / n 353 // ************************************************* 354 355 // sum of x, x^2, x * y, y 356 double sx = 0.0, sxx = 0.0, sxy = 0.0, sy = 0.0; 357 int counter; 358 for (counter = 0; counter < xData.length; counter++) { 359 sx = sx + xData[counter].doubleValue(); 360 sxx = sxx + Math.pow(xData[counter].doubleValue(), 2); 361 sxy = sxy + yData[counter].doubleValue() 362 * xData[counter].doubleValue(); 363 sy = sy + yData[counter].doubleValue(); 364 } 365 return (sxy - (sx * sy) / counter) / (sxx - (sx * sx) / counter); 366 367 } 368 369 /** 370 * Calculates the correlation between two datasets. Both arrays should 371 * contain the same number of items. Null values are treated as zero. 372 * <P> 373 * Information about the correlation calculation was obtained from: 374 * 375 * http://trochim.human.cornell.edu/kb/statcorr.htm 376 * 377 * @param data1 the first dataset. 378 * @param data2 the second dataset. 379 * 380 * @return The correlation. 381 */ 382 public static double getCorrelation(Number[] data1, Number[] data2) { 383 Args.nullNotPermitted(data1, "data1"); 384 Args.nullNotPermitted(data2, "data2"); 385 if (data1.length != data2.length) { 386 throw new IllegalArgumentException( 387 "'data1' and 'data2' arrays must have same length." 388 ); 389 } 390 int n = data1.length; 391 double sumX = 0.0; 392 double sumY = 0.0; 393 double sumX2 = 0.0; 394 double sumY2 = 0.0; 395 double sumXY = 0.0; 396 for (int i = 0; i < n; i++) { 397 double x = 0.0; 398 if (data1[i] != null) { 399 x = data1[i].doubleValue(); 400 } 401 double y = 0.0; 402 if (data2[i] != null) { 403 y = data2[i].doubleValue(); 404 } 405 sumX = sumX + x; 406 sumY = sumY + y; 407 sumXY = sumXY + (x * y); 408 sumX2 = sumX2 + (x * x); 409 sumY2 = sumY2 + (y * y); 410 } 411 return (n * sumXY - sumX * sumY) / Math.pow((n * sumX2 - sumX * sumX) 412 * (n * sumY2 - sumY * sumY), 0.5); 413 } 414 415 /** 416 * Returns a data set for a moving average on the data set passed in. 417 * 418 * @param xData an array of the x data. 419 * @param yData an array of the y data. 420 * @param period the number of data points to average 421 * 422 * @return A double[][] the length of the data set in the first dimension, 423 * with two doubles for x and y in the second dimension 424 */ 425 public static double[][] getMovingAverage(Number[] xData, Number[] yData, 426 int period) { 427 428 // check arguments... 429 if (xData.length != yData.length) { 430 throw new IllegalArgumentException("Array lengths must be equal."); 431 } 432 433 if (period > xData.length) { 434 throw new IllegalArgumentException( 435 "Period can't be longer than dataset."); 436 } 437 438 double[][] result = new double[xData.length - period][2]; 439 for (int i = 0; i < result.length; i++) { 440 result[i][0] = xData[i + period].doubleValue(); 441 // holds the moving average sum 442 double sum = 0.0; 443 for (int j = 0; j < period; j++) { 444 sum += yData[i + j].doubleValue(); 445 } 446 sum = sum / period; 447 result[i][1] = sum; 448 } 449 return result; 450 451 } 452 453}