001/* ===========================================================
002 * JFreeChart : a free chart library for the Java(tm) platform
003 * ===========================================================
004 *
005 * (C) Copyright 2000-present, by David Gilbert and Contributors.
006 *
007 * Project Info:  http://www.jfree.org/jfreechart/index.html
008 *
009 * This library is free software; you can redistribute it and/or modify it
010 * under the terms of the GNU Lesser General Public License as published by
011 * the Free Software Foundation; either version 2.1 of the License, or
012 * (at your option) any later version.
013 *
014 * This library is distributed in the hope that it will be useful, but
015 * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
016 * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
017 * License for more details.
018 *
019 * You should have received a copy of the GNU Lesser General Public
020 * License along with this library; if not, write to the Free Software
021 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301,
022 * USA.
023 *
024 * [Oracle and Java are registered trademarks of Oracle and/or its affiliates. 
025 * Other names may be trademarks of their respective owners.]
026 *
027 * ---------------
028 * Statistics.java
029 * ---------------
030 * (C) Copyright 2000-present, by Matthew Wright and Contributors.
031 *
032 * Original Author:  Matthew Wright;
033 * Contributor(s):   David Gilbert;
034 *
035 */
036
037package org.jfree.data.statistics;
038
039import java.util.ArrayList;
040import java.util.Collection;
041import java.util.Collections;
042import java.util.Iterator;
043import java.util.List;
044import org.jfree.chart.util.Args;
045
046/**
047 * A utility class that provides some common statistical functions.
048 */
049public abstract class Statistics {
050
051    /**
052     * Returns the mean of an array of numbers.  This is equivalent to calling
053     * {@code calculateMean(values, true)}.
054     *
055     * @param values  the values ({@code null} not permitted).
056     *
057     * @return The mean.
058     */
059    public static double calculateMean(Number[] values) {
060        return calculateMean(values, true);
061    }
062
063    /**
064     * Returns the mean of an array of numbers.
065     *
066     * @param values  the values ({@code null} not permitted).
067     * @param includeNullAndNaN  a flag that controls whether or not
068     *     {@code null} and {@code Double.NaN} values are included
069     *     in the calculation (if either is present in the array, the result is
070     *     {@link Double#NaN}).
071     *
072     * @return The mean.
073     */
074    public static double calculateMean(Number[] values,
075            boolean includeNullAndNaN) {
076
077        Args.nullNotPermitted(values, "values");
078        double sum = 0.0;
079        double current;
080        int counter = 0;
081        for (int i = 0; i < values.length; i++) {
082            // treat nulls the same as NaNs
083            if (values[i] != null) {
084                current = values[i].doubleValue();
085            }
086            else {
087                current = Double.NaN;
088            }
089            // calculate the sum and count
090            if (includeNullAndNaN || !Double.isNaN(current)) {
091                sum = sum + current;
092                counter++;
093            }
094        }
095        double result = (sum / counter);
096        return result;
097    }
098
099    /**
100     * Returns the mean of a collection of {@code Number} objects.
101     *
102     * @param values  the values ({@code null} not permitted).
103     *
104     * @return The mean.
105     */
106    public static double calculateMean(Collection values) {
107        return calculateMean(values, true);
108    }
109
110    /**
111     * Returns the mean of a collection of {@code Number} objects.
112     *
113     * @param values  the values ({@code null} not permitted).
114     * @param includeNullAndNaN  a flag that controls whether or not
115     *     {@code null} and {@code Double.NaN} values are included
116     *     in the calculation (if either is present in the array, the result is
117     *     {@link Double#NaN}).
118     *
119     * @return The mean.
120     */
121    public static double calculateMean(Collection values,
122            boolean includeNullAndNaN) {
123
124        Args.nullNotPermitted(values, "values");
125        int count = 0;
126        double total = 0.0;
127        Iterator iterator = values.iterator();
128        while (iterator.hasNext()) {
129            Object object = iterator.next();
130            if (object == null) {
131                if (includeNullAndNaN) {
132                    return Double.NaN;
133                }
134            }
135            else {
136                if (object instanceof Number) {
137                    Number number = (Number) object;
138                    double value = number.doubleValue();
139                    if (Double.isNaN(value)) {
140                        if (includeNullAndNaN) {
141                            return Double.NaN;
142                        }
143                    }
144                    else {
145                        total = total + number.doubleValue();
146                        count = count + 1;
147                    }
148                }
149            }
150        }
151        return total / count;
152    }
153
154    /**
155     * Calculates the median for a list of values ({@code Number} objects).
156     * The list of values will be copied, and the copy sorted, before
157     * calculating the median.  To avoid this step (if your list of values
158     * is already sorted), use the {@link #calculateMedian(List, boolean)}
159     * method.
160     *
161     * @param values  the values ({@code null} permitted).
162     *
163     * @return The median.
164     */
165    public static double calculateMedian(List values) {
166        return calculateMedian(values, true);
167    }
168
169    /**
170     * Calculates the median for a list of values ({@code Number} objects).
171     * If {@code copyAndSort} is {@code false}, the list is assumed
172     * to be presorted in ascending order by value.
173     *
174     * @param values  the values ({@code null} permitted).
175     * @param copyAndSort  a flag that controls whether the list of values is
176     *                     copied and sorted.
177     *
178     * @return The median.
179     */
180    public static double calculateMedian(List values, boolean copyAndSort) {
181
182        double result = Double.NaN;
183        if (values != null) {
184            if (copyAndSort) {
185                int itemCount = values.size();
186                List copy = new ArrayList(itemCount);
187                for (int i = 0; i < itemCount; i++) {
188                    copy.add(i, values.get(i));
189                }
190                Collections.sort(copy);
191                values = copy;
192            }
193            int count = values.size();
194            if (count > 0) {
195                if (count % 2 == 1) {
196                    if (count > 1) {
197                        Number value = (Number) values.get((count - 1) / 2);
198                        result = value.doubleValue();
199                    }
200                    else {
201                        Number value = (Number) values.get(0);
202                        result = value.doubleValue();
203                    }
204                }
205                else {
206                    Number value1 = (Number) values.get(count / 2 - 1);
207                    Number value2 = (Number) values.get(count / 2);
208                    result = (value1.doubleValue() + value2.doubleValue())
209                             / 2.0;
210                }
211            }
212        }
213        return result;
214    }
215
216    /**
217     * Calculates the median for a sublist within a list of values
218     * ({@code Number} objects).
219     *
220     * @param values  the values, in any order ({@code null} not permitted).
221     * @param start  the start index.
222     * @param end  the end index.
223     *
224     * @return The median.
225     */
226    public static double calculateMedian(List values, int start, int end) {
227        return calculateMedian(values, start, end, true);
228    }
229
230    /**
231     * Calculates the median for a sublist within a list of values
232     * ({@code Number} objects).  The entire list will be sorted if the
233     * {@code ascending} argument is {@code false}.
234     *
235     * @param values  the values ({@code null} not permitted).
236     * @param start  the start index.
237     * @param end  the end index.
238     * @param copyAndSort  a flag that that controls whether the list of values
239     *                     is copied and sorted.
240     *
241     * @return The median.
242     */
243    public static double calculateMedian(List values, int start, int end,
244                                         boolean copyAndSort) {
245
246        double result = Double.NaN;
247        if (copyAndSort) {
248            List working = new ArrayList(end - start + 1);
249            for (int i = start; i <= end; i++) {
250                working.add(values.get(i));
251            }
252            Collections.sort(working);
253            result = calculateMedian(working, false);
254        }
255        else {
256            int count = end - start + 1;
257            if (count > 0) {
258                if (count % 2 == 1) {
259                    if (count > 1) {
260                        Number value
261                            = (Number) values.get(start + (count - 1) / 2);
262                        result = value.doubleValue();
263                    }
264                    else {
265                        Number value = (Number) values.get(start);
266                        result = value.doubleValue();
267                    }
268                }
269                else {
270                    Number value1 = (Number) values.get(start + count / 2 - 1);
271                    Number value2 = (Number) values.get(start + count / 2);
272                    result
273                        = (value1.doubleValue() + value2.doubleValue()) / 2.0;
274                }
275            }
276        }
277        return result;
278
279    }
280
281    /**
282     * Returns the standard deviation of a set of numbers.
283     *
284     * @param data  the data ({@code null} or zero length array not
285     *     permitted).
286     *
287     * @return The standard deviation of a set of numbers.
288     */
289    public static double getStdDev(Number[] data) {
290        Args.nullNotPermitted(data, "data");
291        if (data.length == 0) {
292            throw new IllegalArgumentException("Zero length 'data' array.");
293        }
294        double avg = calculateMean(data);
295        double sum = 0.0;
296
297        for (int counter = 0; counter < data.length; counter++) {
298            double diff = data[counter].doubleValue() - avg;
299            sum = sum + diff * diff;
300        }
301        return Math.sqrt(sum / (data.length - 1));
302    }
303
304    /**
305     * Fits a straight line to a set of (x, y) data, returning the slope and
306     * intercept.
307     *
308     * @param xData  the x-data ({@code null} not permitted).
309     * @param yData  the y-data ({@code null} not permitted).
310     *
311     * @return A double array with the intercept in [0] and the slope in [1].
312     */
313    public static double[] getLinearFit(Number[] xData, Number[] yData) {
314
315        Args.nullNotPermitted(xData, "xData");
316        Args.nullNotPermitted(yData, "yData");
317        if (xData.length != yData.length) {
318            throw new IllegalArgumentException(
319                "Statistics.getLinearFit(): array lengths must be equal.");
320        }
321
322        double[] result = new double[2];
323        // slope
324        result[1] = getSlope(xData, yData);
325        // intercept
326        result[0] = calculateMean(yData) - result[1] * calculateMean(xData);
327
328        return result;
329
330    }
331
332    /**
333     * Finds the slope of a regression line using least squares.
334     *
335     * @param xData  the x-values ({@code null} not permitted).
336     * @param yData  the y-values ({@code null} not permitted).
337     *
338     * @return The slope.
339     */
340    public static double getSlope(Number[] xData, Number[] yData) {
341        Args.nullNotPermitted(xData, "xData");
342        Args.nullNotPermitted(yData, "yData");
343        if (xData.length != yData.length) {
344            throw new IllegalArgumentException("Array lengths must be equal.");
345        }
346
347        // ********* stat function for linear slope ********
348        // y = a + bx
349        // a = ybar - b * xbar
350        //     sum(x * y) - (sum (x) * sum(y)) / n
351        // b = ------------------------------------
352        //     sum (x^2) - (sum(x)^2 / n
353        // *************************************************
354
355        // sum of x, x^2, x * y, y
356        double sx = 0.0, sxx = 0.0, sxy = 0.0, sy = 0.0;
357        int counter;
358        for (counter = 0; counter < xData.length; counter++) {
359            sx = sx + xData[counter].doubleValue();
360            sxx = sxx + Math.pow(xData[counter].doubleValue(), 2);
361            sxy = sxy + yData[counter].doubleValue()
362                      * xData[counter].doubleValue();
363            sy = sy + yData[counter].doubleValue();
364        }
365        return (sxy - (sx * sy) / counter) / (sxx - (sx * sx) / counter);
366
367    }
368
369    /**
370     * Calculates the correlation between two datasets.  Both arrays should
371     * contain the same number of items.  Null values are treated as zero.
372     * <P>
373     * Information about the correlation calculation was obtained from:
374     *
375     * http://trochim.human.cornell.edu/kb/statcorr.htm
376     *
377     * @param data1  the first dataset.
378     * @param data2  the second dataset.
379     *
380     * @return The correlation.
381     */
382    public static double getCorrelation(Number[] data1, Number[] data2) {
383        Args.nullNotPermitted(data1, "data1");
384        Args.nullNotPermitted(data2, "data2");
385        if (data1.length != data2.length) {
386            throw new IllegalArgumentException(
387                "'data1' and 'data2' arrays must have same length."
388            );
389        }
390        int n = data1.length;
391        double sumX = 0.0;
392        double sumY = 0.0;
393        double sumX2 = 0.0;
394        double sumY2 = 0.0;
395        double sumXY = 0.0;
396        for (int i = 0; i < n; i++) {
397            double x = 0.0;
398            if (data1[i] != null) {
399                x = data1[i].doubleValue();
400            }
401            double y = 0.0;
402            if (data2[i] != null) {
403                y = data2[i].doubleValue();
404            }
405            sumX = sumX + x;
406            sumY = sumY + y;
407            sumXY = sumXY + (x * y);
408            sumX2 = sumX2 + (x * x);
409            sumY2 = sumY2 + (y * y);
410        }
411        return (n * sumXY - sumX * sumY) / Math.pow((n * sumX2 - sumX * sumX)
412                * (n * sumY2 - sumY * sumY), 0.5);
413    }
414
415    /**
416     * Returns a data set for a moving average on the data set passed in.
417     *
418     * @param xData  an array of the x data.
419     * @param yData  an array of the y data.
420     * @param period  the number of data points to average
421     *
422     * @return A double[][] the length of the data set in the first dimension,
423     *         with two doubles for x and y in the second dimension
424     */
425    public static double[][] getMovingAverage(Number[] xData, Number[] yData,
426            int period) {
427
428        // check arguments...
429        if (xData.length != yData.length) {
430            throw new IllegalArgumentException("Array lengths must be equal.");
431        }
432
433        if (period > xData.length) {
434            throw new IllegalArgumentException(
435                "Period can't be longer than dataset.");
436        }
437
438        double[][] result = new double[xData.length - period][2];
439        for (int i = 0; i < result.length; i++) {
440            result[i][0] = xData[i + period].doubleValue();
441            // holds the moving average sum
442            double sum = 0.0;
443            for (int j = 0; j < period; j++) {
444                sum += yData[i + j].doubleValue();
445            }
446            sum = sum / period;
447            result[i][1] = sum;
448        }
449        return result;
450
451    }
452
453}