001    /*
002     * Licensed to the Apache Software Foundation (ASF) under one or more
003     * contributor license agreements.  See the NOTICE file distributed with
004     * this work for additional information regarding copyright ownership.
005     * The ASF licenses this file to You under the Apache License, Version 2.0
006     * (the "License"); you may not use this file except in compliance with
007     * the License.  You may obtain a copy of the License at
008     *
009     *      http://www.apache.org/licenses/LICENSE-2.0
010     *
011     * Unless required by applicable law or agreed to in writing, software
012     * distributed under the License is distributed on an "AS IS" BASIS,
013     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014     * See the License for the specific language governing permissions and
015     * limitations under the License.
016     */
017    
018    package org.apache.commons.math.random;
019    
020    import java.io.IOException;
021    import java.io.File;
022    import java.net.URL;
023    import java.util.List;
024    
025    import org.apache.commons.math.stat.descriptive.StatisticalSummary;
026    import org.apache.commons.math.stat.descriptive.SummaryStatistics;
027    
028    /**
029     * Represents an <a href="http://random.mat.sbg.ac.at/~ste/dipl/node11.html">
030     * empirical probability distribution</a> -- a probability distribution derived
031     * from observed data without making any assumptions about the functional form
032     * of the population distribution that the data come from.<p>
033     * Implementations of this interface maintain data structures, called
034     * <i>distribution digests</i>, that describe empirical distributions and
035     * support the following operations: <ul>
036     * <li>loading the distribution from a file of observed data values</li>
037     * <li>dividing the input data into "bin ranges" and reporting bin frequency
038     *     counts (data for histogram)</li>
039     * <li>reporting univariate statistics describing the full set of data values
040     *     as well as the observations within each bin</li>
041     * <li>generating random values from the distribution</li>
042     * </ul>
043     * Applications can use <code>EmpiricalDistribution</code> implementations to
044     * build grouped frequency histograms representing the input data or to
045     * generate random values "like" those in the input file -- i.e., the values
046     * generated will follow the distribution of the values in the file.</p>
047     *
048     * @version $Revision: 817128 $ $Date: 2009-09-21 03:30:53 +0200 (lun. 21 sept. 2009) $
049     */
050    public interface EmpiricalDistribution {
051    
052        /**
053         * Computes the empirical distribution from the provided
054         * array of numbers.
055         *
056         * @param dataArray the data array
057         */
058        void load(double[] dataArray);
059    
060        /**
061         * Computes the empirical distribution from the input file.
062         *
063         * @param file the input file
064         * @throws IOException if an IO error occurs
065         */
066        void load(File file) throws IOException;
067    
068        /**
069         * Computes the empirical distribution using data read from a URL.
070         *
071         * @param url url of the input file
072         * @throws IOException if an IO error occurs
073         */
074        void load(URL url) throws IOException;
075    
076        /**
077         * Generates a random value from this distribution.
078         * <strong>Preconditions:</strong><ul>
079         * <li>the distribution must be loaded before invoking this method</li></ul>
080         * @return the random value.
081         *
082         * @throws IllegalStateException if the distribution has not been loaded
083         */
084        double getNextValue() throws IllegalStateException;
085    
086    
087        /**
088         * Returns a
089         * {@link org.apache.commons.math.stat.descriptive.StatisticalSummary}
090         * describing this distribution.
091         * <strong>Preconditions:</strong><ul>
092         * <li>the distribution must be loaded before invoking this method</li>
093         * </ul>
094         *
095         * @return the sample statistics
096         * @throws IllegalStateException if the distribution has not been loaded
097         */
098        StatisticalSummary getSampleStats() throws IllegalStateException;
099    
100        /**
101         * Property indicating whether or not the distribution has been loaded.
102         *
103         * @return true if the distribution has been loaded
104         */
105        boolean isLoaded();
106    
107         /**
108         * Returns the number of bins.
109         *
110         * @return the number of bins
111         */
112        int getBinCount();
113    
114        /**
115         * Returns a list of
116         * {@link org.apache.commons.math.stat.descriptive.SummaryStatistics}
117         * containing statistics describing the values in each of the bins.  The
118         * List is indexed on the bin number.
119         *
120         * @return List of bin statistics
121         */
122        List<SummaryStatistics> getBinStats();
123    
124        /**
125         * Returns the array of upper bounds for the bins.  Bins are: <br/>
126         * [min,upperBounds[0]],(upperBounds[0],upperBounds[1]],...,
127         *  (upperBounds[binCount-2], upperBounds[binCount-1] = max].
128         *
129         * @return array of bin upper bounds
130         */
131        double[] getUpperBounds();
132    
133    }