net.sourceforge.openforecast.DataSet Java Examples

The following examples show how to use net.sourceforge.openforecast.DataSet. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: AbstractTimeBasedModel.java    From OpenForecast with GNU Lesser General Public License v2.1 6 votes vote down vote up
/**
 * Initializes the time variable from the given data set. If the data set
 * does not have a time variable explicitly defined, then provided there
 * is only one independent variable defined for the data set that is used
 * as the time variable. If more than one independent variable is defined
 * for the data set, then it is not possible to take an educated guess at
 * which one is the time variable. In this case, an
 * IllegalArgumentException will be thrown.
 * @param dataSet the data set to use to initialize the time variable.
 * @throws IllegalArgumentException If more than one independent variable
 * is defined for the data set and no time variable has been specified. To
 * correct this, be sure to explicitly specify the time variable in the
 * data set passed to {@link #init}.
 */
protected void initTimeVariable( DataSet dataSet )
    throws IllegalArgumentException
{
    if ( timeVariable == null )
        {
            // Time variable not set, so look at independent variables
            timeVariable = dataSet.getTimeVariable();
            if ( timeVariable == null )
                {
                    String[] independentVars
                        = dataSet.getIndependentVariables();
                    
                    if ( independentVars.length != 1 )
                        throw new IllegalArgumentException("Unable to determine the independent time variable for the data set passed to init for "+toString()+". Please use DataSet.setTimeVariable before invoking model.init.");
                    
                    timeVariable = independentVars[0];
                }
        }
}
 
Example #2
Source File: DataSetTest.java    From OpenForecast with GNU Lesser General Public License v2.1 6 votes vote down vote up
/**
 * Tests the correct initialization of a DataSet.
 */
public void testDataSet()
{
    DataSet data = new DataSet( dataSet1 );
    
    // Verify data set contains the correct number of entries
    assertTrue( data.size() == dataSet1.size() );
    
    // Vefify that only one independent variable name is reported
    String[] independentVariables = data.getIndependentVariables();
    assertTrue( independentVariables.length == 1 );
    assertTrue( independentVariables[0].equals("x") );
    
    // Verify the dependent values stored
    Iterator<DataPoint> it = data.iterator();
    while ( it.hasNext() )
        {
            DataPoint dp = it.next();
            double value = dp.getDependentValue();
            double TOLERANCE = 0.001;
            assertTrue( value>-TOLERANCE && value<SIZE+TOLERANCE );
        }
}
 
Example #3
Source File: DelimitedTextOutputterTest.java    From OpenForecast with GNU Lesser General Public License v2.1 6 votes vote down vote up
/**
 * Tests the correct output of a DataSet to a CSV file. Assumes that the
 * CSVBuilder input is correct and valid.
 */
public void testCSVOutput()
    throws FileNotFoundException, IOException
{
    // Create new File object to which output should be sent
    File testFile = File.createTempFile( "test", ".csv" );
    
    // Create new outputter and use it to write a CSV file
    DelimitedTextOutputter outputter
        = new DelimitedTextOutputter( testFile.getAbsolutePath() );
    outputter.output( expectedDataSet );
    
    // Use a CSVBuilder to read in the file
    CSVBuilder builder = new CSVBuilder( testFile.getAbsolutePath() );
    DataSet writtenDataSet = builder.build();
    
    // Compare the expectedDataSet with the writtenDataSet
    assertEquals("Comparing data set written with data set written then read back",
                 expectedDataSet, writtenDataSet);
    
    // Clean up - remove test file
    testFile.delete();
}
 
Example #4
Source File: DelimitedTextOutputterTest.java    From OpenForecast with GNU Lesser General Public License v2.1 6 votes vote down vote up
/**
 * Creates a dummy data setto be written by all test cases.
 */
public void setUp()
{
    // Constants used to determine size of test
    int MAX_X1 = 10;
    int MAX_X2 = 10;
    
    // Set up array for expected results
    expectedDataSet = new DataSet();
    
    // Create test DataSet
    int numberOfDataPoints = 0;
    for ( int x1=0; x1<MAX_X1; x1++ )
        for ( int x2=0; x2<MAX_X2; x2++ )
            {
                double expectedValue = x1+2*x2+3.14;
                DataPoint dp = new Observation( expectedValue );
                dp.setIndependentValue( "x1", x1 );
                dp.setIndependentValue( "x2", x2 );
                expectedDataSet.add( dp );
                numberOfDataPoints++;
            }
    
    assertEquals("Checking correct number of data points created",
                 numberOfDataPoints, expectedDataSet.size());
}
 
Example #5
Source File: ResultSetBuilder.java    From OpenForecast with GNU Lesser General Public License v2.1 6 votes vote down vote up
/**
 * Retrieves a DataSet - a collection of DataPoints - from the current
 * input source. The DataSet should contain all DataPoints defined by
 * the input source.
 *
 * <p>In general, build will attempt to convert all rows in the ResultSet
 * to data points. In this implementation, all columns are assumed to
 * contain numeric data. This restriction may be relaxed at a later date.
 * @return a DataSet built from the current input source.
 * @throws SQLException if a database access error occurs.
 */
public DataSet build()
     throws SQLException
{
     DataSet dataSet = new DataSet();
     
     setColumnNames();
     
     // Make sure we're on the first record
     if ( !rs.isBeforeFirst() )
           rs.beforeFirst();
     
     // Iterate through ResultSet,
     //  creating new DataPoint instance for each row
     while ( rs.next() )
           {
                DataPoint dp = build( rs );
                dataSet.add( dp );
           }
     
     return dataSet;
}
 
Example #6
Source File: DelimitedTextOutputter.java    From OpenForecast with GNU Lesser General Public License v2.1 6 votes vote down vote up
/**
 * Writes a DataSet - a collection of DataPoints - to the current writer.
 * The DataSet should contain all DataPoints to be output.
 *
 * <p>Depending on the setting of outputHeaderRow, a header row containing
 * the variable names of the data points will be output. To enable/disable
 * this feature, use the {@link #setOutputHeaderRow} method.</li>
 * @param dataSet the DataSet to be output to the current writer.
 * @throws IOException if an I/O error occurs.
 */
public void output( DataSet dataSet )
    throws IOException
{
    if ( outputHeaderRow )
        writeHeader( dataSet.getIndependentVariables() );
    
    Iterator<DataPoint> it = dataSet.iterator();
    while ( it.hasNext() )
        {
            DataPoint dataPoint = it.next();
            output( dataPoint );
        }
    
    out.flush();
}
 
Example #7
Source File: TripleExponentialSmoothingModel.java    From OpenForecast with GNU Lesser General Public License v2.1 6 votes vote down vote up
/**
 * Constructs a new triple exponential smoothing forecasting model, using
 * the given smoothing constants - alpha, beta and gamma. For a valid
 * model to be constructed, you should call init and pass in a data set
 * containing a series of data points with the time variable initialized
 * to identify the independent variable.
 * @param alpha the smoothing constant to use for this exponential
 * smoothing model. Must be a value in the range 0.0-1.0. Values above 0.5
 * are uncommon - though they are still valid and are supported by this
 * implementation.
 * @param beta the second smoothing constant, beta to use in this model
 * to smooth the trend. Must be a value in the range 0.0-1.0. Values above
 * 0.5 are uncommon - though they are still valid and are supported by this
 * implementation.
 * @param gamma the third smoothing constant, gamma to use in this model
 * to smooth the seasonality. Must be a value in the range 0.0-1.0.
 * @throws IllegalArgumentException if the value of any of the smoothing
 * constants are invalid - outside the range 0.0-1.0.
 */
public TripleExponentialSmoothingModel( double alpha,
                                        double beta,
                                        double gamma )
{
    if ( alpha < 0.0  ||  alpha > 1.0 )
        throw new IllegalArgumentException("TripleExponentialSmoothingModel: Invalid smoothing constant, " + alpha + " - must be in the range 0.0-1.0.");
    
    if ( beta < 0.0  ||  beta > 1.0 )
        throw new IllegalArgumentException("TripleExponentialSmoothingModel: Invalid smoothing constant, beta=" + beta + " - must be in the range 0.0-1.0.");
    
    if ( gamma < 0.0  ||  gamma > 1.0 )
        throw new IllegalArgumentException("TripleExponentialSmoothingModel: Invalid smoothing constant, gamma=" + gamma + " - must be in the range 0.0-1.0.");
    
    baseValues = new DataSet();
    trendValues = new DataSet();
    seasonalIndex = new DataSet();
    
    this.alpha = alpha;
    this.beta = beta;
    this.gamma = gamma;
}
 
Example #8
Source File: ForecastingChartDemo.java    From OpenForecast with GNU Lesser General Public License v2.1 6 votes vote down vote up
/**
 * A helper function to convert data points (from startIndex to
 * endIndex) of a (JFreeChart) TimeSeries object into an
 * OpenForecast DataSet.
 * @param series the series of data points stored as a JFreeChart
 * TimeSeries object.
 * @param startIndex the index of the first data point required from the
 * series.
 * @param endIndex the index of the last data point required from the
 * series.
 * @return an OpenForecast DataSet representing the data points extracted
 * from the TimeSeries.
 */
private DataSet getDataSet( TimeSeries series,
                            int startIndex, int endIndex )
{
    DataSet dataSet = new DataSet();
    if ( endIndex > series.getItemCount() )
        endIndex = series.getItemCount();
    
    for ( int i=startIndex; i<endIndex; i++ )
        {
            TimeSeriesDataItem dataPair = series.getDataItem(i);
            DataPoint dp = new Observation( dataPair.getValue().doubleValue() );
            dp.setIndependentValue( "t", i );
            dataSet.add( dp );
        }
    
    return dataSet;
}
 
Example #9
Source File: MovingAverageModel.java    From OpenForecast with GNU Lesser General Public License v2.1 6 votes vote down vote up
/**
 * Used to initialize the moving average model. This method must be
 * called before any other method in the class. Since the moving
 * average model does not derive any equation for forecasting, this
 * method uses the input DataSet to calculate forecast values for all
 * valid values of the independent time variable.
 * @param dataSet a data set of observations that can be used to
 * initialize the forecasting parameters of the forecasting model.
 */
public void init( DataSet dataSet )
{
    if ( getNumberOfPeriods() <= 0 )
        {
            // Number of periods has not yet been defined
            //  - what's a reasonable number to use?
            
            // Use maximum number of periods as a default
            int period = getNumberOfPeriods();
            
            // Set weights for moving average model
            double[] weights = new double[period];
            for ( int p=0; p<period; p++ )
                weights[p] = 1/period;
            
            setWeights( weights );
        }
    
    super.init( dataSet );
}
 
Example #10
Source File: ExponentialSmoothingChartDemo.java    From OpenForecast with GNU Lesser General Public License v2.1 6 votes vote down vote up
/**
 * A helper function to convert data points (from startIndex to
 * endIndex) of a (JFreeChart) TimeSeries object into an
 * OpenForecast DataSet.
 * @param series the series of data points stored as a JFreeChart
 * TimeSeries object.
 * @param startIndex the index of the first data point required from the
 * series.
 * @param endIndex the index of the last data point required from the
 * series.
 * @return an OpenForecast DataSet representing the data points extracted
 * from the TimeSeries.
 */
private DataSet getDataSet( TimeSeries series,
                            int startIndex, int endIndex )
{
    DataSet dataSet = new DataSet();
    if ( endIndex > series.getItemCount() )
        endIndex = series.getItemCount();
    
    for ( int i=startIndex; i<endIndex; i++ )
        {
            TimeSeriesDataItem dataPair = series.getDataItem(i);
            DataPoint dp = new Observation( dataPair.getValue().doubleValue() );
            dp.setIndependentValue( "t", i );
            dataSet.add( dp );
        }
    
    return dataSet;
}
 
Example #11
Source File: DelimitedTextOutputterTest.java    From OpenForecast with GNU Lesser General Public License v2.1 5 votes vote down vote up
/**
 * Tests the correct output of a DataSet to a CSV file, using a modified
 * delimiter String - a comma surrounded by various whitespace. Assumes
 * that the CSVBuilder input is correct and valid.
 */
public void testAltCSVOutput()
    throws FileNotFoundException, IOException
{
    final String DELIMITER = ", ";
    
    // Create new File object to which output should be sent
    File testFile = File.createTempFile( "test", ".csv" );
    
    // Create new outputter and use it to write a CSV file
    DelimitedTextOutputter outputter
        = new DelimitedTextOutputter( testFile.getAbsolutePath() );
    outputter.setDelimiter( DELIMITER );
    outputter.setOutputHeaderRow( true );
    outputter.output( expectedDataSet );
    
    // Use a CSVBuilder to read in the file
    CSVBuilder builder
        = new CSVBuilder( testFile.getAbsolutePath(), true );
    DataSet writtenDataSet = builder.build();
    
    // Compare the expectedDataSet with the writtenDataSet
    assertEquals("Comparing data set written with data set written then read back",
                 expectedDataSet, writtenDataSet);
    
    // Clean up - remove test file
    testFile.delete();
}
 
Example #12
Source File: TimeSeriesOutputter.java    From OpenForecast with GNU Lesser General Public License v2.1 5 votes vote down vote up
/**
 * Adds a DataSet - a collection of DataPoints - to the current TimeSeries.
 * The DataSet should contain all DataPoints to be output.
 * @param dataSet the DataSet to be output to the current TimeSeries.
 */
public void output( DataSet dataSet )
    throws InstantiationException, IllegalAccessException,
    InvocationTargetException, InstantiationException
{
    String timeVariable = dataSet.getTimeVariable();
    
    Iterator<DataPoint> it = dataSet.iterator();
    while ( it.hasNext() )
        {
            DataPoint dataPoint = it.next();
            output( dataPoint, timeVariable );
        }
}
 
Example #13
Source File: RegressionModel.java    From OpenForecast with GNU Lesser General Public License v2.1 5 votes vote down vote up
/**
 * Initializes the coefficients to use for this regression model. The
 * intercept and slope are derived so as to give the best fit line for the
 * given data set.
 *
 * <p>Additionally, the accuracy indicators are calculated based on this
 * data set.
 * @param dataSet the set of observations to use to derive the regression
 * coefficients for this model.
 */
public void init( DataSet dataSet )
{
    int n = dataSet.size();
    double sumX = 0.0;
    double sumY = 0.0;
    double sumXX = 0.0;
    double sumXY = 0.0;
    
    Iterator<DataPoint> it = dataSet.iterator();
    while ( it.hasNext() )
        {
            DataPoint dp = it.next();
            
            double x = dp.getIndependentValue( independentVariable );
            double y = dp.getDependentValue();
            
            sumX += x;
            sumY += y;
            sumXX += x*x;
            sumXY += x*y;
        }
    
    double xMean = sumX / n;
    double yMean = sumY / n;
    
    slope = (n*sumXY - sumX*sumY) / (n*sumXX - sumX*sumX);
    intercept = yMean - slope*xMean;
    
    // Calculate the accuracy of this model
    calculateAccuracyIndicators( dataSet );
}
 
Example #14
Source File: TimeSeriesBuilder.java    From OpenForecast with GNU Lesser General Public License v2.1 5 votes vote down vote up
/**
 * Retrieves a DataSet - a collection of DataPoints - from the current
 * (JFreeChart) TimeSeries. The DataSet should contain all DataPoints
 * defined by the TimeSeries.
 *
 * <p>In general, build will attempt to convert all values in the
 * TimeSeries to data points.
 * @return a DataSet built from the current TimeSeries.
 */
public DataSet build()
{
    DataSet dataSet = new DataSet();
    
    dataSet.setTimeVariable( getTimeVariable() );
    
    // Iterate through TimeSeries,
    //  creating new DataPoint instance for each row
    int numberOfPeriods = timeSeries.getItemCount();
    for ( int t=0; t<numberOfPeriods; t++ )
        dataSet.add( build(timeSeries.getDataItem(t)) );
    
    return dataSet;
}
 
Example #15
Source File: OpenForecaster.java    From yawl with GNU Lesser General Public License v3.0 5 votes vote down vote up
@Override
public double get() {
    if (_series.size() < MIN_MEANINGFUL_QUEUE_SIZE) {
        return getLastValue(_series);
    }
    ForecastingModel forecaster = net.sourceforge.openforecast.Forecaster.getBestForecast(_series);
    System.out.println("Selected forecasting model: " + forecaster.getForecastType());
    DataSet transport = getForecastTransport();
    forecaster.forecast(transport);
    return getLastValue(transport);
}
 
Example #16
Source File: ExponentialSmoothingChartDemo.java    From OpenForecast with GNU Lesser General Public License v2.1 5 votes vote down vote up
/**
 * Use the given forecasting model to produce a TimeSeries object
 * representing the periods startIndex through endIndex, and containing
 * the forecast values produced by the model.
 * @param model the forecasting model to use to generate the forecast
 * series.
 * @param initDataSet data set to use to initialize the forecasting model.
 * @param startIndex the index of the first data point to use from the
 * set of potential forecast values.
 * @param endIndex the index of the last data point to use from the set
 * of potential forecast values.
 * @param title a title to give to the TimeSeries created.
 */
private TimeSeries getForecastTimeSeries( ForecastingModel model,
                                               DataSet initDataSet,
                                               int startIndex,
                                               int endIndex,
                                               String title )
{
    // Initialize the forecasting model
    model.init( initDataSet );
    
    // Get range of data required for forecast
    DataSet fcDataSet = getDataSet( fc, startIndex, endIndex );
    
    // Obtain forecast values for the forecast data set
    model.forecast( fcDataSet );

    // Create a new TimeSeries
    TimeSeries series
        = new TimeSeries(title,fc.getTimePeriodClass());
    
    // Iterator through the forecast results, adding to the series
    Iterator it = fcDataSet.iterator();
    while ( it.hasNext() )
        {
            DataPoint dp = (DataPoint)it.next();
            int index = (int)dp.getIndependentValue("t");
            series.add( fc.getTimePeriod(index), dp.getDependentValue() );
        }
    
    return series;
}
 
Example #17
Source File: OpenForecastTestCase.java    From OpenForecast with GNU Lesser General Public License v2.1 5 votes vote down vote up
/**
 * A helper function that validates the actual results obtaining for
 * a DataSet match the expected results. This is the same as the other
 * checkResults method except that with this method, the caller can
 * specify an acceptance tolerance when comparing actual with expected
 * results.
 * @param actualResults the DataSet returned from the forecast method
 *        that contains the data points for which forecasts were done.
 * @param expectedResults an array of expected values for the forecast
 *        data points. The order should match the order of the results
 *        as defined by the DataSet iterator.
 * @param tolerance the tolerance to accept when comparing the actual
 *        results (obtained from a forecasting model) with the expected
 *        results.
 */
protected void checkResults( DataSet actualResults,
                             double[] expectedResults,
                             double tolerance )
{
    // This is just to safeguard against a bug in the test case!  :-)
    assertNotNull( "Checking expected results is not null",
                   expectedResults );
    assertTrue( "Checking there are some expected results",
                expectedResults.length > 0 );
    
    assertEquals( "Checking the correct number of results returned",
                  expectedResults.length, actualResults.size() );
    
    // Iterate through the results, checking each one in turn
    Iterator<DataPoint> it = actualResults.iterator();
    int i=0;
    while ( it.hasNext() )
        {
            // Check that the results are within specified tolerance
            //  of the expected values
            DataPoint fc = (DataPoint)it.next();
            double fcValue = fc.getDependentValue();
            
            assertEquals( "Checking result",
                          expectedResults[i], fcValue, tolerance );
            i++;
        }
}
 
Example #18
Source File: OpenForecaster.java    From yawl with GNU Lesser General Public License v3.0 5 votes vote down vote up
private double getLastValue(DataSet forecasted) {
    Iterator<DataPoint> itr = forecasted.iterator();
    while (itr.hasNext()) {
        DataPoint dp = itr.next();
        if (! itr.hasNext()) {
            return dp.getDependentValue();
        }
    }
    return 0;
}
 
Example #19
Source File: DoubleExponentialSmoothingModel.java    From OpenForecast with GNU Lesser General Public License v2.1 5 votes vote down vote up
/**
 * Constructs a new double exponential smoothing forecasting model, using
 * the given smoothing constants - alpha and gamma. For a valid model to
 * be constructed, you should call init and pass in a data set containing
 * a series of data points with the time variable initialized to identify
 * the independent variable.
 * @param alpha the smoothing constant to use for this exponential
 * smoothing model. Must be a value in the range 0.0-1.0.
 * @param gamma the second smoothing constant, gamma to use in this model
 * to smooth the trend. Must be a value in the range 0.0-1.0.
 * @throws IllegalArgumentException if the value of either smoothing
 * constant is invalid - outside the range 0.0-1.0.
 */
public DoubleExponentialSmoothingModel( double alpha,
                                        double gamma )
{
    if ( alpha < 0.0  ||  alpha > 1.0 )
        throw new IllegalArgumentException("DoubleExponentialSmoothingModel: Invalid smoothing constant, " + alpha + " - must be in the range 0.0-1.0.");
    
    if ( gamma < 0.0  ||  gamma > 1.0 )
        throw new IllegalArgumentException("DoubleExponentialSmoothingModel: Invalid smoothing constant, gamma=" + gamma + " - must be in the range 0.0-1.0.");
    
    slopeValues = new DataSet();
    
    this.alpha = alpha;
    this.gamma = gamma;
}
 
Example #20
Source File: TripleExponentialSmoothingModel.java    From OpenForecast with GNU Lesser General Public License v2.1 5 votes vote down vote up
/**
 * Used to initialize the time based model. This method must be called
 * before any other method in the class. Since the time based model does
 * not derive any equation for forecasting, this method uses the input
 * DataSet to calculate forecast values for all values of the independent
 * time variable within the initial data set.
 * @param dataSet a data set of observations that can be used to
 * initialize the forecasting parameters of the forecasting model.
 */
public void init( DataSet dataSet )
{
    initTimeVariable( dataSet );
    String timeVariable = getTimeVariable();

    if ( dataSet.getPeriodsPerYear() <= 1 )
        throw new IllegalArgumentException("Data set passed to init in the triple exponential smoothing model does not contain seasonal data. Don't forget to call setPeriodsPerYear on the data set to set this.");

    periodsPerYear = dataSet.getPeriodsPerYear();

    // Check we have the minimum amount of data points
    if ( dataSet.size() < NUMBER_OF_YEARS*periodsPerYear )
        throw new IllegalArgumentException("TripleExponentialSmoothing models require a minimum of a full two years of data to initialize the model.");

    // Calculate initial values for base and trend
    initBaseAndTrendValues( dataSet );

    // Initialize seasonal indices using data for all complete years
    initSeasonalIndices( dataSet );

    Iterator<DataPoint> it = dataSet.iterator();
    maxObservedTime = Double.NEGATIVE_INFINITY;
    while ( it.hasNext() )
        {
            DataPoint dp = it.next();
            if ( dp.getIndependentValue(timeVariable) > maxObservedTime )
                maxObservedTime = dp.getIndependentValue(timeVariable);
        }

    super.init( dataSet );
}
 
Example #21
Source File: ForecastingChartDemo.java    From OpenForecast with GNU Lesser General Public License v2.1 5 votes vote down vote up
/**
 * Use the given forecasting model to produce a TimeSeries object
 * representing the periods startIndex through endIndex, and containing
 * the forecast values produced by the model.
 * @param model the forecasting model to use to generate the forecast
 * series.
 * @param initDataSet data set to use to initialize the forecasting model.
 * @param startIndex the index of the first data point to use from the
 * set of potential forecast values.
 * @param endIndex the index of the last data point to use from the set
 * of potential forecast values.
 * @param title a title to give to the TimeSeries created.
 */
private TimeSeries getForecastTimeSeries( ForecastingModel model,
                                          DataSet initDataSet,
                                          int startIndex,
                                          int endIndex,
                                          String title )
{
    // Initialize the forecasting model
    model.init( initDataSet );
    
    // Get range of data required for forecast
    DataSet fcDataSet = getDataSet( fc, startIndex, endIndex );
    
    // Obtain forecast values for the forecast data set
    model.forecast( fcDataSet );
    
    // Create a new TimeSeries
    TimeSeries series
        = new TimeSeries(title,fc.getTimePeriodClass());
    
    // Iterator through the forecast results, adding to the series
    Iterator it = fcDataSet.iterator();
    while ( it.hasNext() )
        {
            DataPoint dp = (DataPoint)it.next();
            int index = (int)dp.getIndependentValue("t");
            series.add( fc.getTimePeriod(index), dp.getDependentValue() );
        }
    
    return series;
}
 
Example #22
Source File: DataSetTest.java    From OpenForecast with GNU Lesser General Public License v2.1 5 votes vote down vote up
/**
 * Creates four simple DataSet for use by the tests. The first three
 * DataSets are created to contain the same data (though different
 * DataPoint objects), whereas the fourth DataSet is the same size but
 * contains different data as the others.
 */
public void setUp()
{
    dataSet1 = new DataSet();
    dataSet2 = new DataSet();
    dataSet3 = new DataSet();
    dataSet4 = new DataSet(); // Different data set
    
    for ( int count=0; count<SIZE; count++ )
        {
            DataPoint dp1 = new Observation( (double)count );
            DataPoint dp2 = new Observation( (double)count );
            DataPoint dp3 = new Observation( (double)count );
            DataPoint dp4 = new Observation( (double)count );
            
            dp1.setIndependentValue( "x", count );
            dp2.setIndependentValue( "x", count );
            dp3.setIndependentValue( "x", count );
            dp4.setIndependentValue( "x", count+1 );
            
            dataSet1.add( dp1 );
            dataSet2.add( dp2 );
            dataSet3.add( dp3 );
            dataSet4.add( dp4 );
        }
    
    // Verify data set contains the correct number of entries
    assertTrue("Checking dataSet1 contains correct number of data points",
               dataSet1.size() == SIZE );
    assertTrue("Checking dataSet2 contains correct number of data points",
               dataSet2.size() == SIZE );
    assertTrue("Checking dataSet3 contains correct number of data points",
               dataSet3.size() == SIZE );
    assertTrue("Checking dataSet4 contains correct number of data points",
               dataSet4.size() == SIZE );
}
 
Example #23
Source File: TripleExponentialSmoothingModel.java    From OpenForecast with GNU Lesser General Public License v2.1 5 votes vote down vote up
/**
 * Factory method that returns a best fit triple exponential smoothing
 * model for the given data set. This, like the overloaded
 * {@link #getBestFitModel(DataSet)}, attempts to derive "good" -
 * hopefully near optimal - values for the alpha and beta smoothing
 * constants.
 *
 * <p>To determine which model is "best", this method currently uses only
 * the Mean Squared Error (MSE). Future versions may use other measures in
 * addition to the MSE. However, the resulting "best fit" model - and the
 * associated values of alpha and beta - is expected to be very similar
 * either way.
 *
 * <p>Note that the approach used to calculate the best smoothing
 * constants - alpha and beta - <em>may</em> end up choosing values near
 * a local optimum. In other words, there <em>may</em> be other values for
 * alpha and beta that result in an even better model.
 * @param dataSet the observations for which a "best fit" triple
 * exponential smoothing model is required.
 * @param alphaTolerance the required precision/accuracy - or tolerance
 * of error - required in the estimate of the alpha smoothing constant.
 * @param betaTolerance the required precision/accuracy - or tolerance
 * of error - required in the estimate of the beta smoothing constant.
 * @return a best fit triple exponential smoothing model for the given
 * data set.
 */
public static TripleExponentialSmoothingModel
    getBestFitModel( DataSet dataSet,
                     double alphaTolerance, double betaTolerance )
{
    // Check we have the minimum amount of data points
    if ( dataSet.size() < NUMBER_OF_YEARS*dataSet.getPeriodsPerYear() )
        throw new IllegalArgumentException("TripleExponentialSmoothing models require a minimum of a full two years of data in the data set.");

    // Check alphaTolerance is in the expected range
    if ( alphaTolerance < 0.0  || alphaTolerance > 0.5 )
        throw new IllegalArgumentException("The value of alphaTolerance must be significantly less than 1.0, and no less than 0.0. Suggested value: "+DEFAULT_SMOOTHING_CONSTANT_TOLERANCE);

    // Check betaTolerance is in the expected range
    if ( betaTolerance < 0.0  || betaTolerance > 0.5 )
        throw new IllegalArgumentException("The value of betaTolerance must be significantly less than 1.0, and no less than 0.0. Suggested value: "+DEFAULT_SMOOTHING_CONSTANT_TOLERANCE);

    TripleExponentialSmoothingModel model1
        = findBestBeta( dataSet, 0.0, 0.0, 1.0, betaTolerance );
    TripleExponentialSmoothingModel model2
        = findBestBeta( dataSet, 0.5, 0.0, 1.0, betaTolerance );
    TripleExponentialSmoothingModel model3
        = findBestBeta( dataSet, 1.0, 0.0, 1.0, betaTolerance );

    // First rough estimate of alpha and beta to the nearest 0.1
    TripleExponentialSmoothingModel bestModel
        = findBest( dataSet, model1, model2, model3,
                    alphaTolerance, betaTolerance );

    return bestModel;
}
 
Example #24
Source File: OpenForecaster.java    From yawl with GNU Lesser General Public License v3.0 5 votes vote down vote up
private DataSet getForecastTransport() {
    DataSet transport = new DataSet();
    long now = System.currentTimeMillis();
    for (int i=0; i< Config.getForecastLookahead(); i++) {
        DataPoint dp = new Observation(0.0);
        dp.setIndependentValue("timestamp", now);
        transport.add(dp);
        now += Config.getPollInterval();
    }
    return transport;
}
 
Example #25
Source File: AbstractForecastingModel.java    From OpenForecast with GNU Lesser General Public License v2.1 4 votes vote down vote up
/**
 * A helper method to calculate the various accuracy indicators when
 * applying the given DataSet to the current forecasting model.
 * @param dataSet the DataSet to use to evaluate this model, and to
 *        calculate the accuracy indicators against.
 */
protected void calculateAccuracyIndicators( DataSet dataSet )
{
    // Note that the model has been initialized
    initialized = true;
    
    // Reset various helper summations
    double sumErr = 0.0;
    double sumAbsErr = 0.0;
    double sumAbsPercentErr = 0.0;
    double sumErrSquared = 0.0;
    
    // Obtain the forecast values for this model
    DataSet forecastValues = new DataSet( dataSet );
    forecast( forecastValues );
    
    // Calculate the Sum of the Absolute Errors
    Iterator<DataPoint> it = dataSet.iterator();
    Iterator<DataPoint> itForecast = forecastValues.iterator();
    while ( it.hasNext() )
        {
            // Get next data point
            DataPoint dp = it.next();
            double x = dp.getDependentValue();
            
            // Get next forecast value
            DataPoint dpForecast = itForecast.next();
            double forecastValue = dpForecast.getDependentValue();
            
            // Calculate error in forecast, and update sums appropriately
            double error = forecastValue - x;
            sumErr += error;
            sumAbsErr += Math.abs( error );
            sumAbsPercentErr += Math.abs( error / x );
            sumErrSquared += error*error;
        }
    
    // Initialize the accuracy indicators
    int n = dataSet.size();
    int p = getNumberOfPredictors();

    accuracyIndicators.setAIC( n*Math.log(2*Math.PI)
               + Math.log(sumErrSquared/n)
               + 2 * ( p+2 ) );
    accuracyIndicators.setBias( sumErr / n );
    accuracyIndicators.setMAD( sumAbsErr / n );
    accuracyIndicators.setMAPE( sumAbsPercentErr / n );
    accuracyIndicators.setMSE( sumErrSquared / n );
    accuracyIndicators.setSAE( sumAbsErr );
}
 
Example #26
Source File: TimeSeriesBuilderTest.java    From OpenForecast with GNU Lesser General Public License v2.1 4 votes vote down vote up
/**
 * Tests the correct input of a DataSet from a TimeSeries by creating a
 * simple TimeSeries object then inputting it using a TimeSeriesBuilder
 * instance.
 */
public void testBuilder()
{
    // Constants used to determine size of test
    int NUMBER_OF_TIME_PERIODS = 100;
    
    // Set up array for expected results
    double expectedValue[] = new double[ NUMBER_OF_TIME_PERIODS ];
    
    // Create test TimeSeries
    TimeSeries timeSeries = new TimeSeries("Simple time series");
    RegularTimePeriod period = new Day();
    
    for ( int d=0; d<NUMBER_OF_TIME_PERIODS; d++ )
        {
            expectedValue[d] = d;
            timeSeries.add(period,d);
            period = period.next();
        }
    
    // Create TimeSeriesBuilder and use it to create the DataSet
    String TIME_VARIABLE = "t";
    TimeSeriesBuilder builder
        = new TimeSeriesBuilder( timeSeries, TIME_VARIABLE );
    DataSet dataSet = builder.build();
    
    // Verify data set contains the correct number of entries
    assertEquals( "DataSet created is of the wrong size",
                  NUMBER_OF_TIME_PERIODS, dataSet.size() );
    
    // Vefify that only two independent variable names are reported
    String[] independentVariables = dataSet.getIndependentVariables();
    assertEquals( "Checking the correct number of independent variables",
                  1, independentVariables.length );
    assertEquals( "Independent variable not set as expected",
                  TIME_VARIABLE, independentVariables[0] );
    
    // Check the data points in the data set. This may not be a good
    //  test since it is dependent on the order of the data points in
    //  the 2-d array
    checkResults( dataSet, expectedValue );
}
 
Example #27
Source File: MultipleLinearRegressionTest.java    From OpenForecast with GNU Lesser General Public License v2.1 4 votes vote down vote up
/**
 * Tests the use of user-defined coefficients with the multiple
 * variable linear regression model.
 */
public void testUserDefinedCoefficientsWithNamedVars()
{
    // Reset the observedData, to ensure that it is *not* used
    observedData.clear();
    observedData = null;
    
    // Initialize coefficients
    final int NUMBER_OF_COEFFS = 5;
    double intercept = 0.12345;
    Hashtable<String,Double> coeffs = new Hashtable<String,Double>();
    String varNames[] = new String[NUMBER_OF_COEFFS];
    for ( int c=0; c<NUMBER_OF_COEFFS; c++ )
        {
            varNames[c] = new String( "param"+(c+1) );
            coeffs.put( varNames[c], new Double( Math.pow(10,c) ) );
        }

    // Create a data set for forecasting
    DataSet fcValues = new DataSet();

    for ( int count=0; count<10; count++ )
        {
            DataPoint dp = new Observation( 0.0 );
            dp.setIndependentValue( "param1", count+4 );
            dp.setIndependentValue( "param2", count+3 );
            dp.setIndependentValue( "param3", count+2 );
            dp.setIndependentValue( "param4", count+1 );
            dp.setIndependentValue( "param5", count   );
            fcValues.add( dp );
        }

    // Get forecast values
    MultipleLinearRegressionModel model
        = new MultipleLinearRegressionModel( varNames );

    model.init( intercept, coeffs );
    DataSet results = model.forecast( fcValues );
    assertTrue( fcValues.size() == results.size() );

    // These are the expected results
    double expectedResult[] = {   1234.12345,
                                 12345.12345,
                                 23456.12345,
                                 34567.12345,
                                 45678.12345,
                                 56789.12345,
                                 67900.12345,
                                 79011.12345,
                                 90122.12345,
                                101233.12345 };

    // Check results against expected results
    checkResults( results, expectedResult );
}
 
Example #28
Source File: MultipleLinearRegressionTest.java    From OpenForecast with GNU Lesser General Public License v2.1 4 votes vote down vote up
/**
 * Tests the use of user-defined coefficients with the multiple
 * variable linear regression model.
 */
public void testUserDefinedCoefficients()
{
    // Reset the observedData, to ensure that it is *not* used
    observedData.clear();
    observedData = null;
    
    // Initialize coefficients
    final int NUMBER_OF_COEFFS = 5;
    double intercept = 0.12345;
    Hashtable<String,Double> coeffs = new Hashtable<String,Double>();
    String varNames[] = new String[NUMBER_OF_COEFFS];
    for ( int c=0; c<NUMBER_OF_COEFFS; c++ )
        {
            varNames[c] = new String( "param"+(c+1) );
            coeffs.put( varNames[c], new Double( Math.pow(10,c) ) );
        }

    // Create a data set for forecasting
    DataSet fcValues = new DataSet();

    for ( int count=0; count<10; count++ )
        {
            DataPoint dp = new Observation( 0.0 );
            dp.setIndependentValue( "param1", count+4 );
            dp.setIndependentValue( "param2", count+3 );
            dp.setIndependentValue( "param3", count+2 );
            dp.setIndependentValue( "param4", count+1 );
            dp.setIndependentValue( "param5", count   );
            fcValues.add( dp );
        }

    // Get forecast values
    MultipleLinearRegressionModel model
        = new MultipleLinearRegressionModel();

    model.init( intercept, coeffs );
    DataSet results = model.forecast( fcValues );
    assertTrue( fcValues.size() == results.size() );

    // These are the expected results
    double expectedResult[] = {   1234.12345,
                                 12345.12345,
                                 23456.12345,
                                 34567.12345,
                                 45678.12345,
                                 56789.12345,
                                 67900.12345,
                                 79011.12345,
                                 90122.12345,
                                101233.12345 };

    // Check results against expected results
    checkResults( results, expectedResult );
}
 
Example #29
Source File: CSVBuilderTest.java    From OpenForecast with GNU Lesser General Public License v2.1 4 votes vote down vote up
/**
 * Tests the correct initialization of a DataSet from a CSV file where
 * the input is valid, yet poorly and irregularly formatted. For example,
 * the CSVBuilder is supposed to treat as a zero field two commas following
 * each other. This test will also test naming the columns and the use of
 * blank lines and comments in the input.
 */
public void testExtremeCSVBuilder()
    throws FileNotFoundException, IOException
{
    // Constants used to determine size of test
    double expectedValue[] = { 4,5,6,7,8 };
    int numberOfDataPoints = expectedValue.length;
    
    // Create test CSV file
    File testFile = File.createTempFile( "test", ".csv" );
    PrintStream out = new PrintStream( new FileOutputStream(testFile) );
    out.println("# This is a test CSV file with various 'peculiarities'");
    out.println(" # thrown in to try and trip it up");
    out.println("Field1, Field2, \"Field, 3\", Observation");
    out.println("-1, -2 ,-3,4");
    out.println(",,,5");
    out.println(" 1 , 2 , 3 , 6 ");
    out.println(" 2, 4, 6, 7");
    out.println("3 ,6 ,9 ,8");
    out.close();
    
    // Create CSV builder and use it to create the DataSet
    CSVBuilder builder = new CSVBuilder( testFile, true );
    DataSet dataSet = builder.build();
    
    // Verify data set contains the correct number of entries
    assertEquals( "DataSet created is of the wrong size",
                  numberOfDataPoints, dataSet.size() );
    
    // Vefify that only three independent variable names are reported
    String[] independentVariables = dataSet.getIndependentVariables();
    assertEquals( "Checking the correct number of independent variables",
                  3, independentVariables.length );
    
    // Note these will have been sorted into alphabetical order
    assertTrue( "Checking variable 0 name is as expected",
                independentVariables[0].compareTo("Field, 3")==0 );
    assertTrue( "Checking variable 1 name is as expected",
                independentVariables[1].compareTo("Field1")==0 );
    assertTrue( "Checking variable 2 name is as expected",
                independentVariables[2].compareTo("Field2")==0 );
    
    // Test the data set created by the builder
    Iterator<DataPoint> it = dataSet.iterator();
    while ( it.hasNext() )
        {
            DataPoint dataPoint = it.next();
            double field1 = dataPoint.getIndependentValue("Field1");
            double field2 = dataPoint.getIndependentValue("Field2");
            double field3 = dataPoint.getIndependentValue("Field, 3");
            
            // field2 was set to twice field1
            // field3 was set to three times field1
            assertTrue( "Checking independent values are correct",
                        field2==2*field1 && field3==3*field1 );
            
            // The data was set up with this simple equation
            double expectedResult = 5.0 + field1;
            
            assertEquals("Checking data point "+dataPoint,
                         expectedResult, dataPoint.getDependentValue(),
                         TOLERANCE);
        }
    
    // Clean up - remove test file
    testFile.delete();
}
 
Example #30
Source File: CSVBuilder.java    From OpenForecast with GNU Lesser General Public License v2.1 4 votes vote down vote up
/**
 * Retrieves a DataSet - a collection of DataPoints - from the current
 * input source. The DataSet should contain all DataPoints defined by
 * the input source.
 *
 * <p>In general, build will attempt to convert all lines/rows in the CSV
 * input to data points. The exceptions are as follows:
 * <ul>
 *  <li>Blank lines (lines containing only whitespace) will be ignored,
 *      and can be used for spacing in the input.</li>
 *  <li>Lines beginning with a '#' will be treated as comments, and will
 *      be ignored.</li>
 *  <li>If a header row is included - as specified in one of the
 *      constructors - then it will be treated as containing field/variable
 *      names for use by the DataSet.</li>
 * </ul>
 * @return a DataSet built from the current input source.
 * @throws IOException if an error occurred reading from the CSV file.
 */
public DataSet build()
    throws IOException
{
    DataSet dataSet = new DataSet();
    
    boolean firstLineRead = false;
    
    BufferedReader reader = new BufferedReader( fileReader );
    String line;
    do
        {
            // Get next line (trimmed)
            line = reader.readLine();
            if ( line == null )
                continue;
            
            line = line.trim();
            
            // Skip blank lines
            if ( line.length() == 0 )
                continue;
            
            // Skip comment lines
            if ( line.startsWith( "#" ) )
                continue;
            
            if ( !firstLineRead )
                {
                    firstLineRead = true;
                    if ( hasHeaderRow != HAS_HEADER_ROW_FALSE )
                        {
                            try
                                {
                                    // Treat first line as header
                                    readHeaderRow( line );
                                    continue;
                                }
                            catch ( NoHeaderException nhex )
                                {
                                    // No header row found, so treat it
                                    //  as the first row of data
                                }
                        }
                    
                    // Calculate how many independent values per line
                    // TODO: Fix this to handle quoted commas
                    int n = 0;
                    for ( int pos=0;
                          (pos=line.indexOf(SEPARATOR,pos)) > 0;
                          pos++ )
                        n++;
                    setNumberOfVariables( n );
                }
            
            DataPoint dp = build( line );
            dataSet.add( dp );
        }
    while ( line != null );   // line == null when EOF is reached
    
    return dataSet;
}