Python pandas.infer_freq() Examples

The following are 19 code examples of pandas.infer_freq(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module pandas , or try the search function .
Example #1
Source File: utils.py    From bigbang with GNU Affero General Public License v3.0 6 votes vote down vote up
def add_freq(idx, freq=None):
    """Add a frequency attribute to idx, through inference or directly.

    Returns a copy.  If `freq` is None, it is inferred.
    """

    idx = idx.copy()
    if freq is None:
        if idx.freq is None:
            freq = pd.infer_freq(idx)
        else:
            return idx
    idx.freq = pd.tseries.frequencies.to_offset(freq)
    if idx.freq is None:
        raise AttributeError('no discernible frequency found to `idx`.  Specify'
                             ' a frequency string with `freq`.')
    return idx 
Example #2
Source File: test_base.py    From twitter-stock-recommendation with MIT License 6 votes vote down vote up
def test_constructor_from_frame_series_freq(self):
        # GH 6273
        # create from a series, passing a freq
        dts = ['1-1-1990', '2-1-1990', '3-1-1990', '4-1-1990', '5-1-1990']
        expected = DatetimeIndex(dts, freq='MS')

        df = pd.DataFrame(np.random.rand(5, 3))
        df['date'] = dts
        result = DatetimeIndex(df['date'], freq='MS')

        assert df['date'].dtype == object
        expected.name = 'date'
        tm.assert_index_equal(result, expected)

        expected = pd.Series(dts, name='date')
        tm.assert_series_equal(df['date'], expected)

        # GH 6274
        # infer freq of same
        freq = pd.infer_freq(df['date'])
        assert freq == 'MS' 
Example #3
Source File: test_base.py    From coffeegrindsize with MIT License 6 votes vote down vote up
def test_constructor_from_frame_series_freq(self):
        # GH 6273
        # create from a series, passing a freq
        dts = ['1-1-1990', '2-1-1990', '3-1-1990', '4-1-1990', '5-1-1990']
        expected = DatetimeIndex(dts, freq='MS')

        df = pd.DataFrame(np.random.rand(5, 3))
        df['date'] = dts
        result = DatetimeIndex(df['date'], freq='MS')

        assert df['date'].dtype == object
        expected.name = 'date'
        tm.assert_index_equal(result, expected)

        expected = pd.Series(dts, name='date')
        tm.assert_series_equal(df['date'], expected)

        # GH 6274
        # infer freq of same
        freq = pd.infer_freq(df['date'])
        assert freq == 'MS' 
Example #4
Source File: datachecks.py    From xclim with Apache License 2.0 6 votes vote down vote up
def check_daily(var):
    r"""Assert that the series is daily and monotonic (no jumps in time index).

    A ValueError is raised otherwise."""

    t0, t1 = var.time[:2]

    # This won't work for non-standard calendars. Needs to be implemented in xarray. Comment for now
    if isinstance(t0.values, np.datetime64):
        if pd.infer_freq(var.time.to_pandas()) != "D":
            raise ValidationError("time series is not recognized as daily.")

    # Check that the first time step is one day.
    if np.timedelta64(dt.timedelta(days=1)) != (t1 - t0).data:
        raise ValidationError("time series is not daily.")

    # Check that the series does not go backward in time
    if not var.time.to_pandas().is_monotonic_increasing:
        raise ValidationError("time index is not monotonically increasing.") 
Example #5
Source File: test_base.py    From predictive-maintenance-using-machine-learning with Apache License 2.0 6 votes vote down vote up
def test_constructor_from_frame_series_freq(self):
        # GH 6273
        # create from a series, passing a freq
        dts = ['1-1-1990', '2-1-1990', '3-1-1990', '4-1-1990', '5-1-1990']
        expected = DatetimeIndex(dts, freq='MS')

        df = pd.DataFrame(np.random.rand(5, 3))
        df['date'] = dts
        result = DatetimeIndex(df['date'], freq='MS')

        assert df['date'].dtype == object
        expected.name = 'date'
        tm.assert_index_equal(result, expected)

        expected = pd.Series(dts, name='date')
        tm.assert_series_equal(df['date'], expected)

        # GH 6274
        # infer freq of same
        freq = pd.infer_freq(df['date'])
        assert freq == 'MS' 
Example #6
Source File: test_base.py    From vnpy_crypto with MIT License 6 votes vote down vote up
def test_constructor_from_frame_series_freq(self):
        # GH 6273
        # create from a series, passing a freq
        dts = ['1-1-1990', '2-1-1990', '3-1-1990', '4-1-1990', '5-1-1990']
        expected = DatetimeIndex(dts, freq='MS')

        df = pd.DataFrame(np.random.rand(5, 3))
        df['date'] = dts
        result = DatetimeIndex(df['date'], freq='MS')

        assert df['date'].dtype == object
        expected.name = 'date'
        tm.assert_index_equal(result, expected)

        expected = pd.Series(dts, name='date')
        tm.assert_series_equal(df['date'], expected)

        # GH 6274
        # infer freq of same
        freq = pd.infer_freq(df['date'])
        assert freq == 'MS' 
Example #7
Source File: test_base.py    From recruit with Apache License 2.0 6 votes vote down vote up
def test_constructor_from_frame_series_freq(self):
        # GH 6273
        # create from a series, passing a freq
        dts = ['1-1-1990', '2-1-1990', '3-1-1990', '4-1-1990', '5-1-1990']
        expected = DatetimeIndex(dts, freq='MS')

        df = pd.DataFrame(np.random.rand(5, 3))
        df['date'] = dts
        result = DatetimeIndex(df['date'], freq='MS')

        assert df['date'].dtype == object
        expected.name = 'date'
        tm.assert_index_equal(result, expected)

        expected = pd.Series(dts, name='date')
        tm.assert_series_equal(df['date'], expected)

        # GH 6274
        # infer freq of same
        freq = pd.infer_freq(df['date'])
        assert freq == 'MS' 
Example #8
Source File: data.py    From vnpy_crypto with MIT License 5 votes vote down vote up
def _check_period_index(x, freq="M"):
    from pandas import PeriodIndex, DatetimeIndex
    if not isinstance(x.index, (DatetimeIndex, PeriodIndex)):
        raise ValueError("The index must be a DatetimeIndex or PeriodIndex")

    if x.index.freq is not None:
        inferred_freq = x.index.freqstr
    else:
        inferred_freq = pd.infer_freq(x.index)
    if not inferred_freq.startswith(freq):
        raise ValueError("Expected frequency {}. Got {}".format(inferred_freq,
                                                                freq)) 
Example #9
Source File: returns.py    From pyfinance with MIT License 5 votes vote down vote up
def _try_get_freq(self):
        if self.freq is None:
            freq = pd.infer_freq(self.index)
            if freq is None:
                raise FrequencyError(
                    "No frequency was passed at"
                    " instantiation, and one cannot"
                    " be inferred."
                )
            freq = utils.get_anlz_factor(freq)
        else:
            freq = utils.get_anlz_factor(self.freq)
        return freq 
Example #10
Source File: perf.py    From tia with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def periodicity(freq_or_frame):
    """
    resolve the number of periods per year
    """
    if hasattr(freq_or_frame, 'rule_code'):
        rc = freq_or_frame.rule_code
        rc = rc.split('-')[0]
        factor = PER_YEAR_MAP.get(rc, None)
        if factor is not None:
            return factor / abs(freq_or_frame.n)
        else:
            raise Exception('Failed to determine periodicity. No factor mapping for %s' % freq_or_frame)
    elif isinstance(freq_or_frame, basestring):
        factor = PER_YEAR_MAP.get(freq_or_frame, None)
        if factor is not None:
            return factor
        else:
            raise Exception('Failed to determine periodicity. No factor mapping for %s' % freq_or_frame)
    elif isinstance(freq_or_frame, (pd.Series, pd.DataFrame, pd.TimeSeries)):
        freq = freq_or_frame.index.freq
        if not freq:
            freq = pd.infer_freq(freq_or_frame.index)
            if freq:
                return periodicity(freq)
            else:
                # Attempt to resolve it
                import warnings

                freq = guess_freq(freq_or_frame.index)
                warnings.warn('frequency not set. guessed it to be %s' % freq)
                return periodicity(freq)
        else:
            return periodicity(freq)
    else:
        raise ValueError("periodicity expects DataFrame, Series, or rule_code property") 
Example #11
Source File: test_base.py    From elasticintel with GNU General Public License v3.0 5 votes vote down vote up
def test_constructor_from_series(self):

        expected = DatetimeIndex([Timestamp('20110101'), Timestamp('20120101'),
                                  Timestamp('20130101')])
        s = Series([Timestamp('20110101'), Timestamp('20120101'),
                    Timestamp('20130101')])
        result = Index(s)
        tm.assert_index_equal(result, expected)
        result = DatetimeIndex(s)
        tm.assert_index_equal(result, expected)

        # GH 6273
        # create from a series, passing a freq
        s = Series(pd.to_datetime(['1-1-1990', '2-1-1990', '3-1-1990',
                                   '4-1-1990', '5-1-1990']))
        result = DatetimeIndex(s, freq='MS')
        expected = DatetimeIndex(['1-1-1990', '2-1-1990', '3-1-1990',
                                  '4-1-1990', '5-1-1990'], freq='MS')
        tm.assert_index_equal(result, expected)

        df = pd.DataFrame(np.random.rand(5, 3))
        df['date'] = ['1-1-1990', '2-1-1990', '3-1-1990', '4-1-1990',
                      '5-1-1990']
        result = DatetimeIndex(df['date'], freq='MS')
        expected.name = 'date'
        tm.assert_index_equal(result, expected)
        assert df['date'].dtype == object

        exp = pd.Series(['1-1-1990', '2-1-1990', '3-1-1990', '4-1-1990',
                         '5-1-1990'], name='date')
        tm.assert_series_equal(df['date'], exp)

        # GH 6274
        # infer freq of same
        result = pd.infer_freq(df['date'])
        assert result == 'MS' 
Example #12
Source File: test_market.py    From pyTD with MIT License 5 votes vote down vote up
def test_history_dates(self):
        start = datetime.date(2018, 1, 24)
        end = datetime.date(2018, 2, 12)

        data = pyTD.market.get_price_history("AAPL", start_date=start,
                                             end_date=end,
                                             output_format='pandas')

        assert data.iloc[0].name.date() == start
        assert data.iloc[-1].name.date() == datetime.date(2018, 2, 9)

        assert pd.infer_freq(data.index) == "B" 
Example #13
Source File: normalization.py    From rdtools with MIT License 5 votes vote down vote up
def check_series_frequency(series, series_description):
    '''Returns the inferred frequency of a pandas series, raises ValueError
    using series_description if it can't. series_description should be a string'''

    if series.index.freq is None:
        freq = pd.infer_freq(series.index)
        if freq is None:
            error_string = ('Could not infer frequency of ' + series_description +
                            ', which must be a regular time series')
            raise ValueError(error_string)
    else:
        freq = series.index.freq
    return freq 
Example #14
Source File: ml.py    From timeflux with MIT License 5 votes vote down vote up
def _reindex(self, data, times, columns):

        if len(data) != len(times):

            if self.resample:
                # Resample at a specific frequency
                kwargs = {"periods": len(data)}
                if self.resample_rate is None:
                    kwargs["freq"] = pd.infer_freq(times)
                    kwargs["freq"] = pd.tseries.frequencies.to_offset(kwargs["freq"])
                else:
                    kwargs["freq"] = pd.DateOffset(seconds=1 / self.resample_rate)
                if self.resample_direction == "right":
                    kwargs["start"] = times[0]
                elif self.resample_direction == "left":
                    kwargs["end"] = times[-1]
                else:

                    def middle(a):
                        return int(np.ceil(len(a) / 2)) - 1

                    kwargs["start"] = times[middle(times)] - (
                        middle(data) * kwargs["freq"]
                    )
                times = pd.date_range(**kwargs)

            else:
                # Linearly arange between first and last
                times = pd.date_range(start=times[0], end=times[-1], periods=len(data))

        return pd.DataFrame(data, times, columns) 
Example #15
Source File: test_recorders.py    From pywr with GNU General Public License v3.0 5 votes vote down vote up
def timeseries2_observed():
    path = os.path.join(os.path.dirname(__file__), 'models')
    df = pandas.read_csv(os.path.join(path, 'timeseries2.csv'),
                         parse_dates=True, dayfirst=True, index_col=0)
    df = df.asfreq(pandas.infer_freq(df.index))
    # perturb a bit
    df += np.random.normal(size=df.shape)
    return df 
Example #16
Source File: snow.py    From pvlib-python with BSD 3-Clause "New" or "Revised" License 4 votes vote down vote up
def fully_covered_nrel(snowfall, threshold_snowfall=1.):
    '''
    Calculates the timesteps when the row's slant height is fully covered
    by snow.

    Parameters
    ----------
    snowfall : Series
        Accumulated snowfall in each time period [cm]

    threshold_snowfall : float, default 1.0
        Hourly snowfall above which snow coverage is set to the row's slant
        height. [cm/hr]

    Returns
    ----------
    boolean: Series
        True where the snowfall exceeds the defined threshold to fully cover
        the panel.

    Notes
    -----
    Implements the model described in [1]_ with minor improvements in [2]_.

    References
    ----------
    .. [1] Marion, B.; Schaefer, R.; Caine, H.; Sanchez, G. (2013).
       "Measured and modeled photovoltaic system energy losses from snow for
       Colorado and Wisconsin locations." Solar Energy 97; pp.112-121.
    .. [2] Ryberg, D; Freeman, J. "Integration, Validation, and Application
       of a PV Snow Coverage Model in SAM" (2017) NREL Technical Report
       NREL/TP-6A20-68705
    '''
    timestep = _time_delta_in_hours(snowfall.index)
    hourly_snow_rate = snowfall / timestep
    # if we can infer a time frequency, use first snowfall value
    # otherwise the first snowfall value is ignored
    freq = pd.infer_freq(snowfall.index)
    if freq is not None:
        timedelta = pd.tseries.frequencies.to_offset(freq) / pd.Timedelta('1h')
        hourly_snow_rate.iloc[0] = snowfall[0] / timedelta
    else:  # can't infer frequency from index
        hourly_snow_rate[0] = 0  # replaces NaN
    return hourly_snow_rate > threshold_snowfall 
Example #17
Source File: helpers.py    From pudl with MIT License 4 votes vote down vote up
def is_annual(df_year, year_col='report_date'):
    """
    Determine whether a DataFrame contains consistent annual time-series data.

    Some processes will only work with consistent yearly reporting. This means
    if you have two non-contiguous years of data or the datetime reporting is
    inconsistent, the process will break. This function attempts to infer the
    temporal frequency of the dataframe, or if that is impossible, to at least
    see whether the data would be consistent with annual reporting -- e.g. if
    there is only a single year of data, it should all have the same date, and
    that date should correspond to January 1st of a given year.

    This function is known to be flaky and needs to be re-written to deal with
    the edge cases better.

    Args:
        df_year (:class:`pandas.DataFrame`): A pandas DataFrame that might
            contain time-series data at annual resolution.
        year_col (str): The column of the DataFrame in which the year is
            reported.

    Returns:
        bool: True if df_year is found to be consistent with continuous annual
        time resolution, False otherwise.

    """
    year_index = pd.DatetimeIndex(df_year[year_col].unique()).sort_values()
    if len(year_index) >= 3:
        date_freq = pd.infer_freq(year_index)
        assert date_freq == 'AS-JAN', "infer_freq() not AS-JAN"
    elif len(year_index) == 2:
        min_year = year_index.min()
        max_year = year_index.max()
        assert year_index.min().month == 1, "min year not Jan"
        assert year_index.min().day == 1, "min day not 1st"
        assert year_index.max().month == 1, "max year not Jan"
        assert year_index.max().day == 1, "max day not 1st"
        delta_year = pd.Timedelta(max_year - min_year)
        assert delta_year / pd.Timedelta(days=1) >= 365.0
        assert delta_year / pd.Timedelta(days=1) <= 366.0
    elif len(year_index) == 1:
        assert year_index.min().month == 1, "only month not Jan"
        assert year_index.min().day == 1, "only day not 1st"
    else:
        assert False, "Zero dates found!"

    return True 
Example #18
Source File: dataframe_tools.py    From pywr with GNU General Public License v3.0 4 votes vote down vote up
def load_dataframe(model, data):
    column = data.pop("column", None)
    if isinstance(column, list):
        # Cast multiindex to a tuple to ensure .loc works correctly
        column = tuple(column)

    index = data.pop("index", None)
    if isinstance(index, list):
        # Cast multiindex to a tuple to ensure .loc works correctly
        index = tuple(index)

    table_ref = data.pop('table', None)
    if table_ref is not None:
        name = table_ref
        df = model.tables[table_ref]
    else:
        name = data.get('url', None)
        df = read_dataframe(model, data)

    # if column is not specified, use the whole dataframe
    if column is not None:
        try:
            df = df[column]
        except KeyError:
            raise KeyError('Column "{}" not found in dataset "{}"'.format(column, name))

    if index is not None:
        try:
            df = df.loc[index]
        except KeyError:
            raise KeyError('Index "{}" not found in dataset "{}"'.format(index, name))

    try:
        if isinstance(df.index, pandas.DatetimeIndex):
            # Only infer freq if one isn't already found.
            # E.g. HDF stores the saved freq, but CSV tends to have None, but infer to Weekly for example
            if df.index.freq is None:
                freq = pandas.infer_freq(df.index)
                if freq is None:
                    raise IndexError("Failed to identify frequency of dataset \"{}\"".format(name))
                df = df.asfreq(freq)
    except AttributeError:
        # Probably wasn't a pandas dataframe at this point.
        pass

    return df 
Example #19
Source File: tsm.py    From pyflux with BSD 3-Clause "New" or "Revised" License 4 votes vote down vote up
def shift_dates(self,h):
        """ Auxiliary function for creating dates for forecasts

        Parameters
        ----------
        h : int
            How many steps to forecast

        Returns
        ----------
        A transformed date_index object
        """

        date_index = copy.deepcopy(self.index)
        date_index = date_index[self.max_lag:len(date_index)]

        if self.is_pandas is True:

            if isinstance(date_index, pd.core.indexes.datetimes.DatetimeIndex):

                if pd.infer_freq(date_index) in ['H', 'M', 'S']:

                    for t in range(h):
                        date_index += pd.DateOffset((date_index[len(date_index)-1] - date_index[len(date_index)-2]).seconds)

                else: # Assume higher frequency (configured for days)

                    for t in range(h):
                        date_index += pd.DateOffset((date_index[len(date_index)-1] - date_index[len(date_index)-2]).days)

            elif isinstance(date_index, pd.core.indexes.numeric.Int64Index):

                for i in range(h):
                    new_value = date_index.values[len(date_index.values)-1] + (date_index.values[len(date_index.values)-1] - date_index.values[len(date_index.values)-2])
                    date_index = pd.Int64Index(np.append(date_index.values,new_value))

        else:

            for t in range(h):
                date_index.append(date_index[len(date_index)-1]+1)

        return date_index