Python Examples of pandas.to

Source File: analyze_estimates.py From performance_tracker with GNU General Public License v3.0

6 votes

def match_arrivals_with_schedule(estimated_trips, schedule_direction):
    schedule_direction.loc[:,"datetime_utc"] = pd.to_datetime(schedule_direction["datetime"], utc=True)
    estimated_trips.loc[:,"datetime_utc"] = pd.to_datetime(estimated_trips["datetime"], utc=True)
    schedule_direction = schedule_direction.set_index(pd.DatetimeIndex(schedule_direction["datetime_utc"])).sort_index()
    matched_estimates = [
        match_times(
            stop_id,
            stop_estimates,
            schedule_direction[schedule_direction["stop_id"] == stop_id],
        )
        for stop_id, stop_estimates in estimated_trips.groupby(["stop_id"])
    ]
    matched_estimates = [x for x in matched_estimates if x is not None]
    matched_estimates = pd.concat(matched_estimates)
    matched_estimates["since_scheduled"] = (
        matched_estimates["datetime_utc"] - matched_estimates["closest_scheduled"]
    )
    return matched_estimates

Source File: test_utils_times.py From aospy with Apache License 2.0

6 votes

def test_monthly_mean_at_each_ind():
    times_submonthly = pd.to_datetime(['2000-06-01', '2000-06-15',
                                       '2000-07-04', '2000-07-19'])
    times_means = pd.to_datetime(['2000-06-01', '2000-07-01'])
    len_other_dim = 2
    arr_submonthly = xr.DataArray(
        np.random.random((len(times_submonthly), len_other_dim)),
        dims=[TIME_STR, 'dim0'], coords={TIME_STR: times_submonthly}
    )
    arr_means = xr.DataArray(
        np.random.random((len(times_means), len_other_dim)),
        dims=arr_submonthly.dims, coords={TIME_STR: times_means}
    )
    actual = monthly_mean_at_each_ind(arr_means, arr_submonthly)
    desired_values = np.stack([arr_means.values[0]] * len_other_dim +
                              [arr_means.values[1]] * len_other_dim,
                              axis=0)
    desired = xr.DataArray(desired_values, dims=arr_submonthly.dims,
                           coords=arr_submonthly.coords)
    assert actual.identical(desired)

Source File: test_core.py From pywr with GNU General Public License v3.0

6 votes

def test_storage_max_volume_zero():
    """Test a that an max_volume of zero results in a NaN for current_pc and no exception

    """

    model = Model(
        start=pandas.to_datetime('2016-01-01'),
        end=pandas.to_datetime('2016-01-01')
    )

    storage = Storage(model, 'storage', num_inputs=1, num_outputs=0)
    otpt = Output(model, 'output', max_flow=99999, cost=-99999)
    storage.connect(otpt)

    storage.max_volume = 0

    model.run()
    assert np.isnan(storage.current_pc)

Source File: test_files.py From pysat with BSD 3-Clause "New" or "Revised" License

6 votes

def test_year_month_day_files_direct_call_to_from_os(self):
        # create a bunch of files by year and doy
        start = pysat.datetime(2008, 1, 1)
        stop = pysat.datetime(2009, 12, 31)
        create_files(self.testInst, start, stop, freq='1D', use_doy=False,
                     root_fname=''.join(('pysat_testing_junk_{year:04d}_gold_',
                                         '{day:03d}_stuff_{month:02d}.pysat_',
                                         'testing_file')))
        # use from_os function to get pandas Series of files and dates
        files = pysat.Files.from_os(data_path=self.testInst.files.data_path,
                                    format_str=''.join(('pysat_testing_junk_',
                                                        '{year:04d}_gold_',
                                                        '{day:03d}_stuff_',
                                                        '{month:02d}.pysat_',
                                                        'testing_file')))
        # check overall length
        check1 = len(files) == (365 + 366)
        # check specific dates
        check2 = pds.to_datetime(files.index[0]) == pysat.datetime(2008, 1, 1)
        check3 = pds.to_datetime(files.index[365]) == \
            pysat.datetime(2008, 12, 31)
        check4 = pds.to_datetime(files.index[-1]) == \
            pysat.datetime(2009, 12, 31)
        assert(check1 & check2 & check3 & check4)

Source File: test_files.py From pysat with BSD 3-Clause "New" or "Revised" License

6 votes

def test_year_month_files_direct_call_to_from_os(self):
        # create a bunch of files by year and doy
        start = pysat.datetime(2008, 1, 1)
        stop = pysat.datetime(2009, 12, 31)
        create_files(self.testInst, start, stop, freq='1MS',
                     root_fname=''.join(('pysat_testing_junk_{year:04d}_gold_',
                                         'stuff_{month:02d}.pysat_testing_',
                                         'file')))
        # use from_os function to get pandas Series of files and dates
        files = pysat.Files.from_os(data_path=self.testInst.files.data_path,
                                    format_str=''.join(('pysat_testing_junk_',
                                                        '{year:04d}_gold_',
                                                        'stuff_{month:02d}.',
                                                        'pysat_testing_file')))
        # check overall length
        check1 = len(files) == 24
        # check specific dates
        check2 = pds.to_datetime(files.index[0]) == pysat.datetime(2008, 1, 1)
        check3 = pds.to_datetime(files.index[11]) == \
            pysat.datetime(2008, 12, 1)
        check4 = pds.to_datetime(files.index[-1]) == \
            pysat.datetime(2009, 12, 1)
        assert(check1 & check2 & check3 & check4)

Source File: universal.py From xalpha with MIT License

6 votes

def get_sw_from_jq(code, start=None, end=None, **kws):
    """

    :param code: str. eg. 801180 申万行业指数
    :param start:
    :param end:
    :param kws:
    :return:
    """
    logger.debug("get sw data of %s" % code)
    df = finance.run_query(
        query(finance.SW1_DAILY_VALUATION)
        .filter(finance.SW1_DAILY_VALUATION.date >= start)
        .filter(finance.SW1_DAILY_VALUATION.date <= end)
        .filter(finance.SW1_DAILY_VALUATION.code == code)
        .order_by(finance.SW1_DAILY_VALUATION.date.asc())
    )
    df["date"] = pd.to_datetime(df["date"])
    return df

Source File: test_files.py From pysat with BSD 3-Clause "New" or "Revised" License

6 votes

def test_year_doy_files_no_gap_in_name_direct_call_to_from_os(self):
        # create a bunch of files by year and doy
        start = pysat.datetime(2008, 1, 1)
        stop = pysat.datetime(2009, 12, 31)
        create_files(self.testInst, start, stop, freq='1D',
                     root_fname=''.join(('pysat_testing_junk_{year:04d}',
                                         '{day:03d}_stuff.pysat_testing_',
                                         'file')))
        # use from_os function to get pandas Series of files and dates
        files = pysat.Files.from_os(data_path=self.testInst.files.data_path,
                                    format_str=''.join(('pysat_testing_junk_',
                                                        '{year:04d}{day:03d}_',
                                                        'stuff.pysat_testing_',
                                                        'file')))
        # check overall length
        check1 = len(files) == (365 + 366)
        # check specific dates
        check2 = pds.to_datetime(files.index[0]) == pysat.datetime(2008, 1, 1)
        check3 = pds.to_datetime(files.index[365]) == \
            pysat.datetime(2008, 12, 31)
        check4 = pds.to_datetime(files.index[-1]) == \
            pysat.datetime(2009, 12, 31)
        assert(check1 & check2 & check3 & check4)

Source File: test_files.py From pysat with BSD 3-Clause "New" or "Revised" License

6 votes

def test_year_doy_files_direct_call_to_from_os(self):
        # create a bunch of files by year and doy
        start = pysat.datetime(2008, 1, 1)
        stop = pysat.datetime(2009, 12, 31)
        create_files(self.testInst, start, stop, freq='1D')
        # use from_os function to get pandas Series of files and dates
        files = pysat.Files.from_os(data_path=self.testInst.files.data_path,
                                    format_str=''.join(('pysat_testing_junk_',
                                                        '{year:04d}_gold_',
                                                        '{day:03d}_stuff.',
                                                        'pysat_testing_file')))
        # check overall length
        check1 = len(files) == (365 + 366)
        # check specific dates
        check2 = pds.to_datetime(files.index[0]) == pysat.datetime(2008, 1, 1)
        check3 = pds.to_datetime(files.index[365]) == \
            pysat.datetime(2008, 12, 31)
        check4 = pds.to_datetime(files.index[-1]) == \
            pysat.datetime(2009, 12, 31)
        assert(check1 & check2 & check3 & check4)

Source File: universal.py From xalpha with MIT License

6 votes

def _get_index_weight_range(code, start, end):
    if len(code.split(".")) != 2:
        code = _inverse_convert_code(code)
    start_obj = dt.datetime.strptime(start.replace("-", "").replace("/", ""), "%Y%m%d")
    end_obj = dt.datetime.strptime(end.replace("-", "").replace("/", ""), "%Y%m%d")
    start_m = start_obj.replace(day=1)
    if start_m < start_obj:
        start_m = start_m + relativedelta(months=1)
    end_m = end_obj.replace(day=1)
    if end_obj < end_m:
        end_m = end_m - relativedelta(months=1)
    d = start_m

    df = pd.DataFrame({"code": [], "weight": [], "display_name": [], "date": []})
    while True:
        if d > end_m:

            df["date"] = pd.to_datetime(df["date"])
            return df
        logger.debug("fetch index weight on %s for %s" % (d, code))
        df0 = get_index_weights(index_id=code, date=d.strftime("%Y-%m-%d"))
        df0["code"] = df0.index
        df = df.append(df0, ignore_index=True, sort=False)
        d = d + relativedelta(months=1)

Source File: fixtures.py From pywr with GNU General Public License v3.0

6 votes

def simple_storage_model(request):
    """
    Make a simple model with a single Input, Storage and Output.
    
    Input -> Storage -> Output
    """

    model = pywr.core.Model(
        start=pandas.to_datetime('2016-01-01'),
        end=pandas.to_datetime('2016-01-05'),
        timestep=datetime.timedelta(1),
    )

    inpt = Input(model, name="Input", max_flow=5.0, cost=-1)
    res = Storage(model, name="Storage", num_outputs=1, num_inputs=1, max_volume=20, initial_volume=10)
    otpt = Output(model, name="Output", max_flow=8, cost=-999)
    
    inpt.connect(res)
    res.connect(otpt)
    
    return model

Source File: test_files.py From pysat with BSD 3-Clause "New" or "Revised" License

6 votes

def test_year_month_day_hour_minute_files_direct_call_to_from_os(self):
        root_fname = ''.join(('pysat_testing_junk_{year:04d}_gold_{day:03d}_',
                              'stuff_{month:02d}_{hour:02d}{minute:02d}.',
                              'pysat_testing_file'))
        # create a bunch of files by year and doy
        start = pysat.datetime(2008, 1, 1)
        stop = pysat.datetime(2008, 1, 4)
        create_files(self.testInst, start, stop, freq='30min',
                     use_doy=False,
                     root_fname=root_fname)
        # use from_os function to get pandas Series of files and dates
        files = pysat.Files.from_os(data_path=self.testInst.files.data_path,
                                    format_str=root_fname)
        # check overall length
        check1 = len(files) == 145
        # check specific dates
        check2 = pds.to_datetime(files.index[0]) == pysat.datetime(2008, 1, 1)
        check3 = pds.to_datetime(files.index[1]) == \
            pysat.datetime(2008, 1, 1, 0, 30)
        check4 = pds.to_datetime(files.index[10]) == \
            pysat.datetime(2008, 1, 1, 5, 0)
        check5 = pds.to_datetime(files.index[-1]) == pysat.datetime(2008, 1, 4)
        assert(check1 & check2 & check3 & check4 & check5)

Source File: test_utils_times.py From aospy with Apache License 2.0

6 votes

def test_yearly_average_no_mask():
    times = pd.to_datetime(['2000-06-01', '2000-06-15',
                            '2001-07-04', '2001-10-01', '2001-12-31',
                            '2004-01-01'])
    arr = xr.DataArray(np.random.random((len(times),)),
                       dims=[TIME_STR], coords={TIME_STR: times})
    dt = arr.copy(deep=True)
    dt.values = np.random.random((len(times),))

    actual = yearly_average(arr, dt)

    yr2000 = (arr[0]*dt[0] + arr[1]*dt[1]) / (dt[0] + dt[1])
    yr2001 = ((arr[2]*dt[2] + arr[3]*dt[3] + arr[4]*dt[4]) /
              (dt[2] + dt[3] + dt[4]))
    yr2004 = arr[-1]
    yrs_coord = [2000, 2001, 2004]
    yr_avgs = np.array([yr2000, yr2001, yr2004])
    desired = xr.DataArray(yr_avgs, dims=['year'], coords={'year': yrs_coord})
    xr.testing.assert_allclose(actual, desired)

Source File: test_core.py From pywr with GNU General Public License v3.0

6 votes

def test_storage_max_volume_nested_param():
    """Test that a max_volume can be used with a Parameter with children.
    """

    model = Model(
        start=pandas.to_datetime('2016-01-01'),
        end=pandas.to_datetime('2016-01-01')
    )

    storage = Storage(model, 'storage', num_inputs=1, num_outputs=0)
    otpt = Output(model, 'output', max_flow=99999, cost=-99999)
    storage.connect(otpt)

    p = AggregatedParameter(model, [ConstantParameter(model, 20.0), ConstantParameter(model, 20.0)], agg_func='sum')

    storage.max_volume = p
    storage.initial_volume = 10.0

    model.setup()
    np.testing.assert_allclose(storage.current_pc, 0.25)

Source File: universal.py From xalpha with MIT License

6 votes

def get_historical_fromhzindex(code, start, end):
    """
    华证指数源

    :param code:
    :param start:
    :param end:
    :return:
    """
    if code.startswith("HZ"):
        code = code[2:]

    r = rget_json(
        "http://www.chindices.com/index/values.val?code={code}".format(code=code)
    )
    df = pd.DataFrame(r["data"])
    df["date"] = pd.to_datetime(df["date"])
    df = df[["date", "price", "pctChange"]]
    df.rename(columns={"price": "close", "pctChange": "percent"}, inplace=True)
    df = df[::-1]
    return df

Source File: test_compare.py From recordlinkage with BSD 3-Clause "New" or "Revised" License

6 votes

def test_date_incorrect_dtype(self):

        A = DataFrame({
            'col':
            ['2005/11/23', nan, '2004/11/23', '2010/01/10', '2010/10/30']
        })
        B = DataFrame({
            'col': [
                '2005/11/23', '2010/12/31', '2005/11/23', '2010/10/01',
                '2010/9/30'
            ]
        })
        ix = MultiIndex.from_arrays([A.index.values, B.index.values])

        A['col1'] = to_datetime(A['col'])
        B['col1'] = to_datetime(B['col'])

        comp = recordlinkage.Compare()
        comp.date('col', 'col1')
        pytest.raises(ValueError, comp.compute, ix, A, B)

        comp = recordlinkage.Compare()
        comp.date('col1', 'col')
        pytest.raises(ValueError, comp.compute, ix, A, B)

Source File: test_core.py From pywr with GNU General Public License v3.0

6 votes

def test_storage_max_volume_param_raises():
    """Test that a max_volume can not be used with 2 levels of child parameters.
    """

    model = Model(
        start=pandas.to_datetime('2016-01-01'),
        end=pandas.to_datetime('2016-01-01')
    )

    storage = Storage(model, 'storage', num_inputs=1, num_outputs=0)
    otpt = Output(model, 'output', max_flow=99999, cost=-99999)
    storage.connect(otpt)

    p = AggregatedParameter(model, [ConstantParameter(model, 20.0), ConstantParameter(model, 20.0)], agg_func='sum')
    p1 = AggregatedParameter(model, [p, ConstantParameter(model, 1.5)], agg_func="product")

    storage.max_volume = p1
    storage.initial_volume = 10.0

    with pytest.raises(RuntimeError):
        model.run()

Source File: test_parameters.py From pywr with GNU General Public License v3.0

6 votes

def test_daily_profile_leap_day(model):
    """Test behaviour of daily profile parameter for leap years
    """
    inpt = Input(model, "input")
    otpt = Output(model, "otpt", max_flow=None, cost=-999)
    inpt.connect(otpt)
    inpt.max_flow = DailyProfileParameter(model, np.arange(0, 366, dtype=np.float64))

    # non-leap year
    model.timestepper.start = pd.to_datetime("2015-01-01")
    model.timestepper.end = pd.to_datetime("2015-12-31")
    model.run()
    assert_allclose(inpt.flow, 365) # NOT 364

    # leap year
    model.timestepper.start = pd.to_datetime("2016-01-01")
    model.timestepper.end = pd.to_datetime("2016-12-31")
    model.run()
    assert_allclose(inpt.flow, 365)

Source File: test_parameters.py From pywr with GNU General Public License v3.0

6 votes

def test_scenario_daily_profile_leap_day(self, simple_linear_model):
        """Test behaviour of daily profile parameter for leap years
        """

        model = simple_linear_model
        model.timestepper.start = pd.to_datetime("2016-01-01")
        model.timestepper.end = pd.to_datetime("2016-12-31")

        scenario = Scenario(model, 'A', 2)
        values = np.array([np.arange(366, dtype=np.float64), np.arange(366, 0, -1, dtype=np.float64)])

        expected_values = values.T

        p = ScenarioDailyProfileParameter(model, scenario, values)
        AssertionRecorder(model, p, expected_data=expected_values)

        model.setup()
        model.run()

Source File: test_compare.py From recordlinkage with BSD 3-Clause "New" or "Revised" License

6 votes

def test_dates(self):

        A = DataFrame({
            'col':
            to_datetime(
                ['2005/11/23', nan, '2004/11/23', '2010/01/10', '2010/10/30'])
        })
        B = DataFrame({
            'col':
            to_datetime([
                '2005/11/23', '2010/12/31', '2005/11/23', '2010/10/01',
                '2010/9/30'
            ])
        })
        ix = MultiIndex.from_arrays([A.index.values, B.index.values])

        comp = recordlinkage.Compare()
        comp.date('col', 'col')
        result = comp.compute(ix, A, B)[0]

        expected = Series([1, 0, 0, 0.5, 0.5], index=ix, name=0)

        pdt.assert_series_equal(result, expected)

Source File: test_license.py From pywr with GNU General Public License v3.0

6 votes

def test_simple_model_with_annual_licence_multi_year(simple_linear_model):
    """ Test the AnnualLicense over multiple years
    """
    import pandas as pd
    import datetime, calendar
    m = simple_linear_model
    # Modify model to run for 3 years of non-leap years at 30 day time-step.
    m.timestepper.start = pd.to_datetime('2017-1-1')
    m.timestepper.end = pd.to_datetime('2020-1-1')
    m.timestepper.delta = datetime.timedelta(30)

    annual_total = 365.0
    lic = AnnualLicense(m, m.nodes["Input"], annual_total)
    # Apply licence to the model
    m.nodes["Input"].max_flow = lic
    m.nodes["Output"].max_flow = 10.0
    m.nodes["Output"].cost = -10.0
    m.setup()

    for i in range(len(m.timestepper)):
        m.step()
        days_in_year = 365 + int(calendar.isleap(m.timestepper.current.datetime.year))
        assert_allclose(m.nodes["Output"].flow, annual_total/days_in_year)

Source File: routes.py From thewarden with MIT License

6 votes

def tradedetails():
    if request.method == "GET":
        id = request.args.get("id")
        # if tradesonly is true then only look for buy and sells
        tradesonly = request.args.get("trades")
        df = pd.read_sql_table("trades", db.engine)
        # Filter only the trades for current user
        df = df[(df.user_id == current_user.username)]
        df = df[(df.trade_reference_id == id)]
        # Filter only buy and sells, ignore deposit / withdraw
        if tradesonly:
            df = df[(df.trade_operation == "B") | (df.trade_operation == "S")]
        # df['trade_date'] = pd.to_datetime(df['trade_date'])
        df.set_index("trade_reference_id", inplace=True)
        df.drop("user_id", axis=1, inplace=True)
        details = df.to_json()
        return details

Source File: test_recorders.py From pywr with GNU General Public License v3.0

6 votes

def test_reset_timestepper_recorder():
    model = Model(
        start=pandas.to_datetime('2016-01-01'),
        end=pandas.to_datetime('2016-01-01')
    )

    inpt = Input(model, "input", max_flow=10)
    otpt = Output(model, "output", max_flow=50, cost=-10)
    inpt.connect(otpt)

    rec = NumpyArrayNodeRecorder(model, otpt)

    model.run()

    model.timestepper.end = pandas.to_datetime("2016-01-02")

    model.run()

Source File: info.py From xalpha with MIT License

6 votes

def _fetch_csv(self, path):
        """
        fetch the information and pricetable from path+code.csv, not recommend to use manually,
        just set the fetch label to be true when init the object

        :param path:  string of folder path
        """
        try:
            content = pd.read_csv(path + self.code + ".csv")
            pricetable = content.iloc[1:]
            datel = list(pd.to_datetime(pricetable.date))
            self.price = pricetable[["netvalue", "totvalue", "comment"]]
            self.price["date"] = datel
            saveinfo = json.loads(content.iloc[0].date)
            if not isinstance(saveinfo, dict):
                raise FundTypeError("This csv doesn't looks like from fundinfo")
            self.segment = saveinfo["segment"]
            self.feeinfo = saveinfo["feeinfo"]
            self.name = saveinfo["name"]
            self.rate = saveinfo["rate"]
        except FileNotFoundError as e:
            # print('no saved copy of fund %s' % self.code)
            raise e

Source File: info.py From xalpha with MIT License

6 votes

def _fetch_csv(self, path):
        """
        fetch the information and pricetable from path+code.csv, not recommend to use manually,
        just set the fetch label to be true when init the object

        :param path:  string of folder path
        """
        try:
            pricetable = pd.read_csv(path + self.code + ".csv")
            datel = list(pd.to_datetime(pricetable.date))
            self.price = pricetable[["netvalue", "totvalue", "comment"]]
            self.price["date"] = datel

        except FileNotFoundError as e:
            # print('no saved copy of %s' % self.code)
            raise e

Source File: universal.py From xalpha with MIT License

6 votes

def get_portfolio_fromttjj(code, start=None, end=None):
    startobj = dt.datetime.strptime(start, "%Y%m%d")
    endobj = dt.datetime.strptime(end, "%Y%m%d")
    if (endobj - startobj).days < 90:
        return None  # note start is always 1.1 4.1 7.1 10.1 in incremental updates
    if code.startswith("F"):
        code = code[1:]
    r = rget("http://fundf10.eastmoney.com/zcpz_{code}.html".format(code=code))
    s = BeautifulSoup(r.text, "lxml")
    table = s.find("table", class_="tzxq")
    df = pd.read_html(str(table))[0]
    df["date"] = pd.to_datetime(df["报告期"])
    df["stock_ratio"] = df["股票占净比"].replace("---", "0%").apply(lambda s: _float(s[:-1]))
    df["bond_ratio"] = df["债券占净比"].replace("---", "0%").apply(lambda s: _float(s[:-1]))
    df["cash_ratio"] = df["现金占净比"].replace("---", "0%").apply(lambda s: _float(s[:-1]))
    #     df["dr_ratio"] = df["存托凭证占净比"].replace("---", "0%").apply(lambda s: xa.cons._float(s[:-1]))
    df["assets"] = df["净资产（亿元）"]
    df = df[::-1]
    return df[["date", "stock_ratio", "bond_ratio", "cash_ratio", "assets"]]


# this is the most elegant approach to dispatch get_daily, the definition can be such simple
# you actually don't need to bother on start end blah, everything is taken care of by ``cahcedio``

Source File: info.py From xalpha with MIT License

6 votes

def _fetch_csv(self, path):
        """
        fetch the information and pricetable from path+code.csv, not recommend to use manually,
        just set the fetch label to be true when init the object

        :param path:  string of folder path
        """
        try:
            content = pd.read_csv(path + self.code + ".csv")
            pricetable = content.iloc[1:]
            datel = list(pd.to_datetime(pricetable.date))
            self.price = pricetable[["netvalue", "totvalue", "comment"]]
            self.price["date"] = datel
            self.name = content.iloc[0].comment
        except FileNotFoundError as e:
            # print('no saved copy of %s' % self.code)
            raise e

Source File: universal.py From xalpha with MIT License

5 votes

def get_historical_fromzzindex(code, start, end=None):
    """
    中证指数源

    :param code:
    :param start:
    :param end:
    :return:
    """
    if code.startswith("ZZ"):
        code = code[2:]
    start_obj = dt.datetime.strptime(start, "%Y%m%d")
    fromnow = (today_obj() - start_obj).days
    if fromnow < 20:
        flag = "1%E4%B8%AA%E6%9C%88"
    elif fromnow < 60:
        flag = "3%E4%B8%AA%E6%9C%88"  # 个月
    elif fromnow < 200:
        flag = "1%E5%B9%B4"  # 年
    else:
        flag = "5%E5%B9%B4"
    r = rget_json(
        "http://www.csindex.com.cn/zh-CN/indices/index-detail/\
{code}?earnings_performance={flag}&data_type=json".format(
            code=code, flag=flag
        ),
        headers={
            "Host": "www.csindex.com.cn",
            "Referer": "http://www.csindex.com.cn/zh-CN/indices/index-detail/{code}".format(
                code=code
            ),
            "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_4) AppleWebKit/537.36",
            "X-Requested-With": "XMLHttpRequest",
            "Accept": "application/json, text/javascript, */*; q=0.01",
        },
    )
    df = pd.DataFrame(r)
    df["date"] = pd.to_datetime(df["tradedate"])
    df["close"] = df["tclose"].apply(_float)
    return df[["date", "close"]]

Source File: process_vehicles.py From performance_tracker with GNU General Public License v3.0

5 votes

def process_raw_vehicles(df, track):
    df = df.drop_duplicates(
        subset=["report_time", "latitude", "longitude", "vehicle_id"]
    )
    df = df[df["predictable"] == True]

    df["latitude"] = pd.to_numeric(df.latitude)
    df["longitude"] = pd.to_numeric(df.longitude)
    df = toGDF(df)

    mask_0 = (df["direction"] == 0) | (df["direction"] == 90)
    mask_1 = (df["direction"] == 180) | (df["direction"] == 270)
    df_0 = df.loc[mask_0]
    df_0 = df_0.assign(direction_id = 0)
    df_1 = df.loc[mask_1]
    df_1 = df_1.assign(direction_id = 1)
    df_0["relative_position"] = findRelativePositions(df_0, track[0])
    df_1["relative_position"] = findRelativePositions(df_1, track[1])
    df = pd.concat([df_0, df_1])

    df["datetime"] = pd.to_datetime(df["report_time"], utc=True)
    df["datetime_local_iso8601"] = df.report_time.apply(
        lambda dt: pendulum.parse(dt, tz="UTC")
        .in_tz("America/Los_Angeles")
        .to_iso8601_string()
    )
    df = df.reset_index(drop=True)  # necessary both before and after getTrips
    df = getTrips(df)
    df = df.reset_index(drop=True)  # necessary both before and after getTrips
    df["datetime"] = df["datetime_local_iso8601"]
    df = df[["datetime", "trip_id", "direction_id", "relative_position"]]
    return df

Source File: ornstein_uhlenbeck.py From tensortrade with Apache License 2.0

5 votes

def ornstein(base_price: int = 1,
             base_volume: int = 1,
             start_date: str = '2010-01-01',
             start_date_format: str = '%Y-%m-%d',
             times_to_generate: int = 1000,
             time_frame: str = '1h',
             params: ModelParameters = None):

    delta = get_delta(time_frame)
    times_to_generate = scale_times_to_generate(times_to_generate, time_frame)

    params = params or default(base_price, times_to_generate, delta)

    prices = ornstein_uhlenbeck_levels(params)

    volume_gen = GaussianNoise(t=times_to_generate)
    volumes = volume_gen.sample(times_to_generate) + base_volume

    start_date = pd.to_datetime(start_date, format=start_date_format)
    price_frame = pd.DataFrame([], columns=['date', 'price'], dtype=float)
    volume_frame = pd.DataFrame([], columns=['date', 'volume'], dtype=float)

    price_frame['date'] = pd.date_range(start=start_date, periods=times_to_generate, freq="1min")
    price_frame['price'] = abs(prices)

    volume_frame['date'] = price_frame['date'].copy()
    volume_frame['volume'] = abs(volumes)

    price_frame.set_index('date')
    price_frame.index = pd.to_datetime(price_frame.index, unit='m', origin=start_date)

    volume_frame.set_index('date')
    volume_frame.index = pd.to_datetime(volume_frame.index, unit='m', origin=start_date)

    data_frame = price_frame['price'].resample(time_frame).ohlc()
    data_frame['volume'] = volume_frame['volume'].resample(time_frame).sum()

    return data_frame

Source File: cryptodatadownload.py From tensortrade with Apache License 2.0

5 votes

def fetch_default(self, exchange_name, base_symbol, quote_symbol, timeframe, include_all_volumes=False):

        filename = "{}_{}{}_{}.csv".format(exchange_name, quote_symbol, base_symbol, timeframe)
        base_vc = "Volume {}".format(base_symbol)
        new_base_vc = "volume_base"
        quote_vc = "Volume {}".format(quote_symbol)
        new_quote_vc = "volume_quote"

        df = pd.read_csv(self.url + filename, skiprows=1)
        df = df[::-1]
        df = df.drop(["Symbol"], axis=1)
        df = df.rename({base_vc: new_base_vc, quote_vc: new_quote_vc, "Date": "date"}, axis=1)

        if "d" in timeframe:
            df["date"] = pd.to_datetime(df["date"])
        elif "h" in timeframe:
            df["date"] = pd.to_datetime(df["date"], format="%Y-%m-%d %I-%p")

        df = df.set_index("date")
        df.columns = [name.lower() for name in df.columns]
        df = df.reset_index()
        if not include_all_volumes:
            df = df.drop([new_quote_vc], axis=1)
            df = df.rename({new_base_vc: "volume"}, axis=1)
            return df
        return df

Python pandas.to_datetime() Examples