Python pandas.to_numeric() Examples
The following are 30
code examples of pandas.to_numeric().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
pandas
, or try the search function
.
Example #1
Source File: toolbox.py From xalpha with MIT License | 7 votes |
def __init__(self, code, start=None, end=None): """ :param code: str. 指数代码,eg. SH000016 :param start: :param end: """ df = xu.get_daily("teb-" + code, start=start, end=end) df["e"] = pd.to_numeric(df["e"]) df["b"] = pd.to_numeric(df["b"]) df["lnb"] = df["b"].apply(lambda s: np.log(s)) df["lne"] = df["e"].apply(lambda s: np.log(s)) df["roe"] = df["e"] / df["b"] * 100 df["date_count"] = (df["date"] - df["date"].iloc[0]).apply( lambda s: int(s.days) ) self.df = df self.fit(verbose=False)
Example #2
Source File: data_loader.py From PADME with MIT License | 6 votes |
def get_user_specified_features(df, featurizer, verbose=True): """Extract and merge user specified features. Merge features included in dataset provided by user into final features dataframe Three types of featurization here: 1) Molecule featurization -) Smiles string featurization -) Rdkit MOL featurization 2) Complex featurization -) PDB files for interacting molecules. 3) User specified featurizations. """ time1 = time.time() df[featurizer.feature_fields] = df[featurizer.feature_fields].apply( pd.to_numeric) X_shard = df.as_matrix(columns=featurizer.feature_fields) time2 = time.time() log("TIMING: user specified processing took %0.3f s" % (time2 - time1), verbose) return X_shard
Example #3
Source File: test_numeric.py From vnpy_crypto with MIT License | 6 votes |
def test_error(self): s = pd.Series([1, -3.14, 'apple']) msg = 'Unable to parse string "apple" at position 2' with tm.assert_raises_regex(ValueError, msg): to_numeric(s, errors='raise') res = to_numeric(s, errors='ignore') expected = pd.Series([1, -3.14, 'apple']) tm.assert_series_equal(res, expected) res = to_numeric(s, errors='coerce') expected = pd.Series([1, -3.14, np.nan]) tm.assert_series_equal(res, expected) s = pd.Series(['orange', 1, -3.14, 'apple']) msg = 'Unable to parse string "orange" at position 0' with tm.assert_raises_regex(ValueError, msg): to_numeric(s, errors='raise')
Example #4
Source File: test_numeric.py From recruit with Apache License 2.0 | 6 votes |
def test_coerce_uint64_conflict(self): # see gh-17007 and gh-17125 # # Still returns float despite the uint64-nan conflict, # which would normally force the casting to object. df = pd.DataFrame({"a": [200, 300, "", "NaN", 30000000000000000000]}) expected = pd.Series([200, 300, np.nan, np.nan, 30000000000000000000], dtype=float, name="a") result = to_numeric(df["a"], errors="coerce") tm.assert_series_equal(result, expected) s = pd.Series(["12345678901234567890", "1234567890", "ITEM"]) expected = pd.Series([12345678901234567890, 1234567890, np.nan], dtype=float) result = to_numeric(s, errors="coerce") tm.assert_series_equal(result, expected) # For completeness, check against "ignore" and "raise" result = to_numeric(s, errors="ignore") tm.assert_series_equal(result, s) msg = "Unable to parse string" with pytest.raises(ValueError, match=msg): to_numeric(s, errors="raise")
Example #5
Source File: test_numeric.py From vnpy_crypto with MIT License | 6 votes |
def test_coerce_uint64_conflict(self): # see gh-17007 and gh-17125 # # Still returns float despite the uint64-nan conflict, # which would normally force the casting to object. df = pd.DataFrame({"a": [200, 300, "", "NaN", 30000000000000000000]}) expected = pd.Series([200, 300, np.nan, np.nan, 30000000000000000000], dtype=float, name="a") result = to_numeric(df["a"], errors="coerce") tm.assert_series_equal(result, expected) s = pd.Series(["12345678901234567890", "1234567890", "ITEM"]) expected = pd.Series([12345678901234567890, 1234567890, np.nan], dtype=float) result = to_numeric(s, errors="coerce") tm.assert_series_equal(result, expected) # For completeness, check against "ignore" and "raise" result = to_numeric(s, errors="ignore") tm.assert_series_equal(result, s) msg = "Unable to parse string" with tm.assert_raises_regex(ValueError, msg): to_numeric(s, errors="raise")
Example #6
Source File: test_numeric.py From recruit with Apache License 2.0 | 6 votes |
def test_error(self): s = pd.Series([1, -3.14, 'apple']) msg = 'Unable to parse string "apple" at position 2' with pytest.raises(ValueError, match=msg): to_numeric(s, errors='raise') res = to_numeric(s, errors='ignore') expected = pd.Series([1, -3.14, 'apple']) tm.assert_series_equal(res, expected) res = to_numeric(s, errors='coerce') expected = pd.Series([1, -3.14, np.nan]) tm.assert_series_equal(res, expected) s = pd.Series(['orange', 1, -3.14, 'apple']) msg = 'Unable to parse string "orange" at position 0' with pytest.raises(ValueError, match=msg): to_numeric(s, errors='raise')
Example #7
Source File: consensus.py From Comparative-Annotation-Toolkit with Apache License 2.0 | 6 votes |
def load_metrics_from_db(db_path, tx_mode, aln_mode): """ Loads the alignment metrics for the mRNA/CDS alignments of transMap/AugustusTM/TMR """ session = tools.sqlInterface.start_session(db_path) metrics_table = tools.sqlInterface.tables[aln_mode][tx_mode]['metrics'] metrics_df = tools.sqlInterface.load_metrics(metrics_table, session) # unstack flattens the long-form data structure metrics_df = metrics_df.set_index(['AlignmentId', 'classifier']).unstack('classifier') metrics_df.columns = [col[1] for col in metrics_df.columns] metrics_df = metrics_df.reset_index() cols = ['AlnCoverage', 'AlnGoodness', 'AlnIdentity', 'PercentUnknownBases'] metrics_df[cols] = metrics_df[cols].apply(pd.to_numeric) metrics_df['OriginalIntrons'] = metrics_df['OriginalIntrons'].fillna('') metrics_df['OriginalIntrons'] = [list(map(int, x)) if len(x[0]) > 0 else [] for x in metrics_df['OriginalIntrons'].str.split(',').tolist()] metrics_df['OriginalIntronsPercent'] = metrics_df['OriginalIntrons'].apply(calculate_vector_support, resolve_nan=1) session.close() return metrics_df
Example #8
Source File: test_numeric.py From vnpy_crypto with MIT License | 6 votes |
def test_str(self): idx = pd.Index(['1', '2', '3'], name='xxx') exp = np.array([1, 2, 3], dtype='int64') res = pd.to_numeric(idx) tm.assert_index_equal(res, pd.Index(exp, name='xxx')) res = pd.to_numeric(pd.Series(idx, name='xxx')) tm.assert_series_equal(res, pd.Series(exp, name='xxx')) res = pd.to_numeric(idx.values) tm.assert_numpy_array_equal(res, exp) idx = pd.Index(['1.5', '2.7', '3.4'], name='xxx') exp = np.array([1.5, 2.7, 3.4]) res = pd.to_numeric(idx) tm.assert_index_equal(res, pd.Index(exp, name='xxx')) res = pd.to_numeric(pd.Series(idx, name='xxx')) tm.assert_series_equal(res, pd.Series(exp, name='xxx')) res = pd.to_numeric(idx.values) tm.assert_numpy_array_equal(res, exp)
Example #9
Source File: test_numeric.py From vnpy_crypto with MIT License | 6 votes |
def test_numeric_dtypes(self): idx = pd.Index([1, 2, 3], name='xxx') res = pd.to_numeric(idx) tm.assert_index_equal(res, idx) res = pd.to_numeric(pd.Series(idx, name='xxx')) tm.assert_series_equal(res, pd.Series(idx, name='xxx')) res = pd.to_numeric(idx.values) tm.assert_numpy_array_equal(res, idx.values) idx = pd.Index([1., np.nan, 3., np.nan], name='xxx') res = pd.to_numeric(idx) tm.assert_index_equal(res, idx) res = pd.to_numeric(pd.Series(idx, name='xxx')) tm.assert_series_equal(res, pd.Series(idx, name='xxx')) res = pd.to_numeric(idx.values) tm.assert_numpy_array_equal(res, idx.values)
Example #10
Source File: test_numeric.py From recruit with Apache License 2.0 | 6 votes |
def test_numeric_lists_and_arrays(self): # Test to_numeric with embedded lists and arrays df = pd.DataFrame(dict( a=[[decimal.Decimal(3.14), 1.0], decimal.Decimal(1.6), 0.1] )) df['a'] = df['a'].apply(to_numeric) expected = pd.DataFrame(dict( a=[[3.14, 1.0], 1.6, 0.1], )) tm.assert_frame_equal(df, expected) df = pd.DataFrame(dict( a=[np.array([decimal.Decimal(3.14), 1.0]), 0.1] )) df['a'] = df['a'].apply(to_numeric) expected = pd.DataFrame(dict( a=[[3.14, 1.0], 0.1], )) tm.assert_frame_equal(df, expected)
Example #11
Source File: test_numeric.py From vnpy_crypto with MIT License | 6 votes |
def test_numeric_lists_and_arrays(self): # Test to_numeric with embedded lists and arrays df = pd.DataFrame(dict( a=[[decimal.Decimal(3.14), 1.0], decimal.Decimal(1.6), 0.1] )) df['a'] = df['a'].apply(to_numeric) expected = pd.DataFrame(dict( a=[[3.14, 1.0], 1.6, 0.1], )) tm.assert_frame_equal(df, expected) df = pd.DataFrame(dict( a=[np.array([decimal.Decimal(3.14), 1.0]), 0.1] )) df['a'] = df['a'].apply(to_numeric) expected = pd.DataFrame(dict( a=[[3.14, 1.0], 0.1], )) tm.assert_frame_equal(df, expected)
Example #12
Source File: vector_to_cube.py From geocube with BSD 3-Clause "New" or "Revised" License | 6 votes |
def _format_series_data(data_series): """ The purpose of this function is to convert the series data into a rasterizeable format if possible. Parameters ---------- data_series: :obj:`geopandas.GeoSeries` The series to be converted. Returns ------- :obj:`geopandas.GeoSeries`: The series that was converted if possible. """ if "datetime" in str(data_series.dtype): data_series = pandas.to_numeric(data_series).astype(numpy.float64) get_logger().warning( f"The series '{data_series.name}' was converted from a date to a number to " "rasterize the data. To load the data back in as a date, " "use 'pandas.to_datetime()'." ) elif str(data_series.dtype) == "category": data_series = data_series.cat.codes return data_series
Example #13
Source File: pricing.py From thewarden with MIT License | 6 votes |
def df_fx(self, currency, fx_provider): try: # First get the df from this currency if currency != 'USD': fx = PriceData(currency, fx_provider) fx.df = fx.df.rename(columns={'close': 'fx_close'}) fx.df["fx_close"] = pd.to_numeric(fx.df.fx_close, errors='coerce') # Merge the two dfs: merge_df = pd.merge(self.df, fx.df, on='date', how='inner') merge_df['close'] = merge_df['close'].astype(float) merge_df['close_converted'] = merge_df['close'] * merge_df[ 'fx_close'] return (merge_df) else: # If currency is USD no conversion is needed - prices are all in USD self.df['fx_close'] = 1 self.df['close_converted'] = self.df['close'].astype(float) return (self.df) except Exception as e: self.errors.append(e) return (None)
Example #14
Source File: test_numeric.py From recruit with Apache License 2.0 | 6 votes |
def test_str(self): idx = pd.Index(['1', '2', '3'], name='xxx') exp = np.array([1, 2, 3], dtype='int64') res = pd.to_numeric(idx) tm.assert_index_equal(res, pd.Index(exp, name='xxx')) res = pd.to_numeric(pd.Series(idx, name='xxx')) tm.assert_series_equal(res, pd.Series(exp, name='xxx')) res = pd.to_numeric(idx.values) tm.assert_numpy_array_equal(res, exp) idx = pd.Index(['1.5', '2.7', '3.4'], name='xxx') exp = np.array([1.5, 2.7, 3.4]) res = pd.to_numeric(idx) tm.assert_index_equal(res, pd.Index(exp, name='xxx')) res = pd.to_numeric(pd.Series(idx, name='xxx')) tm.assert_series_equal(res, pd.Series(exp, name='xxx')) res = pd.to_numeric(idx.values) tm.assert_numpy_array_equal(res, exp)
Example #15
Source File: plotter.py From pygraphistry with BSD 3-Clause "New" or "Revised" License | 6 votes |
def _sanitize_dataset(self, edges, nodes, nodeid): self._check_bound_attribs(edges, ['source', 'destination'], 'Edge') elist = edges.reset_index(drop=True) \ .dropna(subset=[self._source, self._destination]) obj_df = elist.select_dtypes(include=[numpy.object_]) elist[obj_df.columns] = obj_df.apply(pandas.to_numeric, errors='ignore') if nodes is None: nodes = pandas.DataFrame() nodes[nodeid] = pandas.concat([edges[self._source], edges[self._destination]], ignore_index=True).drop_duplicates() else: self._check_bound_attribs(nodes, ['node'], 'Vertex') nlist = nodes.reset_index(drop=True) \ .dropna(subset=[nodeid]) \ .drop_duplicates(subset=[nodeid]) obj_df = nlist.select_dtypes(include=[numpy.object_]) nlist[obj_df.columns] = obj_df.apply(pandas.to_numeric, errors='ignore') return (elist, nlist)
Example #16
Source File: universal.py From xalpha with MIT License | 6 votes |
def __init__( self, code, name=None, start=None, end=None, rate=0, col="close", **kws ): if not name: try: name = get_rt(code)["name"] except: name = code self.name = name self.code = code self.start = start # None is one year ago self.end = end # None is yesterday df = get_daily(code, start=start, end=end) df[col] = pd.to_numeric(df[col]) # in case the col is not float df["totvalue"] = df[col] df["netvalue"] = df[col] / df.iloc[0][col] self.price = df self.round_label = kws.get("round_label", 0) self.dividend_label = kws.get("dividend_label", 0) self.value_label = kws.get("value_label", 1) # 默认按金额赎回 self.specialdate = [] self.fenhongdate = [] self.zhesuandate = [] self.rate = rate
Example #17
Source File: test_protobuf.py From pygraphistry with BSD 3-Clause "New" or "Revised" License | 6 votes |
def test_metadata_no_nan(self, mock_etl2, mock_open): edges = triangleEdges.copy() edges['testNone'] = triangleNodes.a1.map(lambda x: numpy.nan) edges['testNone'] = pandas.to_numeric(edges.testNone, errors='ignore') edges['testInt'] = triangleNodes.a1.map(lambda x: numpy.nan if x%2 == 1 else 0) edges['testFloat'] = triangleNodes.a1.map(lambda x: numpy.nan if x%2 == 1 else 0.5) edges['testString'] = triangleNodes.a1.map(lambda x: numpy.nan if x%2 == 1 else 'foo') edges['testBool'] = triangleNodes.a1.map(lambda x: numpy.nan if x%2 == 1 else True) graphistry.bind(source='src', destination='dst', node='id').plot(edges) dataset = mock_etl2.call_args[0][0] #for attrib in ['testInt', 'testFloat', 'testString', 'testBool', 'testNone']: # for entry in list(dataset['attributes']['edges'][attrib]['aggregations'].values()): # if entry is None or isinstance(entry, str): # pass # else: # self.assertFalse(numpy.isnan(entry))
Example #18
Source File: test_numeric.py From recruit with Apache License 2.0 | 6 votes |
def test_numeric_dtypes(self): idx = pd.Index([1, 2, 3], name='xxx') res = pd.to_numeric(idx) tm.assert_index_equal(res, idx) res = pd.to_numeric(pd.Series(idx, name='xxx')) tm.assert_series_equal(res, pd.Series(idx, name='xxx')) res = pd.to_numeric(idx.values) tm.assert_numpy_array_equal(res, idx.values) idx = pd.Index([1., np.nan, 3., np.nan], name='xxx') res = pd.to_numeric(idx) tm.assert_index_equal(res, idx) res = pd.to_numeric(pd.Series(idx, name='xxx')) tm.assert_series_equal(res, pd.Series(idx, name='xxx')) res = pd.to_numeric(idx.values) tm.assert_numpy_array_equal(res, idx.values)
Example #19
Source File: test_numeric.py From vnpy_crypto with MIT License | 5 votes |
def test_numeric(self): s = pd.Series([1, -3.14, 7], dtype='O') res = to_numeric(s) expected = pd.Series([1, -3.14, 7]) tm.assert_series_equal(res, expected) s = pd.Series([1, -3.14, 7]) res = to_numeric(s) tm.assert_series_equal(res, expected) # GH 14827 df = pd.DataFrame(dict( a=[1.2, decimal.Decimal(3.14), decimal.Decimal("infinity"), '0.1'], b=[1.0, 2.0, 3.0, 4.0], )) expected = pd.DataFrame(dict( a=[1.2, 3.14, np.inf, 0.1], b=[1.0, 2.0, 3.0, 4.0], )) # Test to_numeric over one column df_copy = df.copy() df_copy['a'] = df_copy['a'].apply(to_numeric) tm.assert_frame_equal(df_copy, expected) # Test to_numeric over multiple columns df_copy = df.copy() df_copy[['a', 'b']] = df_copy[['a', 'b']].apply(to_numeric) tm.assert_frame_equal(df_copy, expected)
Example #20
Source File: fix_yahoo_finance.py From StockRecommendSystem with MIT License | 5 votes |
def parse_ticker_csv(csv_str, auto_adjust): df = pd.read_csv(csv_str, index_col=0, error_bad_lines=False, warn_bad_lines=False ).replace('null', np.nan).dropna() df.index = pd.to_datetime(df.index) df = df.apply(pd.to_numeric) df['Volume'] = df['Volume'].fillna(0).astype(int) if auto_adjust: ratio = df["Close"] / df["Adj Close"] df["Adj Open"] = df["Open"] / ratio df["Adj High"] = df["High"] / ratio df["Adj Low"] = df["Low"] / ratio df.drop( ["Open", "High", "Low", "Close"], axis=1, inplace=True) df.rename(columns={ "Adj Open": "Open", "Adj High": "High", "Adj Low": "Low", "Adj Close": "Close" }, inplace=True) df = df[['Open', 'High', 'Low', 'Close', 'Volume']] return df.groupby(df.index).first()
Example #21
Source File: iroc_reader.py From gordo with GNU Affero General Public License v3.0 | 5 votes |
def read_iroc_file( file_obj, train_start_date: datetime, train_end_date: datetime, tag_list: List[SensorTag], ) -> pd.DataFrame: """ Reads a single iroc timeseries csv, and returns it as a pandas.DataFrame. The returned dataframe has timestamps as a DateTimeIndex, and upto one column per tag in tag_list, but excluding any tags which does not exist in the csv. Parameters ---------- file_obj: str or path object or file-like object File object to read iroc timeseries data from train_start_date Only keep timestamps later or equal than this train_end_date Only keep timestamps earlier than this tag_list Only keep tags in this list. Returns ------- pd.DataFrame Dataframe with timestamps as a DateTimeIndex, and up to one column per tag in tag_list, but excluding any tags which does not exist in the csv. """ df = pd.read_csv(file_obj, sep=",", usecols=["tag", "value", "timestamp"]) df = df[df["tag"].isin(to_list_of_strings(tag_list))] # Note, there are some "digital" sensors with string values, # now they are just NaN converted df["value"] = df["value"].apply(pd.to_numeric, errors="coerce", downcast="float") df.dropna(inplace=True, subset=["value"]) df["timestamp"] = pd.to_datetime(df["timestamp"], utc=True) df = df.pivot(index="timestamp", columns="tag") df = df[(df.index >= train_start_date) & (df.index < train_end_date)] df.columns = df.columns.droplevel(0) return df
Example #22
Source File: test_numeric.py From vnpy_crypto with MIT License | 5 votes |
def test_datetimelike(self): for tz in [None, 'US/Eastern', 'Asia/Tokyo']: idx = pd.date_range('20130101', periods=3, tz=tz, name='xxx') res = pd.to_numeric(idx) tm.assert_index_equal(res, pd.Index(idx.asi8, name='xxx')) res = pd.to_numeric(pd.Series(idx, name='xxx')) tm.assert_series_equal(res, pd.Series(idx.asi8, name='xxx')) res = pd.to_numeric(idx.values) tm.assert_numpy_array_equal(res, idx.asi8)
Example #23
Source File: test_numeric.py From vnpy_crypto with MIT License | 5 votes |
def test_list_numeric(self): s = [1, 3, 4, 5] res = to_numeric(s) tm.assert_numpy_array_equal(res, np.array(s, dtype=np.int64)) s = [1., 3., 4., 5.] res = to_numeric(s) tm.assert_numpy_array_equal(res, np.array(s)) # bool is regarded as numeric s = [True, False, True, True] res = to_numeric(s) tm.assert_numpy_array_equal(res, np.array(s))
Example #24
Source File: test_numeric.py From vnpy_crypto with MIT License | 5 votes |
def test_list(self): s = ['1', '-3.14', '7'] res = to_numeric(s) expected = np.array([1, -3.14, 7]) tm.assert_numpy_array_equal(res, expected)
Example #25
Source File: test_numeric.py From vnpy_crypto with MIT License | 5 votes |
def test_series_numeric(self): s = pd.Series([1, 3, 4, 5], index=list('ABCD'), name='XXX') res = to_numeric(s) tm.assert_series_equal(res, s) s = pd.Series([1., 3., 4., 5.], index=list('ABCD'), name='XXX') res = to_numeric(s) tm.assert_series_equal(res, s) # bool is regarded as numeric s = pd.Series([True, False, True, True], index=list('ABCD'), name='XXX') res = to_numeric(s) tm.assert_series_equal(res, s)
Example #26
Source File: test_numeric.py From vnpy_crypto with MIT License | 5 votes |
def test_series(self): s = pd.Series(['1', '-3.14', '7']) res = to_numeric(s) expected = pd.Series([1, -3.14, 7]) tm.assert_series_equal(res, expected) s = pd.Series(['1', '-3.14', 7]) res = to_numeric(s) tm.assert_series_equal(res, expected)
Example #27
Source File: test_numeric.py From vnpy_crypto with MIT License | 5 votes |
def test_empty(self): # see gh-16302 s = pd.Series([], dtype=object) res = to_numeric(s) expected = pd.Series([], dtype=np.int64) tm.assert_series_equal(res, expected) # Original issue example res = to_numeric(s, errors='coerce', downcast='integer') expected = pd.Series([], dtype=np.int8) tm.assert_series_equal(res, expected)
Example #28
Source File: mediation.py From vnpy_crypto with MIT License | 5 votes |
def summary(self, alpha=0.05): """ Provide a summary of a mediation analysis. """ columns = ["Estimate", "Lower CI bound", "Upper CI bound", "P-value"] index = ["ACME (control)", "ACME (treated)", "ADE (control)", "ADE (treated)", "Total effect", "Prop. mediated (control)", "Prop. mediated (treated)", "ACME (average)", "ADE (average)", "Prop. mediated (average)"] smry = pd.DataFrame(columns=columns, index=index) for i, vec in enumerate([self.ACME_ctrl, self.ACME_tx, self.ADE_ctrl, self.ADE_tx, self.total_effect, self.prop_med_ctrl, self.prop_med_tx, self.ACME_avg, self.ADE_avg, self.prop_med_avg]): if ((vec is self.prop_med_ctrl) or (vec is self.prop_med_tx) or (vec is self.prop_med_avg)): smry.iloc[i, 0] = np.median(vec) else: smry.iloc[i, 0] = vec.mean() smry.iloc[i, 1] = np.percentile(vec, 100 * alpha / 2) smry.iloc[i, 2] = np.percentile(vec, 100 * (1 - alpha / 2)) smry.iloc[i, 3] = _pvalue(vec) if pdc.version < '0.17.0': # pragma: no cover smry = smry.convert_objects(convert_numeric=True) else: # pragma: no cover smry = smry.apply(pd.to_numeric, errors='coerce') return smry
Example #29
Source File: china_etf_day_kdata_recorder.py From zvt with MIT License | 5 votes |
def fetch_cumulative_net_value(self, security_item, start, end) -> pd.DataFrame: query_url = 'http://api.fund.eastmoney.com/f10/lsjz?' \ 'fundCode={}&pageIndex={}&pageSize=200&startDate={}&endDate={}' page = 1 df = pd.DataFrame() while True: url = query_url.format(security_item.code, page, to_time_str(start), to_time_str(end)) response = requests.get(url, headers=EASTMONEY_ETF_NET_VALUE_HEADER) response_json = demjson.decode(response.text) response_df = pd.DataFrame(response_json['Data']['LSJZList']) # 最后一页 if response_df.empty: break response_df['FSRQ'] = pd.to_datetime(response_df['FSRQ']) response_df['JZZZL'] = pd.to_numeric(response_df['JZZZL'], errors='coerce') response_df['LJJZ'] = pd.to_numeric(response_df['LJJZ'], errors='coerce') response_df = response_df.fillna(0) response_df.set_index('FSRQ', inplace=True, drop=True) df = pd.concat([df, response_df]) page += 1 self.sleep() return df
Example #30
Source File: pipeline.py From xbbg with Apache License 2.0 | 5 votes |
def format_raw(data: pd.DataFrame) -> pd.DataFrame: """ Convert data to datetime if possible Examples: >>> dvd = pd.read_pickle('xbbg/tests/data/sample_dvd_mc_raw.pkl') >>> dvd.dtypes Declared Date object Ex-Date object Record Date object Payable Date object Dividend Amount float64 Dividend Frequency object Dividend Type object dtype: object >>> dvd.pipe(format_raw).dtypes Declared Date datetime64[ns] Ex-Date datetime64[ns] Record Date datetime64[ns] Payable Date datetime64[ns] Dividend Amount float64 Dividend Frequency object Dividend Type object dtype: object """ res = data.apply(pd.to_numeric, errors='ignore') dtypes = data.dtypes cols = dtypes.loc[ dtypes.isin([np.dtype('O')]) | data.columns.str.contains('UPDATE_STAMP') ].index if not cols.empty: res.loc[:, cols] = data.loc[:, cols].apply(pd.to_datetime, errors='ignore') return res