Python pandas.DataFrame() Examples
The following are 30
code examples of pandas.DataFrame().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
pandas
, or try the search function
.
Example #1
Source File: technical_indicators.py From pandas-technical-indicators with MIT License | 27 votes |
def average_true_range(df, n): """ :param df: pandas.DataFrame :param n: :return: pandas.DataFrame """ i = 0 TR_l = [0] while i < df.index[-1]: TR = max(df.loc[i + 1, 'High'], df.loc[i, 'Close']) - min(df.loc[i + 1, 'Low'], df.loc[i, 'Close']) TR_l.append(TR) i = i + 1 TR_s = pd.Series(TR_l) ATR = pd.Series(TR_s.ewm(span=n, min_periods=n).mean(), name='ATR_' + str(n)) df = df.join(ATR) return df
Example #2
Source File: pre_submission.py From MPContribs with MIT License | 9 votes |
def get_table(results, letter): y = "Δ{}".format(letter) df = Table( RecursiveDict([("δ", results[0]), (y, results[1]), (y + "ₑᵣᵣ", results[2])]) ) x0, x1 = map(float, df["δ"].iloc[[0, -1]]) pad = 0.15 * (x1 - x0) mask = (results[3] > x0 - pad) & (results[3] < x1 + pad) x, fit = results[3][mask], results[4][mask] df.set_index("δ", inplace=True) df2 = pd.DataFrame(RecursiveDict([("δ", x), (y + " Fit", fit)])) df2.set_index("δ", inplace=True) cols = ["δ", y, y + "ₑᵣᵣ", y + " Fit"] return ( pd.concat([df, df2], sort=True) .sort_index() .reset_index() .rename(columns={"index": "δ"}) .fillna("")[cols] )
Example #3
Source File: display_methods.py From indras_net with GNU General Public License v3.0 | 8 votes |
def create_lines(self, x, varieties): """ Draw just the data portion. """ lines = pd.DataFrame() for i, var in enumerate(varieties): self.legend.append(var) data = varieties[var]["data"] color = get_color(varieties[var], i) x_array = np.array(x) y_array = np.array(data) line = pd.DataFrame({"x": x_array, "y": y_array, "color": color, "var": var}) lines = lines.append(line, ignore_index=True, sort=False) return lines
Example #4
Source File: utils.py From backtrader-cn with GNU General Public License v3.0 | 8 votes |
def write_daily_alert(cls, symbol, stock_id, action): """ write daily stock alert to MongoDB. :param symbol: Arctic symbol :param data: dict, like: {'stock': '000651', 'action': 'buy/sell'} :return: None """ lib = get_or_create_library(conf.DAILY_STOCK_ALERT_LIBNAME) data = { 'stock': stock_id, 'action': action } df = pd.DataFrame([data], columns=data.keys()) if symbol in lib.list_symbols(): lib.append(symbol, df) else: lib.write(symbol, df)
Example #5
Source File: display_methods.py From indras_net with GNU General Public License v3.0 | 7 votes |
def create_scats(self, varieties): self.scats = pd.DataFrame(columns=["x", "y", "color", "marker", "var"]) for i, var in enumerate(varieties): self.legend.append(var) (x_array, y_array) = self.get_arrays(varieties, var) if len(x_array) <= 0: # no data to graph! ''' I am creating a single "position" for an agent that cannot be seen. This seems to fix the issue of colors being missmatched in the occasion that a group has no agents. ''' x_array = [-1] y_array = [-1] elif len(x_array) != len(y_array): logging.debug("Array length mismatch in scatter plot") return color = get_color(varieties[var], i) marker = get_marker(varieties[var], i) scat = pd.DataFrame({"x": pd.Series(x_array), "y": pd.Series(y_array), "color": color, "marker": marker, "var": var}) self.scats = self.scats.append(scat, ignore_index=True, sort=False)
Example #6
Source File: runTests.py From svviz with MIT License | 7 votes |
def run(which): print("running all tests...") summary = pandas.DataFrame(columns=["pass", "info", "timing"]) # Test chromosome ends if len(which)==0 or "chrom_ends" in which: summary.loc["chrom_ends"] = _runTest(runTestIssues, "issues") # Run the demos if len(which)==0 or "demos" in which: summary.loc["demos"] = _runTest(testDemos.run, "demos") # Run regression testing on ref/alt/amb counts if len(which)==0 or "counts" in which: summary.loc["counts"] = _runTest(runTestCounts, "counts") # Run the render regression tests if len(which)==0 or "rendering" in which: summary.loc["rendering"] = _runTest(rendertest.run, "rendering") summary["timing"] = summary["timing"].apply(lambda x: "{}".format(datetime.timedelta(seconds=int(x)))) print(summary) saveTimingInfo(summary)
Example #7
Source File: dataloader_m.py From models with MIT License | 7 votes |
def prepro_pos_table(pos_tables): """Extracts unique positions and sorts them.""" if not isinstance(pos_tables, list): pos_tables = [pos_tables] pos_table = None for next_pos_table in pos_tables: if pos_table is None: pos_table = next_pos_table else: pos_table = pd.concat([pos_table, next_pos_table]) pos_table = pos_table.groupby('chromo').apply( lambda df: pd.DataFrame({'pos': np.unique(df['pos'])})) pos_table.reset_index(inplace=True) pos_table = pos_table[['chromo', 'pos']] pos_table.sort_values(['chromo', 'pos'], inplace=True) return pos_table
Example #8
Source File: test_integration.py From pylivy with MIT License | 6 votes |
def test_session(integration_url, capsys, session_kind, params): assert livy_available(integration_url) with LivySession.create(integration_url, kind=session_kind) as session: assert session.state == SessionState.IDLE session.run(params.print_foo_code) assert capsys.readouterr() == (params.print_foo_output, "") session.run(params.create_dataframe_code) capsys.readouterr() session.run(params.dataframe_count_code) assert capsys.readouterr() == (params.dataframe_count_output, "") with pytest.raises(SparkRuntimeError): session.run(params.error_code) expected = pandas.DataFrame({"value": range(100)}) assert session.read("df").equals(expected) assert session_stopped(integration_url, session.session_id)
Example #9
Source File: Senti.py From Financial-NLP with Apache License 2.0 | 6 votes |
def calculate_scores_of_all(self, saveflag=0, savefilename=''): dates = os.listdir(self.article_dir) all_date_score=[] for date in dates: try: score,info=self.score_of_date(date) all_date_score.append((date,score)) except: continue if saveflag: rawdata=pd.DataFrame(all_date_score) pd.DataFrame.to_csv(rawdata, savefilename) return all_date_score,dates
Example #10
Source File: dns_oa.py From incubator-spot with Apache License 2.0 | 6 votes |
def _ingest_summary(self): # get date parameters. yr = self._date[:4] mn = self._date[4:6] dy = self._date[6:] self._logger.info("Getting ingest summary data for the day") ingest_summary_cols = ["date","total"] result_rows = [] df_filtered = pd.DataFrame() query_to_load = (""" SELECT frame_time, COUNT(*) as total FROM {0}.{1} WHERE y={2} AND m={3} AND d={4} AND unix_tstamp IS NOT NULL AND frame_time IS NOT NULL AND frame_len IS NOT NULL AND dns_qry_name IS NOT NULL AND ip_src IS NOT NULL AND (dns_qry_class IS NOT NULL AND dns_qry_type IS NOT NULL AND dns_qry_rcode IS NOT NULL ) GROUP BY frame_time; """).format(self._db,self._table_name, yr, mn, dy) results = impala.execute_query_as_list(query_to_load) df = pd.DataFrame(results) # Forms a new dataframe splitting the minutes from the time column df_new = pd.DataFrame([["{0}-{1}-{2} {3}:{4}".format(yr, mn, dy,\ val['frame_time'].replace(" "," ").split(" ")[3].split(":")[0].zfill(2),\ val['frame_time'].replace(" "," ").split(" ")[3].split(":")[1].zfill(2)),\ int(val['total']) if not math.isnan(val['total']) else 0 ] for key,val in df.iterrows()],columns = ingest_summary_cols) #Groups the data by minute sf = df_new.groupby(by=['date'])['total'].sum() df_per_min = pd.DataFrame({'date':sf.index, 'total':sf.values}) df_final = df_filtered.append(df_per_min, ignore_index=True).to_records(False,False) if len(df_final) > 0: query_to_insert=(""" INSERT INTO {0}.dns_ingest_summary PARTITION (y={1}, m={2}, d={3}) VALUES {4}; """).format(self._db, yr, mn, dy, tuple(df_final)) impala.execute_query(query_to_insert)
Example #11
Source File: test_datas_utils.py From backtrader-cn with GNU General Public License v3.0 | 6 votes |
def _test_strip_unused_cols(self): data = pd.DataFrame({ 'name': ['tom', 'jack'], 'age': [24, 56], 'gender': ['male', 'male'], 'address': ['cn', 'us'] }) data.index = pd.date_range(start='2017-01-01', periods=2) origin_cols = ['name', 'age', 'gender', 'address'] unused_cols = ['address', 'gender'] new_cols = ['name', 'age'] self.assertEqual(list(data.columns).sort(), origin_cols.sort()) bdu.Utils.strip_unused_cols(data, *unused_cols) self.assertEqual(list(data.columns).sort(), new_cols.sort())
Example #12
Source File: technical_indicators.py From pandas-technical-indicators with MIT License | 6 votes |
def money_flow_index(df, n): """Calculate Money Flow Index and Ratio for given data. :param df: pandas.DataFrame :param n: :return: pandas.DataFrame """ PP = (df['High'] + df['Low'] + df['Close']) / 3 i = 0 PosMF = [0] while i < df.index[-1]: if PP[i + 1] > PP[i]: PosMF.append(PP[i + 1] * df.loc[i + 1, 'Volume']) else: PosMF.append(0) i = i + 1 PosMF = pd.Series(PosMF) TotMF = PP * df['Volume'] MFR = pd.Series(PosMF / TotMF) MFI = pd.Series(MFR.rolling(n, min_periods=n).mean(), name='MFI_' + str(n)) df = df.join(MFI) return df
Example #13
Source File: technical_indicators.py From pandas-technical-indicators with MIT License | 6 votes |
def ppsr(df): """Calculate Pivot Points, Supports and Resistances for given data :param df: pandas.DataFrame :return: pandas.DataFrame """ PP = pd.Series((df['High'] + df['Low'] + df['Close']) / 3) R1 = pd.Series(2 * PP - df['Low']) S1 = pd.Series(2 * PP - df['High']) R2 = pd.Series(PP + df['High'] - df['Low']) S2 = pd.Series(PP - df['High'] + df['Low']) R3 = pd.Series(df['High'] + 2 * (PP - df['Low'])) S3 = pd.Series(df['Low'] - 2 * (df['High'] - PP)) psr = {'PP': PP, 'R1': R1, 'S1': S1, 'R2': R2, 'S2': S2, 'R3': R3, 'S3': S3} PSR = pd.DataFrame(psr) df = df.join(PSR) return df
Example #14
Source File: technical_indicators.py From pandas-technical-indicators with MIT License | 6 votes |
def trix(df, n): """Calculate TRIX for given data. :param df: pandas.DataFrame :param n: :return: pandas.DataFrame """ EX1 = df['Close'].ewm(span=n, min_periods=n).mean() EX2 = EX1.ewm(span=n, min_periods=n).mean() EX3 = EX2.ewm(span=n, min_periods=n).mean() i = 0 ROC_l = [np.nan] while i + 1 <= df.index[-1]: ROC = (EX3[i + 1] - EX3[i]) / EX3[i] ROC_l.append(ROC) i = i + 1 Trix = pd.Series(ROC_l, name='Trix_' + str(n)) df = df.join(Trix) return df
Example #15
Source File: technical_indicators.py From pandas-technical-indicators with MIT License | 6 votes |
def vortex_indicator(df, n): """Calculate the Vortex Indicator for given data. Vortex Indicator described here: http://www.vortexindicator.com/VFX_VORTEX.PDF :param df: pandas.DataFrame :param n: :return: pandas.DataFrame """ i = 0 TR = [0] while i < df.index[-1]: Range = max(df.loc[i + 1, 'High'], df.loc[i, 'Close']) - min(df.loc[i + 1, 'Low'], df.loc[i, 'Close']) TR.append(Range) i = i + 1 i = 0 VM = [0] while i < df.index[-1]: Range = abs(df.loc[i + 1, 'High'] - df.loc[i, 'Low']) - abs(df.loc[i + 1, 'Low'] - df.loc[i, 'High']) VM.append(Range) i = i + 1 VI = pd.Series(pd.Series(VM).rolling(n).sum() / pd.Series(TR).rolling(n).sum(), name='Vortex_' + str(n)) df = df.join(VI) return df
Example #16
Source File: gather.py From models with MIT License | 6 votes |
def get_df(vcf_file, model_name): df = pd.DataFrame(list(KipoiVCFParser(vcf_file))) meta_info = df[["variant_chr", "variant_pos", "variant_ref", "variant_alt", "variant_id"]] meta_info["variant_uid"] = df["variant_chr"].astype(str) + ':' + df["variant_pos"].astype(str) + ':' + df["variant_ref"] + ':' + df["variant_alt"] df.index = meta_info["variant_uid"] meta_info.index = meta_info["variant_uid"] obsolete_variant_columns = ["variant_chr", "variant_pos", "variant_ref", "variant_alt", "variant_id"] df = df[[col for col in df.columns if col not in obsolete_variant_columns]] df = df[[col for col in df.columns if "rID" not in col]] col_types = ["_LOGIT_REF", "_LOGIT_ALT", "_REF", "_ALT", "_DIFF", "_LOGIT"] if model_name == "labranchor": df = average_labranchor(df, model_name, col_types) else: df.columns = [refmt_col(col, model_name, col_types) for col in df.columns] # clump variants together df = deduplicate_vars(df) # subset meta_info like df and add variant_uid as common ID meta_info=meta_info.loc[df.index,:] return df, meta_info
Example #17
Source File: gather.py From models with MIT License | 6 votes |
def get_df(vcf_file, model_name): df = pd.DataFrame(list(KipoiVCFParser(vcf_file))) meta_info = df[["variant_chr", "variant_pos", "variant_ref", "variant_alt", "variant_id"]] meta_info["variant_uid"] = df["variant_chr"].astype(str) + ':' + df["variant_pos"].astype(str) + ':' + df["variant_ref"] + ':' + df["variant_alt"] df.index = meta_info["variant_uid"] meta_info.index = meta_info["variant_uid"] obsolete_variant_columns = ["variant_chr", "variant_pos", "variant_ref", "variant_alt", "variant_id"] df = df[[col for col in df.columns if col not in obsolete_variant_columns]] df = df[[col for col in df.columns if "rID" not in col]] col_types = ["_LOGIT_REF", "_LOGIT_ALT", "_REF", "_ALT", "_DIFF", "_LOGIT"] if model_name == "labranchor": df = average_labranchor(df, model_name, col_types) else: df.columns = [refmt_col(col, model_name, col_types) for col in df.columns] # clump variants together df = deduplicate_vars(df) # subset meta_info like df and add variant_uid as common ID meta_info=meta_info.loc[df.index,:] return df, meta_info
Example #18
Source File: technical_indicators.py From pandas-technical-indicators with MIT License | 6 votes |
def on_balance_volume(df, n): """Calculate On-Balance Volume for given data. :param df: pandas.DataFrame :param n: :return: pandas.DataFrame """ i = 0 OBV = [0] while i < df.index[-1]: if df.loc[i + 1, 'Close'] - df.loc[i, 'Close'] > 0: OBV.append(df.loc[i + 1, 'Volume']) if df.loc[i + 1, 'Close'] - df.loc[i, 'Close'] == 0: OBV.append(0) if df.loc[i + 1, 'Close'] - df.loc[i, 'Close'] < 0: OBV.append(-df.loc[i + 1, 'Volume']) i = i + 1 OBV = pd.Series(OBV) OBV_ma = pd.Series(OBV.rolling(n, min_periods=n).mean(), name='OBV_' + str(n)) df = df.join(OBV_ma) return df
Example #19
Source File: technical_indicators.py From pandas-technical-indicators with MIT License | 6 votes |
def coppock_curve(df, n): """Calculate Coppock Curve for given data. :param df: pandas.DataFrame :param n: :return: pandas.DataFrame """ M = df['Close'].diff(int(n * 11 / 10) - 1) N = df['Close'].shift(int(n * 11 / 10) - 1) ROC1 = M / N M = df['Close'].diff(int(n * 14 / 10) - 1) N = df['Close'].shift(int(n * 14 / 10) - 1) ROC2 = M / N Copp = pd.Series((ROC1 + ROC2).ewm(span=n, min_periods=n).mean(), name='Copp_' + str(n)) df = df.join(Copp) return df
Example #20
Source File: technical_indicators.py From pandas-technical-indicators with MIT License | 6 votes |
def keltner_channel(df, n): """Calculate Keltner Channel for given data. :param df: pandas.DataFrame :param n: :return: pandas.DataFrame """ KelChM = pd.Series(((df['High'] + df['Low'] + df['Close']) / 3).rolling(n, min_periods=n).mean(), name='KelChM_' + str(n)) KelChU = pd.Series(((4 * df['High'] - 2 * df['Low'] + df['Close']) / 3).rolling(n, min_periods=n).mean(), name='KelChU_' + str(n)) KelChD = pd.Series(((-2 * df['High'] + 4 * df['Low'] + df['Close']) / 3).rolling(n, min_periods=n).mean(), name='KelChD_' + str(n)) df = df.join(KelChM) df = df.join(KelChU) df = df.join(KelChD) return df
Example #21
Source File: technical_indicators.py From pandas-technical-indicators with MIT License | 6 votes |
def ultimate_oscillator(df): """Calculate Ultimate Oscillator for given data. :param df: pandas.DataFrame :return: pandas.DataFrame """ i = 0 TR_l = [0] BP_l = [0] while i < df.index[-1]: TR = max(df.loc[i + 1, 'High'], df.loc[i, 'Close']) - min(df.loc[i + 1, 'Low'], df.loc[i, 'Close']) TR_l.append(TR) BP = df.loc[i + 1, 'Close'] - min(df.loc[i + 1, 'Low'], df.loc[i, 'Close']) BP_l.append(BP) i = i + 1 UltO = pd.Series((4 * pd.Series(BP_l).rolling(7).sum() / pd.Series(TR_l).rolling(7).sum()) + ( 2 * pd.Series(BP_l).rolling(14).sum() / pd.Series(TR_l).rolling(14).sum()) + ( pd.Series(BP_l).rolling(28).sum() / pd.Series(TR_l).rolling(28).sum()), name='Ultimate_Osc') df = df.join(UltO) return df
Example #22
Source File: technical_indicators.py From pandas-technical-indicators with MIT License | 6 votes |
def donchian_channel(df, n): """Calculate donchian channel of given pandas data frame. :param df: pandas.DataFrame :param n: :return: pandas.DataFrame """ i = 0 dc_l = [] while i < n - 1: dc_l.append(0) i += 1 i = 0 while i + n - 1 < df.index[-1]: dc = max(df['High'].ix[i:i + n - 1]) - min(df['Low'].ix[i:i + n - 1]) dc_l.append(dc) i += 1 donchian_chan = pd.Series(dc_l, name='Donchian_' + str(n)) donchian_chan = donchian_chan.shift(n - 1) return df.join(donchian_chan)
Example #23
Source File: tdata.py From MPContribs with MIT License | 6 votes |
def render(self, total_records=None): """use BackGrid JS library to render Pandas DataFrame""" # if project given, this will result in an overview table of contributions # TODO check for index column in df other than the default numbering jtable = json.dumps(self.to_backgrid_dict()) if total_records is None: total_records = self.shape[0] config = {"total_records": total_records} config["uuids"] = [str(uuid.uuid4()) for i in range(4)] if self.tid: config["tid"] = self.tid config["per_page"] = self.per_page else: config["project"] = self.project config["api_key"] = self.api_key config["ncols"] = self.ncols config["filters"] = self.filters jconfig = json.dumps(config) html = '<div class="col-md-6" id="{}"></div>'.format(config["uuids"][0]) html += '<div class="pull-right" id="{}"></div>'.format(config["uuids"][3]) html += '<div id="{}" style="width:100%;"></div>'.format(config["uuids"][1]) html += '<div id="{}"></div>'.format(config["uuids"][2]) html += f"<script>render_table({{table: {jtable}, config: {jconfig}}})</script>" return html
Example #24
Source File: server.py From jiji-with-tensorflow-example with MIT License | 5 votes |
def estimate(): # 値の正規化のため、リクエストボディで渡された指標データと訓練で使用したデータを統合。 data = pd.DataFrame({k: [v] for k, v in request.json.items()}).append(trade_data) # 正規化したデータを渡して、損益を予測 results = estimator.estimate(TradeResults(data).all_data().iloc[[0]]) return jsonify(result=("up" if results[0] == 0 else "down"))
Example #25
Source File: main.py From tensorflow-DeepFM with MIT License | 5 votes |
def _make_submission(ids, y_pred, filename="submission.csv"): pd.DataFrame({"id": ids, "target": y_pred.flatten()}).to_csv( os.path.join(config.SUB_DIR, filename), index=False, float_format="%.5f")
Example #26
Source File: session.py From pylivy with MIT License | 5 votes |
def read_sql(self, code: str) -> pandas.DataFrame: """Evaluate a Spark SQL satatement and retrieve the result. :param code: The Spark SQL statement to evaluate. """ if self.kind != SessionKind.SQL: raise ValueError("not a SQL session") output = self._execute(code) output.raise_for_status() if output.json is None: raise RuntimeError("statement had no JSON output") return dataframe_from_json_output(output.json)
Example #27
Source File: session.py From pylivy with MIT License | 5 votes |
def read(self, dataframe_name: str) -> pandas.DataFrame: """Evaluate and retrieve a Spark dataframe in the managed session. :param dataframe_name: The name of the Spark dataframe to read. """ code = serialise_dataframe_code(dataframe_name, self.kind) output = self._execute(code) output.raise_for_status() if output.text is None: raise RuntimeError("statement had no text output") return deserialise_dataframe(output.text)
Example #28
Source File: session.py From pylivy with MIT License | 5 votes |
def dataframe_from_json_output(json_output: dict) -> pandas.DataFrame: try: fields = json_output["schema"]["fields"] columns = [field["name"] for field in fields] data = json_output["data"] except KeyError: raise ValueError("json output does not match expected structure") return pandas.DataFrame(data, columns=columns)
Example #29
Source File: session.py From pylivy with MIT License | 5 votes |
def deserialise_dataframe(text: str) -> pandas.DataFrame: rows = [] for line in text.split("\n"): if line: rows.append(json.loads(line)) return pandas.DataFrame.from_records(rows)
Example #30
Source File: pre_submission.py From MPContribs with MIT License | 5 votes |
def run(mpfile, **kwargs): input_dir = mpfile.hdata["_hdata"]["input_dir"] identifier = get_composition_from_string("PbZr20Ti80O3") print identifier # 'SP128_NSO_LPFM0000.ibw' too big to display in notebook files = ["BR_60016 (1).ibw", "SP128_NSO_VPFM0000.ibw"] for f in files: file_name = os.path.join(input_dir, f) df = load_data(file_name) name = f.split(".")[0] mpfile.add_data_table(identifier, df, name) print "imported", f xrd_file = os.path.join(input_dir, "Program6_JA_6_2th0m Near SRO (002)_2.xrdml.xml") data = read_xrdml(xrd_file) df = DataFrame( np.stack((data["2Theta"], data["data"]), 1), columns=["2Theta", "Intensity"] ) opts = {"yaxis": {"type": "log"}} # see plotly docs mpfile.add_data_table(identifier, df, "NearSRO", plot_options=opts) print "imported", os.path.basename(xrd_file) rsm_file = os.path.join(input_dir, "JA 42 RSM 103 STO 001.xrdml.xml") rvals, df = load_RSM(rsm_file) mpfile.add_hierarchical_data( { "rsm_range": { "x": "{} {}".format(rvals[0], rvals[1]), "y": "{} {}".format(rvals[2], rvals[3]), } }, identifier=identifier, ) mpfile.add_data_table(identifier, df, "RSM") print "imported", os.path.basename(rsm_file)