Python Examples of pandas.DataFrame.from

Source File: test_foreign.py From vnpy_crypto with MIT License

6 votes

def test_datetime_roundtrip():
    dta = np.array([(1, datetime(2010, 1, 1), 2),
                    (2, datetime(2010, 2, 1), 3),
                    (4, datetime(2010, 3, 1), 5)],
                    dtype=[('var1', float), ('var2', object), ('var3', float)])
    buf = BytesIO()
    writer = StataWriter(buf, dta, {"var2" : "tm"})
    writer.write_file()
    buf.seek(0)
    dta2 = genfromdta(buf)
    assert_equal(dta, dta2)

    dta = DataFrame.from_records(dta)
    buf = BytesIO()
    writer = StataWriter(buf, dta, {"var2" : "tm"})
    writer.write_file()
    buf.seek(0)
    dta2 = genfromdta(buf, pandas=True)
    ptesting.assert_frame_equal(dta, dta2.drop('index', axis=1))

Source File: test.py From realtime_talib with MIT License

6 votes

def getCryptoHistoricalData(self, symbol, endTime, histPeriod, vwap=False):
		endTimeUNIX = dateToUNIX(endTime)
		startDate = getCurrentDateStr()
		priorDate = datetimeDiff(startDate, histPeriod)
		gdaxTicker = GDAX_TO_POLONIEX[symbol]

		stDateUNIX = dateToUNIX(priorDate)
		eDateUNIX = dateToUNIX(startDate)
		poloniexJsonURL = self.POLO_HIST_DATA.format(gdaxTicker, stDateUNIX, eDateUNIX, self.interval)

		import json
		import requests
		poloniexJson = requests.get(poloniexJsonURL).json()

		from pandas import DataFrame
		histDataframe = DataFrame.from_records(poloniexJson)
		histDataframe.drop('quoteVolume', axis=1, inplace=True)
		histDataframe.drop('weightedAverage', axis=1, inplace=True)
		histDataframe['date'] = histDataframe['date'].astype(float)

		return histDataframe[["date", "open", "high", "low", "close", "volume"]]

Source File: Xueqiu.py From dHydra with Apache License 2.0

6 votes

def get_stocks(
        self,
        stockTypeList=['sha', 'shb', 'sza', 'szb'],
        columns=CONST_XUEQIU_QUOTE_ORDER_COLUMN
    ):
        for stockType in stockTypeList:
            print("正在从雪球获取：{}".format(EX_NAME[stockType]))
            page = 1
            while True:
                response = self.session.get(
                    URL_XUEQIU_QUOTE_ORDER(page, columns, stockType),
                    headers=HEADERS_XUEQIU
                ).json()
                df = DataFrame.from_records(
                    response["data"], columns=response["column"])
                if 'stocks' not in locals().keys():
                    stocks = df
                else:
                    stocks = stocks.append(df)
                if df.size == 0:
                    break
                page += 1
        return stocks

Source File: Xueqiu.py From dHydra with Apache License 2.0

6 votes

def get_quotation(self, symbol=None, symbolSet=None, dataframe=True, threadNum=3):
        if 'quotation' in self.__dict__.keys():
            del(self.quotation)
            # Cut symbolList
        symbolList = list(symbolSet)
        threads = []
        symbolListSlice = util.slice_list(num=threadNum, data_list=symbolList)
        for symbolList in symbolListSlice:
            loop = asyncio.new_event_loop()
            symbolsList = util.slice_list(step=50, data_list=symbolList)
            tasks = [self.get_quotation_task(
                symbols=symbols) for symbols in symbolsList]
            t = threading.Thread(target=util.thread_loop, args=(loop, tasks))
            threads.append(t)
        for t in threads:
            t.start()
        for t in threads:
            t.join()

        if dataframe:
            self.quotation = DataFrame.from_records(self.quotation).T
        return(self.quotation)

Source File: test_foreign.py From vnpy_crypto with MIT License

6 votes

def test_stata_writer_pandas():
    buf = BytesIO()
    dta = macrodata.load().data
    dtype = dta.dtype
    #as of 0.9.0 pandas only supports i8 and f8
    dta = dta.astype(np.dtype([('year', 'i8'),
                               ('quarter', 'i8')] + dtype.descr[2:]))
    dta4 = dta.astype(np.dtype([('year', 'i4'),
                               ('quarter', 'i4')] + dtype.descr[2:]))
    dta = DataFrame.from_records(dta)
    dta4 = DataFrame.from_records(dta4)
    # dta is int64 'i8'  given to Stata writer
    writer = StataWriter(buf, dta)
    writer.write_file()
    buf.seek(0)
    dta2 = genfromdta(buf)
    dta5 = DataFrame.from_records(dta2)
    # dta2 is int32 'i4'  returned from Stata reader

    if dta5.dtypes[1] is np.dtype('int64'):
        ptesting.assert_frame_equal(dta.reset_index(), dta5)
    else:
        # don't check index because it has different size, int32 versus int64
        ptesting.assert_frame_equal(dta4, dta5[dta5.columns[1:]])

Source File: database.py From cgat-core with MIT License

6 votes

def fetch_DataFrame(query,
                    dbhandle=None,
                    attach=False):
    '''Fetch query results and returns them as a pandas dataframe'''

    dbhandle = connect(dbhandle, attach=attach)

    cc = dbhandle.cursor()
    sqlresult = cc.execute(query).fetchall()
    cc.close()

    # see http://pandas.pydata.org/pandas-docs/dev/generated/
    # pandas.DataFrame.from_records.html#pandas.DataFrame.from_records
    # this method is design to handle sql_records with proper type
    # conversion

    field_names = [d[0] for d in cc.description]
    pandas_DataFrame = DataFrame.from_records(
        sqlresult,
        columns=field_names)
    return pandas_DataFrame

Source File: python_parser_only.py From twitter-stock-recommendation with MIT License

6 votes

def test_read_table_buglet_4x_multiindex(self):
        # see gh-6607
        text = """                      A       B       C       D        E
one two three   four
a   b   10.0032 5    -0.5109 -2.3358 -0.4645  0.05076  0.3640
a   q   20      4     0.4473  1.4152  0.2834  1.00661  0.1744
x   q   30      3    -0.6662 -0.5243 -0.3580  0.89145  2.5838"""

        df = self.read_table(StringIO(text), sep=r'\s+')
        assert df.index.names == ('one', 'two', 'three', 'four')

        # see gh-6893
        data = '      A B C\na b c\n1 3 7 0 3 6\n3 1 4 1 5 9'
        expected = DataFrame.from_records(
            [(1, 3, 7, 0, 3, 6), (3, 1, 4, 1, 5, 9)],
            columns=list('abcABC'), index=list('abc'))
        actual = self.read_table(StringIO(data), sep=r'\s+')
        tm.assert_frame_equal(actual, expected)

Source File: test_testing.py From elasticintel with GNU General Public License v3.0

5 votes

def test_index_dtype(self):
        df1 = DataFrame.from_records(
            {'a': [1, 2], 'c': ['l1', 'l2']}, index=['a'])
        df2 = DataFrame.from_records(
            {'a': [1.0, 2.0], 'c': ['l1', 'l2']}, index=['a'])
        self._assert_not_equal(df1.c, df2.c, check_index_type=True)

Source File: test_testing.py From elasticintel with GNU General Public License v3.0

5 votes

def test_multiindex_dtype(self):
        df1 = DataFrame.from_records(
            {'a': [1, 2], 'b': [2.1, 1.5],
             'c': ['l1', 'l2']}, index=['a', 'b'])
        df2 = DataFrame.from_records(
            {'a': [1.0, 2.0], 'b': [2.1, 1.5],
             'c': ['l1', 'l2']}, index=['a', 'b'])
        self._assert_not_equal(df1.c, df2.c, check_index_type=True)

Source File: test_testing.py From elasticintel with GNU General Public License v3.0

5 votes

def test_index_dtype(self):
        df1 = DataFrame.from_records(
            {'a': [1, 2], 'c': ['l1', 'l2']}, index=['a'])
        df2 = DataFrame.from_records(
            {'a': [1.0, 2.0], 'c': ['l1', 'l2']}, index=['a'])
        self._assert_not_equal(df1, df2, check_index_type=True)

Source File: util.py From PyAthenaJDBC with MIT License

5 votes

def as_pandas(cursor, coerce_float=False):
    from pandas import DataFrame

    names = [metadata[0] for metadata in cursor.description]
    return DataFrame.from_records(
        cursor.fetchall(), columns=names, coerce_float=coerce_float
    )

Source File: test_testing.py From elasticintel with GNU General Public License v3.0

5 votes

def test_multiindex_dtype(self):
        df1 = DataFrame.from_records(
            {'a': [1, 2], 'b': [2.1, 1.5],
             'c': ['l1', 'l2']}, index=['a', 'b'])
        df2 = DataFrame.from_records(
            {'a': [1.0, 2.0], 'b': [2.1, 1.5],
             'c': ['l1', 'l2']}, index=['a', 'b'])
        self._assert_not_equal(df1, df2, check_index_type=True)

Source File: common.py From elasticintel with GNU General Public License v3.0

5 votes

def test_empty_with_nrows_chunksize(self):
        # see gh-9535
        expected = DataFrame([], columns=['foo', 'bar'])
        result = self.read_csv(StringIO('foo,bar\n'), nrows=10)
        tm.assert_frame_equal(result, expected)

        result = next(iter(self.read_csv(
            StringIO('foo,bar\n'), chunksize=10)))
        tm.assert_frame_equal(result, expected)

        with tm.assert_produces_warning(
                FutureWarning, check_stacklevel=False):
            result = self.read_csv(StringIO('foo,bar\n'),
                                   nrows=10, as_recarray=True)
            result = DataFrame(result[2], columns=result[1],
                               index=result[0])
            tm.assert_frame_equal(DataFrame.from_records(
                result), expected, check_index_type=False)

        with tm.assert_produces_warning(
                FutureWarning, check_stacklevel=False):
            result = next(iter(self.read_csv(StringIO('foo,bar\n'),
                                             chunksize=10, as_recarray=True)))
            result = DataFrame(result[2], columns=result[1], index=result[0])
            tm.assert_frame_equal(DataFrame.from_records(result), expected,
                                  check_index_type=False)

Source File: test_sql.py From elasticintel with GNU General Public License v3.0

5 votes

def _to_sql_save_index(self):
        df = DataFrame.from_records([(1, 2.1, 'line1'), (2, 1.5, 'line2')],
                                    columns=['A', 'B', 'C'], index=['A'])
        self.pandasSQL.to_sql(df, 'test_to_sql_saves_index')
        ix_cols = self._get_index_columns('test_to_sql_saves_index')
        assert ix_cols == [['A', ], ]

Source File: test_combine_concat.py From elasticintel with GNU General Public License v3.0

5 votes

def test_concat_datetime_datetime64_frame(self):
        # #2624
        rows = []
        rows.append([datetime(2010, 1, 1), 1])
        rows.append([datetime(2010, 1, 2), 'hi'])

        df2_obj = DataFrame.from_records(rows, columns=['date', 'test'])

        ind = date_range(start="2000/1/1", freq="D", periods=10)
        df1 = DataFrame({'date': ind, 'test': lrange(10)})

        # it works!
        pd.concat([df1, df2_obj])

Source File: test_sql.py From elasticintel with GNU General Public License v3.0

5 votes

def test_multiindex_roundtrip(self):
        df = DataFrame.from_records([(1, 2.1, 'line1'), (2, 1.5, 'line2')],
                                    columns=['A', 'B', 'C'], index=['A', 'B'])

        df.to_sql('test_multiindex_roundtrip', self.conn)
        result = sql.read_sql_query('SELECT * FROM test_multiindex_roundtrip',
                                    self.conn, index_col=['A', 'B'])
        tm.assert_frame_equal(df, result, check_index_type=True)

Source File: test_taar_locale.py From python_mozetl with MIT License

5 votes

def addon_counts():
    return DataFrame.from_records(
        SAMPLE_ADDON_COUNTS, columns=["locale", "addon", "count"]
    )

Source File: numpy.py From Splunking-Crime with GNU Affero General Public License v3.0

5 votes

def recarray_select(recarray, fields):
    """"
    Work-around for changes in NumPy 1.13 that return views for recarray
    multiple column selection
    """
    from pandas import DataFrame
    fields = [fields] if not isinstance(fields, (tuple, list)) else fields
    if len(fields) == len(recarray.dtype):
        return recarray
    recarray = DataFrame.from_records(recarray)
    return recarray[fields].to_records(index=False)

Source File: Sina.py From dHydra with Apache License 2.0

5 votes

def get_history_data(self, code, year, season):
        """
        新浪历史复权数据接口
        """
        res = self.session.get(url=URL_HISTORY_DATA(code, year, season))
        if res.status_code == 200:
            pattern_data = r'<div align="center">([\d\.]+)</div>'
            data = re.findall(pattern_data, res.text)
            records = util.slice_list(step=7, data_list=data)
            print(records)
            df = DataFrame.from_records(
                records,
                columns=[
                    'open',
                    'high',
                    'close',
                    'low',
                    'volume',
                    'amount',
                    'restoration_factor'
                ]
            )
            pattern_date = r'date=([\d]{4}-[\d]{2}-[\d]{2})'
            date = re.findall(pattern_date, res.text)
            df["date"] = date
            return df
        else:
            self.logger.debug("Status Code: {}".format(res.status_code))
            return False

Source File: Sina.py From dHydra with Apache License 2.0

5 votes

def get_history_data(self, code, year, season):
        """
        新浪历史复权数据接口
        """
        res = self.session.get(url=URL_HISTORY_DATA(code, year, season))
        if res.status_code == 200:
            pattern_data = r'<div align="center">([\d\.]+)</div>'
            data = re.findall(pattern_data, res.text)
            records = util.slice_list(step=7, data_list=data)
            print(records)
            df = DataFrame.from_records(
                records,
                columns=[
                    'open',
                    'high',
                    'close',
                    'low',
                    'volume',
                    'amount',
                    'restoration_factor'
                ]
            )
            pattern_date = r'date=([\d]{4}-[\d]{2}-[\d]{2})'
            date = re.findall(pattern_date, res.text)
            df["date"] = date
            return df
        else:
            self.logger.debug("Status Code: {}".format(res.status_code))
            return False

Source File: Xueqiu.py From dHydra with Apache License 2.0

5 votes

def fetch_quotation_coroutine(self, symbols=None):
        loop = asyncio.get_event_loop()
        if symbols is not None:
            async_req = loop.run_in_executor(
                None,
                functools.partial(
                    self.session.get,
                    URL_XUEQIU_QUOTE(symbols),
                    headers=HEADERS_XUEQIU
                )
            )
            try:
                quotation = yield from async_req
            except Exception as e:
                print(e)
                async_req = loop.run_in_executor(
                    None,
                    functools.partial(
                        self.session.get,
                        URL_XUEQIU_QUOTE(symbols),
                        headers=HEADERS_XUEQIU
                    )
                )
                quotation = yield from async_req
            quotation = quotation.json()
        return(quotation)

    # """
    # 雪球单股基本面数据获取
    # 默认返回值格式是dict，若参数dataframe为True则返回dataframe
    # """
    # def fetch_quotation(self, symbols = None, dataframe = False):
    # 	symbols = util.symbols_to_string(symbols)
    # 	if symbols is not None:
    # 		quotation = self.session.get(
    # 			URL_XUEQIU_QUOTE(symbols)
    # 		,	headers = HEADERS_XUEQIU
    # 		).json()
    # 	if dataframe:
    # 		quotation = DataFrame.from_records( quotation ).T
    # 	return(quotation)

Source File: Xueqiu.py From dHydra with Apache License 2.0

5 votes

def get_today(self, symbol, period='1day', dataframe=True):
        quotation = self.session.get(
            URL_XUEQIU_CHART(symbol=symbol, period=period),	headers=HEADERS_XUEQIU
        ).json()
        if quotation["success"] == "true":
            if dataframe:
                df = DataFrame.from_records(quotation["chartlist"])
                df["time"] = pandas.to_datetime(df["time"])
                df["time"] += timedelta(hours=8)
                df["symbol"] = symbol
                return df
            else:
                return quotation["chartlist"]
        else:
            return False

Source File: test_timezones.py From twitter-stock-recommendation with MIT License

5 votes

def test_frame_from_records_utc(self):
        rec = {'datum': 1.5,
               'begin_time': datetime(2006, 4, 27, tzinfo=pytz.utc)}

        # it works
        DataFrame.from_records([rec], index='begin_time')

Source File: test_combine_concat.py From twitter-stock-recommendation with MIT License

5 votes

def test_concat_datetime_datetime64_frame(self):
        # #2624
        rows = []
        rows.append([datetime(2010, 1, 1), 1])
        rows.append([datetime(2010, 1, 2), 'hi'])

        df2_obj = DataFrame.from_records(rows, columns=['date', 'test'])

        ind = date_range(start="2000/1/1", freq="D", periods=10)
        df1 = DataFrame({'date': ind, 'test': lrange(10)})

        # it works!
        pd.concat([df1, df2_obj])

Source File: test_testing.py From twitter-stock-recommendation with MIT License

5 votes

def test_index_dtype(self):
        df1 = DataFrame.from_records(
            {'a': [1, 2], 'c': ['l1', 'l2']}, index=['a'])
        df2 = DataFrame.from_records(
            {'a': [1.0, 2.0], 'c': ['l1', 'l2']}, index=['a'])
        self._assert_not_equal(df1.c, df2.c, check_index_type=True)

Source File: test_testing.py From twitter-stock-recommendation with MIT License

5 votes

def test_multiindex_dtype(self):
        df1 = DataFrame.from_records(
            {'a': [1, 2], 'b': [2.1, 1.5],
             'c': ['l1', 'l2']}, index=['a', 'b'])
        df2 = DataFrame.from_records(
            {'a': [1.0, 2.0], 'b': [2.1, 1.5],
             'c': ['l1', 'l2']}, index=['a', 'b'])
        self._assert_not_equal(df1.c, df2.c, check_index_type=True)

Source File: test_testing.py From twitter-stock-recommendation with MIT License

5 votes

def test_index_dtype(self):
        df1 = DataFrame.from_records(
            {'a': [1, 2], 'c': ['l1', 'l2']}, index=['a'])
        df2 = DataFrame.from_records(
            {'a': [1.0, 2.0], 'c': ['l1', 'l2']}, index=['a'])
        self._assert_not_equal(df1, df2, check_index_type=True)

Source File: test_sql.py From twitter-stock-recommendation with MIT License

5 votes

def _to_sql_save_index(self):
        df = DataFrame.from_records([(1, 2.1, 'line1'), (2, 1.5, 'line2')],
                                    columns=['A', 'B', 'C'], index=['A'])
        self.pandasSQL.to_sql(df, 'test_to_sql_saves_index')
        ix_cols = self._get_index_columns('test_to_sql_saves_index')
        assert ix_cols == [['A', ], ]

Source File: test_sql.py From twitter-stock-recommendation with MIT License

5 votes

def test_multiindex_roundtrip(self):
        df = DataFrame.from_records([(1, 2.1, 'line1'), (2, 1.5, 'line2')],
                                    columns=['A', 'B', 'C'], index=['A', 'B'])

        df.to_sql('test_multiindex_roundtrip', self.conn)
        result = sql.read_sql_query('SELECT * FROM test_multiindex_roundtrip',
                                    self.conn, index_col=['A', 'B'])
        tm.assert_frame_equal(df, result, check_index_type=True)

Source File: test_sql.py From vnpy_crypto with MIT License

5 votes

def _to_sql_save_index(self):
        df = DataFrame.from_records([(1, 2.1, 'line1'), (2, 1.5, 'line2')],
                                    columns=['A', 'B', 'C'], index=['A'])
        self.pandasSQL.to_sql(df, 'test_to_sql_saves_index')
        ix_cols = self._get_index_columns('test_to_sql_saves_index')
        assert ix_cols == [['A', ], ]

Python pandas.DataFrame.from_records() Examples