Python Examples of pandas.DataFrame.from

Source File: test_convert_to.py From recruit with Apache License 2.0

13 votes

def test_to_dict_index_dtypes(self, into, expected):
        # GH 18580
        # When using to_dict(orient='index') on a dataframe with int
        # and float columns only the int columns were cast to float

        df = DataFrame({'int_col': [1, 2, 3],
                        'float_col': [1.0, 2.0, 3.0]})

        result = df.to_dict(orient='index', into=into)
        cols = ['int_col', 'float_col']
        result = DataFrame.from_dict(result, orient='index')[cols]
        expected = DataFrame.from_dict(expected, orient='index')[cols]
        tm.assert_frame_equal(result, expected)

Source File: test_excel.py From vnpy_crypto with MIT License

11 votes

def test_reader_converters(self, ext):

        basename = 'test_converters'

        expected = DataFrame.from_dict(OrderedDict([
            ("IntCol", [1, 2, -3, -1000, 0]),
            ("FloatCol", [12.5, np.nan, 18.3, 19.2, 0.000000005]),
            ("BoolCol", ['Found', 'Found', 'Found', 'Not found', 'Found']),
            ("StrCol", ['1', np.nan, '3', '4', '5']),
        ]))

        converters = {'IntCol': lambda x: int(x) if x != '' else -1000,
                      'FloatCol': lambda x: 10 * x if x else np.nan,
                      2: lambda x: 'Found' if x != '' else 'Not found',
                      3: lambda x: str(x) if x else '',
                      }

        # should read in correctly and set types of single cells (not array
        # dtypes)
        actual = self.get_exceldf(basename, ext, 'Sheet1',
                                  converters=converters)
        tm.assert_frame_equal(actual, expected)

Source File: test_excel.py From predictive-maintenance-using-machine-learning with Apache License 2.0

10 votes

def test_reader_seconds(self, ext):

        # Test reading times with and without milliseconds. GH5945.
        expected = DataFrame.from_dict({"Time": [time(1, 2, 3),
                                                 time(2, 45, 56, 100000),
                                                 time(4, 29, 49, 200000),
                                                 time(6, 13, 42, 300000),
                                                 time(7, 57, 35, 400000),
                                                 time(9, 41, 28, 500000),
                                                 time(11, 25, 21, 600000),
                                                 time(13, 9, 14, 700000),
                                                 time(14, 53, 7, 800000),
                                                 time(16, 37, 0, 900000),
                                                 time(18, 20, 54)]})

        actual = self.get_exceldf('times_1900', ext, 'Sheet1')
        tm.assert_frame_equal(actual, expected)

        actual = self.get_exceldf('times_1904', ext, 'Sheet1')
        tm.assert_frame_equal(actual, expected)

Source File: account.py From ctpbee with MIT License

7 votes

def result(self):
        # 根据daily_life里面的数据 获取最后的结果
        result = defaultdict(list)
        for daily in self.daily_life.values():
            for key, value in daily.items():
                result[key].append(value)

        df = DataFrame.from_dict(result).set_index("date")
        try:
            import matplotlib.pyplot as plt
            df['balance'].plot()
            plt.show()

        except ImportError as e:
            pass
        finally:
            return self._cal_result(df)

Source File: test_excel.py From recruit with Apache License 2.0

6 votes

def test_reader_seconds(self, ext):

        # Test reading times with and without milliseconds. GH5945.
        expected = DataFrame.from_dict({"Time": [time(1, 2, 3),
                                                 time(2, 45, 56, 100000),
                                                 time(4, 29, 49, 200000),
                                                 time(6, 13, 42, 300000),
                                                 time(7, 57, 35, 400000),
                                                 time(9, 41, 28, 500000),
                                                 time(11, 25, 21, 600000),
                                                 time(13, 9, 14, 700000),
                                                 time(14, 53, 7, 800000),
                                                 time(16, 37, 0, 900000),
                                                 time(18, 20, 54)]})

        actual = self.get_exceldf('times_1900', ext, 'Sheet1')
        tm.assert_frame_equal(actual, expected)

        actual = self.get_exceldf('times_1904', ext, 'Sheet1')
        tm.assert_frame_equal(actual, expected)

Source File: encoder.py From sk-dist with Apache License 2.0

5 votes

def _process_input(self, X, fit=True):
        """ 
        Converts flexible intput type into pandas DataFrame. Handles
        pandas DataFrame, dictionary, pyspark DataFrame, or 
        numpy ndarray.
        """
        if isinstance(X, DataFrame):
            out = X
        elif isinstance(X, dict):
            try:
                out = DataFrame.from_dict(
                    X, orient="columns")
            except:
                raise ValueError("Cannot parse input")
        elif isinstance(X, np.ndarray) or isinstance(X, list):
            if fit and self.col_names is None:
                raise ValueError(
                    "Must supply col_names with numpy array input")
            elif fit:
                out = DataFrame(X, columns=self.col_names)
            else:
                out = DataFrame(X, columns=self.fields_)
        else:
            from pyspark.sql import DataFrame as SparkDataFrame
            if isinstance(X, SparkDataFrame):
                out = X.toPandas()
            else:
                raise ValueError("Cannot parse input")
        if fit:
            self.fields_ = list(out.columns)
        return out

Source File: solver.py From pastas with MIT License

5 votes

def get_realizations(self, func, n=None, name=None, **kwargs):
        """Internal method to obtain  n number of realizations."""
        if name:
            kwargs["name"] = name

        params = self.get_parameter_sample(n=n, name=name)
        data = {}

        for i, param in enumerate(params):
            data[i] = func(parameters=param, **kwargs)

        return DataFrame.from_dict(data, orient="columns")

Source File: read.py From anndata with BSD 3-Clause "New" or "Revised" License

5 votes

def read_umi_tools(filename: PathLike, dtype: str = "float32") -> AnnData:
    """\
    Read a gzipped condensed count matrix from umi_tools.

    Parameters
    ----------
    filename
        File name to read from.
    """
    # import pandas for conversion of a dict of dicts into a matrix
    # import gzip to read a gzipped file :-)
    import gzip
    from pandas import DataFrame

    dod = {}  # this will contain basically everything
    fh = gzip.open(fspath(filename))
    header = fh.readline()  # read the first line

    for line in fh:
        # gzip read bytes, hence the decoding
        t = line.decode("ascii").split("\t")
        try:
            dod[t[1]].update({t[0]: int(t[2])})
        except KeyError:
            dod[t[1]] = {t[0]: int(t[2])}

    df = DataFrame.from_dict(dod, orient="index")  # build the matrix
    df.fillna(value=0.0, inplace=True)  # many NaN, replace with zeros
    return AnnData(
        np.array(df), dict(obs_names=df.index), dict(var_names=df.columns), dtype=dtype,
    )

Source File: annotation_database_parser.py From HistomicsTK with Apache License 2.0

5 votes

def _add_item_to_sqlite(dbcon, item):
    # modify item info to prep for appending to sqlite table
    item_info = copy.deepcopy(item)
    item_info['largeImage'] = str(item_info['largeImage'])

    item_info_dtypes = {
        '_id': String(),
        '_modelType': String(),
        'baseParentId': String(),
        'baseParentType': String(),
        'copyOfItem': String(),
        'created': String(),
        'creatorId': String(),
        'description': String(),
        'folderId': String(),
        'largeImage': String(),
        'name': String(),
        'size': Integer(),
        'updated': String(),
    }

    # in case anything is not in the schema, drop it
    item_info = {
        k: v for k, v in item_info.items()
        if k in item_info_dtypes.keys()}

    # convert to df and add to items table
    item_info_df = DataFrame.from_dict(item_info, orient='index').T
    item_info_df.to_sql(
        name='items', con=dbcon, if_exists='append',
        dtype=item_info_dtypes, index=False)

Source File: annotation_database_parser.py From HistomicsTK with Apache License 2.0

5 votes

def _add_folder_to_sqlite(dbcon, folder_info):
    # modify folder info to prep for appending to sqlite table
    folder_info_dtypes = {
        '_accessLevel': Integer(),
        '_id': String(),
        '_modelType': String(),
        'baseParentId': String(),
        'baseParentType': String(),
        'created': String(),
        'creatorId': String(),
        'description': String(),
        'name': String(),
        'parentCollection': String(),
        'parentId': String(),
        'public': Boolean(),
        'size': Integer(),
        'updated': String(),
        'folder_path': String(),
    }

    # in case anything is not in the schema, drop it
    folder_info = {
        k: v for k, v in folder_info.items()
        if k in folder_info_dtypes.keys()}

    # convert to df and add to items table
    folder_info_df = DataFrame.from_dict(folder_info, orient='index').T
    folder_info_df.to_sql(
        name='folders', con=dbcon, if_exists='append',
        dtype=folder_info_dtypes, index=False)

Source File: load.py From bifacial_radiance with BSD 3-Clause "New" or "Revised" License

5 votes

def _exportTrackerDict(trackerdict, savefile, reindex):
    """
    Save a TrackerDict output as a ``.csv`` file.
    
    Parameters
    ----------
        trackerdict : Dictionary
            The tracker dictionary to save
        savefile : str
            Path to .csv save file location
        reindex : bool
            Boolean indicating if trackerdict should be resampled to include
            all 8760 hours in the year (even those when the sun is not up and 
            irradiance results is empty).
    
    """
    from pandas import DataFrame as df
    import numpy as np
    import pandas as pd

    # convert trackerdict into dataframe
    d = df.from_dict(trackerdict,orient='index',columns=['dhi','ghi','Wm2Back','Wm2Front','theta','surf_tilt','surf_azm','ground_clearance'])
    d['Wm2BackAvg'] = [np.nanmean(i) for i in d['Wm2Back']]
    d['Wm2FrontAvg'] = [np.nanmean(i) for i in d['Wm2Front']]
    d['BifiRatio'] =  d['Wm2BackAvg'] / d['Wm2FrontAvg']

    if reindex is True: # change to proper timestamp and interpolate to get 8760 output
        d['measdatetime'] = d.index
        d=d.set_index(pd.to_datetime(d['measdatetime'] , format='%m_%d_%H'))
        d=d.resample('H').asfreq()
  
    d.to_csv(savefile)

Source File: test_multilevel.py From elasticintel with GNU General Public License v3.0

5 votes

def test_frame_dict_constructor_empty_series(self):
        s1 = Series([
            1, 2, 3, 4
        ], index=MultiIndex.from_tuples([(1, 2), (1, 3), (2, 2), (2, 4)]))
        s2 = Series([
            1, 2, 3, 4
        ], index=MultiIndex.from_tuples([(1, 2), (1, 3), (3, 2), (3, 4)]))
        s3 = Series()

        # it works!
        DataFrame({'foo': s1, 'bar': s2, 'baz': s3})
        DataFrame.from_dict({'foo': s1, 'baz': s3, 'bar': s2})

Source File: test_merge.py From elasticintel with GNU General Public License v3.0

5 votes

def test_merge_nosort(self):
        # #2098, anything to do?

        from datetime import datetime

        d = {"var1": np.random.randint(0, 10, size=10),
             "var2": np.random.randint(0, 10, size=10),
             "var3": [datetime(2012, 1, 12), datetime(2011, 2, 4),
                      datetime(
                      2010, 2, 3), datetime(2012, 1, 12),
                      datetime(
                      2011, 2, 4), datetime(2012, 4, 3),
                      datetime(
                      2012, 3, 4), datetime(2008, 5, 1),
                      datetime(2010, 2, 3), datetime(2012, 2, 3)]}
        df = DataFrame.from_dict(d)
        var3 = df.var3.unique()
        var3.sort()
        new = DataFrame.from_dict({"var3": var3,
                                   "var8": np.random.random(7)})

        result = df.merge(new, on="var3", sort=False)
        exp = merge(df, new, on='var3', sort=False)
        assert_frame_equal(result, exp)

        assert (df.var3.unique() == result.var3.unique()).all()

Source File: test_multilevel.py From coffeegrindsize with MIT License

5 votes

def test_frame_dict_constructor_empty_series(self):
        s1 = Series([
            1, 2, 3, 4
        ], index=MultiIndex.from_tuples([(1, 2), (1, 3), (2, 2), (2, 4)]))
        s2 = Series([
            1, 2, 3, 4
        ], index=MultiIndex.from_tuples([(1, 2), (1, 3), (3, 2), (3, 4)]))
        s3 = Series()

        # it works!
        DataFrame({'foo': s1, 'bar': s2, 'baz': s3})
        DataFrame.from_dict({'foo': s1, 'baz': s3, 'bar': s2})

Source File: test_reshape.py From coffeegrindsize with MIT License

5 votes

def test_get_dummies_dont_sparsify_all_columns(self, sparse):
        # GH18914
        df = DataFrame.from_dict(OrderedDict([('GDP', [1, 2]),
                                              ('Nation', ['AB', 'CD'])]))
        df = get_dummies(df, columns=['Nation'], sparse=sparse)
        df2 = df.reindex(columns=['GDP'])

        tm.assert_frame_equal(df[['GDP']], df2)

Source File: test_reshape.py From recruit with Apache License 2.0

5 votes

def test_get_dummies_dont_sparsify_all_columns(self, sparse):
        # GH18914
        df = DataFrame.from_dict(OrderedDict([('GDP', [1, 2]),
                                              ('Nation', ['AB', 'CD'])]))
        df = get_dummies(df, columns=['Nation'], sparse=sparse)
        df2 = df.reindex(columns=['GDP'])

        tm.assert_frame_equal(df[['GDP']], df2)

Source File: _posthocs.py From scikit-posthocs with BSD 3-Clause "New" or "Revised" License

5 votes

def __convert_to_block_df(a, y_col=None, group_col=None, block_col=None, melted=False):

    if melted and not all([i is not None for i in [block_col, group_col, y_col]]):
        raise ValueError('`block_col`, `group_col`, `y_col` should be explicitly specified if using melted data')

    if isinstance(a, DataFrame) and not melted:
        x = a.copy(deep=True)
        group_col = 'groups'
        block_col = 'blocks'
        y_col = 'y'
        x.columns.name = group_col
        x.index.name = block_col
        x = x.reset_index().melt(id_vars=block_col, var_name=group_col, value_name=y_col)

    elif isinstance(a, DataFrame) and melted:
        x = DataFrame.from_dict({'groups': a[group_col],
                                 'blocks': a[block_col],
                                 'y': a[y_col]})

    elif not isinstance(a, DataFrame):
        x = np.array(a)
        x = DataFrame(x, index=np.arange(x.shape[0]), columns=np.arange(x.shape[1]))

        if not melted:
            group_col = 'groups'
            block_col = 'blocks'
            y_col = 'y'
            x.columns.name = group_col
            x.index.name = block_col
            x = x.reset_index().melt(id_vars=block_col, var_name=group_col, value_name=y_col)

        else:
            x.rename(columns={group_col: 'groups', block_col: 'blocks', y_col: 'y'}, inplace=True)
            group_col = 'groups'
            block_col = 'blocks'
            y_col = 'y'

    return x, 'y', 'groups', 'blocks'

Source File: simpletable.py From pyphot with MIT License

5 votes

def to_pandas(self, **kwargs):
        """ Construct a pandas dataframe

        Parameters
        ----------
        data : ndarray 
            (structured dtype), list of tuples, dict, or DataFrame
        keys: sequence, optional
            ordered subset of columns to export
        index : string, list of fields, array-like
            Field of array to use as the index, alternately a specific set of
            input labels to use
        exclude : sequence, default None
            Columns or fields to exclude
        columns : sequence, default None
            Column names to use. If the passed data do not have names
            associated with them, this argument provides names for the
            columns. Otherwise this argument indicates the order of the columns
            in the result (any names not found in the data will become all-NA
            columns)
        coerce_float : boolean, default False
            Attempt to convert values to non-string, non-numeric objects (like
            decimal.Decimal) to floating point, useful for SQL result sets

        Returns
        -------
        df : DataFrame
        """
        try:
            from pandas import DataFrame
            keys = kwargs.pop('keys', None)
            return DataFrame.from_dict(self.to_dict(keys=keys), **kwargs)
        except ImportError as error:
            print("Pandas import error")
            raise error

Source File: simpletable.py From pyphot with MIT License

5 votes

def to_pandas(self, **kwargs):
        """ Construct a pandas dataframe

        Parameters
        ----------
        data : ndarray 
            (structured dtype), list of tuples, dict, or DataFrame
        keys: sequence, optional
            ordered subset of columns to export
        index : string, list of fields, array-like
            Field of array to use as the index, alternately a specific set of
            input labels to use
        exclude : sequence, default None
            Columns or fields to exclude
        columns : sequence, default None
            Column names to use. If the passed data do not have names
            associated with them, this argument provides names for the
            columns. Otherwise this argument indicates the order of the columns
            in the result (any names not found in the data will become all-NA
            columns)
        coerce_float : boolean, default False
            Attempt to convert values to non-string, non-numeric objects (like
            decimal.Decimal) to floating point, useful for SQL result sets

        Returns
        -------
        df : DataFrame
        """
        try:
            from pandas import DataFrame
            keys = kwargs.pop('keys', None)
            return DataFrame.from_dict(self.to_dict(keys=keys), **kwargs)
        except ImportError as error:
            print("Pandas import error")
            raise error

Source File: combine-sqm-tables.py From SqueezeMeta with GNU General Public License v3.0

5 votes

def write_feature_dict(sampleNames, featureDict, outName):
    df = DataFrame.from_dict(featureDict).fillna(0)
    df = df.sort_index()
    df = df[sampleNames]
    df.to_csv(outName, sep='\t')

Source File: test_multilevel.py From twitter-stock-recommendation with MIT License

5 votes

def test_frame_dict_constructor_empty_series(self):
        s1 = Series([
            1, 2, 3, 4
        ], index=MultiIndex.from_tuples([(1, 2), (1, 3), (2, 2), (2, 4)]))
        s2 = Series([
            1, 2, 3, 4
        ], index=MultiIndex.from_tuples([(1, 2), (1, 3), (3, 2), (3, 4)]))
        s3 = Series()

        # it works!
        DataFrame({'foo': s1, 'bar': s2, 'baz': s3})
        DataFrame.from_dict({'foo': s1, 'baz': s3, 'bar': s2})

Source File: test_reshape.py From twitter-stock-recommendation with MIT License

5 votes

def test_get_dummies_dont_sparsify_all_columns(self, sparse):
        # GH18914
        df = DataFrame.from_dict(OrderedDict([('GDP', [1, 2]),
                                              ('Nation', ['AB', 'CD'])]))
        df = get_dummies(df, columns=['Nation'], sparse=sparse)
        df2 = df.reindex(columns=['GDP'])

        tm.assert_frame_equal(df[['GDP']], df2)

Source File: common.py From twitter-stock-recommendation with MIT License

5 votes

def test_scientific_no_exponent(self):
        # see gh-12215
        df = DataFrame.from_dict(OrderedDict([('w', ['2e']), ('x', ['3E']),
                                              ('y', ['42e']),
                                              ('z', ['632E'])]))
        data = df.to_csv(index=False)
        for prec in self.float_precision_choices:
            df_roundtrip = self.read_csv(
                StringIO(data), float_precision=prec)
            tm.assert_frame_equal(df_roundtrip, df)

Source File: test_common.py From predictive-maintenance-using-machine-learning with Apache License 2.0

5 votes

def test_scientific_no_exponent(all_parsers):
    # see gh-12215
    df = DataFrame.from_dict(OrderedDict([("w", ["2e"]), ("x", ["3E"]),
                                          ("y", ["42e"]),
                                          ("z", ["632E"])]))
    data = df.to_csv(index=False)
    parser = all_parsers

    for precision in parser.float_precision_choices:
        df_roundtrip = parser.read_csv(StringIO(data),
                                       float_precision=precision)
        tm.assert_frame_equal(df_roundtrip, df)

Source File: test_multilevel.py From recruit with Apache License 2.0

5 votes

def test_frame_dict_constructor_empty_series(self):
        s1 = Series([
            1, 2, 3, 4
        ], index=MultiIndex.from_tuples([(1, 2), (1, 3), (2, 2), (2, 4)]))
        s2 = Series([
            1, 2, 3, 4
        ], index=MultiIndex.from_tuples([(1, 2), (1, 3), (3, 2), (3, 4)]))
        s3 = Series()

        # it works!
        DataFrame({'foo': s1, 'bar': s2, 'baz': s3})
        DataFrame.from_dict({'foo': s1, 'baz': s3, 'bar': s2})

Source File: test_common.py From recruit with Apache License 2.0

5 votes

def test_scientific_no_exponent(all_parsers):
    # see gh-12215
    df = DataFrame.from_dict(OrderedDict([("w", ["2e"]), ("x", ["3E"]),
                                          ("y", ["42e"]),
                                          ("z", ["632E"])]))
    data = df.to_csv(index=False)
    parser = all_parsers

    for precision in parser.float_precision_choices:
        df_roundtrip = parser.read_csv(StringIO(data),
                                       float_precision=precision)
        tm.assert_frame_equal(df_roundtrip, df)

Source File: test_multilevel.py From vnpy_crypto with MIT License

5 votes

def test_frame_dict_constructor_empty_series(self):
        s1 = Series([
            1, 2, 3, 4
        ], index=MultiIndex.from_tuples([(1, 2), (1, 3), (2, 2), (2, 4)]))
        s2 = Series([
            1, 2, 3, 4
        ], index=MultiIndex.from_tuples([(1, 2), (1, 3), (3, 2), (3, 4)]))
        s3 = Series()

        # it works!
        DataFrame({'foo': s1, 'bar': s2, 'baz': s3})
        DataFrame.from_dict({'foo': s1, 'baz': s3, 'bar': s2})

Source File: test_reshape.py From vnpy_crypto with MIT License

5 votes

def test_get_dummies_dont_sparsify_all_columns(self, sparse):
        # GH18914
        df = DataFrame.from_dict(OrderedDict([('GDP', [1, 2]),
                                              ('Nation', ['AB', 'CD'])]))
        df = get_dummies(df, columns=['Nation'], sparse=sparse)
        df2 = df.reindex(columns=['GDP'])

        tm.assert_frame_equal(df[['GDP']], df2)

Source File: common.py From vnpy_crypto with MIT License

5 votes

def test_scientific_no_exponent(self):
        # see gh-12215
        df = DataFrame.from_dict(OrderedDict([('w', ['2e']), ('x', ['3E']),
                                              ('y', ['42e']),
                                              ('z', ['632E'])]))
        data = df.to_csv(index=False)
        for prec in self.float_precision_choices:
            df_roundtrip = self.read_csv(
                StringIO(data), float_precision=prec)
            tm.assert_frame_equal(df_roundtrip, df)

Source File: test_merge.py From Computable with MIT License

5 votes

def test_merge_nosort(self):
        # #2098, anything to do?

        from datetime import datetime

        d = {"var1": np.random.randint(0, 10, size=10),
             "var2": np.random.randint(0, 10, size=10),
             "var3": [datetime(2012, 1, 12), datetime(2011, 2, 4),
                      datetime(
                      2010, 2, 3), datetime(2012, 1, 12),
                      datetime(
                      2011, 2, 4), datetime(2012, 4, 3),
                      datetime(
                      2012, 3, 4), datetime(2008, 5, 1),
                      datetime(2010, 2, 3), datetime(2012, 2, 3)]}
        df = DataFrame.from_dict(d)
        var3 = df.var3.unique()
        var3.sort()
        new = DataFrame.from_dict({"var3": var3,
                                   "var8": np.random.random(7)})

        result = df.merge(new, on="var3", sort=False)
        exp = merge(df, new, on='var3', sort=False)
        assert_frame_equal(result, exp)

        self.assert_((df.var3.unique() == result.var3.unique()).all())

Python pandas.DataFrame.from_dict() Examples