Python pandas.core.frame.DataFrame.from_records() Examples
The following are 27
code examples of pandas.core.frame.DataFrame.from_records().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
pandas.core.frame.DataFrame
, or try the search function
.
Example #1
Source File: test_stata.py From Computable with MIT License | 6 votes |
def test_read_dta4(self): parsed = self.read_dta(self.dta4) parsed_13 = self.read_dta(self.dta4_13) expected = DataFrame.from_records( [ ["one", "ten", "one", "one", "one"], ["two", "nine", "two", "two", "two"], ["three", "eight", "three", "three", "three"], ["four", "seven", 4, "four", "four"], ["five", "six", 5, np.nan, "five"], ["six", "five", 6, np.nan, "six"], ["seven", "four", 7, np.nan, "seven"], ["eight", "three", 8, np.nan, "eight"], ["nine", "two", 9, np.nan, "nine"], ["ten", "one", "ten", np.nan, "ten"] ], columns=['fully_labeled', 'fully_labeled2', 'incompletely_labeled', 'labeled_with_missings', 'float_labelled']) tm.assert_frame_equal(parsed, expected) tm.assert_frame_equal(parsed_13, expected)
Example #2
Source File: test_stata.py From predictive-maintenance-using-machine-learning with Apache License 2.0 | 5 votes |
def test_read_dta12(self): parsed_117 = self.read_dta(self.dta21_117) expected = DataFrame.from_records( [ [1, "abc", "abcdefghi"], [3, "cba", "qwertywertyqwerty"], [93, "", "strl"], ], columns=['x', 'y', 'z']) tm.assert_frame_equal(parsed_117, expected, check_dtype=False)
Example #3
Source File: test_stata.py From twitter-stock-recommendation with MIT License | 5 votes |
def test_categorical_warnings_and_errors(self): # Warning for non-string labels # Error for labels too long original = pd.DataFrame.from_records( [['a' * 10000], ['b' * 10000], ['c' * 10000], ['d' * 10000]], columns=['Too_long']) original = pd.concat([original[col].astype('category') for col in original], axis=1) with tm.ensure_clean() as path: pytest.raises(ValueError, original.to_stata, path) original = pd.DataFrame.from_records( [['a'], ['b'], ['c'], ['d'], [1]], columns=['Too_long']) original = pd.concat([original[col].astype('category') for col in original], axis=1) with warnings.catch_warnings(record=True) as w: original.to_stata(path) # should get a warning for mixed content assert len(w) == 1
Example #4
Source File: test_stata.py From twitter-stock-recommendation with MIT License | 5 votes |
def test_categorical_writing(self, version): original = DataFrame.from_records( [ ["one", "ten", "one", "one", "one", 1], ["two", "nine", "two", "two", "two", 2], ["three", "eight", "three", "three", "three", 3], ["four", "seven", 4, "four", "four", 4], ["five", "six", 5, np.nan, "five", 5], ["six", "five", 6, np.nan, "six", 6], ["seven", "four", 7, np.nan, "seven", 7], ["eight", "three", 8, np.nan, "eight", 8], ["nine", "two", 9, np.nan, "nine", 9], ["ten", "one", "ten", np.nan, "ten", 10] ], columns=['fully_labeled', 'fully_labeled2', 'incompletely_labeled', 'labeled_with_missings', 'float_labelled', 'unlabeled']) expected = original.copy() # these are all categoricals original = pd.concat([original[col].astype('category') for col in original], axis=1) expected['incompletely_labeled'] = expected[ 'incompletely_labeled'].apply(str) expected['unlabeled'] = expected['unlabeled'].apply(str) expected = pd.concat([expected[col].astype('category') for col in expected], axis=1) expected.index.name = 'index' with tm.ensure_clean() as path: with warnings.catch_warnings(record=True) as w: # noqa # Silence warnings original.to_stata(path, version=version) written_and_read_again = self.read_dta(path) res = written_and_read_again.set_index('index') tm.assert_frame_equal(res, expected, check_categorical=False)
Example #5
Source File: test_stata.py From twitter-stock-recommendation with MIT License | 5 votes |
def test_read_dta18(self): parsed_118 = self.read_dta(self.dta22_118) parsed_118["Bytes"] = parsed_118["Bytes"].astype('O') expected = DataFrame.from_records( [['Cat', 'Bogota', u'Bogotá', 1, 1.0, u'option b Ünicode', 1.0], ['Dog', 'Boston', u'Uzunköprü', np.nan, np.nan, np.nan, np.nan], ['Plane', 'Rome', u'Tromsø', 0, 0.0, 'option a', 0.0], ['Potato', 'Tokyo', u'Elâzığ', -4, 4.0, 4, 4], ['', '', '', 0, 0.3332999, 'option a', 1 / 3.] ], columns=['Things', 'Cities', 'Unicode_Cities_Strl', 'Ints', 'Floats', 'Bytes', 'Longs']) expected["Floats"] = expected["Floats"].astype(np.float32) for col in parsed_118.columns: tm.assert_almost_equal(parsed_118[col], expected[col]) with StataReader(self.dta22_118) as rdr: vl = rdr.variable_labels() vl_expected = {u'Unicode_Cities_Strl': u'Here are some strls with Ünicode chars', u'Longs': u'long data', u'Things': u'Here are some things', u'Bytes': u'byte data', u'Ints': u'int data', u'Cities': u'Here are some cities', u'Floats': u'float data'} tm.assert_dict_equal(vl, vl_expected) assert rdr.data_label == u'This is a Ünicode data label'
Example #6
Source File: test_stata.py From twitter-stock-recommendation with MIT License | 5 votes |
def test_read_dta12(self): parsed_117 = self.read_dta(self.dta21_117) expected = DataFrame.from_records( [ [1, "abc", "abcdefghi"], [3, "cba", "qwertywertyqwerty"], [93, "", "strl"], ], columns=['x', 'y', 'z']) tm.assert_frame_equal(parsed_117, expected, check_dtype=False)
Example #7
Source File: test_stata.py From twitter-stock-recommendation with MIT License | 5 votes |
def test_read_dta4(self, file): file = getattr(self, file) parsed = self.read_dta(file) expected = DataFrame.from_records( [ ["one", "ten", "one", "one", "one"], ["two", "nine", "two", "two", "two"], ["three", "eight", "three", "three", "three"], ["four", "seven", 4, "four", "four"], ["five", "six", 5, np.nan, "five"], ["six", "five", 6, np.nan, "six"], ["seven", "four", 7, np.nan, "seven"], ["eight", "three", 8, np.nan, "eight"], ["nine", "two", 9, np.nan, "nine"], ["ten", "one", "ten", np.nan, "ten"] ], columns=['fully_labeled', 'fully_labeled2', 'incompletely_labeled', 'labeled_with_missings', 'float_labelled']) # these are all categoricals expected = pd.concat([expected[col].astype('category') for col in expected], axis=1) # stata doesn't save .category metadata tm.assert_frame_equal(parsed, expected, check_categorical=False) # File containing strls
Example #8
Source File: test_stata.py From elasticintel with GNU General Public License v3.0 | 5 votes |
def test_categorical_warnings_and_errors(self): # Warning for non-string labels # Error for labels too long original = pd.DataFrame.from_records( [['a' * 10000], ['b' * 10000], ['c' * 10000], ['d' * 10000]], columns=['Too_long']) original = pd.concat([original[col].astype('category') for col in original], axis=1) with tm.ensure_clean() as path: pytest.raises(ValueError, original.to_stata, path) original = pd.DataFrame.from_records( [['a'], ['b'], ['c'], ['d'], [1]], columns=['Too_long']) original = pd.concat([original[col].astype('category') for col in original], axis=1) with warnings.catch_warnings(record=True) as w: original.to_stata(path) # should get a warning for mixed content assert len(w) == 1
Example #9
Source File: test_stata.py From elasticintel with GNU General Public License v3.0 | 5 votes |
def test_categorical_writing(self): original = DataFrame.from_records( [ ["one", "ten", "one", "one", "one", 1], ["two", "nine", "two", "two", "two", 2], ["three", "eight", "three", "three", "three", 3], ["four", "seven", 4, "four", "four", 4], ["five", "six", 5, np.nan, "five", 5], ["six", "five", 6, np.nan, "six", 6], ["seven", "four", 7, np.nan, "seven", 7], ["eight", "three", 8, np.nan, "eight", 8], ["nine", "two", 9, np.nan, "nine", 9], ["ten", "one", "ten", np.nan, "ten", 10] ], columns=['fully_labeled', 'fully_labeled2', 'incompletely_labeled', 'labeled_with_missings', 'float_labelled', 'unlabeled']) expected = original.copy() # these are all categoricals original = pd.concat([original[col].astype('category') for col in original], axis=1) expected['incompletely_labeled'] = expected[ 'incompletely_labeled'].apply(str) expected['unlabeled'] = expected['unlabeled'].apply(str) expected = pd.concat([expected[col].astype('category') for col in expected], axis=1) expected.index.name = 'index' with tm.ensure_clean() as path: with warnings.catch_warnings(record=True) as w: # noqa # Silence warnings original.to_stata(path) written_and_read_again = self.read_dta(path) res = written_and_read_again.set_index('index') tm.assert_frame_equal(res, expected, check_categorical=False)
Example #10
Source File: test_stata.py From elasticintel with GNU General Public License v3.0 | 5 votes |
def test_read_dta18(self): parsed_118 = self.read_dta(self.dta22_118) parsed_118["Bytes"] = parsed_118["Bytes"].astype('O') expected = DataFrame.from_records( [['Cat', 'Bogota', u'Bogotá', 1, 1.0, u'option b Ünicode', 1.0], ['Dog', 'Boston', u'Uzunköprü', np.nan, np.nan, np.nan, np.nan], ['Plane', 'Rome', u'Tromsø', 0, 0.0, 'option a', 0.0], ['Potato', 'Tokyo', u'Elâzığ', -4, 4.0, 4, 4], ['', '', '', 0, 0.3332999, 'option a', 1 / 3.] ], columns=['Things', 'Cities', 'Unicode_Cities_Strl', 'Ints', 'Floats', 'Bytes', 'Longs']) expected["Floats"] = expected["Floats"].astype(np.float32) for col in parsed_118.columns: tm.assert_almost_equal(parsed_118[col], expected[col]) with StataReader(self.dta22_118) as rdr: vl = rdr.variable_labels() vl_expected = {u'Unicode_Cities_Strl': u'Here are some strls with Ünicode chars', u'Longs': u'long data', u'Things': u'Here are some things', u'Bytes': u'byte data', u'Ints': u'int data', u'Cities': u'Here are some cities', u'Floats': u'float data'} tm.assert_dict_equal(vl, vl_expected) assert rdr.data_label == u'This is a Ünicode data label'
Example #11
Source File: test_stata.py From elasticintel with GNU General Public License v3.0 | 5 votes |
def test_read_dta12(self): parsed_117 = self.read_dta(self.dta21_117) expected = DataFrame.from_records( [ [1, "abc", "abcdefghi"], [3, "cba", "qwertywertyqwerty"], [93, "", "strl"], ], columns=['x', 'y', 'z']) tm.assert_frame_equal(parsed_117, expected, check_dtype=False)
Example #12
Source File: test_stata.py From elasticintel with GNU General Public License v3.0 | 5 votes |
def test_read_dta4(self, file): file = getattr(self, file) parsed = self.read_dta(file) expected = DataFrame.from_records( [ ["one", "ten", "one", "one", "one"], ["two", "nine", "two", "two", "two"], ["three", "eight", "three", "three", "three"], ["four", "seven", 4, "four", "four"], ["five", "six", 5, np.nan, "five"], ["six", "five", 6, np.nan, "six"], ["seven", "four", 7, np.nan, "seven"], ["eight", "three", 8, np.nan, "eight"], ["nine", "two", 9, np.nan, "nine"], ["ten", "one", "ten", np.nan, "ten"] ], columns=['fully_labeled', 'fully_labeled2', 'incompletely_labeled', 'labeled_with_missings', 'float_labelled']) # these are all categoricals expected = pd.concat([expected[col].astype('category') for col in expected], axis=1) # stata doesn't save .category metadata tm.assert_frame_equal(parsed, expected, check_categorical=False) # File containing strls
Example #13
Source File: test_stata.py From predictive-maintenance-using-machine-learning with Apache License 2.0 | 5 votes |
def test_categorical_warnings_and_errors(self): # Warning for non-string labels # Error for labels too long original = pd.DataFrame.from_records( [['a' * 10000], ['b' * 10000], ['c' * 10000], ['d' * 10000]], columns=['Too_long']) original = pd.concat([original[col].astype('category') for col in original], axis=1) with tm.ensure_clean() as path: msg = ("Stata value labels for a single variable must have" r" a combined length less than 32,000 characters\.") with pytest.raises(ValueError, match=msg): original.to_stata(path) original = pd.DataFrame.from_records( [['a'], ['b'], ['c'], ['d'], [1]], columns=['Too_long']) original = pd.concat([original[col].astype('category') for col in original], axis=1) with tm.assert_produces_warning(pd.io.stata.ValueLabelTypeMismatch): original.to_stata(path) # should get a warning for mixed content
Example #14
Source File: test_stata.py From predictive-maintenance-using-machine-learning with Apache License 2.0 | 5 votes |
def test_categorical_writing(self, version): original = DataFrame.from_records( [ ["one", "ten", "one", "one", "one", 1], ["two", "nine", "two", "two", "two", 2], ["three", "eight", "three", "three", "three", 3], ["four", "seven", 4, "four", "four", 4], ["five", "six", 5, np.nan, "five", 5], ["six", "five", 6, np.nan, "six", 6], ["seven", "four", 7, np.nan, "seven", 7], ["eight", "three", 8, np.nan, "eight", 8], ["nine", "two", 9, np.nan, "nine", 9], ["ten", "one", "ten", np.nan, "ten", 10] ], columns=['fully_labeled', 'fully_labeled2', 'incompletely_labeled', 'labeled_with_missings', 'float_labelled', 'unlabeled']) expected = original.copy() # these are all categoricals original = pd.concat([original[col].astype('category') for col in original], axis=1) expected['incompletely_labeled'] = expected[ 'incompletely_labeled'].apply(str) expected['unlabeled'] = expected['unlabeled'].apply(str) expected = pd.concat([expected[col].astype('category') for col in expected], axis=1) expected.index.name = 'index' with tm.ensure_clean() as path: original.to_stata(path, version=version) written_and_read_again = self.read_dta(path) res = written_and_read_again.set_index('index') tm.assert_frame_equal(res, expected, check_categorical=False)
Example #15
Source File: test_stata.py From predictive-maintenance-using-machine-learning with Apache License 2.0 | 5 votes |
def test_read_dta18(self): parsed_118 = self.read_dta(self.dta22_118) parsed_118["Bytes"] = parsed_118["Bytes"].astype('O') expected = DataFrame.from_records( [['Cat', 'Bogota', u'Bogotá', 1, 1.0, u'option b Ünicode', 1.0], ['Dog', 'Boston', u'Uzunköprü', np.nan, np.nan, np.nan, np.nan], ['Plane', 'Rome', u'Tromsø', 0, 0.0, 'option a', 0.0], ['Potato', 'Tokyo', u'Elâzığ', -4, 4.0, 4, 4], ['', '', '', 0, 0.3332999, 'option a', 1 / 3.] ], columns=['Things', 'Cities', 'Unicode_Cities_Strl', 'Ints', 'Floats', 'Bytes', 'Longs']) expected["Floats"] = expected["Floats"].astype(np.float32) for col in parsed_118.columns: tm.assert_almost_equal(parsed_118[col], expected[col]) with StataReader(self.dta22_118) as rdr: vl = rdr.variable_labels() vl_expected = {u'Unicode_Cities_Strl': u'Here are some strls with Ünicode chars', u'Longs': u'long data', u'Things': u'Here are some things', u'Bytes': u'byte data', u'Ints': u'int data', u'Cities': u'Here are some cities', u'Floats': u'float data'} tm.assert_dict_equal(vl, vl_expected) assert rdr.data_label == u'This is a Ünicode data label'
Example #16
Source File: test_stata.py From recruit with Apache License 2.0 | 5 votes |
def test_read_dta4(self, file): file = getattr(self, file) parsed = self.read_dta(file) expected = DataFrame.from_records( [ ["one", "ten", "one", "one", "one"], ["two", "nine", "two", "two", "two"], ["three", "eight", "three", "three", "three"], ["four", "seven", 4, "four", "four"], ["five", "six", 5, np.nan, "five"], ["six", "five", 6, np.nan, "six"], ["seven", "four", 7, np.nan, "seven"], ["eight", "three", 8, np.nan, "eight"], ["nine", "two", 9, np.nan, "nine"], ["ten", "one", "ten", np.nan, "ten"] ], columns=['fully_labeled', 'fully_labeled2', 'incompletely_labeled', 'labeled_with_missings', 'float_labelled']) # these are all categoricals expected = pd.concat([expected[col].astype('category') for col in expected], axis=1) # stata doesn't save .category metadata tm.assert_frame_equal(parsed, expected, check_categorical=False) # File containing strls
Example #17
Source File: test_stata.py From predictive-maintenance-using-machine-learning with Apache License 2.0 | 5 votes |
def test_read_dta4(self, file): file = getattr(self, file) parsed = self.read_dta(file) expected = DataFrame.from_records( [ ["one", "ten", "one", "one", "one"], ["two", "nine", "two", "two", "two"], ["three", "eight", "three", "three", "three"], ["four", "seven", 4, "four", "four"], ["five", "six", 5, np.nan, "five"], ["six", "five", 6, np.nan, "six"], ["seven", "four", 7, np.nan, "seven"], ["eight", "three", 8, np.nan, "eight"], ["nine", "two", 9, np.nan, "nine"], ["ten", "one", "ten", np.nan, "ten"] ], columns=['fully_labeled', 'fully_labeled2', 'incompletely_labeled', 'labeled_with_missings', 'float_labelled']) # these are all categoricals expected = pd.concat([expected[col].astype('category') for col in expected], axis=1) # stata doesn't save .category metadata tm.assert_frame_equal(parsed, expected, check_categorical=False) # File containing strls
Example #18
Source File: test_stata.py From vnpy_crypto with MIT License | 5 votes |
def test_categorical_warnings_and_errors(self): # Warning for non-string labels # Error for labels too long original = pd.DataFrame.from_records( [['a' * 10000], ['b' * 10000], ['c' * 10000], ['d' * 10000]], columns=['Too_long']) original = pd.concat([original[col].astype('category') for col in original], axis=1) with tm.ensure_clean() as path: pytest.raises(ValueError, original.to_stata, path) original = pd.DataFrame.from_records( [['a'], ['b'], ['c'], ['d'], [1]], columns=['Too_long']) original = pd.concat([original[col].astype('category') for col in original], axis=1) with warnings.catch_warnings(record=True) as w: original.to_stata(path) # should get a warning for mixed content assert len(w) == 1
Example #19
Source File: test_stata.py From vnpy_crypto with MIT License | 5 votes |
def test_categorical_writing(self, version): original = DataFrame.from_records( [ ["one", "ten", "one", "one", "one", 1], ["two", "nine", "two", "two", "two", 2], ["three", "eight", "three", "three", "three", 3], ["four", "seven", 4, "four", "four", 4], ["five", "six", 5, np.nan, "five", 5], ["six", "five", 6, np.nan, "six", 6], ["seven", "four", 7, np.nan, "seven", 7], ["eight", "three", 8, np.nan, "eight", 8], ["nine", "two", 9, np.nan, "nine", 9], ["ten", "one", "ten", np.nan, "ten", 10] ], columns=['fully_labeled', 'fully_labeled2', 'incompletely_labeled', 'labeled_with_missings', 'float_labelled', 'unlabeled']) expected = original.copy() # these are all categoricals original = pd.concat([original[col].astype('category') for col in original], axis=1) expected['incompletely_labeled'] = expected[ 'incompletely_labeled'].apply(str) expected['unlabeled'] = expected['unlabeled'].apply(str) expected = pd.concat([expected[col].astype('category') for col in expected], axis=1) expected.index.name = 'index' with tm.ensure_clean() as path: with warnings.catch_warnings(record=True) as w: # noqa # Silence warnings original.to_stata(path, version=version) written_and_read_again = self.read_dta(path) res = written_and_read_again.set_index('index') tm.assert_frame_equal(res, expected, check_categorical=False)
Example #20
Source File: test_stata.py From vnpy_crypto with MIT License | 5 votes |
def test_read_dta18(self): parsed_118 = self.read_dta(self.dta22_118) parsed_118["Bytes"] = parsed_118["Bytes"].astype('O') expected = DataFrame.from_records( [['Cat', 'Bogota', u'Bogotá', 1, 1.0, u'option b Ünicode', 1.0], ['Dog', 'Boston', u'Uzunköprü', np.nan, np.nan, np.nan, np.nan], ['Plane', 'Rome', u'Tromsø', 0, 0.0, 'option a', 0.0], ['Potato', 'Tokyo', u'Elâzığ', -4, 4.0, 4, 4], ['', '', '', 0, 0.3332999, 'option a', 1 / 3.] ], columns=['Things', 'Cities', 'Unicode_Cities_Strl', 'Ints', 'Floats', 'Bytes', 'Longs']) expected["Floats"] = expected["Floats"].astype(np.float32) for col in parsed_118.columns: tm.assert_almost_equal(parsed_118[col], expected[col]) with StataReader(self.dta22_118) as rdr: vl = rdr.variable_labels() vl_expected = {u'Unicode_Cities_Strl': u'Here are some strls with Ünicode chars', u'Longs': u'long data', u'Things': u'Here are some things', u'Bytes': u'byte data', u'Ints': u'int data', u'Cities': u'Here are some cities', u'Floats': u'float data'} tm.assert_dict_equal(vl, vl_expected) assert rdr.data_label == u'This is a Ünicode data label'
Example #21
Source File: test_stata.py From vnpy_crypto with MIT License | 5 votes |
def test_read_dta12(self): parsed_117 = self.read_dta(self.dta21_117) expected = DataFrame.from_records( [ [1, "abc", "abcdefghi"], [3, "cba", "qwertywertyqwerty"], [93, "", "strl"], ], columns=['x', 'y', 'z']) tm.assert_frame_equal(parsed_117, expected, check_dtype=False)
Example #22
Source File: test_stata.py From vnpy_crypto with MIT License | 5 votes |
def test_read_dta4(self, file): file = getattr(self, file) parsed = self.read_dta(file) expected = DataFrame.from_records( [ ["one", "ten", "one", "one", "one"], ["two", "nine", "two", "two", "two"], ["three", "eight", "three", "three", "three"], ["four", "seven", 4, "four", "four"], ["five", "six", 5, np.nan, "five"], ["six", "five", 6, np.nan, "six"], ["seven", "four", 7, np.nan, "seven"], ["eight", "three", 8, np.nan, "eight"], ["nine", "two", 9, np.nan, "nine"], ["ten", "one", "ten", np.nan, "ten"] ], columns=['fully_labeled', 'fully_labeled2', 'incompletely_labeled', 'labeled_with_missings', 'float_labelled']) # these are all categoricals expected = pd.concat([expected[col].astype('category') for col in expected], axis=1) # stata doesn't save .category metadata tm.assert_frame_equal(parsed, expected, check_categorical=False) # File containing strls
Example #23
Source File: test_stata.py From recruit with Apache License 2.0 | 5 votes |
def test_categorical_warnings_and_errors(self): # Warning for non-string labels # Error for labels too long original = pd.DataFrame.from_records( [['a' * 10000], ['b' * 10000], ['c' * 10000], ['d' * 10000]], columns=['Too_long']) original = pd.concat([original[col].astype('category') for col in original], axis=1) with tm.ensure_clean() as path: msg = ("Stata value labels for a single variable must have" r" a combined length less than 32,000 characters\.") with pytest.raises(ValueError, match=msg): original.to_stata(path) original = pd.DataFrame.from_records( [['a'], ['b'], ['c'], ['d'], [1]], columns=['Too_long']) original = pd.concat([original[col].astype('category') for col in original], axis=1) with tm.assert_produces_warning(pd.io.stata.ValueLabelTypeMismatch): original.to_stata(path) # should get a warning for mixed content
Example #24
Source File: test_stata.py From recruit with Apache License 2.0 | 5 votes |
def test_categorical_writing(self, version): original = DataFrame.from_records( [ ["one", "ten", "one", "one", "one", 1], ["two", "nine", "two", "two", "two", 2], ["three", "eight", "three", "three", "three", 3], ["four", "seven", 4, "four", "four", 4], ["five", "six", 5, np.nan, "five", 5], ["six", "five", 6, np.nan, "six", 6], ["seven", "four", 7, np.nan, "seven", 7], ["eight", "three", 8, np.nan, "eight", 8], ["nine", "two", 9, np.nan, "nine", 9], ["ten", "one", "ten", np.nan, "ten", 10] ], columns=['fully_labeled', 'fully_labeled2', 'incompletely_labeled', 'labeled_with_missings', 'float_labelled', 'unlabeled']) expected = original.copy() # these are all categoricals original = pd.concat([original[col].astype('category') for col in original], axis=1) expected['incompletely_labeled'] = expected[ 'incompletely_labeled'].apply(str) expected['unlabeled'] = expected['unlabeled'].apply(str) expected = pd.concat([expected[col].astype('category') for col in expected], axis=1) expected.index.name = 'index' with tm.ensure_clean() as path: original.to_stata(path, version=version) written_and_read_again = self.read_dta(path) res = written_and_read_again.set_index('index') tm.assert_frame_equal(res, expected, check_categorical=False)
Example #25
Source File: test_stata.py From recruit with Apache License 2.0 | 5 votes |
def test_read_dta18(self): parsed_118 = self.read_dta(self.dta22_118) parsed_118["Bytes"] = parsed_118["Bytes"].astype('O') expected = DataFrame.from_records( [['Cat', 'Bogota', u'Bogotá', 1, 1.0, u'option b Ünicode', 1.0], ['Dog', 'Boston', u'Uzunköprü', np.nan, np.nan, np.nan, np.nan], ['Plane', 'Rome', u'Tromsø', 0, 0.0, 'option a', 0.0], ['Potato', 'Tokyo', u'Elâzığ', -4, 4.0, 4, 4], ['', '', '', 0, 0.3332999, 'option a', 1 / 3.] ], columns=['Things', 'Cities', 'Unicode_Cities_Strl', 'Ints', 'Floats', 'Bytes', 'Longs']) expected["Floats"] = expected["Floats"].astype(np.float32) for col in parsed_118.columns: tm.assert_almost_equal(parsed_118[col], expected[col]) with StataReader(self.dta22_118) as rdr: vl = rdr.variable_labels() vl_expected = {u'Unicode_Cities_Strl': u'Here are some strls with Ünicode chars', u'Longs': u'long data', u'Things': u'Here are some things', u'Bytes': u'byte data', u'Ints': u'int data', u'Cities': u'Here are some cities', u'Floats': u'float data'} tm.assert_dict_equal(vl, vl_expected) assert rdr.data_label == u'This is a Ünicode data label'
Example #26
Source File: test_stata.py From recruit with Apache License 2.0 | 5 votes |
def test_read_dta12(self): parsed_117 = self.read_dta(self.dta21_117) expected = DataFrame.from_records( [ [1, "abc", "abcdefghi"], [3, "cba", "qwertywertyqwerty"], [93, "", "strl"], ], columns=['x', 'y', 'z']) tm.assert_frame_equal(parsed_117, expected, check_dtype=False)
Example #27
Source File: test_stata.py From Computable with MIT License | 4 votes |
def test_read_dta2(self): if LooseVersion(sys.version) < '2.7': raise nose.SkipTest('datetime interp under 2.6 is faulty') expected = DataFrame.from_records( [ ( datetime(2006, 11, 19, 23, 13, 20), 1479596223000, datetime(2010, 1, 20), datetime(2010, 1, 8), datetime(2010, 1, 1), datetime(1974, 7, 1), datetime(2010, 1, 1), datetime(2010, 1, 1) ), ( datetime(1959, 12, 31, 20, 3, 20), -1479590, datetime(1953, 10, 2), datetime(1948, 6, 10), datetime(1955, 1, 1), datetime(1955, 7, 1), datetime(1955, 1, 1), datetime(2, 1, 1) ), ( pd.NaT, pd.NaT, pd.NaT, pd.NaT, pd.NaT, pd.NaT, pd.NaT, pd.NaT, ) ], columns=['datetime_c', 'datetime_big_c', 'date', 'weekly_date', 'monthly_date', 'quarterly_date', 'half_yearly_date', 'yearly_date'] ) with warnings.catch_warnings(record=True) as w: parsed = self.read_dta(self.dta2) parsed_13 = self.read_dta(self.dta2_13) np.testing.assert_equal( len(w), 1) # should get a warning for that format. # buggy test because of the NaT comparison on certain platforms # #tm.assert_frame_equal(parsed, expected) #tm.assert_frame_equal(parsed_13, expected)