Python Examples of pandas.core.frame.DataFrame.from

Source File: test_stata.py From Computable with MIT License

6 votes

def test_read_dta4(self):
        parsed = self.read_dta(self.dta4)
        parsed_13 = self.read_dta(self.dta4_13)
        expected = DataFrame.from_records(
            [
                ["one", "ten", "one", "one", "one"],
                ["two", "nine", "two", "two", "two"],
                ["three", "eight", "three", "three", "three"],
                ["four", "seven", 4, "four", "four"],
                ["five", "six", 5, np.nan, "five"],
                ["six", "five", 6, np.nan, "six"],
                ["seven", "four", 7, np.nan, "seven"],
                ["eight", "three", 8, np.nan, "eight"],
                ["nine", "two", 9, np.nan, "nine"],
                ["ten", "one", "ten", np.nan, "ten"]
            ],
            columns=['fully_labeled', 'fully_labeled2', 'incompletely_labeled',
                     'labeled_with_missings', 'float_labelled'])

        tm.assert_frame_equal(parsed, expected)
        tm.assert_frame_equal(parsed_13, expected)

Source File: test_stata.py From predictive-maintenance-using-machine-learning with Apache License 2.0

5 votes

def test_read_dta12(self):
        parsed_117 = self.read_dta(self.dta21_117)
        expected = DataFrame.from_records(
            [
                [1, "abc", "abcdefghi"],
                [3, "cba", "qwertywertyqwerty"],
                [93, "", "strl"],
            ],
            columns=['x', 'y', 'z'])

        tm.assert_frame_equal(parsed_117, expected, check_dtype=False)

Source File: test_stata.py From twitter-stock-recommendation with MIT License

5 votes

def test_categorical_warnings_and_errors(self):
        # Warning for non-string labels
        # Error for labels too long
        original = pd.DataFrame.from_records(
            [['a' * 10000],
             ['b' * 10000],
             ['c' * 10000],
             ['d' * 10000]],
            columns=['Too_long'])

        original = pd.concat([original[col].astype('category')
                              for col in original], axis=1)
        with tm.ensure_clean() as path:
            pytest.raises(ValueError, original.to_stata, path)

        original = pd.DataFrame.from_records(
            [['a'],
             ['b'],
             ['c'],
             ['d'],
             [1]],
            columns=['Too_long'])
        original = pd.concat([original[col].astype('category')
                              for col in original], axis=1)

        with warnings.catch_warnings(record=True) as w:
            original.to_stata(path)
            # should get a warning for mixed content
            assert len(w) == 1

Source File: test_stata.py From twitter-stock-recommendation with MIT License

5 votes

def test_categorical_writing(self, version):
        original = DataFrame.from_records(
            [
                ["one", "ten", "one", "one", "one", 1],
                ["two", "nine", "two", "two", "two", 2],
                ["three", "eight", "three", "three", "three", 3],
                ["four", "seven", 4, "four", "four", 4],
                ["five", "six", 5, np.nan, "five", 5],
                ["six", "five", 6, np.nan, "six", 6],
                ["seven", "four", 7, np.nan, "seven", 7],
                ["eight", "three", 8, np.nan, "eight", 8],
                ["nine", "two", 9, np.nan, "nine", 9],
                ["ten", "one", "ten", np.nan, "ten", 10]
            ],
            columns=['fully_labeled', 'fully_labeled2', 'incompletely_labeled',
                     'labeled_with_missings', 'float_labelled', 'unlabeled'])
        expected = original.copy()

        # these are all categoricals
        original = pd.concat([original[col].astype('category')
                              for col in original], axis=1)

        expected['incompletely_labeled'] = expected[
            'incompletely_labeled'].apply(str)
        expected['unlabeled'] = expected['unlabeled'].apply(str)
        expected = pd.concat([expected[col].astype('category')
                              for col in expected], axis=1)
        expected.index.name = 'index'

        with tm.ensure_clean() as path:
            with warnings.catch_warnings(record=True) as w:  # noqa
                # Silence warnings
                original.to_stata(path, version=version)
                written_and_read_again = self.read_dta(path)
                res = written_and_read_again.set_index('index')
                tm.assert_frame_equal(res, expected, check_categorical=False)

Source File: test_stata.py From twitter-stock-recommendation with MIT License

5 votes

def test_read_dta18(self):
        parsed_118 = self.read_dta(self.dta22_118)
        parsed_118["Bytes"] = parsed_118["Bytes"].astype('O')
        expected = DataFrame.from_records(
            [['Cat', 'Bogota', u'Bogotá', 1, 1.0, u'option b Ünicode', 1.0],
             ['Dog', 'Boston', u'Uzunköprü', np.nan, np.nan, np.nan, np.nan],
             ['Plane', 'Rome', u'Tromsø', 0, 0.0, 'option a', 0.0],
             ['Potato', 'Tokyo', u'Elâzığ', -4, 4.0, 4, 4],
             ['', '', '', 0, 0.3332999, 'option a', 1 / 3.]
             ],
            columns=['Things', 'Cities', 'Unicode_Cities_Strl',
                     'Ints', 'Floats', 'Bytes', 'Longs'])
        expected["Floats"] = expected["Floats"].astype(np.float32)
        for col in parsed_118.columns:
            tm.assert_almost_equal(parsed_118[col], expected[col])

        with StataReader(self.dta22_118) as rdr:
            vl = rdr.variable_labels()
            vl_expected = {u'Unicode_Cities_Strl':
                           u'Here are some strls with Ünicode chars',
                           u'Longs': u'long data',
                           u'Things': u'Here are some things',
                           u'Bytes': u'byte data',
                           u'Ints': u'int data',
                           u'Cities': u'Here are some cities',
                           u'Floats': u'float data'}
            tm.assert_dict_equal(vl, vl_expected)

            assert rdr.data_label == u'This is a  Ünicode data label'

Source File: test_stata.py From twitter-stock-recommendation with MIT License

5 votes

def test_read_dta12(self):
        parsed_117 = self.read_dta(self.dta21_117)
        expected = DataFrame.from_records(
            [
                [1, "abc", "abcdefghi"],
                [3, "cba", "qwertywertyqwerty"],
                [93, "", "strl"],
            ],
            columns=['x', 'y', 'z'])

        tm.assert_frame_equal(parsed_117, expected, check_dtype=False)

Source File: test_stata.py From twitter-stock-recommendation with MIT License

5 votes

def test_read_dta4(self, file):

        file = getattr(self, file)
        parsed = self.read_dta(file)

        expected = DataFrame.from_records(
            [
                ["one", "ten", "one", "one", "one"],
                ["two", "nine", "two", "two", "two"],
                ["three", "eight", "three", "three", "three"],
                ["four", "seven", 4, "four", "four"],
                ["five", "six", 5, np.nan, "five"],
                ["six", "five", 6, np.nan, "six"],
                ["seven", "four", 7, np.nan, "seven"],
                ["eight", "three", 8, np.nan, "eight"],
                ["nine", "two", 9, np.nan, "nine"],
                ["ten", "one", "ten", np.nan, "ten"]
            ],
            columns=['fully_labeled', 'fully_labeled2', 'incompletely_labeled',
                     'labeled_with_missings', 'float_labelled'])

        # these are all categoricals
        expected = pd.concat([expected[col].astype('category')
                              for col in expected], axis=1)

        # stata doesn't save .category metadata
        tm.assert_frame_equal(parsed, expected, check_categorical=False)

    # File containing strls

Source File: test_stata.py From elasticintel with GNU General Public License v3.0

5 votes

def test_categorical_warnings_and_errors(self):
        # Warning for non-string labels
        # Error for labels too long
        original = pd.DataFrame.from_records(
            [['a' * 10000],
             ['b' * 10000],
             ['c' * 10000],
             ['d' * 10000]],
            columns=['Too_long'])

        original = pd.concat([original[col].astype('category')
                              for col in original], axis=1)
        with tm.ensure_clean() as path:
            pytest.raises(ValueError, original.to_stata, path)

        original = pd.DataFrame.from_records(
            [['a'],
             ['b'],
             ['c'],
             ['d'],
             [1]],
            columns=['Too_long'])
        original = pd.concat([original[col].astype('category')
                              for col in original], axis=1)

        with warnings.catch_warnings(record=True) as w:
            original.to_stata(path)
            # should get a warning for mixed content
            assert len(w) == 1

Source File: test_stata.py From elasticintel with GNU General Public License v3.0

5 votes

def test_categorical_writing(self):
        original = DataFrame.from_records(
            [
                ["one", "ten", "one", "one", "one", 1],
                ["two", "nine", "two", "two", "two", 2],
                ["three", "eight", "three", "three", "three", 3],
                ["four", "seven", 4, "four", "four", 4],
                ["five", "six", 5, np.nan, "five", 5],
                ["six", "five", 6, np.nan, "six", 6],
                ["seven", "four", 7, np.nan, "seven", 7],
                ["eight", "three", 8, np.nan, "eight", 8],
                ["nine", "two", 9, np.nan, "nine", 9],
                ["ten", "one", "ten", np.nan, "ten", 10]
            ],
            columns=['fully_labeled', 'fully_labeled2', 'incompletely_labeled',
                     'labeled_with_missings', 'float_labelled', 'unlabeled'])
        expected = original.copy()

        # these are all categoricals
        original = pd.concat([original[col].astype('category')
                              for col in original], axis=1)

        expected['incompletely_labeled'] = expected[
            'incompletely_labeled'].apply(str)
        expected['unlabeled'] = expected['unlabeled'].apply(str)
        expected = pd.concat([expected[col].astype('category')
                              for col in expected], axis=1)
        expected.index.name = 'index'

        with tm.ensure_clean() as path:
            with warnings.catch_warnings(record=True) as w:  # noqa
                # Silence warnings
                original.to_stata(path)
                written_and_read_again = self.read_dta(path)
                res = written_and_read_again.set_index('index')
                tm.assert_frame_equal(res, expected, check_categorical=False)

Source File: test_stata.py From elasticintel with GNU General Public License v3.0

5 votes

def test_read_dta18(self):
        parsed_118 = self.read_dta(self.dta22_118)
        parsed_118["Bytes"] = parsed_118["Bytes"].astype('O')
        expected = DataFrame.from_records(
            [['Cat', 'Bogota', u'Bogotá', 1, 1.0, u'option b Ünicode', 1.0],
             ['Dog', 'Boston', u'Uzunköprü', np.nan, np.nan, np.nan, np.nan],
             ['Plane', 'Rome', u'Tromsø', 0, 0.0, 'option a', 0.0],
             ['Potato', 'Tokyo', u'Elâzığ', -4, 4.0, 4, 4],
             ['', '', '', 0, 0.3332999, 'option a', 1 / 3.]
             ],
            columns=['Things', 'Cities', 'Unicode_Cities_Strl',
                     'Ints', 'Floats', 'Bytes', 'Longs'])
        expected["Floats"] = expected["Floats"].astype(np.float32)
        for col in parsed_118.columns:
            tm.assert_almost_equal(parsed_118[col], expected[col])

        with StataReader(self.dta22_118) as rdr:
            vl = rdr.variable_labels()
            vl_expected = {u'Unicode_Cities_Strl':
                           u'Here are some strls with Ünicode chars',
                           u'Longs': u'long data',
                           u'Things': u'Here are some things',
                           u'Bytes': u'byte data',
                           u'Ints': u'int data',
                           u'Cities': u'Here are some cities',
                           u'Floats': u'float data'}
            tm.assert_dict_equal(vl, vl_expected)

            assert rdr.data_label == u'This is a  Ünicode data label'

Source File: test_stata.py From elasticintel with GNU General Public License v3.0

5 votes

def test_read_dta12(self):
        parsed_117 = self.read_dta(self.dta21_117)
        expected = DataFrame.from_records(
            [
                [1, "abc", "abcdefghi"],
                [3, "cba", "qwertywertyqwerty"],
                [93, "", "strl"],
            ],
            columns=['x', 'y', 'z'])

        tm.assert_frame_equal(parsed_117, expected, check_dtype=False)

Source File: test_stata.py From elasticintel with GNU General Public License v3.0

5 votes

def test_read_dta4(self, file):

        file = getattr(self, file)
        parsed = self.read_dta(file)

        expected = DataFrame.from_records(
            [
                ["one", "ten", "one", "one", "one"],
                ["two", "nine", "two", "two", "two"],
                ["three", "eight", "three", "three", "three"],
                ["four", "seven", 4, "four", "four"],
                ["five", "six", 5, np.nan, "five"],
                ["six", "five", 6, np.nan, "six"],
                ["seven", "four", 7, np.nan, "seven"],
                ["eight", "three", 8, np.nan, "eight"],
                ["nine", "two", 9, np.nan, "nine"],
                ["ten", "one", "ten", np.nan, "ten"]
            ],
            columns=['fully_labeled', 'fully_labeled2', 'incompletely_labeled',
                     'labeled_with_missings', 'float_labelled'])

        # these are all categoricals
        expected = pd.concat([expected[col].astype('category')
                              for col in expected], axis=1)

        # stata doesn't save .category metadata
        tm.assert_frame_equal(parsed, expected, check_categorical=False)

    # File containing strls

Source File: test_stata.py From predictive-maintenance-using-machine-learning with Apache License 2.0

5 votes

def test_categorical_warnings_and_errors(self):
        # Warning for non-string labels
        # Error for labels too long
        original = pd.DataFrame.from_records(
            [['a' * 10000],
             ['b' * 10000],
             ['c' * 10000],
             ['d' * 10000]],
            columns=['Too_long'])

        original = pd.concat([original[col].astype('category')
                              for col in original], axis=1)
        with tm.ensure_clean() as path:
            msg = ("Stata value labels for a single variable must have"
                   r" a combined length less than 32,000 characters\.")
            with pytest.raises(ValueError, match=msg):
                original.to_stata(path)

        original = pd.DataFrame.from_records(
            [['a'],
             ['b'],
             ['c'],
             ['d'],
             [1]],
            columns=['Too_long'])
        original = pd.concat([original[col].astype('category')
                              for col in original], axis=1)

        with tm.assert_produces_warning(pd.io.stata.ValueLabelTypeMismatch):
            original.to_stata(path)
            # should get a warning for mixed content

Source File: test_stata.py From predictive-maintenance-using-machine-learning with Apache License 2.0

5 votes

def test_categorical_writing(self, version):
        original = DataFrame.from_records(
            [
                ["one", "ten", "one", "one", "one", 1],
                ["two", "nine", "two", "two", "two", 2],
                ["three", "eight", "three", "three", "three", 3],
                ["four", "seven", 4, "four", "four", 4],
                ["five", "six", 5, np.nan, "five", 5],
                ["six", "five", 6, np.nan, "six", 6],
                ["seven", "four", 7, np.nan, "seven", 7],
                ["eight", "three", 8, np.nan, "eight", 8],
                ["nine", "two", 9, np.nan, "nine", 9],
                ["ten", "one", "ten", np.nan, "ten", 10]
            ],
            columns=['fully_labeled', 'fully_labeled2', 'incompletely_labeled',
                     'labeled_with_missings', 'float_labelled', 'unlabeled'])
        expected = original.copy()

        # these are all categoricals
        original = pd.concat([original[col].astype('category')
                              for col in original], axis=1)

        expected['incompletely_labeled'] = expected[
            'incompletely_labeled'].apply(str)
        expected['unlabeled'] = expected['unlabeled'].apply(str)
        expected = pd.concat([expected[col].astype('category')
                              for col in expected], axis=1)
        expected.index.name = 'index'

        with tm.ensure_clean() as path:
            original.to_stata(path, version=version)
            written_and_read_again = self.read_dta(path)
            res = written_and_read_again.set_index('index')
            tm.assert_frame_equal(res, expected, check_categorical=False)

Source File: test_stata.py From predictive-maintenance-using-machine-learning with Apache License 2.0

5 votes

def test_read_dta18(self):
        parsed_118 = self.read_dta(self.dta22_118)
        parsed_118["Bytes"] = parsed_118["Bytes"].astype('O')
        expected = DataFrame.from_records(
            [['Cat', 'Bogota', u'Bogotá', 1, 1.0, u'option b Ünicode', 1.0],
             ['Dog', 'Boston', u'Uzunköprü', np.nan, np.nan, np.nan, np.nan],
             ['Plane', 'Rome', u'Tromsø', 0, 0.0, 'option a', 0.0],
             ['Potato', 'Tokyo', u'Elâzığ', -4, 4.0, 4, 4],
             ['', '', '', 0, 0.3332999, 'option a', 1 / 3.]
             ],
            columns=['Things', 'Cities', 'Unicode_Cities_Strl',
                     'Ints', 'Floats', 'Bytes', 'Longs'])
        expected["Floats"] = expected["Floats"].astype(np.float32)
        for col in parsed_118.columns:
            tm.assert_almost_equal(parsed_118[col], expected[col])

        with StataReader(self.dta22_118) as rdr:
            vl = rdr.variable_labels()
            vl_expected = {u'Unicode_Cities_Strl':
                           u'Here are some strls with Ünicode chars',
                           u'Longs': u'long data',
                           u'Things': u'Here are some things',
                           u'Bytes': u'byte data',
                           u'Ints': u'int data',
                           u'Cities': u'Here are some cities',
                           u'Floats': u'float data'}
            tm.assert_dict_equal(vl, vl_expected)

            assert rdr.data_label == u'This is a  Ünicode data label'

Source File: test_stata.py From recruit with Apache License 2.0

5 votes

def test_read_dta4(self, file):

        file = getattr(self, file)
        parsed = self.read_dta(file)

        expected = DataFrame.from_records(
            [
                ["one", "ten", "one", "one", "one"],
                ["two", "nine", "two", "two", "two"],
                ["three", "eight", "three", "three", "three"],
                ["four", "seven", 4, "four", "four"],
                ["five", "six", 5, np.nan, "five"],
                ["six", "five", 6, np.nan, "six"],
                ["seven", "four", 7, np.nan, "seven"],
                ["eight", "three", 8, np.nan, "eight"],
                ["nine", "two", 9, np.nan, "nine"],
                ["ten", "one", "ten", np.nan, "ten"]
            ],
            columns=['fully_labeled', 'fully_labeled2', 'incompletely_labeled',
                     'labeled_with_missings', 'float_labelled'])

        # these are all categoricals
        expected = pd.concat([expected[col].astype('category')
                              for col in expected], axis=1)

        # stata doesn't save .category metadata
        tm.assert_frame_equal(parsed, expected, check_categorical=False)

    # File containing strls

Source File: test_stata.py From predictive-maintenance-using-machine-learning with Apache License 2.0

5 votes

def test_read_dta4(self, file):

        file = getattr(self, file)
        parsed = self.read_dta(file)

        expected = DataFrame.from_records(
            [
                ["one", "ten", "one", "one", "one"],
                ["two", "nine", "two", "two", "two"],
                ["three", "eight", "three", "three", "three"],
                ["four", "seven", 4, "four", "four"],
                ["five", "six", 5, np.nan, "five"],
                ["six", "five", 6, np.nan, "six"],
                ["seven", "four", 7, np.nan, "seven"],
                ["eight", "three", 8, np.nan, "eight"],
                ["nine", "two", 9, np.nan, "nine"],
                ["ten", "one", "ten", np.nan, "ten"]
            ],
            columns=['fully_labeled', 'fully_labeled2', 'incompletely_labeled',
                     'labeled_with_missings', 'float_labelled'])

        # these are all categoricals
        expected = pd.concat([expected[col].astype('category')
                              for col in expected], axis=1)

        # stata doesn't save .category metadata
        tm.assert_frame_equal(parsed, expected, check_categorical=False)

    # File containing strls

Source File: test_stata.py From vnpy_crypto with MIT License

5 votes

def test_categorical_warnings_and_errors(self):
        # Warning for non-string labels
        # Error for labels too long
        original = pd.DataFrame.from_records(
            [['a' * 10000],
             ['b' * 10000],
             ['c' * 10000],
             ['d' * 10000]],
            columns=['Too_long'])

        original = pd.concat([original[col].astype('category')
                              for col in original], axis=1)
        with tm.ensure_clean() as path:
            pytest.raises(ValueError, original.to_stata, path)

        original = pd.DataFrame.from_records(
            [['a'],
             ['b'],
             ['c'],
             ['d'],
             [1]],
            columns=['Too_long'])
        original = pd.concat([original[col].astype('category')
                              for col in original], axis=1)

        with warnings.catch_warnings(record=True) as w:
            original.to_stata(path)
            # should get a warning for mixed content
            assert len(w) == 1

Source File: test_stata.py From vnpy_crypto with MIT License

5 votes

def test_categorical_writing(self, version):
        original = DataFrame.from_records(
            [
                ["one", "ten", "one", "one", "one", 1],
                ["two", "nine", "two", "two", "two", 2],
                ["three", "eight", "three", "three", "three", 3],
                ["four", "seven", 4, "four", "four", 4],
                ["five", "six", 5, np.nan, "five", 5],
                ["six", "five", 6, np.nan, "six", 6],
                ["seven", "four", 7, np.nan, "seven", 7],
                ["eight", "three", 8, np.nan, "eight", 8],
                ["nine", "two", 9, np.nan, "nine", 9],
                ["ten", "one", "ten", np.nan, "ten", 10]
            ],
            columns=['fully_labeled', 'fully_labeled2', 'incompletely_labeled',
                     'labeled_with_missings', 'float_labelled', 'unlabeled'])
        expected = original.copy()

        # these are all categoricals
        original = pd.concat([original[col].astype('category')
                              for col in original], axis=1)

        expected['incompletely_labeled'] = expected[
            'incompletely_labeled'].apply(str)
        expected['unlabeled'] = expected['unlabeled'].apply(str)
        expected = pd.concat([expected[col].astype('category')
                              for col in expected], axis=1)
        expected.index.name = 'index'

        with tm.ensure_clean() as path:
            with warnings.catch_warnings(record=True) as w:  # noqa
                # Silence warnings
                original.to_stata(path, version=version)
                written_and_read_again = self.read_dta(path)
                res = written_and_read_again.set_index('index')
                tm.assert_frame_equal(res, expected, check_categorical=False)

Source File: test_stata.py From vnpy_crypto with MIT License

5 votes

def test_read_dta18(self):
        parsed_118 = self.read_dta(self.dta22_118)
        parsed_118["Bytes"] = parsed_118["Bytes"].astype('O')
        expected = DataFrame.from_records(
            [['Cat', 'Bogota', u'Bogotá', 1, 1.0, u'option b Ünicode', 1.0],
             ['Dog', 'Boston', u'Uzunköprü', np.nan, np.nan, np.nan, np.nan],
             ['Plane', 'Rome', u'Tromsø', 0, 0.0, 'option a', 0.0],
             ['Potato', 'Tokyo', u'Elâzığ', -4, 4.0, 4, 4],
             ['', '', '', 0, 0.3332999, 'option a', 1 / 3.]
             ],
            columns=['Things', 'Cities', 'Unicode_Cities_Strl',
                     'Ints', 'Floats', 'Bytes', 'Longs'])
        expected["Floats"] = expected["Floats"].astype(np.float32)
        for col in parsed_118.columns:
            tm.assert_almost_equal(parsed_118[col], expected[col])

        with StataReader(self.dta22_118) as rdr:
            vl = rdr.variable_labels()
            vl_expected = {u'Unicode_Cities_Strl':
                           u'Here are some strls with Ünicode chars',
                           u'Longs': u'long data',
                           u'Things': u'Here are some things',
                           u'Bytes': u'byte data',
                           u'Ints': u'int data',
                           u'Cities': u'Here are some cities',
                           u'Floats': u'float data'}
            tm.assert_dict_equal(vl, vl_expected)

            assert rdr.data_label == u'This is a  Ünicode data label'

Source File: test_stata.py From vnpy_crypto with MIT License

5 votes

def test_read_dta12(self):
        parsed_117 = self.read_dta(self.dta21_117)
        expected = DataFrame.from_records(
            [
                [1, "abc", "abcdefghi"],
                [3, "cba", "qwertywertyqwerty"],
                [93, "", "strl"],
            ],
            columns=['x', 'y', 'z'])

        tm.assert_frame_equal(parsed_117, expected, check_dtype=False)

Source File: test_stata.py From vnpy_crypto with MIT License

5 votes

def test_read_dta4(self, file):

        file = getattr(self, file)
        parsed = self.read_dta(file)

        expected = DataFrame.from_records(
            [
                ["one", "ten", "one", "one", "one"],
                ["two", "nine", "two", "two", "two"],
                ["three", "eight", "three", "three", "three"],
                ["four", "seven", 4, "four", "four"],
                ["five", "six", 5, np.nan, "five"],
                ["six", "five", 6, np.nan, "six"],
                ["seven", "four", 7, np.nan, "seven"],
                ["eight", "three", 8, np.nan, "eight"],
                ["nine", "two", 9, np.nan, "nine"],
                ["ten", "one", "ten", np.nan, "ten"]
            ],
            columns=['fully_labeled', 'fully_labeled2', 'incompletely_labeled',
                     'labeled_with_missings', 'float_labelled'])

        # these are all categoricals
        expected = pd.concat([expected[col].astype('category')
                              for col in expected], axis=1)

        # stata doesn't save .category metadata
        tm.assert_frame_equal(parsed, expected, check_categorical=False)

    # File containing strls

Source File: test_stata.py From recruit with Apache License 2.0

5 votes

def test_categorical_warnings_and_errors(self):
        # Warning for non-string labels
        # Error for labels too long
        original = pd.DataFrame.from_records(
            [['a' * 10000],
             ['b' * 10000],
             ['c' * 10000],
             ['d' * 10000]],
            columns=['Too_long'])

        original = pd.concat([original[col].astype('category')
                              for col in original], axis=1)
        with tm.ensure_clean() as path:
            msg = ("Stata value labels for a single variable must have"
                   r" a combined length less than 32,000 characters\.")
            with pytest.raises(ValueError, match=msg):
                original.to_stata(path)

        original = pd.DataFrame.from_records(
            [['a'],
             ['b'],
             ['c'],
             ['d'],
             [1]],
            columns=['Too_long'])
        original = pd.concat([original[col].astype('category')
                              for col in original], axis=1)

        with tm.assert_produces_warning(pd.io.stata.ValueLabelTypeMismatch):
            original.to_stata(path)
            # should get a warning for mixed content

Source File: test_stata.py From recruit with Apache License 2.0

5 votes

def test_categorical_writing(self, version):
        original = DataFrame.from_records(
            [
                ["one", "ten", "one", "one", "one", 1],
                ["two", "nine", "two", "two", "two", 2],
                ["three", "eight", "three", "three", "three", 3],
                ["four", "seven", 4, "four", "four", 4],
                ["five", "six", 5, np.nan, "five", 5],
                ["six", "five", 6, np.nan, "six", 6],
                ["seven", "four", 7, np.nan, "seven", 7],
                ["eight", "three", 8, np.nan, "eight", 8],
                ["nine", "two", 9, np.nan, "nine", 9],
                ["ten", "one", "ten", np.nan, "ten", 10]
            ],
            columns=['fully_labeled', 'fully_labeled2', 'incompletely_labeled',
                     'labeled_with_missings', 'float_labelled', 'unlabeled'])
        expected = original.copy()

        # these are all categoricals
        original = pd.concat([original[col].astype('category')
                              for col in original], axis=1)

        expected['incompletely_labeled'] = expected[
            'incompletely_labeled'].apply(str)
        expected['unlabeled'] = expected['unlabeled'].apply(str)
        expected = pd.concat([expected[col].astype('category')
                              for col in expected], axis=1)
        expected.index.name = 'index'

        with tm.ensure_clean() as path:
            original.to_stata(path, version=version)
            written_and_read_again = self.read_dta(path)
            res = written_and_read_again.set_index('index')
            tm.assert_frame_equal(res, expected, check_categorical=False)

Source File: test_stata.py From recruit with Apache License 2.0

5 votes

def test_read_dta18(self):
        parsed_118 = self.read_dta(self.dta22_118)
        parsed_118["Bytes"] = parsed_118["Bytes"].astype('O')
        expected = DataFrame.from_records(
            [['Cat', 'Bogota', u'Bogotá', 1, 1.0, u'option b Ünicode', 1.0],
             ['Dog', 'Boston', u'Uzunköprü', np.nan, np.nan, np.nan, np.nan],
             ['Plane', 'Rome', u'Tromsø', 0, 0.0, 'option a', 0.0],
             ['Potato', 'Tokyo', u'Elâzığ', -4, 4.0, 4, 4],
             ['', '', '', 0, 0.3332999, 'option a', 1 / 3.]
             ],
            columns=['Things', 'Cities', 'Unicode_Cities_Strl',
                     'Ints', 'Floats', 'Bytes', 'Longs'])
        expected["Floats"] = expected["Floats"].astype(np.float32)
        for col in parsed_118.columns:
            tm.assert_almost_equal(parsed_118[col], expected[col])

        with StataReader(self.dta22_118) as rdr:
            vl = rdr.variable_labels()
            vl_expected = {u'Unicode_Cities_Strl':
                           u'Here are some strls with Ünicode chars',
                           u'Longs': u'long data',
                           u'Things': u'Here are some things',
                           u'Bytes': u'byte data',
                           u'Ints': u'int data',
                           u'Cities': u'Here are some cities',
                           u'Floats': u'float data'}
            tm.assert_dict_equal(vl, vl_expected)

            assert rdr.data_label == u'This is a  Ünicode data label'

Source File: test_stata.py From recruit with Apache License 2.0

5 votes

def test_read_dta12(self):
        parsed_117 = self.read_dta(self.dta21_117)
        expected = DataFrame.from_records(
            [
                [1, "abc", "abcdefghi"],
                [3, "cba", "qwertywertyqwerty"],
                [93, "", "strl"],
            ],
            columns=['x', 'y', 'z'])

        tm.assert_frame_equal(parsed_117, expected, check_dtype=False)

Source File: test_stata.py From Computable with MIT License

4 votes

def test_read_dta2(self):
        if LooseVersion(sys.version) < '2.7':
            raise nose.SkipTest('datetime interp under 2.6 is faulty')

        expected = DataFrame.from_records(
            [
                (
                    datetime(2006, 11, 19, 23, 13, 20),
                    1479596223000,
                    datetime(2010, 1, 20),
                    datetime(2010, 1, 8),
                    datetime(2010, 1, 1),
                    datetime(1974, 7, 1),
                    datetime(2010, 1, 1),
                    datetime(2010, 1, 1)
                ),
                (
                    datetime(1959, 12, 31, 20, 3, 20),
                    -1479590,
                    datetime(1953, 10, 2),
                    datetime(1948, 6, 10),
                    datetime(1955, 1, 1),
                    datetime(1955, 7, 1),
                    datetime(1955, 1, 1),
                    datetime(2, 1, 1)
                ),
                (
                    pd.NaT,
                    pd.NaT,
                    pd.NaT,
                    pd.NaT,
                    pd.NaT,
                    pd.NaT,
                    pd.NaT,
                    pd.NaT,
                )
            ],
            columns=['datetime_c', 'datetime_big_c', 'date', 'weekly_date',
                     'monthly_date', 'quarterly_date', 'half_yearly_date',
                     'yearly_date']
        )

        with warnings.catch_warnings(record=True) as w:
            parsed = self.read_dta(self.dta2)
            parsed_13 = self.read_dta(self.dta2_13)
            np.testing.assert_equal(
                len(w), 1)  # should get a warning for that format.

        # buggy test because of the NaT comparison on certain platforms
        #
        #tm.assert_frame_equal(parsed, expected)
        #tm.assert_frame_equal(parsed_13, expected)

Python pandas.core.frame.DataFrame.from_records() Examples