Python Examples of pandas.util.testing.ensure

Source File: test_to_csv.py From recruit with Apache License 2.0

6 votes

def test_to_csv_escapechar(self):
        df = DataFrame({'col': ['a"a', '"bb"']})
        expected = '''\
"","col"
"0","a\\"a"
"1","\\"bb\\""
'''

        with tm.ensure_clean('test.csv') as path:  # QUOTE_ALL
            df.to_csv(path, quoting=1, doublequote=False, escapechar='\\')
            with open(path, 'r') as f:
                assert f.read() == expected

        df = DataFrame({'col': ['a,a', ',bb,']})
        expected = """\
,col
0,a\\,a
1,\\,bb\\,
"""

        with tm.ensure_clean('test.csv') as path:
            df.to_csv(path, quoting=3, escapechar='\\')  # QUOTE_NONE
            with open(path, 'r') as f:
                assert f.read() == expected

Source File: test_stata.py From recruit with Apache License 2.0

6 votes

def test_read_write_dta10(self, version):
        original = DataFrame(data=[["string", "object", 1, 1.1,
                                    np.datetime64('2003-12-25')]],
                             columns=['string', 'object', 'integer',
                                      'floating', 'datetime'])
        original["object"] = Series(original["object"], dtype=object)
        original.index.name = 'index'
        original.index = original.index.astype(np.int32)
        original['integer'] = original['integer'].astype(np.int32)

        with tm.ensure_clean() as path:
            original.to_stata(path, {'datetime': 'tc'}, version=version)
            written_and_read_again = self.read_dta(path)
            # original.index is np.int32, read index is np.int64
            tm.assert_frame_equal(written_and_read_again.set_index('index'),
                                  original, check_index_type=False)

Source File: test_stata.py From recruit with Apache License 2.0

6 votes

def test_encoding(self, version):

        # GH 4626, proper encoding handling
        raw = read_stata(self.dta_encoding)
        with tm.assert_produces_warning(FutureWarning):
            encoded = read_stata(self.dta_encoding, encoding='latin-1')
        result = encoded.kreis1849[0]

        expected = raw.kreis1849[0]
        assert result == expected
        assert isinstance(result, compat.string_types)

        with tm.ensure_clean() as path:
            with tm.assert_produces_warning(FutureWarning):
                encoded.to_stata(path, write_index=False, version=version,
                                 encoding='latin-1')
            reread_encoded = read_stata(path)
            tm.assert_frame_equal(encoded, reread_encoded)

Source File: test_stata.py From recruit with Apache License 2.0

6 votes

def test_read_write_reread_dta14(self, file, parsed_114, version):
        file = getattr(self, file)
        parsed = self.read_dta(file)
        parsed.index.name = 'index'

        expected = self.read_csv(self.csv14)
        cols = ['byte_', 'int_', 'long_', 'float_', 'double_']
        for col in cols:
            expected[col] = expected[col]._convert(datetime=True, numeric=True)
        expected['float_'] = expected['float_'].astype(np.float32)
        expected['date_td'] = pd.to_datetime(
            expected['date_td'], errors='coerce')

        tm.assert_frame_equal(parsed_114, parsed)

        with tm.ensure_clean() as path:
            parsed_114.to_stata(path, {'date_td': 'td'}, version=version)
            written_and_read_again = self.read_dta(path)
            tm.assert_frame_equal(
                written_and_read_again.set_index('index'), parsed_114)

Source File: test_stata.py From recruit with Apache License 2.0

6 votes

def test_large_value_conversion(self):
        s0 = Series([1, 99], dtype=np.int8)
        s1 = Series([1, 127], dtype=np.int8)
        s2 = Series([1, 2 ** 15 - 1], dtype=np.int16)
        s3 = Series([1, 2 ** 63 - 1], dtype=np.int64)
        original = DataFrame({'s0': s0, 's1': s1, 's2': s2, 's3': s3})
        original.index.name = 'index'
        with tm.ensure_clean() as path:
            with tm.assert_produces_warning(PossiblePrecisionLoss):
                original.to_stata(path)

            written_and_read_again = self.read_dta(path)
            modified = original.copy()
            modified['s1'] = Series(modified['s1'], dtype=np.int16)
            modified['s2'] = Series(modified['s2'], dtype=np.int32)
            modified['s3'] = Series(modified['s3'], dtype=np.float64)
            tm.assert_frame_equal(written_and_read_again.set_index('index'),
                                  modified)

Source File: test_stata.py From recruit with Apache License 2.0

6 votes

def test_read_write_dta11(self):
        original = DataFrame([(1, 2, 3, 4)],
                             columns=['good', compat.u('b\u00E4d'), '8number',
                                      'astringwithmorethan32characters______'])
        formatted = DataFrame([(1, 2, 3, 4)],
                              columns=['good', 'b_d', '_8number',
                                       'astringwithmorethan32characters_'])
        formatted.index.name = 'index'
        formatted = formatted.astype(np.int32)

        with tm.ensure_clean() as path:
            with tm.assert_produces_warning(pd.io.stata.InvalidColumnName):
                original.to_stata(path, None)

            written_and_read_again = self.read_dta(path)
            tm.assert_frame_equal(
                written_and_read_again.set_index('index'), formatted)

Source File: test_compression.py From recruit with Apache License 2.0

6 votes

def test_compression(parser_and_data, compression_only, buffer, filename):
    parser, data, expected = parser_and_data
    compress_type = compression_only

    ext = "gz" if compress_type == "gzip" else compress_type
    filename = filename if filename is None else filename.format(ext=ext)

    if filename and buffer:
        pytest.skip("Cannot deduce compression from "
                    "buffer of compressed data.")

    with tm.ensure_clean(filename=filename) as path:
        tm.write_to_compressed(compress_type, path, data)
        compression = "infer" if filename else compress_type

        if buffer:
            with open(path, "rb") as f:
                result = parser.read_csv(f, compression=compression)
        else:
            result = parser.read_csv(path, compression=compression)

        tm.assert_frame_equal(result, expected)

Source File: test_stata.py From recruit with Apache License 2.0

6 votes

def test_date_export_formats(self):
        columns = ['tc', 'td', 'tw', 'tm', 'tq', 'th', 'ty']
        conversions = {c: c for c in columns}
        data = [datetime(2006, 11, 20, 23, 13, 20)] * len(columns)
        original = DataFrame([data], columns=columns)
        original.index.name = 'index'
        expected_values = [datetime(2006, 11, 20, 23, 13, 20),  # Time
                           datetime(2006, 11, 20),  # Day
                           datetime(2006, 11, 19),  # Week
                           datetime(2006, 11, 1),  # Month
                           datetime(2006, 10, 1),  # Quarter year
                           datetime(2006, 7, 1),  # Half year
                           datetime(2006, 1, 1)]  # Year

        expected = DataFrame([expected_values], columns=columns)
        expected.index.name = 'index'
        with tm.ensure_clean() as path:
            original.to_stata(path, conversions)
            written_and_read_again = self.read_dta(path)
            tm.assert_frame_equal(written_and_read_again.set_index('index'),
                                  expected)

Source File: test_stata.py From recruit with Apache License 2.0

6 votes

def test_bool_uint(self, byteorder, version):
        s0 = Series([0, 1, True], dtype=np.bool)
        s1 = Series([0, 1, 100], dtype=np.uint8)
        s2 = Series([0, 1, 255], dtype=np.uint8)
        s3 = Series([0, 1, 2 ** 15 - 100], dtype=np.uint16)
        s4 = Series([0, 1, 2 ** 16 - 1], dtype=np.uint16)
        s5 = Series([0, 1, 2 ** 31 - 100], dtype=np.uint32)
        s6 = Series([0, 1, 2 ** 32 - 1], dtype=np.uint32)

        original = DataFrame({'s0': s0, 's1': s1, 's2': s2, 's3': s3,
                              's4': s4, 's5': s5, 's6': s6})
        original.index.name = 'index'
        expected = original.copy()
        expected_types = (np.int8, np.int8, np.int16, np.int16, np.int32,
                          np.int32, np.float64)
        for c, t in zip(expected.columns, expected_types):
            expected[c] = expected[c].astype(t)

        with tm.ensure_clean() as path:
            original.to_stata(path, byteorder=byteorder, version=version)
            written_and_read_again = self.read_dta(path)
            written_and_read_again = written_and_read_again.set_index('index')
            tm.assert_frame_equal(written_and_read_again, expected)

Source File: test_stata.py From recruit with Apache License 2.0

6 votes

def test_minimal_size_col(self):
        str_lens = (1, 100, 244)
        s = {}
        for str_len in str_lens:
            s['s' + str(str_len)] = Series(['a' * str_len,
                                            'b' * str_len, 'c' * str_len])
        original = DataFrame(s)
        with tm.ensure_clean() as path:
            original.to_stata(path, write_index=False)

            with StataReader(path) as sr:
                typlist = sr.typlist
                variables = sr.varlist
                formats = sr.fmtlist
                for variable, fmt, typ in zip(variables, formats, typlist):
                    assert int(variable[1:]) == int(fmt[1:-1])
                    assert int(variable[1:]) == typ

Source File: test_common.py From recruit with Apache License 2.0

6 votes

def test_read_csv_memory_growth_chunksize(all_parsers):
    # see gh-24805
    #
    # Let's just make sure that we don't crash
    # as we iteratively process all chunks.
    parser = all_parsers

    with tm.ensure_clean() as path:
        with open(path, "w") as f:
            for i in range(1000):
                f.write(str(i) + "\n")

        result = parser.read_csv(path, chunksize=20)

        for _ in result:
            pass

Source File: test_stata.py From recruit with Apache License 2.0

6 votes

def test_invalid_variable_labels(self, version):
        original = pd.DataFrame({'a': [1, 2, 3, 4],
                                 'b': [1.0, 3.0, 27.0, 81.0],
                                 'c': ['Atlanta', 'Birmingham',
                                       'Cincinnati', 'Detroit']})
        original.index.name = 'index'
        variable_labels = {'a': 'very long' * 10,
                           'b': 'City Exponent',
                           'c': 'City'}
        with tm.ensure_clean() as path:
            msg = "Variable labels must be 80 characters or fewer"
            with pytest.raises(ValueError, match=msg):
                original.to_stata(path,
                                  variable_labels=variable_labels,
                                  version=version)

        variable_labels['a'] = u'invalid character Œ'
        with tm.ensure_clean() as path:
            msg = ("Variable labels must contain only characters that can be"
                   " encoded in Latin-1")
            with pytest.raises(ValueError, match=msg):
                original.to_stata(path,
                                  variable_labels=variable_labels,
                                  version=version)

Source File: test_stata.py From recruit with Apache License 2.0

6 votes

def test_out_of_range_double(self):
        # GH 14618
        df = DataFrame({'ColumnOk': [0.0,
                                     np.finfo(np.double).eps,
                                     4.49423283715579e+307],
                        'ColumnTooBig': [0.0,
                                         np.finfo(np.double).eps,
                                         np.finfo(np.double).max]})
        msg = (r"Column ColumnTooBig has a maximum value \(.+\)"
               r" outside the range supported by Stata \(.+\)")
        with pytest.raises(ValueError, match=msg):
            with tm.ensure_clean() as path:
                df.to_stata(path)

        df.loc[2, 'ColumnTooBig'] = np.inf
        msg = ("Column ColumnTooBig has a maximum value of infinity which"
               " is outside the range supported by Stata")
        with pytest.raises(ValueError, match=msg):
            with tm.ensure_clean() as path:
                df.to_stata(path)

Source File: test_python_parser_only.py From recruit with Apache License 2.0

6 votes

def test_decompression_regex_sep(python_parser_only, csv1, compression, klass):
    # see gh-6607
    parser = python_parser_only

    with open(csv1, "rb") as f:
        data = f.read()

    data = data.replace(b",", b"::")
    expected = parser.read_csv(csv1)

    module = pytest.importorskip(compression)
    klass = getattr(module, klass)

    with tm.ensure_clean() as path:
        tmp = klass(path, mode="wb")
        tmp.write(data)
        tmp.close()

        result = parser.read_csv(path, sep="::",
                                 compression=compression)
        tm.assert_frame_equal(result, expected)

Source File: test_to_csv.py From recruit with Apache License 2.0

6 votes

def test_to_csv_doublequote(self):
        df = DataFrame({'col': ['a"a', '"bb"']})
        expected = '''\
"","col"
"0","a""a"
"1","""bb"""
'''

        with tm.ensure_clean('test.csv') as path:
            df.to_csv(path, quoting=1, doublequote=True)  # QUOTE_ALL
            with open(path, 'r') as f:
                assert f.read() == expected

        from _csv import Error
        with tm.ensure_clean('test.csv') as path:
            with pytest.raises(Error, match='escapechar'):
                df.to_csv(path, doublequote=False)  # no escapechar set

Source File: test_to_latex.py From recruit with Apache License 2.0

6 votes

def test_to_latex_filename(self, frame):
        with tm.ensure_clean('test.tex') as path:
            frame.to_latex(path)

            with open(path, 'r') as f:
                assert frame.to_latex() == f.read()

        # test with utf-8 and encoding option (GH 7061)
        df = DataFrame([[u'au\xdfgangen']])
        with tm.ensure_clean('test.tex') as path:
            df.to_latex(path, encoding='utf-8')
            with codecs.open(path, 'r', encoding='utf-8') as f:
                assert df.to_latex() == f.read()

        # test with utf-8 without encoding option
        if compat.PY3:  # python3: pandas default encoding is utf-8
            with tm.ensure_clean('test.tex') as path:
                df.to_latex(path)
                with codecs.open(path, 'r', encoding='utf-8') as f:
                    assert df.to_latex() == f.read()
        else:
            # python2 default encoding is ascii, so an error should be raised
            with tm.ensure_clean('test.tex') as path:
                with pytest.raises(UnicodeEncodeError):
                    df.to_latex(path)

Source File: test_stata.py From recruit with Apache License 2.0

6 votes

def test_strl_latin1(self):
        # GH 23573, correct GSO data to reflect correct size
        output = DataFrame([[u'pandas'] * 2, [u'þâÑÐÅ§'] * 2],
                           columns=['var_str', 'var_strl'])

        with tm.ensure_clean() as path:
            output.to_stata(path, version=117, convert_strl=['var_strl'])
            with open(path, 'rb') as reread:
                content = reread.read()
                expected = u'þâÑÐÅ§'
                assert expected.encode('latin-1') in content
                assert expected.encode('utf-8') in content
                gsos = content.split(b'strls')[1][1:-2]
                for gso in gsos.split(b'GSO')[1:]:
                    val = gso.split(b'\x00')[-2]
                    size = gso[gso.find(b'\x82') + 1]
                    if not PY3:
                        size = ord(size)
                    assert len(val) == size - 1

Source File: test_to_csv.py From recruit with Apache License 2.0

6 votes

def test_to_csv_write_to_open_file_with_newline_py3(self):
        # see gh-21696
        # see gh-20353
        df = pd.DataFrame({'a': ['x', 'y', 'z']})
        expected_rows = ["x",
                         "y",
                         "z"]
        expected = ("manual header\n" +
                    tm.convert_rows_list_to_csv_str(expected_rows))
        with tm.ensure_clean('test.txt') as path:
            with open(path, 'w', newline='') as f:
                f.write('manual header\n')
                df.to_csv(f, header=None, index=None)

            with open(path, 'rb') as f:
                assert f.read() == bytes(expected, 'utf-8')

Source File: test_stata.py From recruit with Apache License 2.0

6 votes

def test_all_none_exception(self, version):
        output = [
            {'none': 'none',
             'number': 0},
            {'none': None,
             'number': 1}
        ]
        output = pd.DataFrame(output)
        output.loc[:, 'none'] = None
        with tm.ensure_clean() as path:
            msg = (r"Column `none` cannot be exported\.\n\n"
                   "Only string-like object arrays containing all strings or a"
                   r" mix of strings and None can be exported\. Object arrays"
                   r" containing only null values are prohibited\. Other"
                   " object typescannot be exported and must first be"
                   r" converted to one of the supported types\.")
            with pytest.raises(ValueError, match=msg):
                output.to_stata(path, version=version)

Source File: test_to_csv.py From recruit with Apache License 2.0

6 votes

def test_to_csv_write_to_open_file_with_newline_py2(self):
        # see gh-21696
        # see gh-20353
        df = pd.DataFrame({'a': ['x', 'y', 'z']})
        expected_rows = ["x",
                         "y",
                         "z"]
        expected = ("manual header\n" +
                    tm.convert_rows_list_to_csv_str(expected_rows))
        with tm.ensure_clean('test.txt') as path:
            with open(path, 'wb') as f:
                f.write('manual header\n')
                df.to_csv(f, header=None, index=None)

            with open(path, 'rb') as f:
                assert f.read() == expected

Source File: test_read_fwf.py From recruit with Apache License 2.0

6 votes

def test_fwf_compression(compression_only, infer):
    data = """1111111111
    2222222222
    3333333333""".strip()

    compression = compression_only
    extension = "gz" if compression == "gzip" else compression

    kwargs = dict(widths=[5, 5], names=["one", "two"])
    expected = read_fwf(StringIO(data), **kwargs)

    if compat.PY3:
        data = bytes(data, encoding="utf-8")

    with tm.ensure_clean(filename="tmp." + extension) as path:
        tm.write_to_compressed(compression, path, data)

        if infer is not None:
            kwargs["compression"] = "infer" if infer else compression

        result = read_fwf(path, **kwargs)
        tm.assert_frame_equal(result, expected)

Source File: test_stata.py From recruit with Apache License 2.0

6 votes

def test_out_of_range_float(self):
        original = DataFrame({'ColumnOk': [0.0,
                                           np.finfo(np.float32).eps,
                                           np.finfo(np.float32).max / 10.0],
                              'ColumnTooBig': [0.0,
                                               np.finfo(np.float32).eps,
                                               np.finfo(np.float32).max]})
        original.index.name = 'index'
        for col in original:
            original[col] = original[col].astype(np.float32)

        with tm.ensure_clean() as path:
            original.to_stata(path)
            reread = read_stata(path)
            original['ColumnTooBig'] = original['ColumnTooBig'].astype(
                np.float64)
            tm.assert_frame_equal(original,
                                  reread.set_index('index'))

        original.loc[2, 'ColumnTooBig'] = np.inf
        msg = ("Column ColumnTooBig has a maximum value of infinity which"
               " is outside the range supported by Stata")
        with pytest.raises(ValueError, match=msg):
            with tm.ensure_clean() as path:
                original.to_stata(path)

Source File: test_stata.py From recruit with Apache License 2.0

6 votes

def test_unsupported_datetype(self):
        dates = [dt.datetime(1999, 12, 31, 12, 12, 12, 12000),
                 dt.datetime(2012, 12, 21, 12, 21, 12, 21000),
                 dt.datetime(1776, 7, 4, 7, 4, 7, 4000)]
        original = pd.DataFrame({'nums': [1.0, 2.0, 3.0],
                                 'strs': ['apple', 'banana', 'cherry'],
                                 'dates': dates})

        msg = "Format %tC not implemented"
        with pytest.raises(NotImplementedError, match=msg):
            with tm.ensure_clean() as path:
                original.to_stata(path, convert_dates={'dates': 'tC'})

        dates = pd.date_range('1-1-1990', periods=3, tz='Asia/Hong_Kong')
        original = pd.DataFrame({'nums': [1.0, 2.0, 3.0],
                                 'strs': ['apple', 'banana', 'cherry'],
                                 'dates': dates})
        with pytest.raises(NotImplementedError):
            with tm.ensure_clean() as path:
                original.to_stata(path)

Source File: test_c_parser_only.py From recruit with Apache License 2.0

5 votes

def test_file_binary_mode(c_parser_only):
    # see gh-23779
    parser = c_parser_only
    expected = DataFrame([[1, 2, 3], [4, 5, 6]])

    with tm.ensure_clean() as path:
        with open(path, "w") as f:
            f.write("1,2,3\n4,5,6")

        with open(path, "rb") as f:
            result = parser.read_csv(f, header=None)
            tm.assert_frame_equal(result, expected)

Source File: test_compression.py From recruit with Apache License 2.0

5 votes

def test_write_unsupported_compression_type():
    df = pd.read_json('{"a": [1, 2, 3], "b": [4, 5, 6]}')
    with tm.ensure_clean() as path:
        msg = "Unrecognized compression type: unsupported"
        with pytest.raises(ValueError, match=msg):
            df.to_json(path, compression="unsupported")

Source File: test_c_parser_only.py From recruit with Apache License 2.0

5 votes

def test_unsupported_dtype(c_parser_only, match, kwargs):
    parser = c_parser_only
    df = DataFrame(np.random.rand(5, 2), columns=list(
        "AB"), index=["1A", "1B", "1C", "1D", "1E"])

    with tm.ensure_clean("__unsupported_dtype__.csv") as path:
        df.to_csv(path)

        with pytest.raises(TypeError, match=match):
            parser.read_csv(path, index_col=0, **kwargs)

Source File: test_to_csv.py From recruit with Apache License 2.0

5 votes

def test_to_csv_compression(self, compression_only,
                                read_infer, to_infer):
        # see gh-15008
        compression = compression_only

        if compression == "zip":
            pytest.skip("{compression} is not supported "
                        "for to_csv".format(compression=compression))

        # We'll complete file extension subsequently.
        filename = "test."

        if compression == "gzip":
            filename += "gz"
        else:
            # xz --> .xz
            # bz2 --> .bz2
            filename += compression

        df = DataFrame({"A": [1]})

        to_compression = "infer" if to_infer else compression
        read_compression = "infer" if read_infer else compression

        with tm.ensure_clean(filename) as path:
            df.to_csv(path, compression=to_compression)
            result = pd.read_csv(path, index_col=0,
                                 compression=read_compression)
            tm.assert_frame_equal(result, df)

Source File: test_excel.py From recruit with Apache License 2.0

5 votes

def test_read_one_empty_col_no_header(self, ext, header, expected):
        # xref gh-12292
        filename = "no_header"
        df = pd.DataFrame(
            [["", 1, 100],
             ["", 2, 200],
             ["", 3, 300],
             ["", 4, 400]]
        )

        with ensure_clean(ext) as path:
            df.to_excel(path, filename, index=False, header=False)
            result = read_excel(path, filename, usecols=[0], header=header)

        tm.assert_frame_equal(result, expected)

Source File: test_to_csv.py From recruit with Apache License 2.0

5 votes

def test_to_csv_string_array_utf8(self):
        # GH 10813
        str_array = [{'names': ['foo', 'bar']}, {'names': ['baz', 'qux']}]
        df = pd.DataFrame(str_array)
        expected_utf8 = '''\
,names
0,"[u'foo', u'bar']"
1,"[u'baz', u'qux']"
'''
        with tm.ensure_clean('unicode_test.csv') as path:
            df.to_csv(path, encoding='utf-8')
            with open(path, 'r') as f:
                assert f.read() == expected_utf8

Source File: test_to_csv.py From recruit with Apache License 2.0

5 votes

def test_to_csv_write_to_open_file(self):
        # GH 21696
        df = pd.DataFrame({'a': ['x', 'y', 'z']})
        expected = '''\
manual header
x
y
z
'''
        with tm.ensure_clean('test.txt') as path:
            with open(path, 'w') as f:
                f.write('manual header\n')
                df.to_csv(f, header=None, index=None)
            with open(path, 'r') as f:
                assert f.read() == expected

Python pandas.util.testing.ensure_clean() Examples