Python pandas.util.testing.ensure_clean() Examples

The following are 30 code examples of pandas.util.testing.ensure_clean(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module pandas.util.testing , or try the search function .
Example #1
Source File: test_to_csv.py    From recruit with Apache License 2.0 6 votes vote down vote up
def test_to_csv_escapechar(self):
        df = DataFrame({'col': ['a"a', '"bb"']})
        expected = '''\
"","col"
"0","a\\"a"
"1","\\"bb\\""
'''

        with tm.ensure_clean('test.csv') as path:  # QUOTE_ALL
            df.to_csv(path, quoting=1, doublequote=False, escapechar='\\')
            with open(path, 'r') as f:
                assert f.read() == expected

        df = DataFrame({'col': ['a,a', ',bb,']})
        expected = """\
,col
0,a\\,a
1,\\,bb\\,
"""

        with tm.ensure_clean('test.csv') as path:
            df.to_csv(path, quoting=3, escapechar='\\')  # QUOTE_NONE
            with open(path, 'r') as f:
                assert f.read() == expected 
Example #2
Source File: test_stata.py    From recruit with Apache License 2.0 6 votes vote down vote up
def test_read_write_dta10(self, version):
        original = DataFrame(data=[["string", "object", 1, 1.1,
                                    np.datetime64('2003-12-25')]],
                             columns=['string', 'object', 'integer',
                                      'floating', 'datetime'])
        original["object"] = Series(original["object"], dtype=object)
        original.index.name = 'index'
        original.index = original.index.astype(np.int32)
        original['integer'] = original['integer'].astype(np.int32)

        with tm.ensure_clean() as path:
            original.to_stata(path, {'datetime': 'tc'}, version=version)
            written_and_read_again = self.read_dta(path)
            # original.index is np.int32, read index is np.int64
            tm.assert_frame_equal(written_and_read_again.set_index('index'),
                                  original, check_index_type=False) 
Example #3
Source File: test_stata.py    From recruit with Apache License 2.0 6 votes vote down vote up
def test_encoding(self, version):

        # GH 4626, proper encoding handling
        raw = read_stata(self.dta_encoding)
        with tm.assert_produces_warning(FutureWarning):
            encoded = read_stata(self.dta_encoding, encoding='latin-1')
        result = encoded.kreis1849[0]

        expected = raw.kreis1849[0]
        assert result == expected
        assert isinstance(result, compat.string_types)

        with tm.ensure_clean() as path:
            with tm.assert_produces_warning(FutureWarning):
                encoded.to_stata(path, write_index=False, version=version,
                                 encoding='latin-1')
            reread_encoded = read_stata(path)
            tm.assert_frame_equal(encoded, reread_encoded) 
Example #4
Source File: test_stata.py    From recruit with Apache License 2.0 6 votes vote down vote up
def test_read_write_reread_dta14(self, file, parsed_114, version):
        file = getattr(self, file)
        parsed = self.read_dta(file)
        parsed.index.name = 'index'

        expected = self.read_csv(self.csv14)
        cols = ['byte_', 'int_', 'long_', 'float_', 'double_']
        for col in cols:
            expected[col] = expected[col]._convert(datetime=True, numeric=True)
        expected['float_'] = expected['float_'].astype(np.float32)
        expected['date_td'] = pd.to_datetime(
            expected['date_td'], errors='coerce')

        tm.assert_frame_equal(parsed_114, parsed)

        with tm.ensure_clean() as path:
            parsed_114.to_stata(path, {'date_td': 'td'}, version=version)
            written_and_read_again = self.read_dta(path)
            tm.assert_frame_equal(
                written_and_read_again.set_index('index'), parsed_114) 
Example #5
Source File: test_stata.py    From recruit with Apache License 2.0 6 votes vote down vote up
def test_large_value_conversion(self):
        s0 = Series([1, 99], dtype=np.int8)
        s1 = Series([1, 127], dtype=np.int8)
        s2 = Series([1, 2 ** 15 - 1], dtype=np.int16)
        s3 = Series([1, 2 ** 63 - 1], dtype=np.int64)
        original = DataFrame({'s0': s0, 's1': s1, 's2': s2, 's3': s3})
        original.index.name = 'index'
        with tm.ensure_clean() as path:
            with tm.assert_produces_warning(PossiblePrecisionLoss):
                original.to_stata(path)

            written_and_read_again = self.read_dta(path)
            modified = original.copy()
            modified['s1'] = Series(modified['s1'], dtype=np.int16)
            modified['s2'] = Series(modified['s2'], dtype=np.int32)
            modified['s3'] = Series(modified['s3'], dtype=np.float64)
            tm.assert_frame_equal(written_and_read_again.set_index('index'),
                                  modified) 
Example #6
Source File: test_stata.py    From recruit with Apache License 2.0 6 votes vote down vote up
def test_read_write_dta11(self):
        original = DataFrame([(1, 2, 3, 4)],
                             columns=['good', compat.u('b\u00E4d'), '8number',
                                      'astringwithmorethan32characters______'])
        formatted = DataFrame([(1, 2, 3, 4)],
                              columns=['good', 'b_d', '_8number',
                                       'astringwithmorethan32characters_'])
        formatted.index.name = 'index'
        formatted = formatted.astype(np.int32)

        with tm.ensure_clean() as path:
            with tm.assert_produces_warning(pd.io.stata.InvalidColumnName):
                original.to_stata(path, None)

            written_and_read_again = self.read_dta(path)
            tm.assert_frame_equal(
                written_and_read_again.set_index('index'), formatted) 
Example #7
Source File: test_compression.py    From recruit with Apache License 2.0 6 votes vote down vote up
def test_compression(parser_and_data, compression_only, buffer, filename):
    parser, data, expected = parser_and_data
    compress_type = compression_only

    ext = "gz" if compress_type == "gzip" else compress_type
    filename = filename if filename is None else filename.format(ext=ext)

    if filename and buffer:
        pytest.skip("Cannot deduce compression from "
                    "buffer of compressed data.")

    with tm.ensure_clean(filename=filename) as path:
        tm.write_to_compressed(compress_type, path, data)
        compression = "infer" if filename else compress_type

        if buffer:
            with open(path, "rb") as f:
                result = parser.read_csv(f, compression=compression)
        else:
            result = parser.read_csv(path, compression=compression)

        tm.assert_frame_equal(result, expected) 
Example #8
Source File: test_stata.py    From recruit with Apache License 2.0 6 votes vote down vote up
def test_date_export_formats(self):
        columns = ['tc', 'td', 'tw', 'tm', 'tq', 'th', 'ty']
        conversions = {c: c for c in columns}
        data = [datetime(2006, 11, 20, 23, 13, 20)] * len(columns)
        original = DataFrame([data], columns=columns)
        original.index.name = 'index'
        expected_values = [datetime(2006, 11, 20, 23, 13, 20),  # Time
                           datetime(2006, 11, 20),  # Day
                           datetime(2006, 11, 19),  # Week
                           datetime(2006, 11, 1),  # Month
                           datetime(2006, 10, 1),  # Quarter year
                           datetime(2006, 7, 1),  # Half year
                           datetime(2006, 1, 1)]  # Year

        expected = DataFrame([expected_values], columns=columns)
        expected.index.name = 'index'
        with tm.ensure_clean() as path:
            original.to_stata(path, conversions)
            written_and_read_again = self.read_dta(path)
            tm.assert_frame_equal(written_and_read_again.set_index('index'),
                                  expected) 
Example #9
Source File: test_stata.py    From recruit with Apache License 2.0 6 votes vote down vote up
def test_bool_uint(self, byteorder, version):
        s0 = Series([0, 1, True], dtype=np.bool)
        s1 = Series([0, 1, 100], dtype=np.uint8)
        s2 = Series([0, 1, 255], dtype=np.uint8)
        s3 = Series([0, 1, 2 ** 15 - 100], dtype=np.uint16)
        s4 = Series([0, 1, 2 ** 16 - 1], dtype=np.uint16)
        s5 = Series([0, 1, 2 ** 31 - 100], dtype=np.uint32)
        s6 = Series([0, 1, 2 ** 32 - 1], dtype=np.uint32)

        original = DataFrame({'s0': s0, 's1': s1, 's2': s2, 's3': s3,
                              's4': s4, 's5': s5, 's6': s6})
        original.index.name = 'index'
        expected = original.copy()
        expected_types = (np.int8, np.int8, np.int16, np.int16, np.int32,
                          np.int32, np.float64)
        for c, t in zip(expected.columns, expected_types):
            expected[c] = expected[c].astype(t)

        with tm.ensure_clean() as path:
            original.to_stata(path, byteorder=byteorder, version=version)
            written_and_read_again = self.read_dta(path)
            written_and_read_again = written_and_read_again.set_index('index')
            tm.assert_frame_equal(written_and_read_again, expected) 
Example #10
Source File: test_stata.py    From recruit with Apache License 2.0 6 votes vote down vote up
def test_minimal_size_col(self):
        str_lens = (1, 100, 244)
        s = {}
        for str_len in str_lens:
            s['s' + str(str_len)] = Series(['a' * str_len,
                                            'b' * str_len, 'c' * str_len])
        original = DataFrame(s)
        with tm.ensure_clean() as path:
            original.to_stata(path, write_index=False)

            with StataReader(path) as sr:
                typlist = sr.typlist
                variables = sr.varlist
                formats = sr.fmtlist
                for variable, fmt, typ in zip(variables, formats, typlist):
                    assert int(variable[1:]) == int(fmt[1:-1])
                    assert int(variable[1:]) == typ 
Example #11
Source File: test_common.py    From recruit with Apache License 2.0 6 votes vote down vote up
def test_read_csv_memory_growth_chunksize(all_parsers):
    # see gh-24805
    #
    # Let's just make sure that we don't crash
    # as we iteratively process all chunks.
    parser = all_parsers

    with tm.ensure_clean() as path:
        with open(path, "w") as f:
            for i in range(1000):
                f.write(str(i) + "\n")

        result = parser.read_csv(path, chunksize=20)

        for _ in result:
            pass 
Example #12
Source File: test_stata.py    From recruit with Apache License 2.0 6 votes vote down vote up
def test_invalid_variable_labels(self, version):
        original = pd.DataFrame({'a': [1, 2, 3, 4],
                                 'b': [1.0, 3.0, 27.0, 81.0],
                                 'c': ['Atlanta', 'Birmingham',
                                       'Cincinnati', 'Detroit']})
        original.index.name = 'index'
        variable_labels = {'a': 'very long' * 10,
                           'b': 'City Exponent',
                           'c': 'City'}
        with tm.ensure_clean() as path:
            msg = "Variable labels must be 80 characters or fewer"
            with pytest.raises(ValueError, match=msg):
                original.to_stata(path,
                                  variable_labels=variable_labels,
                                  version=version)

        variable_labels['a'] = u'invalid character Œ'
        with tm.ensure_clean() as path:
            msg = ("Variable labels must contain only characters that can be"
                   " encoded in Latin-1")
            with pytest.raises(ValueError, match=msg):
                original.to_stata(path,
                                  variable_labels=variable_labels,
                                  version=version) 
Example #13
Source File: test_stata.py    From recruit with Apache License 2.0 6 votes vote down vote up
def test_out_of_range_double(self):
        # GH 14618
        df = DataFrame({'ColumnOk': [0.0,
                                     np.finfo(np.double).eps,
                                     4.49423283715579e+307],
                        'ColumnTooBig': [0.0,
                                         np.finfo(np.double).eps,
                                         np.finfo(np.double).max]})
        msg = (r"Column ColumnTooBig has a maximum value \(.+\)"
               r" outside the range supported by Stata \(.+\)")
        with pytest.raises(ValueError, match=msg):
            with tm.ensure_clean() as path:
                df.to_stata(path)

        df.loc[2, 'ColumnTooBig'] = np.inf
        msg = ("Column ColumnTooBig has a maximum value of infinity which"
               " is outside the range supported by Stata")
        with pytest.raises(ValueError, match=msg):
            with tm.ensure_clean() as path:
                df.to_stata(path) 
Example #14
Source File: test_python_parser_only.py    From recruit with Apache License 2.0 6 votes vote down vote up
def test_decompression_regex_sep(python_parser_only, csv1, compression, klass):
    # see gh-6607
    parser = python_parser_only

    with open(csv1, "rb") as f:
        data = f.read()

    data = data.replace(b",", b"::")
    expected = parser.read_csv(csv1)

    module = pytest.importorskip(compression)
    klass = getattr(module, klass)

    with tm.ensure_clean() as path:
        tmp = klass(path, mode="wb")
        tmp.write(data)
        tmp.close()

        result = parser.read_csv(path, sep="::",
                                 compression=compression)
        tm.assert_frame_equal(result, expected) 
Example #15
Source File: test_to_csv.py    From recruit with Apache License 2.0 6 votes vote down vote up
def test_to_csv_doublequote(self):
        df = DataFrame({'col': ['a"a', '"bb"']})
        expected = '''\
"","col"
"0","a""a"
"1","""bb"""
'''

        with tm.ensure_clean('test.csv') as path:
            df.to_csv(path, quoting=1, doublequote=True)  # QUOTE_ALL
            with open(path, 'r') as f:
                assert f.read() == expected

        from _csv import Error
        with tm.ensure_clean('test.csv') as path:
            with pytest.raises(Error, match='escapechar'):
                df.to_csv(path, doublequote=False)  # no escapechar set 
Example #16
Source File: test_to_latex.py    From recruit with Apache License 2.0 6 votes vote down vote up
def test_to_latex_filename(self, frame):
        with tm.ensure_clean('test.tex') as path:
            frame.to_latex(path)

            with open(path, 'r') as f:
                assert frame.to_latex() == f.read()

        # test with utf-8 and encoding option (GH 7061)
        df = DataFrame([[u'au\xdfgangen']])
        with tm.ensure_clean('test.tex') as path:
            df.to_latex(path, encoding='utf-8')
            with codecs.open(path, 'r', encoding='utf-8') as f:
                assert df.to_latex() == f.read()

        # test with utf-8 without encoding option
        if compat.PY3:  # python3: pandas default encoding is utf-8
            with tm.ensure_clean('test.tex') as path:
                df.to_latex(path)
                with codecs.open(path, 'r', encoding='utf-8') as f:
                    assert df.to_latex() == f.read()
        else:
            # python2 default encoding is ascii, so an error should be raised
            with tm.ensure_clean('test.tex') as path:
                with pytest.raises(UnicodeEncodeError):
                    df.to_latex(path) 
Example #17
Source File: test_stata.py    From recruit with Apache License 2.0 6 votes vote down vote up
def test_strl_latin1(self):
        # GH 23573, correct GSO data to reflect correct size
        output = DataFrame([[u'pandas'] * 2, [u'þâÑÐŧ'] * 2],
                           columns=['var_str', 'var_strl'])

        with tm.ensure_clean() as path:
            output.to_stata(path, version=117, convert_strl=['var_strl'])
            with open(path, 'rb') as reread:
                content = reread.read()
                expected = u'þâÑÐŧ'
                assert expected.encode('latin-1') in content
                assert expected.encode('utf-8') in content
                gsos = content.split(b'strls')[1][1:-2]
                for gso in gsos.split(b'GSO')[1:]:
                    val = gso.split(b'\x00')[-2]
                    size = gso[gso.find(b'\x82') + 1]
                    if not PY3:
                        size = ord(size)
                    assert len(val) == size - 1 
Example #18
Source File: test_to_csv.py    From recruit with Apache License 2.0 6 votes vote down vote up
def test_to_csv_write_to_open_file_with_newline_py3(self):
        # see gh-21696
        # see gh-20353
        df = pd.DataFrame({'a': ['x', 'y', 'z']})
        expected_rows = ["x",
                         "y",
                         "z"]
        expected = ("manual header\n" +
                    tm.convert_rows_list_to_csv_str(expected_rows))
        with tm.ensure_clean('test.txt') as path:
            with open(path, 'w', newline='') as f:
                f.write('manual header\n')
                df.to_csv(f, header=None, index=None)

            with open(path, 'rb') as f:
                assert f.read() == bytes(expected, 'utf-8') 
Example #19
Source File: test_stata.py    From recruit with Apache License 2.0 6 votes vote down vote up
def test_all_none_exception(self, version):
        output = [
            {'none': 'none',
             'number': 0},
            {'none': None,
             'number': 1}
        ]
        output = pd.DataFrame(output)
        output.loc[:, 'none'] = None
        with tm.ensure_clean() as path:
            msg = (r"Column `none` cannot be exported\.\n\n"
                   "Only string-like object arrays containing all strings or a"
                   r" mix of strings and None can be exported\. Object arrays"
                   r" containing only null values are prohibited\. Other"
                   " object typescannot be exported and must first be"
                   r" converted to one of the supported types\.")
            with pytest.raises(ValueError, match=msg):
                output.to_stata(path, version=version) 
Example #20
Source File: test_to_csv.py    From recruit with Apache License 2.0 6 votes vote down vote up
def test_to_csv_write_to_open_file_with_newline_py2(self):
        # see gh-21696
        # see gh-20353
        df = pd.DataFrame({'a': ['x', 'y', 'z']})
        expected_rows = ["x",
                         "y",
                         "z"]
        expected = ("manual header\n" +
                    tm.convert_rows_list_to_csv_str(expected_rows))
        with tm.ensure_clean('test.txt') as path:
            with open(path, 'wb') as f:
                f.write('manual header\n')
                df.to_csv(f, header=None, index=None)

            with open(path, 'rb') as f:
                assert f.read() == expected 
Example #21
Source File: test_read_fwf.py    From recruit with Apache License 2.0 6 votes vote down vote up
def test_fwf_compression(compression_only, infer):
    data = """1111111111
    2222222222
    3333333333""".strip()

    compression = compression_only
    extension = "gz" if compression == "gzip" else compression

    kwargs = dict(widths=[5, 5], names=["one", "two"])
    expected = read_fwf(StringIO(data), **kwargs)

    if compat.PY3:
        data = bytes(data, encoding="utf-8")

    with tm.ensure_clean(filename="tmp." + extension) as path:
        tm.write_to_compressed(compression, path, data)

        if infer is not None:
            kwargs["compression"] = "infer" if infer else compression

        result = read_fwf(path, **kwargs)
        tm.assert_frame_equal(result, expected) 
Example #22
Source File: test_stata.py    From recruit with Apache License 2.0 6 votes vote down vote up
def test_out_of_range_float(self):
        original = DataFrame({'ColumnOk': [0.0,
                                           np.finfo(np.float32).eps,
                                           np.finfo(np.float32).max / 10.0],
                              'ColumnTooBig': [0.0,
                                               np.finfo(np.float32).eps,
                                               np.finfo(np.float32).max]})
        original.index.name = 'index'
        for col in original:
            original[col] = original[col].astype(np.float32)

        with tm.ensure_clean() as path:
            original.to_stata(path)
            reread = read_stata(path)
            original['ColumnTooBig'] = original['ColumnTooBig'].astype(
                np.float64)
            tm.assert_frame_equal(original,
                                  reread.set_index('index'))

        original.loc[2, 'ColumnTooBig'] = np.inf
        msg = ("Column ColumnTooBig has a maximum value of infinity which"
               " is outside the range supported by Stata")
        with pytest.raises(ValueError, match=msg):
            with tm.ensure_clean() as path:
                original.to_stata(path) 
Example #23
Source File: test_stata.py    From recruit with Apache License 2.0 6 votes vote down vote up
def test_unsupported_datetype(self):
        dates = [dt.datetime(1999, 12, 31, 12, 12, 12, 12000),
                 dt.datetime(2012, 12, 21, 12, 21, 12, 21000),
                 dt.datetime(1776, 7, 4, 7, 4, 7, 4000)]
        original = pd.DataFrame({'nums': [1.0, 2.0, 3.0],
                                 'strs': ['apple', 'banana', 'cherry'],
                                 'dates': dates})

        msg = "Format %tC not implemented"
        with pytest.raises(NotImplementedError, match=msg):
            with tm.ensure_clean() as path:
                original.to_stata(path, convert_dates={'dates': 'tC'})

        dates = pd.date_range('1-1-1990', periods=3, tz='Asia/Hong_Kong')
        original = pd.DataFrame({'nums': [1.0, 2.0, 3.0],
                                 'strs': ['apple', 'banana', 'cherry'],
                                 'dates': dates})
        with pytest.raises(NotImplementedError):
            with tm.ensure_clean() as path:
                original.to_stata(path) 
Example #24
Source File: test_c_parser_only.py    From recruit with Apache License 2.0 5 votes vote down vote up
def test_file_binary_mode(c_parser_only):
    # see gh-23779
    parser = c_parser_only
    expected = DataFrame([[1, 2, 3], [4, 5, 6]])

    with tm.ensure_clean() as path:
        with open(path, "w") as f:
            f.write("1,2,3\n4,5,6")

        with open(path, "rb") as f:
            result = parser.read_csv(f, header=None)
            tm.assert_frame_equal(result, expected) 
Example #25
Source File: test_compression.py    From recruit with Apache License 2.0 5 votes vote down vote up
def test_write_unsupported_compression_type():
    df = pd.read_json('{"a": [1, 2, 3], "b": [4, 5, 6]}')
    with tm.ensure_clean() as path:
        msg = "Unrecognized compression type: unsupported"
        with pytest.raises(ValueError, match=msg):
            df.to_json(path, compression="unsupported") 
Example #26
Source File: test_c_parser_only.py    From recruit with Apache License 2.0 5 votes vote down vote up
def test_unsupported_dtype(c_parser_only, match, kwargs):
    parser = c_parser_only
    df = DataFrame(np.random.rand(5, 2), columns=list(
        "AB"), index=["1A", "1B", "1C", "1D", "1E"])

    with tm.ensure_clean("__unsupported_dtype__.csv") as path:
        df.to_csv(path)

        with pytest.raises(TypeError, match=match):
            parser.read_csv(path, index_col=0, **kwargs) 
Example #27
Source File: test_to_csv.py    From recruit with Apache License 2.0 5 votes vote down vote up
def test_to_csv_compression(self, compression_only,
                                read_infer, to_infer):
        # see gh-15008
        compression = compression_only

        if compression == "zip":
            pytest.skip("{compression} is not supported "
                        "for to_csv".format(compression=compression))

        # We'll complete file extension subsequently.
        filename = "test."

        if compression == "gzip":
            filename += "gz"
        else:
            # xz --> .xz
            # bz2 --> .bz2
            filename += compression

        df = DataFrame({"A": [1]})

        to_compression = "infer" if to_infer else compression
        read_compression = "infer" if read_infer else compression

        with tm.ensure_clean(filename) as path:
            df.to_csv(path, compression=to_compression)
            result = pd.read_csv(path, index_col=0,
                                 compression=read_compression)
            tm.assert_frame_equal(result, df) 
Example #28
Source File: test_excel.py    From recruit with Apache License 2.0 5 votes vote down vote up
def test_read_one_empty_col_no_header(self, ext, header, expected):
        # xref gh-12292
        filename = "no_header"
        df = pd.DataFrame(
            [["", 1, 100],
             ["", 2, 200],
             ["", 3, 300],
             ["", 4, 400]]
        )

        with ensure_clean(ext) as path:
            df.to_excel(path, filename, index=False, header=False)
            result = read_excel(path, filename, usecols=[0], header=header)

        tm.assert_frame_equal(result, expected) 
Example #29
Source File: test_to_csv.py    From recruit with Apache License 2.0 5 votes vote down vote up
def test_to_csv_string_array_utf8(self):
        # GH 10813
        str_array = [{'names': ['foo', 'bar']}, {'names': ['baz', 'qux']}]
        df = pd.DataFrame(str_array)
        expected_utf8 = '''\
,names
0,"[u'foo', u'bar']"
1,"[u'baz', u'qux']"
'''
        with tm.ensure_clean('unicode_test.csv') as path:
            df.to_csv(path, encoding='utf-8')
            with open(path, 'r') as f:
                assert f.read() == expected_utf8 
Example #30
Source File: test_to_csv.py    From recruit with Apache License 2.0 5 votes vote down vote up
def test_to_csv_write_to_open_file(self):
        # GH 21696
        df = pd.DataFrame({'a': ['x', 'y', 'z']})
        expected = '''\
manual header
x
y
z
'''
        with tm.ensure_clean('test.txt') as path:
            with open(path, 'w') as f:
                f.write('manual header\n')
                df.to_csv(f, header=None, index=None)
            with open(path, 'r') as f:
                assert f.read() == expected