Python pandas.util.testing.ensure_clean() Examples
The following are 30
code examples of pandas.util.testing.ensure_clean().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
pandas.util.testing
, or try the search function
.
Example #1
Source File: test_to_csv.py From recruit with Apache License 2.0 | 6 votes |
def test_to_csv_escapechar(self): df = DataFrame({'col': ['a"a', '"bb"']}) expected = '''\ "","col" "0","a\\"a" "1","\\"bb\\"" ''' with tm.ensure_clean('test.csv') as path: # QUOTE_ALL df.to_csv(path, quoting=1, doublequote=False, escapechar='\\') with open(path, 'r') as f: assert f.read() == expected df = DataFrame({'col': ['a,a', ',bb,']}) expected = """\ ,col 0,a\\,a 1,\\,bb\\, """ with tm.ensure_clean('test.csv') as path: df.to_csv(path, quoting=3, escapechar='\\') # QUOTE_NONE with open(path, 'r') as f: assert f.read() == expected
Example #2
Source File: test_stata.py From recruit with Apache License 2.0 | 6 votes |
def test_read_write_dta10(self, version): original = DataFrame(data=[["string", "object", 1, 1.1, np.datetime64('2003-12-25')]], columns=['string', 'object', 'integer', 'floating', 'datetime']) original["object"] = Series(original["object"], dtype=object) original.index.name = 'index' original.index = original.index.astype(np.int32) original['integer'] = original['integer'].astype(np.int32) with tm.ensure_clean() as path: original.to_stata(path, {'datetime': 'tc'}, version=version) written_and_read_again = self.read_dta(path) # original.index is np.int32, read index is np.int64 tm.assert_frame_equal(written_and_read_again.set_index('index'), original, check_index_type=False)
Example #3
Source File: test_stata.py From recruit with Apache License 2.0 | 6 votes |
def test_encoding(self, version): # GH 4626, proper encoding handling raw = read_stata(self.dta_encoding) with tm.assert_produces_warning(FutureWarning): encoded = read_stata(self.dta_encoding, encoding='latin-1') result = encoded.kreis1849[0] expected = raw.kreis1849[0] assert result == expected assert isinstance(result, compat.string_types) with tm.ensure_clean() as path: with tm.assert_produces_warning(FutureWarning): encoded.to_stata(path, write_index=False, version=version, encoding='latin-1') reread_encoded = read_stata(path) tm.assert_frame_equal(encoded, reread_encoded)
Example #4
Source File: test_stata.py From recruit with Apache License 2.0 | 6 votes |
def test_read_write_reread_dta14(self, file, parsed_114, version): file = getattr(self, file) parsed = self.read_dta(file) parsed.index.name = 'index' expected = self.read_csv(self.csv14) cols = ['byte_', 'int_', 'long_', 'float_', 'double_'] for col in cols: expected[col] = expected[col]._convert(datetime=True, numeric=True) expected['float_'] = expected['float_'].astype(np.float32) expected['date_td'] = pd.to_datetime( expected['date_td'], errors='coerce') tm.assert_frame_equal(parsed_114, parsed) with tm.ensure_clean() as path: parsed_114.to_stata(path, {'date_td': 'td'}, version=version) written_and_read_again = self.read_dta(path) tm.assert_frame_equal( written_and_read_again.set_index('index'), parsed_114)
Example #5
Source File: test_stata.py From recruit with Apache License 2.0 | 6 votes |
def test_large_value_conversion(self): s0 = Series([1, 99], dtype=np.int8) s1 = Series([1, 127], dtype=np.int8) s2 = Series([1, 2 ** 15 - 1], dtype=np.int16) s3 = Series([1, 2 ** 63 - 1], dtype=np.int64) original = DataFrame({'s0': s0, 's1': s1, 's2': s2, 's3': s3}) original.index.name = 'index' with tm.ensure_clean() as path: with tm.assert_produces_warning(PossiblePrecisionLoss): original.to_stata(path) written_and_read_again = self.read_dta(path) modified = original.copy() modified['s1'] = Series(modified['s1'], dtype=np.int16) modified['s2'] = Series(modified['s2'], dtype=np.int32) modified['s3'] = Series(modified['s3'], dtype=np.float64) tm.assert_frame_equal(written_and_read_again.set_index('index'), modified)
Example #6
Source File: test_stata.py From recruit with Apache License 2.0 | 6 votes |
def test_read_write_dta11(self): original = DataFrame([(1, 2, 3, 4)], columns=['good', compat.u('b\u00E4d'), '8number', 'astringwithmorethan32characters______']) formatted = DataFrame([(1, 2, 3, 4)], columns=['good', 'b_d', '_8number', 'astringwithmorethan32characters_']) formatted.index.name = 'index' formatted = formatted.astype(np.int32) with tm.ensure_clean() as path: with tm.assert_produces_warning(pd.io.stata.InvalidColumnName): original.to_stata(path, None) written_and_read_again = self.read_dta(path) tm.assert_frame_equal( written_and_read_again.set_index('index'), formatted)
Example #7
Source File: test_compression.py From recruit with Apache License 2.0 | 6 votes |
def test_compression(parser_and_data, compression_only, buffer, filename): parser, data, expected = parser_and_data compress_type = compression_only ext = "gz" if compress_type == "gzip" else compress_type filename = filename if filename is None else filename.format(ext=ext) if filename and buffer: pytest.skip("Cannot deduce compression from " "buffer of compressed data.") with tm.ensure_clean(filename=filename) as path: tm.write_to_compressed(compress_type, path, data) compression = "infer" if filename else compress_type if buffer: with open(path, "rb") as f: result = parser.read_csv(f, compression=compression) else: result = parser.read_csv(path, compression=compression) tm.assert_frame_equal(result, expected)
Example #8
Source File: test_stata.py From recruit with Apache License 2.0 | 6 votes |
def test_date_export_formats(self): columns = ['tc', 'td', 'tw', 'tm', 'tq', 'th', 'ty'] conversions = {c: c for c in columns} data = [datetime(2006, 11, 20, 23, 13, 20)] * len(columns) original = DataFrame([data], columns=columns) original.index.name = 'index' expected_values = [datetime(2006, 11, 20, 23, 13, 20), # Time datetime(2006, 11, 20), # Day datetime(2006, 11, 19), # Week datetime(2006, 11, 1), # Month datetime(2006, 10, 1), # Quarter year datetime(2006, 7, 1), # Half year datetime(2006, 1, 1)] # Year expected = DataFrame([expected_values], columns=columns) expected.index.name = 'index' with tm.ensure_clean() as path: original.to_stata(path, conversions) written_and_read_again = self.read_dta(path) tm.assert_frame_equal(written_and_read_again.set_index('index'), expected)
Example #9
Source File: test_stata.py From recruit with Apache License 2.0 | 6 votes |
def test_bool_uint(self, byteorder, version): s0 = Series([0, 1, True], dtype=np.bool) s1 = Series([0, 1, 100], dtype=np.uint8) s2 = Series([0, 1, 255], dtype=np.uint8) s3 = Series([0, 1, 2 ** 15 - 100], dtype=np.uint16) s4 = Series([0, 1, 2 ** 16 - 1], dtype=np.uint16) s5 = Series([0, 1, 2 ** 31 - 100], dtype=np.uint32) s6 = Series([0, 1, 2 ** 32 - 1], dtype=np.uint32) original = DataFrame({'s0': s0, 's1': s1, 's2': s2, 's3': s3, 's4': s4, 's5': s5, 's6': s6}) original.index.name = 'index' expected = original.copy() expected_types = (np.int8, np.int8, np.int16, np.int16, np.int32, np.int32, np.float64) for c, t in zip(expected.columns, expected_types): expected[c] = expected[c].astype(t) with tm.ensure_clean() as path: original.to_stata(path, byteorder=byteorder, version=version) written_and_read_again = self.read_dta(path) written_and_read_again = written_and_read_again.set_index('index') tm.assert_frame_equal(written_and_read_again, expected)
Example #10
Source File: test_stata.py From recruit with Apache License 2.0 | 6 votes |
def test_minimal_size_col(self): str_lens = (1, 100, 244) s = {} for str_len in str_lens: s['s' + str(str_len)] = Series(['a' * str_len, 'b' * str_len, 'c' * str_len]) original = DataFrame(s) with tm.ensure_clean() as path: original.to_stata(path, write_index=False) with StataReader(path) as sr: typlist = sr.typlist variables = sr.varlist formats = sr.fmtlist for variable, fmt, typ in zip(variables, formats, typlist): assert int(variable[1:]) == int(fmt[1:-1]) assert int(variable[1:]) == typ
Example #11
Source File: test_common.py From recruit with Apache License 2.0 | 6 votes |
def test_read_csv_memory_growth_chunksize(all_parsers): # see gh-24805 # # Let's just make sure that we don't crash # as we iteratively process all chunks. parser = all_parsers with tm.ensure_clean() as path: with open(path, "w") as f: for i in range(1000): f.write(str(i) + "\n") result = parser.read_csv(path, chunksize=20) for _ in result: pass
Example #12
Source File: test_stata.py From recruit with Apache License 2.0 | 6 votes |
def test_invalid_variable_labels(self, version): original = pd.DataFrame({'a': [1, 2, 3, 4], 'b': [1.0, 3.0, 27.0, 81.0], 'c': ['Atlanta', 'Birmingham', 'Cincinnati', 'Detroit']}) original.index.name = 'index' variable_labels = {'a': 'very long' * 10, 'b': 'City Exponent', 'c': 'City'} with tm.ensure_clean() as path: msg = "Variable labels must be 80 characters or fewer" with pytest.raises(ValueError, match=msg): original.to_stata(path, variable_labels=variable_labels, version=version) variable_labels['a'] = u'invalid character Œ' with tm.ensure_clean() as path: msg = ("Variable labels must contain only characters that can be" " encoded in Latin-1") with pytest.raises(ValueError, match=msg): original.to_stata(path, variable_labels=variable_labels, version=version)
Example #13
Source File: test_stata.py From recruit with Apache License 2.0 | 6 votes |
def test_out_of_range_double(self): # GH 14618 df = DataFrame({'ColumnOk': [0.0, np.finfo(np.double).eps, 4.49423283715579e+307], 'ColumnTooBig': [0.0, np.finfo(np.double).eps, np.finfo(np.double).max]}) msg = (r"Column ColumnTooBig has a maximum value \(.+\)" r" outside the range supported by Stata \(.+\)") with pytest.raises(ValueError, match=msg): with tm.ensure_clean() as path: df.to_stata(path) df.loc[2, 'ColumnTooBig'] = np.inf msg = ("Column ColumnTooBig has a maximum value of infinity which" " is outside the range supported by Stata") with pytest.raises(ValueError, match=msg): with tm.ensure_clean() as path: df.to_stata(path)
Example #14
Source File: test_python_parser_only.py From recruit with Apache License 2.0 | 6 votes |
def test_decompression_regex_sep(python_parser_only, csv1, compression, klass): # see gh-6607 parser = python_parser_only with open(csv1, "rb") as f: data = f.read() data = data.replace(b",", b"::") expected = parser.read_csv(csv1) module = pytest.importorskip(compression) klass = getattr(module, klass) with tm.ensure_clean() as path: tmp = klass(path, mode="wb") tmp.write(data) tmp.close() result = parser.read_csv(path, sep="::", compression=compression) tm.assert_frame_equal(result, expected)
Example #15
Source File: test_to_csv.py From recruit with Apache License 2.0 | 6 votes |
def test_to_csv_doublequote(self): df = DataFrame({'col': ['a"a', '"bb"']}) expected = '''\ "","col" "0","a""a" "1","""bb""" ''' with tm.ensure_clean('test.csv') as path: df.to_csv(path, quoting=1, doublequote=True) # QUOTE_ALL with open(path, 'r') as f: assert f.read() == expected from _csv import Error with tm.ensure_clean('test.csv') as path: with pytest.raises(Error, match='escapechar'): df.to_csv(path, doublequote=False) # no escapechar set
Example #16
Source File: test_to_latex.py From recruit with Apache License 2.0 | 6 votes |
def test_to_latex_filename(self, frame): with tm.ensure_clean('test.tex') as path: frame.to_latex(path) with open(path, 'r') as f: assert frame.to_latex() == f.read() # test with utf-8 and encoding option (GH 7061) df = DataFrame([[u'au\xdfgangen']]) with tm.ensure_clean('test.tex') as path: df.to_latex(path, encoding='utf-8') with codecs.open(path, 'r', encoding='utf-8') as f: assert df.to_latex() == f.read() # test with utf-8 without encoding option if compat.PY3: # python3: pandas default encoding is utf-8 with tm.ensure_clean('test.tex') as path: df.to_latex(path) with codecs.open(path, 'r', encoding='utf-8') as f: assert df.to_latex() == f.read() else: # python2 default encoding is ascii, so an error should be raised with tm.ensure_clean('test.tex') as path: with pytest.raises(UnicodeEncodeError): df.to_latex(path)
Example #17
Source File: test_stata.py From recruit with Apache License 2.0 | 6 votes |
def test_strl_latin1(self): # GH 23573, correct GSO data to reflect correct size output = DataFrame([[u'pandas'] * 2, [u'þâÑÐŧ'] * 2], columns=['var_str', 'var_strl']) with tm.ensure_clean() as path: output.to_stata(path, version=117, convert_strl=['var_strl']) with open(path, 'rb') as reread: content = reread.read() expected = u'þâÑÐŧ' assert expected.encode('latin-1') in content assert expected.encode('utf-8') in content gsos = content.split(b'strls')[1][1:-2] for gso in gsos.split(b'GSO')[1:]: val = gso.split(b'\x00')[-2] size = gso[gso.find(b'\x82') + 1] if not PY3: size = ord(size) assert len(val) == size - 1
Example #18
Source File: test_to_csv.py From recruit with Apache License 2.0 | 6 votes |
def test_to_csv_write_to_open_file_with_newline_py3(self): # see gh-21696 # see gh-20353 df = pd.DataFrame({'a': ['x', 'y', 'z']}) expected_rows = ["x", "y", "z"] expected = ("manual header\n" + tm.convert_rows_list_to_csv_str(expected_rows)) with tm.ensure_clean('test.txt') as path: with open(path, 'w', newline='') as f: f.write('manual header\n') df.to_csv(f, header=None, index=None) with open(path, 'rb') as f: assert f.read() == bytes(expected, 'utf-8')
Example #19
Source File: test_stata.py From recruit with Apache License 2.0 | 6 votes |
def test_all_none_exception(self, version): output = [ {'none': 'none', 'number': 0}, {'none': None, 'number': 1} ] output = pd.DataFrame(output) output.loc[:, 'none'] = None with tm.ensure_clean() as path: msg = (r"Column `none` cannot be exported\.\n\n" "Only string-like object arrays containing all strings or a" r" mix of strings and None can be exported\. Object arrays" r" containing only null values are prohibited\. Other" " object typescannot be exported and must first be" r" converted to one of the supported types\.") with pytest.raises(ValueError, match=msg): output.to_stata(path, version=version)
Example #20
Source File: test_to_csv.py From recruit with Apache License 2.0 | 6 votes |
def test_to_csv_write_to_open_file_with_newline_py2(self): # see gh-21696 # see gh-20353 df = pd.DataFrame({'a': ['x', 'y', 'z']}) expected_rows = ["x", "y", "z"] expected = ("manual header\n" + tm.convert_rows_list_to_csv_str(expected_rows)) with tm.ensure_clean('test.txt') as path: with open(path, 'wb') as f: f.write('manual header\n') df.to_csv(f, header=None, index=None) with open(path, 'rb') as f: assert f.read() == expected
Example #21
Source File: test_read_fwf.py From recruit with Apache License 2.0 | 6 votes |
def test_fwf_compression(compression_only, infer): data = """1111111111 2222222222 3333333333""".strip() compression = compression_only extension = "gz" if compression == "gzip" else compression kwargs = dict(widths=[5, 5], names=["one", "two"]) expected = read_fwf(StringIO(data), **kwargs) if compat.PY3: data = bytes(data, encoding="utf-8") with tm.ensure_clean(filename="tmp." + extension) as path: tm.write_to_compressed(compression, path, data) if infer is not None: kwargs["compression"] = "infer" if infer else compression result = read_fwf(path, **kwargs) tm.assert_frame_equal(result, expected)
Example #22
Source File: test_stata.py From recruit with Apache License 2.0 | 6 votes |
def test_out_of_range_float(self): original = DataFrame({'ColumnOk': [0.0, np.finfo(np.float32).eps, np.finfo(np.float32).max / 10.0], 'ColumnTooBig': [0.0, np.finfo(np.float32).eps, np.finfo(np.float32).max]}) original.index.name = 'index' for col in original: original[col] = original[col].astype(np.float32) with tm.ensure_clean() as path: original.to_stata(path) reread = read_stata(path) original['ColumnTooBig'] = original['ColumnTooBig'].astype( np.float64) tm.assert_frame_equal(original, reread.set_index('index')) original.loc[2, 'ColumnTooBig'] = np.inf msg = ("Column ColumnTooBig has a maximum value of infinity which" " is outside the range supported by Stata") with pytest.raises(ValueError, match=msg): with tm.ensure_clean() as path: original.to_stata(path)
Example #23
Source File: test_stata.py From recruit with Apache License 2.0 | 6 votes |
def test_unsupported_datetype(self): dates = [dt.datetime(1999, 12, 31, 12, 12, 12, 12000), dt.datetime(2012, 12, 21, 12, 21, 12, 21000), dt.datetime(1776, 7, 4, 7, 4, 7, 4000)] original = pd.DataFrame({'nums': [1.0, 2.0, 3.0], 'strs': ['apple', 'banana', 'cherry'], 'dates': dates}) msg = "Format %tC not implemented" with pytest.raises(NotImplementedError, match=msg): with tm.ensure_clean() as path: original.to_stata(path, convert_dates={'dates': 'tC'}) dates = pd.date_range('1-1-1990', periods=3, tz='Asia/Hong_Kong') original = pd.DataFrame({'nums': [1.0, 2.0, 3.0], 'strs': ['apple', 'banana', 'cherry'], 'dates': dates}) with pytest.raises(NotImplementedError): with tm.ensure_clean() as path: original.to_stata(path)
Example #24
Source File: test_c_parser_only.py From recruit with Apache License 2.0 | 5 votes |
def test_file_binary_mode(c_parser_only): # see gh-23779 parser = c_parser_only expected = DataFrame([[1, 2, 3], [4, 5, 6]]) with tm.ensure_clean() as path: with open(path, "w") as f: f.write("1,2,3\n4,5,6") with open(path, "rb") as f: result = parser.read_csv(f, header=None) tm.assert_frame_equal(result, expected)
Example #25
Source File: test_compression.py From recruit with Apache License 2.0 | 5 votes |
def test_write_unsupported_compression_type(): df = pd.read_json('{"a": [1, 2, 3], "b": [4, 5, 6]}') with tm.ensure_clean() as path: msg = "Unrecognized compression type: unsupported" with pytest.raises(ValueError, match=msg): df.to_json(path, compression="unsupported")
Example #26
Source File: test_c_parser_only.py From recruit with Apache License 2.0 | 5 votes |
def test_unsupported_dtype(c_parser_only, match, kwargs): parser = c_parser_only df = DataFrame(np.random.rand(5, 2), columns=list( "AB"), index=["1A", "1B", "1C", "1D", "1E"]) with tm.ensure_clean("__unsupported_dtype__.csv") as path: df.to_csv(path) with pytest.raises(TypeError, match=match): parser.read_csv(path, index_col=0, **kwargs)
Example #27
Source File: test_to_csv.py From recruit with Apache License 2.0 | 5 votes |
def test_to_csv_compression(self, compression_only, read_infer, to_infer): # see gh-15008 compression = compression_only if compression == "zip": pytest.skip("{compression} is not supported " "for to_csv".format(compression=compression)) # We'll complete file extension subsequently. filename = "test." if compression == "gzip": filename += "gz" else: # xz --> .xz # bz2 --> .bz2 filename += compression df = DataFrame({"A": [1]}) to_compression = "infer" if to_infer else compression read_compression = "infer" if read_infer else compression with tm.ensure_clean(filename) as path: df.to_csv(path, compression=to_compression) result = pd.read_csv(path, index_col=0, compression=read_compression) tm.assert_frame_equal(result, df)
Example #28
Source File: test_excel.py From recruit with Apache License 2.0 | 5 votes |
def test_read_one_empty_col_no_header(self, ext, header, expected): # xref gh-12292 filename = "no_header" df = pd.DataFrame( [["", 1, 100], ["", 2, 200], ["", 3, 300], ["", 4, 400]] ) with ensure_clean(ext) as path: df.to_excel(path, filename, index=False, header=False) result = read_excel(path, filename, usecols=[0], header=header) tm.assert_frame_equal(result, expected)
Example #29
Source File: test_to_csv.py From recruit with Apache License 2.0 | 5 votes |
def test_to_csv_string_array_utf8(self): # GH 10813 str_array = [{'names': ['foo', 'bar']}, {'names': ['baz', 'qux']}] df = pd.DataFrame(str_array) expected_utf8 = '''\ ,names 0,"[u'foo', u'bar']" 1,"[u'baz', u'qux']" ''' with tm.ensure_clean('unicode_test.csv') as path: df.to_csv(path, encoding='utf-8') with open(path, 'r') as f: assert f.read() == expected_utf8
Example #30
Source File: test_to_csv.py From recruit with Apache License 2.0 | 5 votes |
def test_to_csv_write_to_open_file(self): # GH 21696 df = pd.DataFrame({'a': ['x', 'y', 'z']}) expected = '''\ manual header x y z ''' with tm.ensure_clean('test.txt') as path: with open(path, 'w') as f: f.write('manual header\n') df.to_csv(f, header=None, index=None) with open(path, 'r') as f: assert f.read() == expected