Python csv.field_size_limit() Examples
The following are 30
code examples of csv.field_size_limit().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
csv
, or try the search function
.
Example #1
Source File: test_csv.py From oss-ftp with MIT License | 6 votes |
def test_read_bigfield(self): # This exercises the buffer realloc functionality and field size # limits. limit = csv.field_size_limit() try: size = 50000 bigstring = 'X' * size bigline = '%s,%s' % (bigstring, bigstring) self._read_test([bigline], [[bigstring, bigstring]]) csv.field_size_limit(size) self._read_test([bigline], [[bigstring, bigstring]]) self.assertEqual(csv.field_size_limit(), size) csv.field_size_limit(size-1) self.assertRaises(csv.Error, self._read_test, [bigline], []) self.assertRaises(TypeError, csv.field_size_limit, None) self.assertRaises(TypeError, csv.field_size_limit, 1, None) finally: csv.field_size_limit(limit)
Example #2
Source File: test_csv.py From gcblue with BSD 3-Clause "New" or "Revised" License | 6 votes |
def test_read_bigfield(self): # This exercises the buffer realloc functionality and field size # limits. limit = csv.field_size_limit() try: size = 50000 bigstring = 'X' * size bigline = '%s,%s' % (bigstring, bigstring) self._read_test([bigline], [[bigstring, bigstring]]) csv.field_size_limit(size) self._read_test([bigline], [[bigstring, bigstring]]) self.assertEqual(csv.field_size_limit(), size) csv.field_size_limit(size-1) self.assertRaises(csv.Error, self._read_test, [bigline], []) self.assertRaises(TypeError, csv.field_size_limit, None) self.assertRaises(TypeError, csv.field_size_limit, 1, None) finally: csv.field_size_limit(limit)
Example #3
Source File: lookups_util.py From Splunking-Crime with GNU Affero General Public License v3.0 | 6 votes |
def load_lookup_file_from_disk(file_path): """ parse the lookup file from the given path and return the result Args: file_path (string): the path to the lookup file Returns: lookup_data (dict): result from the csv parser """ if not file_exists(file_path): raise RuntimeError('Not valid filepath: {}'.format(file_path)) try: with open(file_path, mode='r') as f: reader = csv.DictReader(f) csv.field_size_limit(CSV_FILESIZE_LIMIT) lookup_data = reader.next() except Exception as e: raise RuntimeError('Error reading model file: %s, %s' % (file_path, str(e))) return lookup_data
Example #4
Source File: resources.py From SummaryRank with MIT License | 6 votes |
def _load_sentence_file(self, filename): csv.field_size_limit(sys.maxsize) opener = gzip.open if filename.endswith('.gz') else open entities = dict() with opener(filename) as csvfile: reader = csv.reader(csvfile, delimiter='\t',quoting=csv.QUOTE_NONE) for row in reader: qid = row[0].strip() sentence_json = row[1].strip() if sentence_json: payload = json.loads(sentence_json) annotations = payload['annotations'] sentence_entities = [ x['id'] for x in annotations] sentence_entities = [ str(x) for x in sentence_entities] entities[qid] = sentence_entities else: entities[qid] = [] return entities
Example #5
Source File: bulkloader.py From python-compat-runtime with Apache License 2.0 | 6 votes |
def Records(self): """Reads the CSV data file and generates row records. Yields: Lists of strings Raises: ResumeError: If the progress database and data file indicate a different number of rows. """ csv_file = self.openfile(self.csv_filename, 'rb') reader = self.create_csv_reader(csv_file, skipinitialspace=True) try: for record in reader: yield record except csv.Error, e: if e.args and e.args[0].startswith('field larger than field limit'): raise FieldSizeLimitError(csv.field_size_limit()) else: raise
Example #6
Source File: test_csv.py From ironpython3 with Apache License 2.0 | 6 votes |
def test_read_bigfield(self): # This exercises the buffer realloc functionality and field size # limits. limit = csv.field_size_limit() try: size = 50000 bigstring = 'X' * size bigline = '%s,%s' % (bigstring, bigstring) self._read_test([bigline], [[bigstring, bigstring]]) csv.field_size_limit(size) self._read_test([bigline], [[bigstring, bigstring]]) self.assertEqual(csv.field_size_limit(), size) csv.field_size_limit(size-1) self.assertRaises(csv.Error, self._read_test, [bigline], []) self.assertRaises(TypeError, csv.field_size_limit, None) self.assertRaises(TypeError, csv.field_size_limit, 1, None) finally: csv.field_size_limit(limit)
Example #7
Source File: test_csv.py From Fluid-Designer with GNU General Public License v3.0 | 6 votes |
def test_read_bigfield(self): # This exercises the buffer realloc functionality and field size # limits. limit = csv.field_size_limit() try: size = 50000 bigstring = 'X' * size bigline = '%s,%s' % (bigstring, bigstring) self._read_test([bigline], [[bigstring, bigstring]]) csv.field_size_limit(size) self._read_test([bigline], [[bigstring, bigstring]]) self.assertEqual(csv.field_size_limit(), size) csv.field_size_limit(size-1) self.assertRaises(csv.Error, self._read_test, [bigline], []) self.assertRaises(TypeError, csv.field_size_limit, None) self.assertRaises(TypeError, csv.field_size_limit, 1, None) finally: csv.field_size_limit(limit)
Example #8
Source File: index.py From brotab with MIT License | 6 votes |
def index(sqlite_filename, tsv_filename): logger.info('Reading tsv file %s', tsv_filename) # https://stackoverflow.com/questions/15063936/csv-error-field-larger-than-field-limit-131072 # https://github.com/balta2ar/brotab/issues/25 # It should work on Python 3 and Python 2, on any CPU / OS. csv.field_size_limit(int(ctypes.c_ulong(-1).value // 2)) with open(tsv_filename, encoding='utf-8') as tsv_file: lines = [tuple(line) for line in csv.reader(tsv_file, delimiter='\t', quoting=csv.QUOTE_NONE)] logger.info( 'Creating sqlite DB filename %s from tsv %s (%s lines)', sqlite_filename, tsv_filename, len(lines)) conn = sqlite3.connect(sqlite_filename) cursor = conn.cursor() with suppress(sqlite3.OperationalError): cursor.execute('drop table tabs;') cursor.execute( 'create virtual table tabs using fts5(' ' tab_id, title, url, body, tokenize="porter unicode61");') cursor.executemany('insert into tabs values (?, ?, ?, ?)', lines) conn.commit() conn.close()
Example #9
Source File: test_csv.py From Project-New-Reign---Nemesis-Main with GNU General Public License v3.0 | 6 votes |
def test_read_bigfield(self): # This exercises the buffer realloc functionality and field size # limits. limit = csv.field_size_limit() try: size = 50000 bigstring = 'X' * size bigline = '%s,%s' % (bigstring, bigstring) self._read_test([bigline], [[bigstring, bigstring]]) csv.field_size_limit(size) self._read_test([bigline], [[bigstring, bigstring]]) self.assertEqual(csv.field_size_limit(), size) csv.field_size_limit(size-1) self.assertRaises(csv.Error, self._read_test, [bigline], []) self.assertRaises(TypeError, csv.field_size_limit, None) self.assertRaises(TypeError, csv.field_size_limit, 1, None) finally: csv.field_size_limit(limit)
Example #10
Source File: test_csv.py From BinderFilter with MIT License | 6 votes |
def test_read_bigfield(self): # This exercises the buffer realloc functionality and field size # limits. limit = csv.field_size_limit() try: size = 50000 bigstring = 'X' * size bigline = '%s,%s' % (bigstring, bigstring) self._read_test([bigline], [[bigstring, bigstring]]) csv.field_size_limit(size) self._read_test([bigline], [[bigstring, bigstring]]) self.assertEqual(csv.field_size_limit(), size) csv.field_size_limit(size-1) self.assertRaises(csv.Error, self._read_test, [bigline], []) self.assertRaises(TypeError, csv.field_size_limit, None) self.assertRaises(TypeError, csv.field_size_limit, 1, None) finally: csv.field_size_limit(limit)
Example #11
Source File: test_csv.py From medicare-demo with Apache License 2.0 | 6 votes |
def test_read_bigfield(self): # This exercises the buffer realloc functionality and field size # limits. limit = csv.field_size_limit() try: size = 50000 bigstring = 'X' * size bigline = '%s,%s' % (bigstring, bigstring) self._read_test([bigline], [[bigstring, bigstring]]) csv.field_size_limit(size) self._read_test([bigline], [[bigstring, bigstring]]) self.assertEqual(csv.field_size_limit(), size) csv.field_size_limit(size-1) self.assertRaises(csv.Error, self._read_test, [bigline], []) self.assertRaises(TypeError, csv.field_size_limit, None) self.assertRaises(TypeError, csv.field_size_limit, 1, None) finally: csv.field_size_limit(limit)
Example #12
Source File: training.py From caml-mimic with MIT License | 6 votes |
def init(args): """ Load data, build model, create optimizer, create vars to hold metrics, etc. """ #need to handle really large text fields csv.field_size_limit(sys.maxsize) #load vocab and other lookups desc_embed = args.lmbda > 0 print("loading lookups...") dicts = datasets.load_lookups(args, desc_embed=desc_embed) model = tools.pick_model(args, dicts) print(model) if not args.test_model: optimizer = optim.Adam(model.parameters(), weight_decay=args.weight_decay, lr=args.lr) else: optimizer = None params = tools.make_param_dict(args) return args, model, optimizer, params, dicts
Example #13
Source File: test_csv.py From ironpython2 with Apache License 2.0 | 6 votes |
def test_read_bigfield(self): # This exercises the buffer realloc functionality and field size # limits. limit = csv.field_size_limit() try: size = 50000 bigstring = 'X' * size bigline = '%s,%s' % (bigstring, bigstring) self._read_test([bigline], [[bigstring, bigstring]]) csv.field_size_limit(size) self._read_test([bigline], [[bigstring, bigstring]]) self.assertEqual(csv.field_size_limit(), size) csv.field_size_limit(size-1) self.assertRaises(csv.Error, self._read_test, [bigline], []) self.assertRaises(TypeError, csv.field_size_limit, None) self.assertRaises(TypeError, csv.field_size_limit, 1, None) finally: csv.field_size_limit(limit)
Example #14
Source File: load_data.py From WeSTClass with Apache License 2.0 | 6 votes |
def read_file(data_dir, with_evaluation): data = [] target = [] with open(join(data_dir, 'dataset.csv'), 'rt', encoding='utf-8') as csvfile: csv.field_size_limit(500 * 1024 * 1024) reader = csv.reader(csvfile) for row in reader: if data_dir == './agnews': doc = row[1] + '. ' + row[2] data.append(doc) target.append(int(row[0]) - 1) elif data_dir == './yelp': data.append(row[1]) target.append(int(row[0]) - 1) if with_evaluation: y = np.asarray(target) assert len(data) == len(y) assert set(range(len(np.unique(y)))) == set(np.unique(y)) else: y = None return data, y
Example #15
Source File: bulkloader.py From browserscope with Apache License 2.0 | 6 votes |
def Records(self): """Reads the CSV data file and generates row records. Yields: Lists of strings Raises: ResumeError: If the progress database and data file indicate a different number of rows. """ csv_file = self.openfile(self.csv_filename, 'rb') reader = self.create_csv_reader(csv_file, skipinitialspace=True) try: for record in reader: yield record except csv.Error, e: if e.args and e.args[0].startswith('field larger than field limit'): raise FieldSizeLimitError(csv.field_size_limit()) else: raise
Example #16
Source File: test_csv.py From CTFCrackTools-V2 with GNU General Public License v3.0 | 6 votes |
def test_read_bigfield(self): # This exercises the buffer realloc functionality and field size # limits. limit = csv.field_size_limit() try: size = 50000 bigstring = 'X' * size bigline = '%s,%s' % (bigstring, bigstring) self._read_test([bigline], [[bigstring, bigstring]]) csv.field_size_limit(size) self._read_test([bigline], [[bigstring, bigstring]]) self.assertEqual(csv.field_size_limit(), size) csv.field_size_limit(size-1) self.assertRaises(csv.Error, self._read_test, [bigline], []) self.assertRaises(TypeError, csv.field_size_limit, None) self.assertRaises(TypeError, csv.field_size_limit, 1, None) finally: csv.field_size_limit(limit)
Example #17
Source File: dataset.py From TopicNet with MIT License | 6 votes |
def _increase_csv_field_max_size(): """Makes document entry in dataset as big as possible References ---------- https://stackoverflow.com/questions/15063936/csv-error-field-larger-than-field-limit-131072 """ max_int = sys.maxsize while True: try: csv.field_size_limit(max_int) break except OverflowError: max_int = int(max_int / 10)
Example #18
Source File: test_csv.py From CTFCrackTools with GNU General Public License v3.0 | 6 votes |
def test_read_bigfield(self): # This exercises the buffer realloc functionality and field size # limits. limit = csv.field_size_limit() try: size = 50000 bigstring = 'X' * size bigline = '%s,%s' % (bigstring, bigstring) self._read_test([bigline], [[bigstring, bigstring]]) csv.field_size_limit(size) self._read_test([bigline], [[bigstring, bigstring]]) self.assertEqual(csv.field_size_limit(), size) csv.field_size_limit(size-1) self.assertRaises(csv.Error, self._read_test, [bigline], []) self.assertRaises(TypeError, csv.field_size_limit, None) self.assertRaises(TypeError, csv.field_size_limit, 1, None) finally: csv.field_size_limit(limit)
Example #19
Source File: repdocs.py From dblp with MIT License | 5 votes |
def read_lcc_author_repdocs(self): """Read and return an iterator over the author repdoc corpus, which excludes the authors not in the LCC. """ author_repdoc_file, _, lcc_idmap_file = self.input() with lcc_idmap_file.open() as lcc_idmap_f: lcc_author_df = pd.read_csv(lcc_idmap_f, header=0, usecols=(0,)) lcc_author_ids = lcc_author_df['author_id'].values csv.field_size_limit(sys.maxint) records = util.iter_csv_fwrapper(author_repdoc_file) return (doc.split('|') for author_id, doc in records if int(author_id) in lcc_author_ids)
Example #20
Source File: sync.py From tap-s3-csv with GNU Affero General Public License v3.0 | 5 votes |
def sync_table_file(config, s3_path, table_spec, stream): LOGGER.info('Syncing file "%s".', s3_path) bucket = config['bucket'] table_name = table_spec['table_name'] s3_file_handle = s3.get_file_handle(config, s3_path) # We observed data who's field size exceeded the default maximum of # 131072. We believe the primary consequence of the following setting # is that a malformed, wide CSV would potentially parse into a single # large field rather than giving this error, but we also think the # chances of that are very small and at any rate the source data would # need to be fixed. The other consequence of this could be larger # memory consumption but that's acceptable as well. csv.field_size_limit(sys.maxsize) iterator = singer_encodings_csv.get_row_iterator( s3_file_handle._raw_stream, table_spec) #pylint:disable=protected-access records_synced = 0 for row in iterator: custom_columns = { s3.SDC_SOURCE_BUCKET_COLUMN: bucket, s3.SDC_SOURCE_FILE_COLUMN: s3_path, # index zero, +1 for header row s3.SDC_SOURCE_LINENO_COLUMN: records_synced + 2 } rec = {**row, **custom_columns} with Transformer() as transformer: to_write = transformer.transform(rec, stream['schema'], metadata.to_map(stream['metadata'])) singer.write_record(table_name, to_write) records_synced += 1 return records_synced
Example #21
Source File: bulkload_deprecated.py From python-compat-runtime with Apache License 2.0 | 5 votes |
def Load(self, kind, data): """Parses CSV data, uses a Loader to convert to entities, and stores them. On error, fails fast. Returns a "bad request" HTTP response code and includes the traceback in the output. Args: kind: a string containing the entity kind that this loader handles data: a string containing the CSV data to load Returns: tuple (response code, output) where: response code: integer HTTP response code to return output: string containing the HTTP response body """ data = data.encode('utf-8') Validate(kind, basestring) Validate(data, basestring) output = [] try: loader = Loader.RegisteredLoaders()[kind] except KeyError: output.append('Error: no Loader defined for kind %s.' % kind) return (httplib.BAD_REQUEST, ''.join(output)) buffer = StringIO.StringIO(data) reader = csv.reader(buffer, skipinitialspace=True) try: csv.field_size_limit(800000) except AttributeError: pass return self.LoadEntities(self.IterRows(reader), loader)
Example #22
Source File: bulkloader.py From python-compat-runtime with Apache License 2.0 | 5 votes |
def __init__(self, limit): self.message = """ A field in your CSV input file has exceeded the current limit of %d. You can raise this limit by adding the following lines to your config file: import csv csv.field_size_limit(new_limit) where new_limit is number larger than the size in bytes of the largest field in your CSV. """ % limit Error.__init__(self, self.message)
Example #23
Source File: bulkload_client.py From python-compat-runtime with Apache License 2.0 | 5 votes |
def ContentGenerator(csv_file, batch_size, create_csv_reader=csv.reader, create_csv_writer=csv.writer): """Retrieves CSV data up to a batch size at a time. Args: csv_file: A file-like object for reading CSV data. batch_size: Maximum number of CSV rows to yield on each iteration. create_csv_reader, create_csv_writer: Used for dependency injection. Yields: Tuple (entity_count, csv_content) where: entity_count: Number of entities contained in the csv_content. Will be less than or equal to the batch_size and greater than 0. csv_content: String containing the CSV content containing the next entity_count entities. """ try: csv.field_size_limit(800000) except AttributeError: pass reader = create_csv_reader(csv_file, skipinitialspace=True) exhausted = False while not exhausted: rows_written = 0 content = StringIO.StringIO() writer = create_csv_writer(content) try: for i in xrange(batch_size): row = reader.next() writer.writerow(row) rows_written += 1 except StopIteration: exhausted = True if rows_written > 0: yield rows_written, content.getvalue()
Example #24
Source File: csv_utils.py From bitcoin-etl with MIT License | 5 votes |
def set_max_field_size_limit(): max_int = sys.maxsize decrement = True while decrement: # decrease the maxInt value by factor 10 # as long as the OverflowError occurs. decrement = False try: csv.field_size_limit(max_int) except OverflowError: max_int = int(max_int / 10) decrement = True
Example #25
Source File: utils.py From text with BSD 3-Clause "New" or "Revised" License | 5 votes |
def unicode_csv_reader(unicode_csv_data, **kwargs): r"""Since the standard csv library does not handle unicode in Python 2, we need a wrapper. Borrowed and slightly modified from the Python docs: https://docs.python.org/2/library/csv.html#csv-examples Arguments: unicode_csv_data: unicode csv data (see example below) Examples: >>> from torchtext.utils import unicode_csv_reader >>> import io >>> with io.open(data_path, encoding="utf8") as f: >>> reader = unicode_csv_reader(f) """ # Fix field larger than field limit error maxInt = sys.maxsize while True: # decrease the maxInt value by factor 10 # as long as the OverflowError occurs. try: csv.field_size_limit(maxInt) break except OverflowError: maxInt = int(maxInt / 10) csv.field_size_limit(maxInt) for line in csv.reader(unicode_csv_data, **kwargs): yield line
Example #26
Source File: utils.py From audio with BSD 2-Clause "Simplified" License | 5 votes |
def unicode_csv_reader(unicode_csv_data: TextIOWrapper, **kwargs: Any) -> Any: r"""Since the standard csv library does not handle unicode in Python 2, we need a wrapper. Borrowed and slightly modified from the Python docs: https://docs.python.org/2/library/csv.html#csv-examples Args: unicode_csv_data (TextIOWrapper): unicode csv data (see example below) Examples: >>> from torchaudio.datasets.utils import unicode_csv_reader >>> import io >>> with io.open(data_path, encoding="utf8") as f: >>> reader = unicode_csv_reader(f) """ # Fix field larger than field limit error maxInt = sys.maxsize while True: # decrease the maxInt value by factor 10 # as long as the OverflowError occurs. try: csv.field_size_limit(maxInt) break except OverflowError: maxInt = int(maxInt / 10) csv.field_size_limit(maxInt) for line in csv.reader(unicode_csv_data, **kwargs): yield line
Example #27
Source File: import_geonames.py From EpiTator with Apache License 2.0 | 5 votes |
def read_geonames_csv(): print("Downloading geoname data from: " + GEONAMES_ZIP_URL) try: url = request.urlopen(GEONAMES_ZIP_URL) except URLError: print("If you are operating behind a firewall, try setting the HTTP_PROXY/HTTPS_PROXY environment variables.") raise zipfile = ZipFile(BytesIO(url.read())) print("Download complete") # Loading geonames data may cause errors without setting csv.field_size_limit: if sys.platform == "win32": max_c_long_on_windows = (2**32 / 2) - 1 csv.field_size_limit(max_c_long_on_windows) else: csv.field_size_limit(sys.maxint if six.PY2 else six.MAXSIZE) with zipfile.open('allCountries.txt') as f: reader = unicodecsv.DictReader(f, fieldnames=[ k for k, v in geonames_field_mappings], encoding='utf-8', delimiter='\t', quoting=csv.QUOTE_NONE) for d in reader: d['population'] = parse_number(d['population'], 0) d['latitude'] = parse_number(d['latitude'], 0) d['longitude'] = parse_number(d['longitude'], 0) if len(d['alternatenames']) > 0: d['alternatenames'] = d['alternatenames'].split(',') else: d['alternatenames'] = [] yield d
Example #28
Source File: test_reader.py From batch-scoring with BSD 3-Clause "New" or "Revised" License | 5 votes |
def really_big_fields_enabled(self): old_limit = csv.field_size_limit() csv.field_size_limit(2 ** 28) yield csv.field_size_limit(old_limit)
Example #29
Source File: __init__.py From starthinker with Apache License 2.0 | 5 votes |
def csv_to_rows(csv_string): if csv_string: csv.field_size_limit(sys.maxsize) if isinstance(csv_string, str): csv_string = StringIO(csv_string) for row in csv.reader(csv_string, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL, skipinitialspace=True, escapechar='\\'): yield row
Example #30
Source File: Sets2Sets.py From Sets2Sets with Apache License 2.0 | 5 votes |
def generate_dictionary_BA(path, files, attributes_list): # path = '../Minnemudac/' # files = ['Coborn_history_order.csv','Coborn_future_order.csv'] # files = ['BA_history_order.csv', 'BA_future_order.csv'] # attributes_list = ['MATERIAL_NUMBER'] dictionary_table = {} counter_table = {} for attr in attributes_list: dictionary = {} dictionary_table[attr] = dictionary counter_table[attr] = 0 csv.field_size_limit(sys.maxsize) for filename in files: count = 0 with open(path + filename, 'r') as csvfile: reader = csv.reader(csvfile, delimiter=',', quotechar='|') for row in reader: if count == 0: count += 1 continue key = attributes_list[0] if row[2] not in dictionary_table[key]: dictionary_table[key][row[2]] = counter_table[key] counter_table[key] = counter_table[key] + 1 count += 1 print(counter_table) total = 0 for key in counter_table.keys(): total = total + counter_table[key] print('# dimensions of final vector: ' + str(total) + ' | ' + str(count - 1)) return dictionary_table, total, counter_table