Python unicodecsv.DictReader() Examples
The following are 30
code examples of unicodecsv.DictReader().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
unicodecsv
, or try the search function
.
Example #1
Source File: test.py From pyRevit with GNU General Public License v3.0 | 6 votes |
def test_read_short(self): fd, name = tempfile.mkstemp() fileobj = os.fdopen(fd, "w+b") try: fileobj.write(b"1,2,abc,4,5,6\r\n1,2,abc\r\n") fileobj.seek(0) reader = csv.DictReader(fileobj, fieldnames="1 2 3 4 5 6".split(), restval="DEFAULT") self.assertEqual(next(reader), {"1": '1', "2": '2', "3": 'abc', "4": '4', "5": '5', "6": '6'}) self.assertEqual(next(reader), {"1": '1', "2": '2', "3": 'abc', "4": 'DEFAULT', "5": 'DEFAULT', "6": 'DEFAULT'}) finally: fileobj.close() os.unlink(name)
Example #2
Source File: make_grades_persistent.py From edx2bigquery with GNU General Public License v2.0 | 6 votes |
def cleanup_rows_from_grade_persistent(csvfn, tempfn, field_to_fix="passed_timestamp"): """ Removes the null values from grades_persistentcoursegrade.csv.gz. The function also fixes course ids by changing them from their edX URL format to their usual format. For instance, course-v1:MITx+STL.162x+2T2017 should be MITx/STL.162x/2T2017. This operation permanently modifies the CSV. :param csvfn: The path of the csv.gz to be modified :param tempfn: The path of the temporary csv.gz :type csvfn: str :type tempfn: str """ with gzip.open(csvfn, "r") as open_csv: csv_dict = csv.DictReader(open_csv) with gzip.open(tempfn, "w+") as write_csv_file: write_csv = csv.DictWriter(write_csv_file, fieldnames=csv_dict.fieldnames) write_csv.writeheader() for row in csv_dict: row_dict = remove_nulls_from_row(row, field_to_fix) row_dict = fix_course_ids(row_dict) write_csv.writerow(row_dict) os.rename(tempfn, csvfn)
Example #3
Source File: test.py From pyRevit with GNU General Public License v3.0 | 6 votes |
def test_read_dict_no_fieldnames(self): fd, name = tempfile.mkstemp() fileobj = os.fdopen(fd, "w+b") try: fileobj.write(b"f1,f2,f3\r\n1,2,abc\r\n") fileobj.seek(0) reader = csv.DictReader(fileobj) self.assertEqual(reader.fieldnames, ["f1", "f2", "f3"]) self.assertEqual(next(reader), {"f1": '1', "f2": '2', "f3": 'abc'}) finally: fileobj.close() os.unlink(name) # Two test cases to make sure existing ways of implicitly setting # fieldnames continue to work. Both arise from discussion in issue3436.
Example #4
Source File: print_batch.py From adversarial-squad with MIT License | 6 votes |
def pred_human_eval(): all_preds = collections.defaultdict(list) with open(OPTS.filename) as csvfile: reader = csv.DictReader(csvfile) for row in reader: all_preds[row['Input.qid']].append(row['Answer.response']) preds = {} for qid in all_preds: if OPTS.ensemble: for a in all_preds[qid]: count = sum(1 for pred in all_preds[qid] if a == pred) if count > 1: preds[qid] = a break else: preds[qid] = random.sample(all_preds[qid], 1)[0] else: preds[qid] = random.sample(all_preds[qid], 1)[0] print json.dumps(preds)
Example #5
Source File: test_functional.py From doorman with MIT License | 6 votes |
def test_node_csv_download(self, node, testapp): import unicodecsv as csv node.enrolled_on = dt.datetime.utcnow() node.last_checkin = dt.datetime.utcnow() node.last_ip = '1.1.1.1' node.node_info = {'hardware_vendor': "Honest Achmed's Computer Supply"} node.save() resp = testapp.get(url_for('manage.nodes_csv')) assert resp.headers['Content-Type'] == 'text/csv; charset=utf-8' assert resp.headers['Content-Disposition'] == 'attachment; filename=nodes.csv' reader = csv.DictReader(io.BytesIO(resp.body)) row = next(reader) assert row['Display Name'] == node.display_name assert row['Host Identifier'] == node.host_identifier assert row['Enrolled On'] == str(node.enrolled_on) assert row['Last Check-In'] == str(node.last_checkin) assert row['Last Ip Address'] == node.last_ip assert row['Is Active'] == 'True' assert row['Make'] == node.node_info['hardware_vendor']
Example #6
Source File: print_batch.py From adversarial-squad with MIT License | 6 votes |
def dump_verify(): with open(OPTS.filename) as csvfile: reader = csv.DictReader(csvfile) for row in reader: if OPTS.worker and row['WorkerId'] != OPTS.worker: continue if row['AssignmentStatus'] == 'Rejected': continue print 'HIT %s' % row['HITId'] print 'WorkerId: %s' % row['WorkerId'] print 'Time: %s s' % row['WorkTimeInSeconds'] qids = row['Input.qids'].split('\t') questions = row['Input.questions'].split('\t') sents = row['Answer.sents'].split('\t') responses = row['Answer.responses'].split('\t') for qid, q, s_str, response_str in zip( qids, questions, sents, responses): print (' Example %s' % qid) print (' Question %s' % q) s_list = s_str.split('|') a_list = response_str.split('|') for s, a in zip(s_list, a_list): print (' Sentence: %s' % sent_format(s)).encode('utf-8') print (' Is good? %s' % colored(a, 'cyan'))
Example #7
Source File: print_batch.py From adversarial-squad with MIT License | 6 votes |
def dump_grammar(): with open(OPTS.filename) as csvfile: reader = csv.DictReader(csvfile) for row in reader: if OPTS.worker and row['WorkerId'] != OPTS.worker: continue if row['AssignmentStatus'] == 'Rejected': continue print 'HIT %s' % row['HITId'] print 'WorkerId: %s' % row['WorkerId'] print 'Time: %s s' % row['WorkTimeInSeconds'] input_qids = row['Input.qids'].split('\t') input_sents = row['Input.sents'].split('\t') ans_is_good = row['Answer.is-good'].split('\t') ans_responses = row['Answer.responses'].split('\t') for qid, s, is_good, response in zip(input_qids, input_sents, ans_is_good, ans_responses): print (' Example %s' % qid) print (' Sentence: %s' % s).encode('utf-8') print (' Is good? %s' % is_good) print (' Response: %s' % colored(response, 'cyan')).encode('utf-8')
Example #8
Source File: print_batch.py From adversarial-squad with MIT License | 5 votes |
def stats_grammar(): # Read data worker_to_is_good = collections.defaultdict(list) worker_to_times = collections.defaultdict(list) with open(OPTS.filename) as csvfile: reader = csv.DictReader(csvfile) for row in reader: if row['AssignmentStatus'] == 'Rejected': continue worker_id = row['WorkerId'] ans_is_good = row['Answer.is-good'].split('\t') time = float(row['WorkTimeInSeconds']) worker_to_is_good[worker_id].extend(ans_is_good) worker_to_times[worker_id].append(time) # Aggregate by worker print '%d total workers' % len(worker_to_times) worker_stats = {} for worker_id in worker_to_times: times = sorted(worker_to_times[worker_id]) t_median = times[len(times)/2] t_mean = sum(times) / float(len(times)) is_good_list = worker_to_is_good[worker_id] num_qs = len(is_good_list) frac_good = sum(1.0 for x in is_good_list if x == 'yes') / num_qs worker_stats[worker_id] = (t_median, t_mean, num_qs, frac_good) # Print sorted_ids = sorted(list(worker_stats), key=lambda x: worker_stats[x][3]) for worker_id in sorted_ids: t_median, t_mean, num_qs, frac_good = worker_stats[worker_id] print 'Worker %s: t_median %.1f, t_mean %.1f, %d questions, %.1f%% good' % ( worker_id, t_median, t_mean, num_qs, 100.0 * frac_good)
Example #9
Source File: test.py From pyRevit with GNU General Public License v3.0 | 5 votes |
def test_read_long_with_rest_no_fieldnames(self): fd, name = tempfile.mkstemp() fileobj = os.fdopen(fd, "w+b") try: fileobj.write(b"f1,f2\r\n1,2,abc,4,5,6\r\n") fileobj.seek(0) reader = csv.DictReader(fileobj, restkey="_rest") self.assertEqual(reader.fieldnames, ["f1", "f2"]) self.assertEqual(next(reader), {"f1": '1', "f2": '2', "_rest": ["abc", "4", "5", "6"]}) finally: fileobj.close() os.unlink(name)
Example #10
Source File: test.py From pyRevit with GNU General Public License v3.0 | 5 votes |
def test_read_multi(self): sample = [ b'2147483648,43.0e12,17,abc,def\r\n', b'147483648,43.0e2,17,abc,def\r\n', b'47483648,43.0,170,abc,def\r\n' ] reader = csv.DictReader(sample, fieldnames="i1 float i2 s1 s2".split()) self.assertEqual(next(reader), {"i1": '2147483648', "float": '43.0e12', "i2": '17', "s1": 'abc', "s2": 'def'})
Example #11
Source File: test.py From pyRevit with GNU General Public License v3.0 | 5 votes |
def test_read_with_blanks(self): reader = csv.DictReader([b"1,2,abc,4,5,6\r\n", b"\r\n", b"1,2,abc,4,5,6\r\n"], fieldnames="1 2 3 4 5 6".split()) self.assertEqual(next(reader), {"1": '1', "2": '2', "3": 'abc', "4": '4', "5": '5', "6": '6'}) self.assertEqual(next(reader), {"1": '1', "2": '2', "3": 'abc', "4": '4', "5": '5', "6": '6'})
Example #12
Source File: test.py From pyRevit with GNU General Public License v3.0 | 5 votes |
def test_empty_file(self): csv.DictReader(BytesIO())
Example #13
Source File: test.py From pyRevit with GNU General Public License v3.0 | 5 votes |
def test_decode_error_dictreader(self): """Make sure the error-handling mode is obeyed on DictReaders.""" file = EncodedFile(BytesIO(u'name,height,weight\nLöwis,2,3'.encode('iso-8859-1')), data_encoding='iso-8859-1') reader = csv.DictReader(file, encoding='ascii', errors='ignore') self.assertEqual(list(reader)[0]['name'], 'Lwis')
Example #14
Source File: utils.py From edx-enterprise with GNU Affero General Public License v3.0 | 5 votes |
def validate_csv(file_stream, expected_columns=None): """ Validate csv file for encoding and expected header. Args: file_stream: input file expected_columns: list of column names that are expected to be present in csv Returns: reader: an iterable for csv datat if csv passes the validation Raises: ValidationError """ try: reader = unicodecsv.DictReader(file_stream, encoding="utf-8") reader_fieldnames = reader.fieldnames except (unicodecsv.Error, UnicodeDecodeError): raise ValidationError(ValidationMessages.INVALID_ENCODING) if expected_columns and set(expected_columns) - set(reader_fieldnames): raise ValidationError(ValidationMessages.MISSING_EXPECTED_COLUMNS.format( expected_columns=", ".join(expected_columns), actual_columns=", ".join(reader.fieldnames) )) return reader
Example #15
Source File: gen_csv_verify.py From adversarial-squad with MIT License | 5 votes |
def read_sentences(): id_to_sents = collections.defaultdict(list) with open(OPTS.batch_file) as f: reader = csv.DictReader(f) for row in reader: input_qids = row['Input.qids'].split('\t') input_sents = row['Input.sents'].split('\t') ans_is_good = row['Answer.is-good'].split('\t') ans_responses = row['Answer.responses'].split('\t') for qid, s, is_good, response in zip(input_qids, input_sents, ans_is_good, ans_responses): if is_good == 'yes': response = s if response not in id_to_sents[qid]: id_to_sents[qid].append(response) return id_to_sents
Example #16
Source File: test.py From pyRevit with GNU General Public License v3.0 | 5 votes |
def test_read_dict_fieldnames_chain(self): import itertools fd, name = tempfile.mkstemp() f = os.fdopen(fd, "w+b") try: f.write(b"f1,f2,f3\r\n1,2,abc\r\n") f.seek(0) reader = csv.DictReader(f) first = next(reader) for row in itertools.chain([first], reader): self.assertEqual(reader.fieldnames, ["f1", "f2", "f3"]) self.assertEqual(row, {"f1": '1', "f2": '2', "f3": 'abc'}) finally: f.close() os.unlink(name)
Example #17
Source File: print_batch.py From adversarial-squad with MIT License | 5 votes |
def stats_verify(): # Read data worker_to_is_good = collections.defaultdict(list) worker_to_times = collections.defaultdict(list) with open(OPTS.filename) as csvfile: reader = csv.DictReader(csvfile) for row in reader: if row['AssignmentStatus'] == 'Rejected': continue worker_id = row['WorkerId'] ans_is_good = [x for s in row['Answer.responses'].split('\t') for x in s.split('|')] time = float(row['WorkTimeInSeconds']) worker_to_is_good[worker_id].extend(ans_is_good) worker_to_times[worker_id].append(time) # Aggregate by worker print '%d total workers' % len(worker_to_times) worker_stats = {} for worker_id in worker_to_times: times = sorted(worker_to_times[worker_id]) t_median = times[len(times)/2] t_mean = sum(times) / float(len(times)) is_good_list = worker_to_is_good[worker_id] num_qs = len(is_good_list) frac_good = sum(1.0 for x in is_good_list if x == 'yes') / num_qs worker_stats[worker_id] = (t_median, t_mean, num_qs, frac_good) # Print sorted_ids = sorted(list(worker_stats), key=lambda x: worker_stats[x][3]) for worker_id in sorted_ids: t_median, t_mean, num_qs, frac_good = worker_stats[worker_id] print 'Worker %s: t_median %.1f, t_mean %.1f, %d questions, %.1f%% good' % ( worker_id, t_median, t_mean, num_qs, 100.0 * frac_good)
Example #18
Source File: sos_parser.py From openelections-data-or with MIT License | 5 votes |
def fetch_state_senate_races(): f = open('state_senate_2016.csv', 'r') reader = unicodecsv.DictReader(f, encoding='utf-8') return [row for row in reader]
Example #19
Source File: sos_parser.py From openelections-data-or with MIT License | 5 votes |
def fetch_state_house_races(): f = open('state_house_2016.csv', 'r') reader = unicodecsv.DictReader(f, encoding='utf-8') return [row for row in reader]
Example #20
Source File: standardizer.py From 990-xml-reader with MIT License | 5 votes |
def _make_groups(self): group_filepath = os.path.join(METADATA_DIRECTORY, 'groups.csv') with open(group_filepath, 'r') as reader_fh: reader = csv.DictReader(reader_fh) for row in reader: self.groups[row['xpath']] = row return True
Example #21
Source File: standardizer.py From 990-xml-reader with MIT License | 5 votes |
def _make_variables(self): variable_filepath = os.path.join(METADATA_DIRECTORY, 'variables.csv') with open(variable_filepath, 'r') as variable_fh: reader = csv.DictReader(variable_fh) for row in reader: vardict = {} for col in self.variable_columns: vardict[col]=row[col] self.variables[row['xpath']] = vardict return True
Example #22
Source File: standardizer.py From 990-xml-reader with MIT License | 5 votes |
def _make_schedule_parts(self): part_filepath = os.path.join(METADATA_DIRECTORY, 'schedule_parts.csv') with open(part_filepath, 'r') as reader_fh: reader = csv.DictReader(reader_fh) for row in reader: self.schedule_parts[row['parent_sked_part']] = { 'name': row['part_name'], 'ordering': row['ordering'], 'parent_sked': row['parent_sked'], 'parent_sked_part': row['parent_sked_part'], 'is_shell': row['is_shell'] } return True
Example #23
Source File: standardizer.py From 990-xml-reader with MIT License | 5 votes |
def _make_line_numbers(self): filepath = os.path.join(METADATA_DIRECTORY, 'line_numbers.csv') with open(filepath, 'r') as reader_fh: reader = csv.DictReader(reader_fh) for row in reader: try: self.line_numbers[row['xpath']] self.line_numbers[row['xpath']].append(row) except KeyError: self.line_numbers[row['xpath']] = [row]
Example #24
Source File: standardizer.py From 990-xml-reader with MIT License | 5 votes |
def _make_descriptions(self): filepath = os.path.join(METADATA_DIRECTORY, 'descriptions.csv') with open(filepath, 'r') as reader_fh: reader = csv.DictReader(reader_fh) for row in reader: try: self.descriptions[row['xpath']] self.descriptions[row['xpath']].append(row) except KeyError: self.descriptions[row['xpath']] = [row]
Example #25
Source File: import_geonames.py From EpiTator with Apache License 2.0 | 5 votes |
def read_geonames_csv(): print("Downloading geoname data from: " + GEONAMES_ZIP_URL) try: url = request.urlopen(GEONAMES_ZIP_URL) except URLError: print("If you are operating behind a firewall, try setting the HTTP_PROXY/HTTPS_PROXY environment variables.") raise zipfile = ZipFile(BytesIO(url.read())) print("Download complete") # Loading geonames data may cause errors without setting csv.field_size_limit: if sys.platform == "win32": max_c_long_on_windows = (2**32 / 2) - 1 csv.field_size_limit(max_c_long_on_windows) else: csv.field_size_limit(sys.maxint if six.PY2 else six.MAXSIZE) with zipfile.open('allCountries.txt') as f: reader = unicodecsv.DictReader(f, fieldnames=[ k for k, v in geonames_field_mappings], encoding='utf-8', delimiter='\t', quoting=csv.QUOTE_NONE) for d in reader: d['population'] = parse_number(d['population'], 0) d['latitude'] = parse_number(d['latitude'], 0) d['longitude'] = parse_number(d['longitude'], 0) if len(d['alternatenames']) > 0: d['alternatenames'] = d['alternatenames'].split(',') else: d['alternatenames'] = [] yield d
Example #26
Source File: load_course_sql.py From edx2bigquery with GNU General Public License v2.0 | 5 votes |
def rephrase_studentmodule_opaque_keys(fn_sm): ''' Generate rephrased studentmodule, with opaque key entries for module_id and course_id translated into traditional format. ''' fn_sm = path(fn_sm) orig_sm_fn = '%s/studentmodule_orig.csv.gz' % (fn_sm.dirname()) cmd = 'cp %s %s' % (fn_sm, orig_sm_fn) print " Running %s" % cmd sys.stdout.flush() os.system(cmd) ofp = openfile(fn_sm, 'w') smfp = openfile(orig_sm_fn) cdr = csv.DictReader(smfp) first = True for entry in cdr: if first: odw = csv.DictWriter(ofp, fieldnames=cdr.fieldnames) odw.writeheader() first = False fix_opaque_keys(entry, 'module_id') fix_opaque_keys(entry, 'course_id') odw.writerow(entry) ofp.close() print "Rephrased %s -> %s to convert opaque keys syntax to standard module_id and course_id format" % (orig_sm_fn, fn_sm) sys.stdout.flush() #-----------------------------------------------------------------------------
Example #27
Source File: make_person_course.py From edx2bigquery with GNU General Public License v2.0 | 5 votes |
def load_csv(self, fn, key, schema=None, multi=False, fields=None, keymap=None, useCourseDir=True ): ''' load csv file into memory, storing into dict with specified field (key) as the key. if multi, then each dict value is a list, with one or more values per key. if fields, load only those specified fields. ''' data = OrderedDict() if keymap is None: keymap = lambda x: x for line in csv.DictReader(self.openfile(fn, useCourseDir=useCourseDir)): try: the_id = keymap(line[key]) except Exception as err: self.log("oops, failed to do keymap, key=%s, line=%s" % (line[key], line)) raise if fields: newline = { x: line[x] for x in fields } line = newline if multi: if the_id in data: data[the_id].append(line) else: data[the_id] = [ line ] else: data[the_id] = line return data
Example #28
Source File: test_hgvs_variantmapper_gcp.py From hgvs with Apache License 2.0 | 5 votes |
def gxp_file_reader(fn): rdr = csv.DictReader(open(fn, "r"), delimiter=str("\t")) for rec in rdr: if rec["id"].startswith("#"): continue yield rec
Example #29
Source File: test_hgvs_grammar_full.py From hgvs with Apache License 2.0 | 5 votes |
def test_parser_test_completeness(self): """ensure that all rules in grammar have tests""" grammar_rule_re = re.compile(r"^(\w+)") grammar_fn = pkg_resources.resource_filename("hgvs", "_data/hgvs.pymeta") with open(grammar_fn, "r") as f: grammar_rules = set(r.group(1) for r in filter(None, map(grammar_rule_re.match, f))) with open(self._test_fn, "r") as f: reader = csv.DictReader(f, delimiter=str("\t")) test_rules = set(row["Func"] for row in reader) untested_rules = grammar_rules - test_rules self.assertTrue(len(untested_rules) == 0, "untested rules: {}".format(untested_rules))
Example #30
Source File: test_hgvs_grammar_full.py From hgvs with Apache License 2.0 | 5 votes |
def test_parser_grammar(self): with open(self._test_fn, "r") as f: reader = csv.DictReader(f, delimiter=str("\t")) fail_cases = [] for row in reader: if row["Func"].startswith("#"): continue # setup input inputs = self._split_inputs(row["Test"], row["InType"]) expected_results = self._split_inputs(row["Expected"], row["InType"]) if row["Expected"] else inputs expected_map = dict(zip(inputs, expected_results)) # step through each item and check is_valid = True if row["Valid"].lower() == "true" else False for key in expected_map: expected_result = six.text_type(expected_map[key]).replace("u'", "'") function_to_test = getattr(self.p._grammar(key), row["Func"]) row_str = u"{}\t{}\t{}\t{}\t{}".format(row["Func"], key, row["Valid"], "one", expected_result) try: actual_result = six.text_type(function_to_test()).replace("u'", "'") if not is_valid or (expected_result != actual_result): print("expected: {} actual:{}".format(expected_result, actual_result)) fail_cases.append(row_str) except Exception as e: if is_valid: print("expected: {} Exception: {}".format(expected_result, e)) fail_cases.append(row_str) # everything should have passed - report whatever failed self.assertTrue(len(fail_cases) == 0, pprint.pprint(fail_cases))