Python unicodecsv.DictWriter() Examples

The following are 30 code examples of unicodecsv.DictWriter(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module unicodecsv , or try the search function .
Example #1
Source File: cli.py    From pdfplumber with MIT License 6 votes vote down vote up
def to_csv(pdf, types, encoding):
    objs = []
    fields = set()
    for t in types:
        new_objs = getattr(pdf, t + "s")
        if len(new_objs):
            objs += new_objs
            fields = fields.union(set(new_objs[0].keys()))

    first_columns = [
        "object_type", "page_number",
        "x0", "x1", "y0", "y1",
        "doctop", "top", "bottom",
        "width", "height"
    ]

    cols = first_columns + list(sorted(set(fields) - set(first_columns)))
    stdout = (sys.stdout.buffer if sys.version_info[0] >= 3 else sys.stdout)
    w = unicodecsv.DictWriter(stdout,
        fieldnames=cols, encoding=encoding)
    w.writeheader()
    w.writerows(objs) 
Example #2
Source File: bqutil.py    From edx2bigquery with GNU General Public License v2.0 6 votes vote down vote up
def convert_data_dict_to_csv(tdata, extra_fields=None):
    '''
    Convert dict format data from get_table_data into CSV file content, as a string.

    If extra_fields is not None, then add data from extra_fields to each row.  
    This can be used, e.g. for adding course_id to a table missing that field.
    '''
    import unicodecsv as csv
    from StringIO import StringIO

    sfp = StringIO()
    extra_fields = extra_fields or {}
    fields = extra_fields.keys()
    fields += tdata['field_names']
    dw = csv.DictWriter(sfp, fieldnames=fields)
    dw.writeheader()
    for row in tdata['data']:
        row.update(extra_fields)
        dw.writerow(row)
    return sfp.getvalue() 
Example #3
Source File: make_grades_persistent.py    From edx2bigquery with GNU General Public License v2.0 6 votes vote down vote up
def cleanup_rows_from_grade_persistent(csvfn, tempfn, field_to_fix="passed_timestamp"):
    """
    Removes the null values from grades_persistentcoursegrade.csv.gz.
    The function also fixes course ids by changing them from their
    edX URL format to their usual format. For instance,
    course-v1:MITx+STL.162x+2T2017 should be MITx/STL.162x/2T2017.

    This operation permanently modifies the CSV.

    :param csvfn: The path of the csv.gz to be modified
    :param tempfn: The path of the temporary csv.gz
    :type csvfn: str
    :type tempfn: str
    """
    with gzip.open(csvfn, "r") as open_csv:
        csv_dict = csv.DictReader(open_csv)
        with gzip.open(tempfn, "w+") as write_csv_file:
            write_csv = csv.DictWriter(write_csv_file, fieldnames=csv_dict.fieldnames)
            write_csv.writeheader()
            for row in csv_dict:
                row_dict = remove_nulls_from_row(row, field_to_fix)
                row_dict = fix_course_ids(row_dict)
                write_csv.writerow(row_dict)
    os.rename(tempfn, csvfn) 
Example #4
Source File: put_repo_requests_in_db.py    From oadoi with MIT License 6 votes vote down vote up
def save_repo_request_rows(rows):

    with open('out.csv','wb') as f:

        w = csv.DictWriter(f, fieldnames=RepoRequest.list_fieldnames(), encoding='utf-8-sig')

        for row in rows[1:]:  # skip header row
            my_repo_request = RepoRequest()
            my_repo_request.set_id_seed(row[0])
            column_num = 0
            for fieldname in RepoRequest.list_fieldnames():
                if fieldname != "id":
                    setattr(my_repo_request, fieldname, row[column_num])
                    column_num += 1

            w.writerow(my_repo_request.to_dict())
            print u"adding repo request {}".format(my_repo_request)
            db.session.merge(my_repo_request)

        safe_commit(db) 
Example #5
Source File: csv_adapter.py    From salesforce-bulk with MIT License 6 votes vote down vote up
def next(self):
        row = next(self.source)

        self.buffer.truncate(0)
        self.buffer.seek(0)

        if not self.csv:
            self.csv = csv.DictWriter(self.buffer, list(row.keys()), quoting=csv.QUOTE_NONNUMERIC)
            self.add_header = True
        if self.add_header:
            if hasattr(self.csv, 'writeheader'):
                self.csv.writeheader()
            else:
                self.csv.writerow(dict((fn, fn) for fn in self.csv.fieldnames))
            self.add_header = False

        self.csv.writerow(row)
        self.buffer.seek(0)
        return self.buffer.read() 
Example #6
Source File: output_writer.py    From Python-Digital-Forensics-Cookbook with MIT License 6 votes vote down vote up
def unicode_csv_dict_writer_py2(data, header, output_directory, name=None):
    try:
        import unicodecsv
    except ImportError:
        print("[+] Install unicodecsv module before executing this"
              " function")
        sys.exit(1)

    if name is None:
        name = "output.csv"

    print("[+] Writing {} to {}".format(name, output_directory))
    with open(os.path.join(output_directory, name), "wb") as csvfile:
        writer = unicodecsv.DictWriter(csvfile, fieldnames=header)
        writer.writeheader()

        writer.writerows(data) 
Example #7
Source File: utilcsv.py    From Python-Digital-Forensics-Cookbook with MIT License 6 votes vote down vote up
def unicode_csv_dict_writer(data, header, output_directory, name=None):
    try:
        import unicodecsv
    except ImportError:
        print("[+] Install unicodecsv module before executing this function")
        sys.exit(1)

    if name is None:
        name = "output.csv"

    print("[+] Writing {} to {}".format(name, output_directory))
    with open(os.path.join(output_directory, name), "wb") as csvfile:
        writer = unicodecsv.DictWriter(csvfile, fieldnames=header)
        writer.writeheader()

        writer.writerows(data) 
Example #8
Source File: test.py    From pyRevit with GNU General Public License v3.0 5 votes vote down vote up
def test_encode_error_dictwriter(self):
        fd = BytesIO()
        dw = csv.DictWriter(fd, ['col1'],
                            encoding='cp1252', errors='xmlcharrefreplace')
        dw.writerow({'col1': chr(2604)})
        self.assertEqual(fd.getvalue(), b'ਬ\r\n') 
Example #9
Source File: load_course_sql.py    From edx2bigquery with GNU General Public License v2.0 5 votes vote down vote up
def rephrase_studentmodule_opaque_keys(fn_sm):
    '''
    Generate rephrased studentmodule, with opaque key entries for module_id and course_id translated
    into traditional format.
    '''
    fn_sm = path(fn_sm)
    orig_sm_fn = '%s/studentmodule_orig.csv.gz' % (fn_sm.dirname())
    cmd = 'cp %s %s' % (fn_sm, orig_sm_fn)
    print "  Running %s" % cmd
    sys.stdout.flush()
    os.system(cmd)
    ofp = openfile(fn_sm, 'w')
    smfp = openfile(orig_sm_fn)
    cdr = csv.DictReader(smfp)
    first = True
    for entry in cdr:
        if first:
            odw = csv.DictWriter(ofp, fieldnames=cdr.fieldnames)
            odw.writeheader()
            first = False
        fix_opaque_keys(entry, 'module_id')
        fix_opaque_keys(entry, 'course_id')
        odw.writerow(entry)
    ofp.close()
    print "Rephrased %s -> %s to convert opaque keys syntax to standard module_id and course_id format" % (orig_sm_fn, fn_sm)
    sys.stdout.flush()

#----------------------------------------------------------------------------- 
Example #10
Source File: dumptool.py    From jsoncsv with Apache License 2.0 5 votes vote down vote up
def write_headers(self):
        self.csv_writer = csv.DictWriter(self.fout, self._headers)
        self.csv_writer.writeheader() 
Example #11
Source File: csv_writer.py    From Learning-Python-for-Forensics-Second-Edition with MIT License 5 votes vote down vote up
def csv_writer(output_data, headers, output_dir, output_name):
	"""
	The csv_writer function uses the csv DictWriter module to
	write the list of dictionaries. The	DictWriter can take
	a fieldnames argument, as a list, which represents the
	desired order of columns.
	:param output_data: The list of dictionaries containing
	embedded metadata.
	:param headers: A list of keys in the dictionary that
	represent the desired order of columns in the output.
	:param output_dir: The folder to write the output CSV to.
	:param output_name: The name of the output CSV.
	:return:
	"""
	msg = 'Writing ' + output_name + ' CSV output.'
	print('[+]', msg)
	logging.info(msg)
	
	out_file = os.path.join(output_dir, output_name)
	
	if sys.version_info[0] == 2:
		csvfile = open(out_file, "wb")
	elif sys.version_info[0] == 3:
		csvfile = open(out_file, "w", newline='',
		encoding='utf-8')
	
	with csvfile:
		# We use DictWriter instead of Writer to write
		# dictionaries to CSV.
		writer = csv.DictWriter(csvfile, fieldnames=headers)

		# Writerheader writes the header based on the supplied
		# headers object
		writer.writeheader()
		for dictionary in output_data:
			if dictionary:
				writer.writerow(dictionary) 
Example #12
Source File: file_lister_peewee.py    From Learning-Python-for-Forensics-Second-Edition with MIT License 5 votes vote down vote up
def write_csv(source, custodian_model):
    """
    The write_csv function generates a CSV report from the Files
        table
    :param source: The output file path
    :param custodian_model: Peewee model instance for the
        custodian
    :return: None
    """
    query = Files.select().where(
        Files.custodian == custodian_model.id).dicts()
    logger.info('Writing CSV report')

    cols = [u'id', u'custodian', u'file_name', u'file_path',
            u'extension', u'file_size', u'ctime', u'mtime',
            u'atime', u'mode', u'inode']

    with open(source, 'wb') as csv_file:
        csv_writer = csv.DictWriter(csv_file, cols)
        csv_writer.writeheader()
        for counter, row in enumerate(query):
            csv_writer.writerow(row)
            if counter % 10000 == 0:
                logger.debug('{:,} lines written'.format(counter))
        logger.debug('{:,} lines written'.format(counter))

    logger.info('CSV Report completed: ' + source) 
Example #13
Source File: pysysinfo.py    From Learning-Python-for-Forensics-Second-Edition with MIT License 5 votes vote down vote up
def csv_writer(data, outdir, name, headers, **kwargs):
	"""
	The csv_writer function writes WMI or process information
	to a CSV output file.
	:param data: The dictionary or list containing the data to
	write to the CSV file.
	:param outdir: The directory to write the CSV report to.
	:param name: the name of the output CSV file.
	:param headers: the CSV column headers.
	:return: Nothing.
	"""
	out_file = os.path.join(outdir, name)

	if sys.version_info[0] == 2:
		csvfile = open(out_file, "wb")
	elif sys.version_info[0] == 3:
		csvfile = open(out_file, "w", newline='',
		encoding='utf-8')

	if 'type' in kwargs:
		with csvfile:
			csvwriter = csv.DictWriter(csvfile, fields,
			extrasaction='ignore')
			csvwriter.writeheader()
			csvwriter.writerows([v for v in data.values()])

	else:
		with csvfile:
			csvwriter = csv.writer(csvfile)
			csvwriter.writerow(headers)
			for row in data:
				csvwriter.writerow([row[x] for x in headers]) 
Example #14
Source File: csv_writer.py    From forseti-security with Apache License 2.0 5 votes vote down vote up
def write_csv(resource_name, data, write_header=False):
    """Start the csv writing flow.

    Args:
        resource_name (str): The resource name.
        data (iterable): An iterable of data to be written to csv.
        write_header (bool): If True, write the header in the csv file.

    Yields:
       object: The CSV temporary file pointer.

    Raises:
        CSVFileError: If there was an error writing the CSV file.
    """
    csv_file = tempfile.NamedTemporaryFile(delete=False)
    try:
        writer = csv.DictWriter(csv_file,
                                extrasaction='ignore',
                                fieldnames=CSV_FIELDNAME_MAP[resource_name])
        if write_header:
            writer.writeheader()

        for row in data:
            # Not ready to send these data via CSV attachment as they break
            # across multiple columns.
            row.pop('inventory_data', None)
            writer.writerow(normalize_nested_dicts(row))

        # This must be closed before returned for loading.
        csv_file.close()
        yield csv_file

        # Remove the csv file after loading.
        os.remove(csv_file.name)
    except (OSError, csv.Error) as e:
        raise CSVFileError(resource_name, e) 
Example #15
Source File: file_system_neo4j_csv_loader.py    From amundsendatabuilder with Apache License 2.0 5 votes vote down vote up
def __init__(self):
        # type: () -> None
        self._node_file_mapping = {}  # type: Dict[Any, DictWriter]
        self._relation_file_mapping = {}  # type: Dict[Any, DictWriter]
        self._closer = Closer() 
Example #16
Source File: exports.py    From EasY_HaCk with Apache License 2.0 5 votes vote down vote up
def csvify(rows):
    '''Expects a list of dictionaries and returns a CSV response.'''
    if not rows:
        csv_str = ''
    else:
        s = BytesIO()
        keys = rows[0].keys()
        dw = csv.DictWriter(s, keys)
        dw.writeheader()
        dw.writerows([dict(r) for r in rows])
        csv_str = s.getvalue()
    return Response(csv_str, mimetype='text/csv') 
Example #17
Source File: log_csv.py    From core with MIT License 5 votes vote down vote up
def download_large_csv(params):
    """
    Script to download large csv files to avoid uwsgi worker running out of memory.
    """
    entries = int(params['limit'])
    params['csv'] = "true"
    params['bin'] = "true"
    params['limit'] = "100000"

    csv_file = open('accesslog.csv', 'w+')
    writer = csv.DictWriter(csv_file, ACCESS_LOG_FIELDS)
    writer.writeheader()
    unicode_err_count = 0
    while entries > 0:
        print "{} entries left".format(entries)
        params['limit'] = str(min(entries, 100000))
        report = AccessLogReport(params)
        rep = report.build()
        end_date = str(rep[-1]['timestamp'])
        for doc in rep[:-1]:
            entries = entries - 1
            writer.writerow(doc)

        if len(rep) == 1:
            entries = 0
            writer.writerow(rep[0])
        if len(rep) < int(params['limit']) - 1:
            entries = 0
        csv_file.flush()
        params['end_date'] = end_date

            
    print "Encountered unicode errors and skipped {} entries".format(unicode_err_count)
    csv_file.close() 
Example #18
Source File: reporthandler.py    From core with MIT License 5 votes vote down vote up
def build_csv(self, filepath):
        csv_file = open(filepath, 'w+')
        writer = csv.DictWriter(csv_file, ACCESS_LOG_FIELDS)
        writer.writeheader()

        for doc in self.build():

            # Format timestamp as ISO UTC
            doc['timestamp'] = pytz.timezone('UTC').localize(doc['timestamp']).isoformat()

            # mongo_dict flattens dictionaries using a dot notation
            writer.writerow(util.mongo_dict(doc))

        # Need to close and reopen file to flush buffer into file
        csv_file.close() 
Example #19
Source File: test.py    From pyRevit with GNU General Public License v3.0 5 votes vote down vote up
def test_write_unicode_header_dict(self):
        fd, name = tempfile.mkstemp()
        fileobj = open(name, 'w+b')
        try:
            writer = csv.DictWriter(fileobj, fieldnames=[u"ñ", u"ö"])
            writer.writeheader()
            fileobj.seek(0)
            self.assertEqual(fileobj.readline().decode('utf-8'), u"ñ,ö\r\n")
        finally:
            fileobj.close()
            os.unlink(name) 
Example #20
Source File: test.py    From pyRevit with GNU General Public License v3.0 5 votes vote down vote up
def test_write_simple_dict(self):
        fd, name = tempfile.mkstemp()
        fileobj = open(name, 'w+b')
        try:
            writer = csv.DictWriter(fileobj, fieldnames=["f1", "f2", "f3"])
            writer.writeheader()
            fileobj.seek(0)
            self.assertEqual(fileobj.readline(), b"f1,f2,f3\r\n")
            writer.writerow({"f1": 10, "f3": "abc"})
            fileobj.seek(0)
            fileobj.readline()  # header
            self.assertEqual(fileobj.read(), b"10,,abc\r\n")
        finally:
            fileobj.close()
            os.unlink(name) 
Example #21
Source File: test.py    From pyRevit with GNU General Public License v3.0 5 votes vote down vote up
def test_write_no_fields(self):
        fileobj = BytesIO()
        self.assertRaises(TypeError, csv.DictWriter, fileobj) 
Example #22
Source File: make_person_course.py    From edx2bigquery with GNU General Public License v2.0 5 votes vote down vote up
def output_table(self):
        '''
        output person_course table 
        '''
        
        fieldnames = self.the_dict_schema.keys()
        ofn = 'person_course.csv.gz'
        ofnj = 'person_course.json.gz'
        ofp = self.openfile(ofnj, 'w')
        ocsv = csv.DictWriter(self.openfile(ofn, 'w'), fieldnames=fieldnames)
        ocsv.writeheader()
        
        self.log("Writing output to %s and %s" % (ofn, ofnj))

        # write JSON first - it's safer
        cnt = 0
        for key, pcent in self.pctab.iteritems():
            cnt += 1
            check_schema(cnt, pcent, the_ds=self.the_dict_schema, coerce=True)
            ofp.write(json.dumps(pcent) + '\n')
        ofp.close()

        # now write CSV file (may have errors due to unicode)
        for key, pcent in self.pctab.iteritems():
            if 0:	# after switching to unicodecsv, don't do this
                try:
                    if 'countryLabel' in pcent:
                        if pcent['countryLabel'] == u'R\xe9union':
                            pcent['countryLabel'] = 'Reunion'
                        else:
                            #pcent['countryLabel'] = pcent['countryLabel'].decode('utf8').encode('utf8')
                            pcent['countryLabel'] = pcent['countryLabel'].encode('ascii', 'ignore')
                except Exception as err:
                    self.log("Error handling country code unicode row=%s" % pcent)
                    raise
            try:
                ocsv.writerow(pcent)
            except Exception as err:
                self.log("Error writing CSV output row=%s" % pcent)
                raise 
Example #23
Source File: generate_ipa_all.py    From panphon with MIT License 5 votes vote down vote up
def write_ipa_all(ipa_bases, ipa_all, all_segments, sort_order):
    with open(ipa_bases, 'rb') as f:
        reader = csv.reader(f, encoding='utf-8')
        fieldnames = next(reader)
    with open(ipa_all, 'wb') as f:
        writer = csv.DictWriter(f, encoding='utf-8', fieldnames=fieldnames)
        writer.writerow({k: k for k in fieldnames})
        all_segments_list = sort_all_segments(sort_order, all_segments)
        for segment in all_segments_list:
            fields = copy.copy(segment.features)
            fields['ipa'] = segment.form
            writer.writerow(fields) 
Example #24
Source File: guia_bolso.py    From guiabolso2csv with GNU General Public License v3.0 5 votes vote down vote up
def csv_transactions(self, year, month, file_name):
        transactions = self.transactions(year, month)

        if len(transactions) == 0:
            warnings.warn('No transactions for the period ({}-{})'.format(
                year, month))
            return

        with open(file_name, 'wb') as f:
            csv_writer = csv.DictWriter(f, fieldnames=self.fieldnames,
                                        encoding='utf-8-sig')  # add BOM to csv
            csv_writer.writeheader()
            csv_writer.writerows(transactions) 
Example #25
Source File: exports.py    From recon-ng with GNU General Public License v3.0 5 votes vote down vote up
def csvify(rows):
    '''Expects a list of dictionaries and returns a CSV response.'''
    if not rows:
        csv_str = ''
    else:
        s = BytesIO()
        keys = rows[0].keys()
        dw = csv.DictWriter(s, keys)
        dw.writeheader()
        dw.writerows([dict(r) for r in rows])
        csv_str = s.getvalue()
    return Response(csv_str, mimetype='text/csv') 
Example #26
Source File: bigquery_import.py    From oadoi with MIT License 5 votes vote down vote up
def from_bq_to_local_file(temp_data_filename, bq_tablename, header=True):

    setup_bigquery_creds()
    client = bigquery.Client()
    (dataset_id, table_id) = bq_tablename.split(".")
    dataset_ref = client.dataset(dataset_id)
    table_ref = dataset_ref.table(table_id)
    table = client.get_table(table_ref)
    fieldnames = [schema.name for schema in table.schema]

    query = ('SELECT * FROM `unpaywall-bhd.{}` '.format(bq_tablename))
    query_job = client.query(
        query,
        # Location must match that of the dataset(s) referenced in the query.
        location='US')  # API request - starts the query

    rows = list(query_job)

    with open(temp_data_filename, 'wb') as f:
        # delimiter workaround from https://stackoverflow.com/questions/43048618/csv-reader-refuses-tab-delimiter?noredirect=1&lq=1#comment73182042_43048618
        writer = unicodecsv.DictWriter(f, fieldnames=fieldnames, delimiter=str(u'\t').encode('utf-8'))
        if header:
            writer.writeheader()
        for row in rows:
            writer.writerow(dict(zip(fieldnames, row)))

    print('Saved {} rows from {}.'.format(len(rows), bq_tablename))
    return fieldnames 
Example #27
Source File: text_format_utils.py    From 990-xml-reader with MIT License 4 votes vote down vote up
def to_csv(parsed_filing, object_id=None, standardizer=None, documentation=True, vd=None, outfilepath=None):
    if not vd:
        vd = VersionDocumentizer()
    stdout = getattr(sys.stdout, 'buffer', sys.stdout)
    if outfilepath:
        stdout = open(outfilepath, 'wb')  # or 'wb' ?

    fieldnames = []
    fieldnames = [ 
            'object_id', 'form', 'line_number', 'description', 'value', 'variable_name',
            'xpath', 'in_group', 'group_name', 'group_index'
        ]
    writer = unicodecsv.DictWriter(
        stdout,
        fieldnames=fieldnames,
        encoding='utf-8',
        quoting=csv.QUOTE_MINIMAL
    )
    writer.writeheader()   # this fails in python3? 
    results = parsed_filing.get_result()

    if results:
        for result in results:
            for this_result in result['csv_line_array']:

                vardata = None
                try:
                    vardata = standardizer.get_var(this_result['xpath'])
                except KeyError:
                    pass
                if vardata:
                    this_result['variable_name'] = vardata['db_table'] + "." + vardata['db_name']

                raw_line_num = vd.get_line_number(
                    this_result['xpath'], 
                    parsed_filing.get_version()
                )
                this_result['line_number'] =  debracket(raw_line_num)

                raw_description = vd.get_description(
                    this_result['xpath'], 
                    parsed_filing.get_version()
                )
                this_result['description'] =  debracket(raw_description)
                this_result['form'] = this_result['xpath'].split("/")[1]
                this_result['object_id'] = object_id
                writer.writerow(this_result) 
Example #28
Source File: analyze_content.py    From edx2bigquery with GNU General Public License v2.0 4 votes vote down vote up
def get_stats_module_usage(course_id,
                           basedir="X-Year-2-data-sql", 
                           datedir="2013-09-21", 
                           use_dataset_latest=False,
                           ):
    '''
    Get data from the stats_module_usage table, if it doesn't already exist as a local file.
    Compute it if necessary.
    '''
    dataset = bqutil.course_id2dataset(course_id, use_dataset_latest=use_dataset_latest)

    sql = """   SELECT 
                    module_type, module_id, count(*) as ncount 
                FROM [{dataset}.studentmodule] 
                group by module_id, module_type
                order by module_id
          """.format(dataset=dataset)

    table = 'stats_module_usage'
    course_dir = find_course_sql_dir(course_id, basedir, datedir, use_dataset_latest)
    csvfn = course_dir / (table + ".csv")

    data = {}
    if csvfn.exists():
        # read file into data structure
        for k in list(csv.DictReader(open(csvfn))):
            midfrag = tuple(k['module_id'].split('/')[-2:])
            data[midfrag] = k
    else:
        # download if it is already computed, or recompute if needed
        bqdat = bqutil.get_bq_table(dataset, table, sql=sql)
        if bqdat is None:
            bqdat = {'data': []}

        fields = [ "module_type", "module_id", "ncount" ]
        fp = open(csvfn, 'w')
        cdw = csv.DictWriter(fp, fieldnames=fields)
        cdw.writeheader()
        for k in bqdat['data']:
            midfrag = tuple(k['module_id'].split('/')[-2:])
            data[midfrag] = k
            try:
                k['module_id'] = k['module_id'].encode('utf8')
                cdw.writerow(k)
            except Exception as err:
                print "Error writing row %s, err=%s" % (k, str(err))
        fp.close()

    print "[analyze_content] got %d lines of studentmodule usage data" % len(data)
    return data 
Example #29
Source File: make_grading_policy_table.py    From edx2bigquery with GNU General Public License v2.0 4 votes vote down vote up
def make_gp_table(course_id, basedir=None, datedir=None, 
                  use_dataset_latest=False,
                  verbose=False,
                  pin_date=None):

    if pin_date:
        datedir = pin_date

    sdir = load_course_sql.find_course_sql_dir(course_id, 
                                               basedir=basedir,
                                               datedir=datedir,
                                               use_dataset_latest=(use_dataset_latest and not pin_date),
                                               )

    fn_to_try = ['course.xml.tar.gz',
                'course-prod-analytics.xml.tar.gz',
                'course-prod-edge-analytics.xml.tar.gz',
                'course-prod-edx-replica.xml.tar.gz',
            ]
    
    for fntt in fn_to_try:
        fn = sdir / fntt
        if os.path.exists(fn):
            break
    if not os.path.exists(fn):
        msg = "---> oops, cannot get couese content (with grading policy file) for %s, file %s (or 'course.xml.tar.gz' or 'course-prod-edge-analytics.xml.tar.gz') missing!" % (course_id, fn)
        raise Exception(msg)

    gpstr, gpfn = read_grading_policy_from_tar_file(fn)
    fields, gptab, schema = load_grading_policy(gpstr, verbose=verbose, gpfn=gpfn)
    
    ofnb = 'grading_policy.csv'
    ofn = sdir / ofnb
    ofp = open(ofn, 'w')
    cdw = csv.DictWriter(ofp, fieldnames=fields)
    cdw.writeheader()
    cdw.writerows(gptab)
    ofp.close()

    # upload to google storage
    gsdir = path(gsutil.gs_path_from_course_id(course_id, use_dataset_latest=use_dataset_latest))
    gsutil.upload_file_to_gs(ofn, gsdir / ofnb, verbose=False)
    
    # import into BigQuery
    table = "grading_policy"
    dataset = bqutil.course_id2dataset(course_id, use_dataset_latest=use_dataset_latest)
    bqutil.load_data_to_table(dataset, table, gsdir / ofnb, schema, format='csv', skiprows=1) 
Example #30
Source File: file_system_neo4j_csv_loader.py    From amundsendatabuilder with Apache License 2.0 4 votes vote down vote up
def _get_writer(self,
                    csv_record_dict,  # type: Dict[str, Any]
                    file_mapping,  # type: Dict[Any, DictWriter]
                    key,  # type: Any
                    dir_path,  # type: str
                    file_suffix  # type: str
                    ):
        # type: (...) -> DictWriter
        """
        Finds a writer based on csv record, key.
        If writer does not exist, it's creates a csv writer and update the
        mapping.

        :param csv_record_dict:
        :param file_mapping:
        :param key:
        :param file_suffix:
        :return:
        """
        writer = file_mapping.get(key)
        if writer:
            return writer

        LOGGER.info('Creating file for {}'.format(key))

        if six.PY2:

            file_out = open('{}/{}.csv'.format(dir_path, file_suffix), 'w')
            writer = csv.DictWriter(file_out, fieldnames=csv_record_dict.keys(),
                                    quoting=csv.QUOTE_NONNUMERIC, encoding='utf-8')
        else:
            file_out = open('{}/{}.csv'.format(dir_path, file_suffix), 'w', encoding='utf8')
            writer = csv.DictWriter(file_out, fieldnames=csv_record_dict.keys(),
                                    quoting=csv.QUOTE_NONNUMERIC)

        def file_out_close():
            # type: () -> None
            LOGGER.info('Closing file IO {}'.format(file_out))
            file_out.close()
        self._closer.register(file_out_close)

        writer.writeheader()
        file_mapping[key] = writer

        return writer