Python Examples of csv.DictReader

Source File: deck.py From tinycards-python-api with MIT License

6 votes

def add_cards_from_csv(self, csv_file,
                           front_column='front',
                           back_column='back'):
        """Add word pairs from a CSV file as cards to the deck.

        Args:
            csv_file: The file buffer that contains the CSV data.
            front_column (str): Optional name for the 'front' column.
            back_column (str): Optional name for the 'back' column.

        Example:
            >>> with open(csv_path, 'r') as csv_file:
            >>>     deck.add_cards_from_csv(csv_file)

        """
        csv_reader = csv.DictReader(csv_file)
        for row in csv_reader:
            current_word_pair = (row[front_column], row[back_column])
            self.add_card(current_word_pair)

Source File: csv_to_deck.py From tinycards-python-api with MIT License

6 votes

def csv_to_deck(csv_path):
    """Creates a Tinycards deck from a CSV file.

    The CSV file is expected to have two columns named 'front' and 'back'.
    """
    # Create new deck.
    tinycards = Tinycards(user_identifier, user_password)
    deck = Deck('French Words')
    deck = tinycards.create_deck(deck)

    # Extract data from CSV file.
    word_pairs = []
    with open(csv_path, 'r') as csv_file:
        csv_reader = csv.DictReader(csv_file)
        for row in csv_reader:
            current_word_pair = (row['front'], row['back'])
            word_pairs.append(current_word_pair)

    # Populate deck with cards from CSV data.
    for pair in word_pairs:
        deck.add_card(pair)

    # Save changes to Tinycards.
    tinycards.update_deck(deck)

Source File: packages.py From arches with GNU Affero General Public License v3.0

6 votes

def import_business_data_relations(self, data_source):
        """
        Imports business data relations
        """
        if isinstance(data_source, str):
            data_source = [data_source]

        for path in data_source:
            if os.path.isabs(path):
                if os.path.isfile(os.path.join(path)):
                    relations = csv.DictReader(open(path, "r"))
                    RelationImporter().import_relations(relations)
                else:
                    utils.print_message("No file found at indicated location: {0}".format(path))
                    sys.exit()
            else:
                utils.print_message(
                    "ERROR: The specified file path appears to be relative. \
                    Please rerun command with an absolute file path."
                )
                sys.exit()

Source File: scatter_plot.py From 3DGCN with MIT License

6 votes

def find_best_hyper(dataset, model, metric="test_rmse"):
    path = "../../result/{}/{}/".format(model, dataset)

    # Get list of hyperparameters
    names, losses, stds = [], [], []
    for root, dirs, files in walk_level(path, level=0):
        for dir_name in dirs:
            loss = []
            if os.path.isfile(path + dir_name + "/results.csv"):
                with open(path + dir_name + "/results.csv") as file:
                    reader = csv.DictReader(file)
                    for row in reader:
                        loss.append(row[metric])

                names.append(dir_name)
                losses.append(float(loss[0]))
                stds.append(float(loss[1]))

    # Sort by loss
    losses, stds, names = zip(*sorted(zip(losses, stds, names)))

    # Choose lowest loss hyper
    path += names[np.argmin(losses)] + '/'

    return path

Source File: fels.py From fetchLandsatSentinelFromGoogleCloud with MIT License

6 votes

def query_sentinel2_catalogue(collection_file, cc_limit, date_start, date_end, tile, latest=False):
    """Query the Sentinel-2 index catalogue and retrieve urls for the best images found."""
    print("Searching for Sentinel-2 images in catalog...")
    cc_values = []
    all_urls = []
    all_acqdates = []
    with open(collection_file) as csvfile:
        reader = csv.DictReader(csvfile)
        for row in reader:
            year_acq = int(row['SENSING_TIME'][0:4])
            month_acq = int(row['SENSING_TIME'][5:7])
            day_acq = int(row['SENSING_TIME'][8:10])
            acqdate = datetime.datetime(year_acq, month_acq, day_acq)
            if row['MGRS_TILE'] == tile and float(row['CLOUD_COVER']) <= cc_limit \
                    and date_start < acqdate < date_end:
                all_urls.append(row['BASE_URL'])
                cc_values.append(float(row['CLOUD_COVER']))
                all_acqdates.append(acqdate)

    if latest and all_urls:
        return [sort_url_list(cc_values, all_acqdates, all_urls).pop()]
    return sort_url_list(cc_values, all_acqdates, all_urls)

Source File: msr_paraphrase.py From lineflow with MIT License

6 votes

def get_msr_paraphrase() -> Dict[str, List[Dict[str, str]]]:

    url = 'https://raw.githubusercontent.com/wasiahmad/paraphrase_identification/master/dataset/msr-paraphrase-corpus/msr_paraphrase_{}.txt'  # NOQA
    root = download.get_cache_directory(os.path.join('datasets', 'msr_paraphrase'))

    def creator(path):
        dataset = {}
        fieldnames = ('quality', 'id1', 'id2', 'string1', 'string2')
        for split in ('train', 'test'):
            data_path = gdown.cached_download(url.format(split))
            with io.open(data_path, 'r', encoding='utf-8') as f:
                f.readline()  # skip header
                reader = csv.DictReader(f, delimiter='\t', fieldnames=fieldnames)
                dataset[split] = [dict(row) for row in reader]

        with io.open(path, 'wb') as f:
            pickle.dump(dataset, f)
        return dataset

    def loader(path):
        with io.open(path, 'rb') as f:
            return pickle.load(f)

    pkl_path = os.path.join(root, 'msr_paraphrase.pkl')
    return download.cache_or_load_file(pkl_path, creator, loader)

Source File: models.py From gazetteer with MIT License

6 votes

def clean(self):
        if self.batch_file and self.batch_file.file:
            csvfile = csv.DictReader(self.batch_file.file, delimiter="\t")
            row = csvfile.next()
            for field in self.core_fields:
                if field not in row.keys():
                    raise ValidationError('CSV File does not have the necessary field: '+ field)

            uris = []
            for row in csvfile:
                fcode = row.get("FEATURE_CODE")
                if not fcode:
                    raise ValidationError("A Feature code is missing")
                uri = row.get("URIS").split("|")[0]
                if not uri:
                    raise ValidationError('CSV file is missing a uri')
                if uri in uris:
                    raise ValidationError('duplicate URI detected')
            uris.append(uri)

Source File: fels.py From fetchLandsatSentinelFromGoogleCloud with MIT License

6 votes

def query_landsat_catalogue(collection_file, cc_limit, date_start, date_end, wr2path, wr2row,
                            sensor, latest=False):
    """Query the Landsat index catalogue and retrieve urls for the best images found."""
    print("Searching for Landsat-{} images in catalog...".format(sensor))
    cc_values = []
    all_urls = []
    all_acqdates = []
    with open(collection_file) as csvfile:
        reader = csv.DictReader(csvfile)
        for row in reader:
            year_acq = int(row['DATE_ACQUIRED'][0:4])
            month_acq = int(row['DATE_ACQUIRED'][5:7])
            day_acq = int(row['DATE_ACQUIRED'][8:10])
            acqdate = datetime.datetime(year_acq, month_acq, day_acq)
            if int(row['WRS_PATH']) == int(wr2path) and int(row['WRS_ROW']) == int(wr2row) \
                    and row['SENSOR_ID'] == sensor and float(row['CLOUD_COVER']) <= cc_limit \
                    and date_start < acqdate < date_end:
                all_urls.append(row['BASE_URL'])
                cc_values.append(float(row['CLOUD_COVER']))
                all_acqdates.append(acqdate)

    if latest and all_urls:
        return [sort_url_list(cc_values, all_acqdates, all_urls).pop()]
    return sort_url_list(cc_values, all_acqdates, all_urls)

Source File: billing.py From aegea with Apache License 2.0

6 votes

def ls(args):
    bucket = resources.s3.Bucket(args.billing_reports_bucket.format(account_id=ARN.get_account_id()))
    now = datetime.utcnow()
    year = args.year or now.year
    month = str(args.month or now.month).zfill(2)
    next_year = year + ((args.month or now.month) + 1) // 12
    next_month = str(((args.month or now.month) + 1) % 12).zfill(2)
    manifest_name = "aegea/{report}/{yr}{mo}01-{next_yr}{next_mo}01/{report}-Manifest.json"
    manifest_name = manifest_name.format(report=__name__, yr=year, mo=month, next_yr=next_year, next_mo=next_month)
    try:
        manifest = json.loads(bucket.Object(manifest_name).get().get("Body").read())
        for report_key in manifest["reportKeys"]:
            report = BytesIO(bucket.Object(report_key).get().get("Body").read())
            with gzip.GzipFile(fileobj=report) as fh:
                reader = csv.DictReader(fh)
                for line in reader:
                    page_output(tabulate(filter_line_items(reader, args), args))
    except ClientError as e:
        msg = 'Unable to get report {} from {}: {}. Run "aegea billing configure" to enable reports.'
        raise AegeaException(msg.format(manifest_name, bucket, e))

Source File: test_functional.py From comport with BSD 3-Clause "New" or "Revised" License

6 votes

def test_csv_filtered_by_dept(self, testapp):
        # create a department
        department1 = Department.create(name="IM Police Department", short_name="IMPD", load_defaults=False)
        department2 = Department.create(name="B Police Department", short_name="BPD", load_defaults=False)

        incidentclass1 = getattr(importlib.import_module("comport.data.models"), "UseOfForceIncident{}".format(department1.short_name))
        incidentclass2 = getattr(importlib.import_module("comport.data.models"), "UseOfForceIncident{}".format(department2.short_name))

        incidentclass1.create(opaque_id="123ABC", department_id=department1.id)
        incidentclass2.create(opaque_id="123XYZ", department_id=department2.id)

        response1 = testapp.get("/department/{}/uof.csv".format(department1.id))
        response2 = testapp.get("/department/{}/uof.csv".format(department2.id))

        incidents1 = list(csv.DictReader(io.StringIO(response1.text)))
        incidents2 = list(csv.DictReader(io.StringIO(response2.text)))

        assert len(incidents1) == 1 and len(incidents2) == 1
        assert incidents1[0]['id'] == '123ABC' and incidents2[0]['id'] == '123XYZ'

Source File: import_departments.py From WF-website with GNU Affero General Public License v3.0

6 votes

def handle(self, *args, **options):
        # Get the only instance of Magazine Department Index Page
        magazine_department_index_page = MagazineDepartmentIndexPage.objects.get()

        with open(options["file"]) as import_file:
            departments = csv.DictReader(import_file)

            for department in departments:
                import_department = MagazineDepartment(
                    title=department["title"],
                )

                # Add department to site page hiererchy
                magazine_department_index_page.add_child(instance=import_department)
                magazine_department_index_page.save()

        self.stdout.write("All done!")

Source File: test_department_model_lmpd.py From comport with BSD 3-Clause "New" or "Revised" License

6 votes

def test_csv_response(self, testapp):
        # create a department and an LMPD uof incident
        department = Department.create(name="LM Police Department", short_name="LMPD", load_defaults=False)

        uof_check = dict(department_id=department.id, opaque_id="Check Opaque ID", occured_date=datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S'), bureau="Check Bureau", division="Check Division", unit="Check Unit", platoon="Check Platoon", disposition="Check Disposition", use_of_force_reason="Check UOF Reason", officer_force_type="Check Officer Force Type", service_type="Check Service Type", arrest_made=False, arrest_charges="Check Arrest Charges", resident_injured=True, resident_hospitalized=False, resident_condition="Check Resident Condition", officer_injured=False, officer_hospitalized=False, officer_condition="Check Officer Condition", resident_identifier="Check Resident Identifier", resident_race="Check Resident Race", resident_sex="Check Resident Sex", resident_age="Check Resident Age", officer_race="Check Officer Race", officer_sex="Check Officer Sex", officer_age="Check Officer Age", officer_years_of_service="Check Officer Years Of Service", officer_identifier="Check Officer Identifier")

        UseOfForceIncidentLMPD.create(**uof_check)

        response = testapp.get("/department/{}/uof.csv".format(department.id))

        incidents = list(csv.DictReader(io.StringIO(response.text)))

        # build a variable to csv header lookup from the csv schema
        csv_schema = UseOfForceIncidentLMPD.get_csv_schema()
        schema_lookup = dict(zip([col[1] for col in csv_schema], [col[0] for col in csv_schema]))

        assert len(incidents) == 1
        for check_key in uof_check.keys():
            if check_key == 'department_id':
                continue
            assert str(uof_check[check_key]) == incidents[0][schema_lookup[check_key]]

Source File: create_test_data.py From adam_qas with GNU General Public License v3.0

6 votes

def insert_question_to_sqlt():
    question_set = []
    last_question = ""
    with open(CORPUS_DIR+"/"+WIKI_QA_TSV) as file:
        wiki_file = csv.DictReader(file, dialect='excel-tab')
        if wiki_file is not None:
            for row in wiki_file:
                if row['Question'] != last_question:
                    question = (row['Question'], )
                    question_set.append(question)
                    last_question = row['Question']

    if question_set is not None:
        sqlt_man = SqLiteManager()
        # sqlt_man.remove_old_results()
        sqlt_man.remove_all_data()
        logger.info("Removed Old test results")
        sqlt_man.insert_many_question(question_set)
        logger.info("Inserted {0} questions".format(sqlt_man.get_question_count()))

Source File: buzzer.py From qb with MIT License

5 votes

def load_finals(self, system, final_file):
        ff = DictReader(open(final_file))
        for ii in ff:
            self._finals[int(ii['question'])][system] = ii['answer'].replace('_', ' ')

Source File: forms.py From ideascube with GNU Affero General Public License v3.0

5 votes

def save(self):
        source = TextIOWrapper(self.cleaned_data['source'].file)
        items = []
        errors = []

        for index, row in enumerate(csv.DictReader(source)):
            try:
                data = {
                    'module': row['module'], 'name': row['name'],
                    'description': row['description'],
                }

            except KeyError as e:
                errors.append(_('Missing column "{}" on line {}').format(
                    e.args[0], index + 1))
                continue

            form = StockItemForm(data=data)

            if form.is_valid():
                item = form.save()
                items.append(item)

            else:
                msgs = (
                    '{}: {}'.format(k, v.as_text())
                    for k, v in form.errors.items())
                errors.append(_('Could not import line {}: {}').format(
                    index + 1, '; '.join(msgs)))
                continue

        return items, errors[:10]

Source File: buzzer.py From qb with MIT License

5 votes

def load_questions(self, question_file):
        qfile = DictReader(open(question_file, 'r'))

        for ii in qfile:
            self._questions[int(ii["id"])][int(ii["sent"])] = ii["text"]
            self._answers[int(ii["id"])] = ii["answer"].strip().replace("_", " ")

Source File: exportnx.py From GraphiPy with MIT License

5 votes

def create_from_csv(self, filepath, nx_graph=None, directional=False):

        if nx_graph is None:
            if directional:
                nx_graph = nx.DiGraph()
            else:
                nx_graph = nx.Graph()

        nodes_path = filepath + "nodes\\"
        edges_path = filepath + "edges\\"

        for filename in os.listdir(edges_path):
            reader = csv.DictReader(
                open(edges_path + filename, encoding="utf-8"))
            for edge in reader:
                source = edge["Source"]
                target = edge["Target"]
                attr = {
                    "Label": edge["Label"],
                    "label_attribute": edge["label_attribute"],
                    "Id": edge["Id"]
                }
                nx_graph.add_edge(source, target, **attr)

        for filename in os.listdir(nodes_path):
            reader = csv.DictReader(
                open(nodes_path + filename, encoding="utf-8"))
            for node in reader:
                node_id = node["Id"]
                nx_node = nx_graph.node[node_id]
                for attr in node.keys():
                    nx_node[attr] = node[attr]

        return nx_graph

Source File: petition.py From petitions with MIT License

5 votes

def generate_modified_file(src, dst, sample, corrupt):
    """원본 파일을 샘플링하고 결측치 넣은 새 파일 생성"""

    # 랜덤 시드 고정. 매번 동일한 결과가 보장되도록.
    random.seed(0)
    with open(src, 'r') as fr:
        with open(dst, 'w') as fw:
            csvr = csv.DictReader(fr)
            csvw = csv.DictWriter(fw, csvr.fieldnames)

            csvw.writeheader()

            rows = csvr

            # 샘플링
            if sample:
                rows = (row for row in rows if random.random() <= SAMPLE_RATE)
            # 결측치 추가
            if corrupt:
                rows = (corrupt_row(row) for row in rows)

            csvw.writerows(rows)

Source File: import_medias.py From ideascube with GNU Affero General Public License v3.0

5 votes

def load(self, path):
        with codecs.open(path, 'r', encoding=self.encoding) as f:
            content = f.read()
            try:
                dialect = csv.Sniffer().sniff(content)
            except csv.Error:
                dialect = csv.unix_dialect()
            return csv.DictReader(content.splitlines(), dialect=dialect)

Source File: forms.py From ideascube with GNU Affero General Public License v3.0

5 votes

def _get_ideascube_reader(self, source):
        return csv.DictReader(source)

Source File: buzzer.py From qb with MIT License

5 votes

def add_system(self, file_path):
        buzzfile = DictReader(open("%s.buzz.csv" % file_path, 'r'))
        system = file_path.replace("CMSC723_", "").split('/')[-1]
        system = system.split('.')[0]
        system = system.split("_")[0]

        for ii in buzzfile:
            question, sent, word = int(ii["question"]), int(ii["sentence"]), int(ii["word"])
            self.add_guess(question, sent, word, system, ii["page"], ii["evidence"],
                           int(ii["final"]), float(ii["weight"]))
            
        self.load_finals(system, "%s.final.csv" % file_path)

Source File: buzzer.py From qb with MIT License

5 votes

def __init__(self, filename):
        self._power_marks = {}
        if filename:
            try:
                infile = DictReader(open(filename, 'r'))
                for ii in infile:
                    question = int(ii['question'])
                    self._power_marks[question] = ii['word']
            except:
                print("Couldn't load from %s" % filename)
            print("Read power marks from %s: %s ..." %
                (filename, str(self._power_marks.keys())[1:69]))
        else:
            print("No power marks")

Source File: dynamo_insert_items_from_file.py From Building-Serverless-Microservices-in-Python with MIT License

5 votes

def main():
    # For manual deployment
    # table_name = 'user-visits'

    # For SAM:
    table_name = 'user-visits-sam'
    input_data_path = '../sample_data/dynamodb-sample-data.txt'
    dynamo_repo = DynamoRepository(table_name)
    with open(input_data_path, 'r') as sample_file:
        csv_reader = csv.DictReader(sample_file)
        for row in csv_reader:
            response = dynamo_repo.update_dynamo_event_counter(row['EventId'],
                                                               row['EventDay'],
                                                               row['EventCount'])
            print(response)

Source File: dynamo_insert_items_from_file.py From Building-Serverless-Microservices-in-Python with MIT License

5 votes

def main():
    # For manual deployment
    # table_name = 'user-visits'

    # For SAM:
    table_name = 'user-visits-sam'
    input_data_path = '../sample_data/dynamodb-sample-data.txt'
    dynamo_repo = DynamoRepository(table_name)
    with open(input_data_path, 'r') as sample_file:
        csv_reader = csv.DictReader(sample_file)
        for row in csv_reader:
            response = dynamo_repo.update_dynamo_event_counter(row['EventId'],
                                                               row['EventDay'],
                                                               row['EventCount'])
            print(response)

Source File: dynamo_insert_items_from_file.py From Building-Serverless-Microservices-in-Python with MIT License

5 votes

def main():
    # For manual deployment
    # table_name = 'user-visits'

    # For SAM:
    table_name = 'user-visits-sam'
    input_data_path = '../sample_data/dynamodb-sample-data.txt'
    dynamo_repo = DynamoRepository(table_name)
    with open(input_data_path, 'r') as sample_file:
        csv_reader = csv.DictReader(sample_file)
        for row in csv_reader:
            response = dynamo_repo.update_dynamo_event_counter(row['EventId'],
                                                               row['EventDay'],
                                                               row['EventCount'])
            print(response)

Source File: resource_export_tests.py From arches with GNU Affero General Public License v3.0

5 votes

def test_csv_export(self):
        BusinessDataImporter("tests/fixtures/data/csv/resource_export_test.csv").import_business_data()

        export = BusinessDataExporter("csv", configs="tests/fixtures/data/csv/resource_export_test.mapping", single_file=True).export()

        csv_output = list(csv.DictReader(export[0]["outputfile"].getvalue().split("\r\n")))[0]
        csvinputfile = "tests/fixtures/data/csv/resource_export_test.csv"
        csv_input = list(csv.DictReader(open(csvinputfile, "rU", encoding="utf-8"), restkey="ADDITIONAL", restval="MISSING"))[0]

        self.assertDictEqual(dict(csv_input), dict(csv_output))

Source File: packages.py From arches with GNU Affero General Public License v3.0

5 votes

def import_node_value_data(self, data_source, overwrite=None):
        """
        Imports node-value datatype business data only.
        """

        if overwrite == "":
            utils.print_message("No overwrite option indicated. Please rerun command with '-ow' parameter.")
            sys.exit()

        if isinstance(data_source, str):
            data_source = [data_source]

        if len(data_source) > 0:
            for source in data_source:
                path = utils.get_valid_path(source)
                if path is not None:
                    data = csv.DictReader(open(path, "r"), encoding="utf-8-sig")
                    business_data = list(data)
                    TileCsvReader(business_data).import_business_data(overwrite=None)
                else:
                    utils.print_message("No file found at indicated location: {0}".format(source))
                    sys.exit()
        else:
            utils.print_message(
                "No BUSINESS_DATA_FILES locations specified in your settings file.\
                Please rerun this command with BUSINESS_DATA_FILES locations specified or \
                pass the locations in manually with the '-s' parameter."
            )
            sys.exit()

Source File: flowlogs_reader.py From flowlogs-reader with Apache License 2.0

5 votes

def _read_file(self, key):
        resp = self.boto_client.get_object(Bucket=self.bucket, Key=key)
        with gz_open(resp['Body'], mode='rt') as gz_f:
            reader = DictReader(gz_f, delimiter=' ')
            reader.fieldnames = [
                f.replace('-', '_') for f in reader.fieldnames
            ]
            yield from reader

Source File: import_issues.py From WF-website with GNU Affero General Public License v3.0

5 votes

def handle(self, *args, **options):
        # Get the only instance of Magazine Index Page
        magazine_index_page = MagazineIndexPage.objects.get()

        with open(options["file"]) as import_file:
            issues = csv.DictReader(import_file)

            for issue in issues:
                response = requests.get(issue["cover_image_url"])
                image_file = BytesIO(response.content)

                image = Image(
                    title=issue["title"] + " cover image",
                    file=ImageFile(image_file, name=issue["cover_image_file_name"]),
                )

                image.save()

                import_issue = MagazineIssue(
                    title=issue["title"],
                    publication_date=issue["publication_date"],
                    first_published_at=issue["publication_date"],
                    issue_number=issue["issue_number"],
                    cover_image=image,
                )

                # Add issue to site page hiererchy
                magazine_index_page.add_child(instance=import_issue)
                magazine_index_page.save()

        self.stdout.write("All done!")

Source File: simple_flatfile_parser_sara.py From gmpe-smtk with GNU Affero General Public License v3.0

5 votes

def parse(self):
        """
        Parses the database
        """
        HEADER_LIST1 = copy.deepcopy(HEADER_LIST)
        self._header_check(HEADER_LIST1)
        # Read in csv
        reader = csv.DictReader(open(self.filename, "r"))
        metadata = []
        self.database = GroundMotionDatabase(self.id, self.name)
        self._get_site_id = self.database._get_site_id
        for row in reader:
            self.database.records.append(self._parse_record(row))
        return self.database

Python csv.DictReader() Examples