Python geopandas.read_file() Examples
The following are 30
code examples of geopandas.read_file().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
, or try the search function
Example #1
Source File: From dinosar with MIT License | 8 votes |
def load_inventory(inventoryJSON): """Load inventory saved with asf.archive.save_inventory(). Parameters ---------- inventoryJSON : str dinsar inventory file (query.geojson) Returns ------- gf : GeoDataFrame A geopandas GeoDataFrame """ gf = gpd.read_file(inventoryJSON) gf["timeStamp"] = pd.to_datetime(gf.sceneDate, format="%Y-%m-%d %H:%M:%S") gf["sceneDateString"] = gf.timeStamp.apply(lambda x: x.strftime("%Y-%m-%d")) gf["dateStamp"] = pd.to_datetime(gf.sceneDateString) gf["utc"] = gf.timeStamp.apply(lambda x: x.strftime("%H:%M:%S")) gf["relativeOrbit"] = gf.relativeOrbit.astype("int") gf.sort_values("relativeOrbit", inplace=True) gf["orbitCode"] = gf.relativeOrbit.astype("category") return gf
Example #2
Source File: From oggm with BSD 3-Clause "New" or "Revised" License | 7 votes |
def inversion_gdir(class_case_dir): from oggm import GlacierDirectory from oggm.tasks import define_glacier_region import geopandas as gpd # Init cfg.initialize() cfg.set_intersects_db(get_demo_file('rgi_intersect_oetztal.shp')) cfg.PATHS['dem_file'] = get_demo_file('hef_srtm.tif') cfg.PATHS['climate_file'] = get_demo_file('') hef_file = get_demo_file('Hintereisferner_RGI5.shp') entity = gpd.read_file(hef_file).iloc[0] gdir = GlacierDirectory(entity, base_dir=class_case_dir, reset=True) define_glacier_region(gdir) return gdir
Example #3
Source File: From geeup with Apache License 2.0 | 7 votes |
def vcount(shpfile): df = gp.read_file(shpfile) if not df.size==0: for i, row in df.iterrows(): # It's better to check if multigeometry multi = row.geometry.type.startswith("Multi") if multi: n = 0 # iterate over all parts of multigeometry for part in row.geometry: n += len(part.exterior.coords) else: # if single geometry like point, linestring or polygon n = len(row.geometry.exterior.coords) #print('Total vertices: {:,}'.format(n)) overall.append(n) if all(i < 1000000 for i in overall)==True: return sum(overall) else: logger.warning(shpfile+' has overall max vertex of '+str(max(overall))+' with max allowed 1000000 ingest might fail') return sum(overall) #print('Total vertices per feature exceeded max. Overall vertices: {}'.format(sum(overall))) #return sum(overall) else: return df.size
Example #4
Source File: From CoastSat with GNU General Public License v3.0 | 7 votes |
def transects_from_geojson(filename): """ Reads transect coordinates from a .geojson file. Arguments: ----------- filename: str contains the path and filename of the geojson file to be loaded Returns: ----------- transects: dict contains the X and Y coordinates of each transect """ gdf = gpd.read_file(filename) transects = dict([]) for i in gdf.index: transects[gdf.loc[i,'name']] = np.array(gdf.loc[i,'geometry'].coords) print('%d transects have been loaded' % len(transects.keys())) return transects
Example #5
Source File: From deeposlandia with MIT License | 7 votes |
def test_extract_empty_tile_items( tanzania_example_image, tanzania_example_labels ): """Test the extraction of polygons that overlap a given squared tile, based on a reference test image (see 'tests/data/tanzania/input/training/'). The tests is focused on an empty tile, that must provide an empty item set. """ ds = gdal.Open(str(tanzania_example_image)) geofeatures = get_image_features(ds) labels = gpd.read_file(tanzania_example_labels) labels = labels.loc[~labels.geometry.isna(), ["condition", "geometry"]] none_mask = [lc is None for lc in labels.condition] labels.loc[none_mask, "condition"] = "Complete" empty_tile_items = extract_tile_items( geofeatures, labels, 450, 450, 100, 100 ) assert empty_tile_items.shape[0] == 0
Example #6
Source File: From dinosar with MIT License | 7 votes |
def ogr2snwe(vectorFile, buffer=None): """Convert ogr shape to South,North,West,East bounds. Parameters ---------- vectorFile : str path to OGR-recognized vector file. buffer : float Amount of buffer distance to add to shape (in decimal degrees). Returns ------- snwe : list a list of coorinate bounds [S, N, W, E] """ gf = gpd.read_file(vectorFile) gf.to_crs(epsg=4326, inplace=True) poly = gf.geometry.convex_hull if buffer: poly = poly.buffer(buffer) W, S, E, N = poly.bounds.values[0] snwe = [S, N, W, E] return snwe
Example #7
Source File: From momepy with MIT License | 7 votes |
def setup_method(self): test_file_path = mm.datasets.get_path("bubenec") self.df_buildings = gpd.read_file(test_file_path, layer="buildings") self.df_streets = gpd.read_file(test_file_path, layer="streets") self.df_tessellation = gpd.read_file(test_file_path, layer="tessellation") self.df_streets["nID"] = mm.unique_id(self.df_streets) self.df_buildings["height"] = np.linspace(10.0, 30.0, 144) self.df_tessellation["area"] = self.df_tessellation.geometry.area self.df_buildings["area"] = self.df_buildings.geometry.area self.df_buildings["fl_area"] = mm.FloorArea(self.df_buildings, "height").series self.df_buildings["nID"] = mm.get_network_id( self.df_buildings, self.df_streets, "nID" ) blocks = mm.Blocks( self.df_tessellation, self.df_streets, self.df_buildings, "bID", "uID" ) self.blocks = blocks.blocks self.df_buildings["bID"] = blocks.buildings_id self.df_tessellation["bID"] = blocks.tessellation_id
Example #8
Source File: From momepy with MIT License | 7 votes |
def test_network_false_nodes(self): test_file_path2 = mm.datasets.get_path("tests") self.false_network = gpd.read_file(test_file_path2, layer="network") fixed = mm.network_false_nodes(self.false_network) assert len(fixed) == 55 assert isinstance(fixed, gpd.GeoDataFrame) assert fixed_series = mm.network_false_nodes(self.false_network.geometry) assert len(fixed_series) == 55 assert isinstance(fixed_series, gpd.GeoSeries) assert with pytest.raises(TypeError): mm.network_false_nodes(list()) multiindex = self.false_network.explode() fixed_multiindex = mm.network_false_nodes(multiindex) assert len(fixed_multiindex) == 55 assert isinstance(fixed, gpd.GeoDataFrame)
Example #9
Source File: From emissions-api with MIT License | 7 votes |
def __load_country_shapes__(): '''Load country shapes''''Loading country shapes') # load shapefile for country shapes and get records world = geopandas.read_file( geopandas.datasets.get_path('naturalearth_lowres')) for _, country in world.iterrows(): # Try to find the alpha 3 country code in the iso3166. # Sometimes it is not set ( value '-99'). Then we try to match by name. country_codes = iso3166.countries_by_alpha3.get( country['iso_a3'], iso3166.countries_by_name.get(country['name'].upper()) ) # log warning if the country is not found. if not country_codes: logger.warning('Unable to find %s', country['name']) continue # Save geometry as wkt string with both alpha 2 and 3 code as key. shape = country['geometry'] __country_shapes__[country_codes.alpha2] = shape __country_shapes__[country_codes.alpha3] = shape
Example #10
Source File: From Pyspatialml with GNU General Public License v3.0 | 7 votes |
def test_extract_polygons(self): # extract training data from polygons training_py = geopandas.read_file(nc.polygons) df = self.stack.extract_vector(gdf=training_py) df = df.dropna() df = ( df.merge(training_py.loc[:, ("id", "label")], left_on="id", right_index=True). drop(columns=["id_x"]) ) # compare to extracted data using GRASS GIS self.assertEqual(df.shape[0], self.extracted_grass.shape[0]) self.assertAlmostEqual(df["lsat7_2000_10"].mean(), self.extracted_grass["b1"].mean(), places=3) self.assertAlmostEqual(df["lsat7_2000_20"].mean(), self.extracted_grass["b2"].mean(), places=3) self.assertAlmostEqual(df["lsat7_2000_30"].mean(), self.extracted_grass["b3"].mean(), places=3) self.assertAlmostEqual(df["lsat7_2000_40"].mean(), self.extracted_grass["b4"].mean(), places=3) self.assertAlmostEqual(df["lsat7_2000_50"].mean(), self.extracted_grass["b5"].mean(), places=3) self.assertAlmostEqual(df["lsat7_2000_70"].mean(), self.extracted_grass["b7"].mean(), places=3)
Example #11
Source File: From Pyspatialml with GNU General Public License v3.0 | 7 votes |
def test_extract_points(self): training_pt = geopandas.read_file(nc.points) # check that extracted training data as array match known values ids, X, xys = self.stack.extract_vector(gdf=training_pt, return_array=True) self.assertTrue((X[~X[:, 0].mask, 0].data == training_pt["b1"].dropna().values).all()) self.assertTrue((X[~X[:, 1].mask, 1].data == training_pt["b2"].dropna().values).all()) self.assertTrue((X[~X[:, 2].mask, 2].data == training_pt["b3"].dropna().values).all()) self.assertTrue((X[~X[:, 3].mask, 3].data == training_pt["b4"].dropna().values).all()) self.assertTrue((X[~X[:, 4].mask, 4].data == training_pt["b5"].dropna().values).all()) self.assertTrue((X[~X[:, 5].mask, 5].data == training_pt["b7"].dropna().values).all()) # check that extracted training data as a DataFrame match known values df = self.stack.extract_vector(gdf=training_pt) self.assertTrue(df["lsat7_2000_10"].equals(training_pt["b1"])) self.assertTrue(df["lsat7_2000_20"].equals(training_pt["b2"])) self.assertTrue(df["lsat7_2000_30"].equals(training_pt["b3"])) self.assertTrue(df["lsat7_2000_40"].equals(training_pt["b4"])) self.assertTrue(df["lsat7_2000_50"].equals(training_pt["b5"])) self.assertTrue(df["lsat7_2000_70"].equals(training_pt["b7"]))
Example #12
Source File: From Pyspatialml with GNU General Public License v3.0 | 7 votes |
def test_regression(self): training_pt = gpd.read_file(ms.meuse) training = self.stack_meuse.extract_vector(gdf=training_pt) training["zinc"] = training_pt["zinc"] training["cadmium"] = training_pt["cadmium"] training["copper"] = training_pt["copper"] training["lead"] = training_pt["lead"] training = training.dropna() # single target regression regr = RandomForestRegressor(n_estimators=50) X = training.loc[:, self.stack_meuse.names] y = training["zinc"], y) single_regr = self.stack_meuse.predict(regr) self.assertIsInstance(single_regr, Raster) self.assertEqual(single_regr.count, 1) # multi-target regression y = training.loc[:, ["zinc", "cadmium", "copper", "lead"]], y) multi_regr = self.stack_meuse.predict(regr) self.assertIsInstance(multi_regr, Raster) self.assertEqual(multi_regr.count, 4)
Example #13
Source File: From Pyspatialml with GNU General Public License v3.0 | 7 votes |
def test_classification(self): training_pt = gpd.read_file(nc.points) df_points = self.stack_nc.extract_vector(gdf=training_pt) df_points["class_id"] = training_pt["id"] df_points = df_points.dropna() clf = RandomForestClassifier(n_estimators=50) X = df_points.drop(columns=["id", "class_id", "geometry"]) y = df_points.class_id, y) # classification cla = self.stack_nc.predict(estimator=clf, dtype="int16", nodata=0) self.assertIsInstance(cla, Raster) self.assertEqual(cla.count, 1) self.assertEqual(, 135092) # class probabilities probs = self.stack_nc.predict_proba(estimator=clf) self.assertIsInstance(cla, Raster) self.assertEqual(probs.count, 7) for _, layer in probs: self.assertEqual(, 135092)
Example #14
Source File: From minerva with Apache License 2.0 | 7 votes |
def read(self, epsg=None, **kwargs): """ Read vector data from Girder :param format: Format to return data in (default is GeoDataFrame) :param epsg: EPSG code to reproject data to :return: Data in GeoJSON """ if is None: self.save_geojson() = geopandas.read_file(self.uri) if self.filters: self.filter_data() out_data = if epsg and self.get_epsg() != epsg: out_data = geopandas.GeoDataFrame.copy(out_data) out_data[] = \ = if format == formats.JSON: return out_data.to_json() else: return out_data
Example #15
Source File: From CityEnergyAnalyst with MIT License | 7 votes |
def get_thermal_network_from_shapefile(locator, network_type, network_name): """ This function reads the existing node and pipe network from a shapefile and produces an edge-node incidence matrix (as defined by Oppelt et al., 2016) as well as the edge properties (length, start node, and end node) and node coordinates. """ # import shapefiles containing the network's edges and nodes network_edges_df = gpd.read_file(locator.get_network_layout_edges_shapefile(network_type, network_name)) network_nodes_df = gpd.read_file(locator.get_network_layout_nodes_shapefile(network_type, network_name)) # check duplicated NODE/PIPE IDs duplicated_nodes = network_nodes_df[network_nodes_df.Name.duplicated(keep=False)] duplicated_edges = network_edges_df[network_edges_df.Name.duplicated(keep=False)] if duplicated_nodes.size > 0: raise ValueError('There are duplicated NODE IDs:', duplicated_nodes) if duplicated_edges.size > 0: raise ValueError('There are duplicated PIPE IDs:', duplicated_nodes) # get node and pipe information node_df, edge_df = extract_network_from_shapefile(network_edges_df, network_nodes_df) return edge_df, node_df
Example #16
Source File: From oggm with BSD 3-Clause "New" or "Revised" License | 7 votes |
def _read_shapefile_from_path(cls, fp): if '.shp' not in fp: raise ValueError('File ending not that of a shapefile') if cfg.PARAMS['use_tar_shapefiles']: fp = 'tar://' + fp.replace('.shp', '.tar') if cfg.PARAMS['use_compression']: fp += '.gz' shp = gpd.read_file(fp) # .properties file is created for compressed shapefiles. github: #904 _properties = fp.replace('tar://', '') + '.properties' if os.path.isfile(_properties): # remove it, to keep GDir slim os.remove(_properties) return shp
Example #17
Source File: From momepy with MIT License | 6 votes |
def setup_method(self): test_file_path = mm.datasets.get_path("bubenec") self.df_buildings = gpd.read_file(test_file_path, layer="buildings") self.df_streets = gpd.read_file(test_file_path, layer="streets") self.df_tessellation = gpd.read_file(test_file_path, layer="tessellation") self.df_buildings["height"] = np.linspace(10.0, 30.0, 144) self.df_tessellation["area"] = mm.Area(self.df_tessellation).series self.sw = sw_high(k=3, gdf=self.df_tessellation, ids="uID") self.sw.neighbors[100] = [] self.sw_drop = sw_high(k=3, gdf=self.df_tessellation[2:], ids="uID")
Example #18
Source File: From geocube with BSD 3-Clause "New" or "Revised" License | 6 votes |
def test_to_geodataframe(): gdf = gpd.read_file(os.path.join(TEST_INPUT_DATA_DIR, "soil_data_flat.geojson")) gdf2 = vectorxarray.from_geodataframe(gdf).vector.to_geodataframe() assert_test_dataframes_equal(gdf, gdf2)
Example #19
Source File: From geocube with BSD 3-Clause "New" or "Revised" License | 6 votes |
def test_to_netcdf(tmpdir): gdf = gpd.read_file(os.path.join(TEST_INPUT_DATA_DIR, "soil_data_flat.geojson")) vxd = vectorxarray.from_geodataframe(gdf) output_file = tmpdir.join("") vxd.vector.to_netcdf(output_file) vxd2 = vectorxarray.open_dataset(str(output_file)) assert_test_dataframes_equal(gdf, vxd2.vector.to_geodataframe())
Example #20
Source File: From geocube with BSD 3-Clause "New" or "Revised" License | 6 votes |
def test_multidimensional_error(): gdf = gpd.read_file(os.path.join(TEST_INPUT_DATA_DIR, "soil_data_flat.geojson")) vxd = vectorxarray.from_geodataframe(gdf) vxd2 = vxd.copy() vxd.coords["time"] = parse("20170516T000000") vxd2.coords["time"] = parse("20170517T000000") merged_vxd = xarray.concat([vxd, vxd2], dim="time") with pytest.raises(ValueError): merged_vxd.vector.plot(column="sandtotal_r")
Example #21
Source File: From geocube with BSD 3-Clause "New" or "Revised" License | 6 votes |
def load_vector_data(vector_data): """ Parameters ---------- vector_data: str or :obj:`geopandas.GeoDataFrame` A file path to an OGR supported source or GeoDataFrame containing the vector data. Returns ------- :obj:`geopandas.GeoDataFrame` containing the vector data. """ logger = get_logger() if isinstance(vector_data, str): vector_data = geopandas.read_file(vector_data) elif not isinstance(vector_data, geopandas.GeoDataFrame): vector_data = geopandas.GeoDataFrame(vector_data) if vector_data.empty: raise VectorDataError("Empty GeoDataFrame.") if "geometry" not in vector_data.columns: raise VectorDataError( "'geometry' column missing. Columns in file: " f"{vector_data.columns.values.tolist()}" ) # make sure projection is set if not = "EPSG:4326" logger.warning( "Projection not defined in `vector_data`." " Setting to geographic (EPSG:4326)." ) return vector_data
Example #22
Source File: From docker-python with Apache License 2.0 | 6 votes |
def test_spatial_join(self): cities = geopandas.read_file(geopandas.datasets.get_path('naturalearth_cities')) world = geopandas.read_file(geopandas.datasets.get_path('naturalearth_lowres')) countries = world[['geometry', 'name']] countries = countries.rename(columns={'name':'country'}) cities_with_country = geopandas.sjoin(cities, countries, how="inner", op='intersects') self.assertTrue(cities_with_country.size > 1)
Example #23
Source File: From docker-python with Apache License 2.0 | 6 votes |
def test_read(self): df = geopandas.read_file(geopandas.datasets.get_path('nybb')) self.assertTrue(df.size > 1)
Example #24
Source File: From momepy with MIT License | 6 votes |
def setup_method(self): test_file_path = mm.datasets.get_path("bubenec") self.df_buildings = gpd.read_file(test_file_path, layer="buildings") self.df_streets = gpd.read_file(test_file_path, layer="streets") self.df_tessellation = gpd.read_file(test_file_path, layer="tessellation") self.df_buildings["height"] = np.linspace(10.0, 30.0, 144) self.df_buildings["volume"] = mm.Volume(self.df_buildings, "height").series self.df_streets["nID"] = mm.unique_id(self.df_streets) self.df_buildings["nID"] = mm.get_network_id( self.df_buildings, self.df_streets, "nID" )
Example #25
Source File: From momepy with MIT License | 6 votes |
def setup_method(self): test_file_path = mm.datasets.get_path("bubenec") self.df_buildings = gpd.read_file(test_file_path, layer="buildings") self.df_streets = gpd.read_file(test_file_path, layer="streets") self.df_tessellation = gpd.read_file(test_file_path, layer="tessellation") self.df_buildings["height"] = np.linspace(10.0, 30.0, 144) self.df_buildings["volume"] = mm.Volume(self.df_buildings, "height").series
Example #26
Source File: From gridfinder with MIT License | 6 votes |
def clip_rasters(folder_in, folder_out, aoi_in, debug=False): """Read continental rasters one at a time, clip to AOI and save Parameters ---------- folder_in : str, Path Path to directory containing rasters. folder_out : str, Path Path to directory to save clipped rasters. aoi_in : str, Path Path to an AOI file (readable by Fiona) to use for clipping. """ if isinstance(aoi_in, gpd.GeoDataFrame): aoi = aoi_in else: aoi = gpd.read_file(aoi_in) coords = [json.loads(aoi.to_json())["features"][0]["geometry"]] for file_path in os.listdir(folder_in): if file_path.endswith(".tif"): if debug: print(f"Doing {file_path}") ntl_rd =, file_path)) ntl, affine = mask(dataset=ntl_rd, shapes=coords, crop=True, nodata=0) if ntl.ndim == 3: ntl = ntl[0] save_raster(folder_out / file_path, ntl, affine)
Example #27
Source File: From LSDMappingTools with MIT License | 6 votes |
def ReadChannelData(DataDirectory, FilenamePrefix): """ This function reads in the file with the suffix '_MChiSegmented.csv' or _MChiSegmented.geojson to a pandas dataframe Args: DataDirectory: the data directory FilenamePrefix: the file name prefix Returns: pandas dataframe with data from the csv file Author: MDH """ # get the filename and open either csv or geojson Suffix = '_MChiSegmented' Filename = FilenamePrefix+Suffix if os.path.isfile(DataDirectory+Filename+".csv"): # read in the dataframe using pandas ChannelData = pd.read_csv(DataDirectory+Filename+".csv") elif os.path.isfile(DataDirectory+Filename+".geojson"): # read in the dataframe using pandas ChannelData = gpd.read_file(DataDirectory+Filename+".geojson") else: print("No file named "+DataDirectory+Filename+".* found") sys.exit() # If there is no chi values due to threshold then chi will be -9999 # throw out these segments Segments2Remove = ChannelData[ChannelData.chi == -9999].segment_number.unique() ChannelData = ChannelData[~ChannelData.segment_number.isin(Segments2Remove)] #return the hillslope data return ChannelData
Example #28
Source File: From EarthSim with BSD 3-Clause "New" or "Revised" License | 6 votes |
def get_file_from_quest(collection_name, service_uri, parameter, mask_shapefile, use_existing=True): """ For a given collection_name, service_uri, parameter_name, and mask_shapefile, return quest's path to the corresponding file. If the given combination does not exist in quest, data will be downloaded and then stored in quest. The parameter, mask_shapefile, and service are stored in the dataset's metadata. Note: service_uri=svc://dummy with collection_name=test_philippines_small skips quest completely and returns hardcoded 'philippines_small' data. """ if service_uri == 'svc://dummy' and collection_name == 'test_philippines_small': if parameter == 'landuse': return 'philippines_small/LC_hd_global_2012.tif' elif parameter == 'elevation': return 'philippines_small/gmted_elevation.tif' else: raise ValueError bounds = [float(x) for x in gpd.read_file(mask_shapefile).geometry.bounds.values[0]] dataset_id = download_data(service_uri, bounds, collection_name, use_existing) metadata = quest.api.datasets.update_metadata(dataset_id, metadata={ 'mask_shapefile': mask_shapefile, 'service_uri': service_uri, 'parameter': parameter})[dataset_id] return metadata['file_path']
Example #29
Source File: From urbansprawl with MIT License | 6 votes |
def load_geodataframe(geo_filename): """ Load input GeoDataFrame Parameters ---------- geo_filename : string input GeoDataFrame filename Returns ---------- geopandas.GeoDataFrame loaded data """ # Load using geopandas df_osm_data = gpd.read_file(geo_filename) # Set None as NaN df_osm_data.fillna(value=np.nan, inplace=True) # Replace empty string (Json NULL sometimes read as '') for NaN df_osm_data.replace('', np.nan, inplace=True) def list_int_from_string(x): # List of integers given input in string format return [ int(id_) for id_ in x.split(",") ] def list_str_from_string(x): # List of strings given input in string format return x.split(",") # Recover list if ( "activity_category" in df_osm_data.columns): df_osm_data[ "activity_category" ] = df_osm_data.activity_category.apply(lambda x: list_str_from_string(x) if pd.notnull(x) else np.nan ) if ( "containing_parts" in df_osm_data.columns): df_osm_data[ "containing_parts" ] = df_osm_data.containing_parts.apply( lambda x: list_int_from_string(x) if pd.notnull(x) else np.nan ) if ( "containing_poi" in df_osm_data.columns): df_osm_data[ "containing_poi" ] = df_osm_data.containing_poi.apply( lambda x: list_int_from_string(x) if pd.notnull(x) else np.nan ) # To UTM coordinates return ox.project_gdf( df_osm_data )
Example #30
Source File: From geocube with BSD 3-Clause "New" or "Revised" License | 6 votes |
def test_from_geodataframe(): gdf = gpd.read_file(os.path.join(TEST_INPUT_DATA_DIR, "soil_data_flat.geojson")) vxd = vectorxarray.from_geodataframe(gdf) assert all(gdf.geometry == vxd.geometry.values) assert sorted(gdf.columns.tolist() + ["crs"]) == sorted(vxd.variables) assert ==["crs_wkt"] assert "geometry" in vxd.coords assert "crs" in vxd.coords