Python geopandas.sjoin() Examples
The following are 12
code examples of geopandas.sjoin().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
geopandas
, or try the search function
.
Example #1
Source File: test_geopandas.py From docker-python with Apache License 2.0 | 6 votes |
def test_spatial_join(self): cities = geopandas.read_file(geopandas.datasets.get_path('naturalearth_cities')) world = geopandas.read_file(geopandas.datasets.get_path('naturalearth_lowres')) countries = world[['geometry', 'name']] countries = countries.rename(columns={'name':'country'}) cities_with_country = geopandas.sjoin(cities, countries, how="inner", op='intersects') self.assertTrue(cities_with_country.size > 1)
Example #2
Source File: data_extract.py From urbansprawl with MIT License | 5 votes |
def get_df_extract(df_data, poly_gdf, operation = "within"): """ Indexes input geo-data frame within an input region of interest If the region of interest is given as a polygon, its bounding box is indexed Parameters ---------- df_data : geopandas.GeoDataFrame input data frame to index poly_gdf : geopandas.GeoDataFrame geodataframe containing the region of interest in form of polygon operation : string the desired spatial join operation: 'within' or 'intersects' Returns ---------- geopandas.GeoDataFrame returns the population data frame indexed within the region of interest """ # Project to same system coordinates poly_gdf = ox.project_gdf(poly_gdf, to_crs=df_data.crs) # Spatial join df_extract = gpd.sjoin(df_data, poly_gdf, op=operation) # Keep original columns df_extract = df_extract[ df_data.columns ] return df_extract
Example #3
Source File: utils.py From urbansprawl with MIT License | 5 votes |
def population_downscaling_validation(df_osm_built, df_insee): """ Validates the population downscaling estimation by means of aggregating the sum of buildings estimated population lying within each population square Allows to compare the real population count with the estimated population lying within each square Updates new column 'pop_estimation' for each square in the population data frame Parameters ---------- df_osm_built : geopandas.GeoDataFrame input buildings with computed population count df_insee : geopandas.GeoDataFrame INSEE population data Returns ---------- """ df_osm_built['geom'] = df_osm_built.geometry df_osm_built_residential = df_osm_built[ df_osm_built.apply(lambda x: x.landuses_m2['residential'] > 0, axis = 1) ] df_insee.crs = df_osm_built_residential.crs # Intersecting gridded population - buildings sjoin = gpd.sjoin( df_insee, df_osm_built_residential, op='intersects') # Calculate area within square (percentage of building with the square) sjoin['pop_estimation'] = sjoin.apply(lambda x: x.population * (x.geom.intersection(x.geometry).area / x.geom.area), axis=1 ) # Initialize df_insee['pop_estimation'] = np.nan sum_pop_per_square = sjoin.groupby(sjoin.index)['pop_estimation'].sum() df_insee.loc[ sum_pop_per_square.index, "pop_estimation" ] = sum_pop_per_square.values # Drop unnecessary column df_osm_built.drop('geom', axis=1, inplace=True) # Set to 0 nan values df_insee.loc[ df_insee.pop_estimation.isnull(), "pop_estimation" ] = 0 # Compute absolute and relative error df_insee["absolute_error"] = df_insee.apply(lambda x: abs(x.pop_count - x.pop_estimation), axis=1) df_insee["relative_error"] = df_insee.apply(lambda x: abs(x.pop_count - x.pop_estimation) / x.pop_count, axis=1)
Example #4
Source File: refine.py From OpenSarToolkit with MIT License | 5 votes |
def _remove_outside_aoi(aoi_gdf, inventory_df): '''Removes scenes that are located outside the AOI The search routine works over a simplified representation of the AOI. This may then include acquistions that do not overlap with the AOI. In this step we sort out the scenes that are completely outside the actual AOI. Args: aoi_gdf (gdf): the aoi as an GeoDataFrame inventory_df (gdf): an OST compliant Sentinel-1 inventory GeoDataFrame Returns: inventory_df (gdf): the manipulated inventory GeodataFrame ''' # get columns of input dataframe for later return function cols = inventory_df.columns # 1) get only intersecting footprints (double, since we do this before) inventory_df = gpd.sjoin(inventory_df, aoi_gdf, how='inner', op='intersects') # if aoi gdf has an id field we need to rename the changed id_left field if 'id_left' in inventory_df.columns.tolist(): # rename id_left to id inventory_df.columns = [ 'id' if x == 'id_left' else x for x in inventory_df.columns.tolist()] return inventory_df[cols]
Example #5
Source File: BaseModel.py From spatial_access with GNU General Public License v3.0 | 5 votes |
def _spatial_join_community_index(self, dataframe, shapefile='data/chicago_boundaries/chicago_boundaries.shp', spatial_index='community', projection='epsg:4326'): """ Join the dataframe with location data from shapefile. Args: dataframe: pandas dataframe with unique id. shapefile: shapefile containing geometry. spatial_index: column names of aggregation area in shapefile. projection: defaults to 'epsg:4326' Returns: dataframe. Raises: ShapefileNotFoundException: Shapefile not found. SpatialIndexNotMatchedException: spatial_index not found in shapefile. """ geometry = [Point(xy) for xy in zip(dataframe['lon'], dataframe['lat'])] crs = {'init': projection} geo_original = gpd.GeoDataFrame(dataframe, crs=crs, geometry=geometry) try: boundaries_gdf = gpd.read_file(shapefile) except FileNotFoundError: raise ShapefileNotFoundException('shapefile not found: {}'.format(shapefile)) geo_result = gpd.sjoin(boundaries_gdf, geo_original, how='right', op='intersects') dataframe_columns = list(dataframe.columns) geo_result.rename(columns={spatial_index: 'spatial_index'}, inplace=True) dataframe_columns.append('spatial_index') dataframe_columns.append('geometry') try: geo_result = geo_result[dataframe_columns] except KeyError: raise SpatialIndexNotMatchedException('Unable to match spatial_index:{}'.format(spatial_index)) if len(geo_result) != len(dataframe): self.logger.warning('Length of joined dataframe ({}) != length of input dataframe ({})' .format(len(geo_result), len(dataframe))) return geo_result
Example #6
Source File: downscaling.py From urbansprawl with MIT License | 4 votes |
def proportional_population_downscaling(df_osm_built, df_insee): """ Performs a proportional population downscaling considering the surface dedicated to residential land use Associates the estimated population to each building in column 'population' Parameters ---------- df_osm_built : geopandas.GeoDataFrame input buildings with computed residential surface df_insee : geopandas.GeoDataFrame INSEE population data Returns ---------- """ if (df_insee.crs != df_osm_built.crs): # If projections do not match # First project to Lat-Long coordinates, then project to UTM coordinates df_insee = ox.project_gdf( ox.project_gdf(df_insee, to_latlong=True) ) # OSM Building geometries are already projected assert(df_insee.crs == df_osm_built.crs) df_osm_built['geom'] = df_osm_built.geometry df_osm_built_residential = df_osm_built[ df_osm_built.apply(lambda x: x.landuses_m2['residential'] > 0, axis = 1) ] # Loading/saving using geopandas loses the 'ellps' key df_insee.crs = df_osm_built_residential.crs # Intersecting gridded population - buildings sjoin = gpd.sjoin( df_insee, df_osm_built_residential, op='intersects') # Calculate area within square (percentage of building with the square) sjoin['residential_m2_within'] = sjoin.apply(lambda x: x.landuses_m2['residential'] * (x.geom.intersection(x.geometry).area / x.geom.area), axis=1 ) # Initialize df_insee['residential_m2_within'] = 0 # Sum residential area within square sum_m2_per_square = sjoin.groupby(sjoin.index)['residential_m2_within'].sum() # Assign total residential area within each square df_insee.loc[ sum_m2_per_square.index, "residential_m2_within" ] = sum_m2_per_square.values # Get number of M^2 / person df_insee[ "m2_per_person" ] = df_insee.apply(lambda x: x.residential_m2_within / x.pop_count, axis=1) def population_building(x, df_insee): # Sum of: For each square: M2 of building within square / M2 per person return ( x.get('m2',[]) / df_insee.loc[ x.get('idx',[]) ].m2_per_person ).sum() # Index: Buildings , Values: idx:Indices of gridded square population, m2: M2 within that square buildings_square_m2_association = sjoin.groupby('index_right').apply(lambda x: {'idx':list(x.index), 'm2':list(x.residential_m2_within)} ) # Associate df_osm_built.loc[ buildings_square_m2_association.index, "population" ] = buildings_square_m2_association.apply(lambda x: population_building(x,df_insee) ) # Drop unnecessary column df_osm_built.drop('geom', axis=1, inplace=True)
Example #7
Source File: utils.py From urbansprawl with MIT License | 4 votes |
def get_population_df_filled_empty_squares(df_insee): """ Add empty squares as 0-population box-squares Parameters ---------- df_insee : geopandas.GeoDataFrame INSEE population data Returns ---------- """ def get_northing_easting(x): # Extract northing and easting coordinates north, east = x.idINSPIRE.split("N")[1].split("E") x["north"] = int(north) x["east"] = int(east) return x # Project data to its original projection coordinates df_insee_3035 = ox.project_gdf(df_insee, to_crs="+init=epsg:3035") # Get northing and easting coordinates coordinates = df_insee.apply(lambda x: get_northing_easting(x), axis=1 )[["north","east"]] # +100 meters to obtain the centroid of each box coords_offset = 100 # Input data step step = 200. # North, east coordinates denote the south-west box endpoint: north_min, north_max = coordinates.north.min() + coords_offset, coordinates.north.max() + coords_offset east_min, east_max = coordinates.east.min() + coords_offset, coordinates.east.max() + coords_offset # Create mesh grid: One point for each square's centroid: Each square has an extent of 1km by 1km xv, yv = np.meshgrid( np.arange(east_min, east_max, step), np.arange(north_min, north_max, step) ) # For every given coordinate, if a box is not created (no population), make it with an initial population of 0 empty_population_box = [] for E, N in zip( xv.ravel(), yv.ravel() ): # Center-point point_df = gpd.GeoDataFrame( [Point(E,N)], columns=[ "geometry" ], crs="+init=epsg:3035" ) if ( gpd.sjoin( point_df, df_insee_3035 ).empty ): # Does not intersect any existing square-box # Create new square empty_population_box.append( Polygon([ (E - 100., N - 100.), (E - 100., N + 100. ), (E + 100., N + 100. ), (E + 100., N - 100. ), (E - 100., N - 100. ) ]) ) # Concatenate original data frame + Empty squares gdf_concat = pd.concat( [df_insee_3035, gpd.GeoDataFrame( {'geometry':empty_population_box, 'pop_count':[0]*len(empty_population_box) }, crs="+init=epsg:3035" ) ], ignore_index=True, sort=False ) # Remove added grid-cells outside the convex hull of the population data frame df_insee_convex_hull_3035 = df_insee_3035.unary_union.convex_hull gdf_concat = gdf_concat[ gdf_concat.apply(lambda x: df_insee_convex_hull_3035.intersects(x.geometry), axis=1 ) ] gdf_concat.reset_index(drop=True, inplace=True) # Project (First project to latitude-longitude due to GeoPandas issue) return ox.project_gdf( ox.project_gdf(gdf_concat, to_latlong=True) )
Example #8
Source File: classification.py From urbansprawl with MIT License | 4 votes |
def compute_landuse_inference(df_buildings, df_landuse): """ Compute land use inference for building polygons with no information The inference is done using polygons with defined land use A building polygon's land use is inferred by means of adopting the land use of the smallest encompassing polygon with defined land use Parameters ---------- df_buildings : geopandas.GeoDataFrame input buildings df_landuse : geopandas.GeoDataFrame land use polygons to aid inference procedure Returns ---------- """ # Get those indices which need to be inferred, and keep geometry column only df_buildings_to_infer = df_buildings.loc[ df_buildings['classification'] == 'infer', ["geometry"] ] # Add land use polygon's area df_landuse['area'] = df_landuse.apply(lambda x: x.geometry.area, axis=1) # Get geometries to infer within Land use polygons matching sjoin = gpd.sjoin(df_buildings_to_infer, df_landuse, op='within') # Add index column to sort values sjoin['index'] = sjoin.index # Sort values by index, then by area sjoin.sort_values(by=['index','area'], inplace=True) # Drop duplicates. Keep first (minimum computing area) sjoin.drop_duplicates(subset=['index'], keep='first', inplace=True) ##### Set key:value and classification # Set default value: inferred:None df_buildings.loc[ df_buildings_to_infer.index, "key_value" ] = df_buildings.loc[ df_buildings_to_infer.index].apply(lambda x: {"inferred":None} , axis=1) # Set land use for those buildings within a defined land use polygon df_buildings.loc[ sjoin.index, "key_value" ] = sjoin.apply(lambda x: {'inferred':x.landuse}, axis=1) # Set classification df_buildings.loc[ df_buildings_to_infer.index, "classification" ] = df_buildings.loc[ df_buildings_to_infer.index, "key_value" ].apply(lambda x: classify_landuse_inference(x.get("inferred")) ) # Remove useless rows df_buildings.drop( df_buildings[ df_buildings.classification.isin([None,"other"]) ].index, inplace=True) df_buildings.reset_index(inplace=True,drop=True) assert( len( df_buildings[df_buildings.classification.isnull()] ) == 0 ) ############################################ ### Activity type classification ############################################
Example #9
Source File: mesh.py From dhitools with MIT License | 4 votes |
def lyr_from_shape(self, lyr_name, input_shp, field_attribute, output_shp=None): """ Create a model input layer at mesh element coordinates. For example, input_shp is a roughness map containing polygons with roughness values. A spatial join is performed for mesh element points within input_shp polygons and returns field_attributeat element points. Parameters ---------- lyr_name : str Layer name as key to `lyrs` attribute dictionary input_shp : str Path to input shape file field_attributes : str Attribute in `input_shp` to extract at mesh elements output_shp : str, optional output path to write to .shp file Returns ------- Inserts `lyr_name` into the `lyrs` attribute dictionary as an ndarray, shape (num_elements,) with extracted `field_attributes` value for each mesh element """ # Load input_shp to GeoDF input_df = gpd.read_file(input_shp) # Load mesh element points as GeoDF mesh_df = self.to_gpd() # Perform spatial join join_df = gpd.sjoin(mesh_df, input_df, how='left', op='within') # Drop duplicated points; there is the potential to have duplicated # points when they intersect two different polygons. Keep the first join_df = join_df[~join_df.index.duplicated(keep='first')] self.lyrs[lyr_name] = np.array(join_df[field_attribute]) if output_shp is not None: join_df.to_file(output_shp)
Example #10
Source File: generators.py From PowerGenome with MIT License | 4 votes |
def label_hydro_region(gens_860, pudl_engine, model_regions_gdf): """ Label hydro facilities that don't have a region by default. Parameters ---------- gens_860 : dataframe Infomation on all generators from PUDL pudl_engine : sqlalchemy.Engine A sqlalchemy connection for use by pandas model_regions_gdf : dataframe Geodataframe of the model regions Returns ------- dataframe Plant id and region for any hydro that didn't originally have a region label. """ plant_entity = pd.read_sql_table("plants_entity_eia", pudl_engine) model_hydro = gens_860.loc[ gens_860["technology_description"] == "Conventional Hydroelectric" ].merge(plant_entity[["plant_id_eia", "latitude", "longitude"]], on="plant_id_eia") no_lat_lon = model_hydro.loc[ (model_hydro["latitude"].isnull()) | (model_hydro["longitude"].isnull()), : ] if not no_lat_lon.empty: print(no_lat_lon["summer_capacity_mw"].sum(), " MW without lat/lon") model_hydro = model_hydro.dropna(subset=["latitude", "longitude"]) # Convert the lon/lat values to geo points. Need to add an initial CRS and then # change it to align with the IPM regions model_hydro_gdf = gpd.GeoDataFrame( model_hydro, geometry=gpd.points_from_xy(model_hydro.longitude, model_hydro.latitude), crs={"init": "epsg:4326"}, ) if model_hydro_gdf.crs != model_regions_gdf.crs: model_hydro_gdf = model_hydro_gdf.to_crs(model_regions_gdf.crs) model_hydro_gdf = gpd.sjoin(model_regions_gdf, model_hydro_gdf) model_hydro_gdf = model_hydro_gdf.rename(columns={"IPM_Region": "region"}) keep_cols = ["plant_id_eia", "region"] return model_hydro_gdf.loc[:, keep_cols]
Example #11
Source File: burst.py From OpenSarToolkit with MIT License | 4 votes |
def refine_burst_inventory(aoi, burst_gdf, outfile, coverages=None): '''Creates a Burst GeoDataFrame from an OST inventory file Args: Returns: ''' # turn aoi into a geodataframe aoi_gdf = gpd.GeoDataFrame(vec.wkt_to_gdf(aoi).buffer(0.05)) aoi_gdf.columns = ['geometry'] # get columns of input dataframe for later return function cols = burst_gdf.columns # 1) get only intersecting footprints (double, since we do this before) burst_gdf = gpd.sjoin(burst_gdf, aoi_gdf, how='inner', op='intersects') # if aoi gdf has an id field we need to rename the changed id_left field if 'id_left' in burst_gdf.columns.tolist(): # rename id_left to id burst_gdf.columns = (['id' if x == 'id_left' else x for x in burst_gdf.columns.tolist()]) # remove duplicates burst_gdf.drop_duplicates(['SceneID', 'Date', 'bid'], inplace=True) # check if number of bursts align with number of coverages if coverages: for burst in burst_gdf.bid.unique(): if len(burst_gdf[burst_gdf.bid == burst]) != coverages: print(' INFO. Removing burst {} because of' ' unsuffcient coverage.'.format(burst)) burst_gdf.drop(burst_gdf[burst_gdf.bid == burst].index, inplace=True) # save file to out burst_gdf['Date'] = burst_gdf['Date'].astype(str) burst_gdf['BurstNr'] = burst_gdf['BurstNr'].astype(str) burst_gdf['AnxTime'] = burst_gdf['AnxTime'].astype(str) burst_gdf['Track'] = burst_gdf['Track'].astype(str) burst_gdf.to_file(outfile) return burst_gdf[cols]
Example #12
Source File: geometries.py From deeposlandia with MIT License | 4 votes |
def extract_tile_items( raster_features, labels, min_x, min_y, tile_width, tile_height ): """Extract label items that belong to the tile defined by the minimum horizontal pixel `min_x` (left tile limit), the minimum vertical pixel `min_y` (upper tile limit) and the sizes ̀€tile_width` and `tile_height` measured as a pixel amount. The tile is cropped from the original image raster as follows: - horizontally, between `min_x` and `min_x+tile_width` - vertically, between `min_y` and `min_y+tile_height` This method takes care of original data projection (UTM 37S, Tanzania area), however this parameter may be changed if similar data on another projection is used. Parameters ---------- raster_features : dict Raw image raster geographical features (`north`, `south`, `east` and `west` coordinates, `weight` and `height` measured in pixels) labels : geopandas.GeoDataFrame Raw image labels, as a set of geometries min_x : int Left tile limit, as a horizontal pixel index min_y : int Upper tile limit, as a vertical pixel index tile_width : int Tile width, measured in pixel tile_height : int Tile height, measured in pixel Returns ------- geopandas.GeoDataFrame Set of ground-truth labels contained into the tile, characterized by their type (complete, unfinished or foundation) and their geometry """ area = get_tile_footprint( raster_features, min_x, min_y, tile_width, tile_height ) bdf = gpd.GeoDataFrame( crs=from_epsg(raster_features["srid"]), geometry=[area] ) reproj_labels = labels.to_crs(epsg=raster_features["srid"]) tile_items = gpd.sjoin(reproj_labels, bdf) if tile_items.shape[0] == 0: return tile_items[["condition", "geometry"]] tile_items = gpd.overlay(tile_items, bdf) tile_items = tile_items.explode() # Manage MultiPolygons return tile_items[["condition", "geometry"]]