"""
Subset and Sample using Sliderule
https://slideruleearth.io
"""
from __future__ import annotations
import typing
import warnings
from pathlib import Path
from typing import Any
import geopandas as gpd
try:
import sliderule
from sliderule import earthdata, raster, toregion
except ImportError:
warnings.warn(
"'sliderule' package not found. Install for GEDI & ICESat2 functionality: https://slideruleearth.io/web/rtd/getting_started/Install.html",
stacklevel=2,
)
from coincident._utils import depends_on_optional
from coincident.datasets.planetary_computer import WorldCover
from coincident.search.wesm import read_wesm_csv, stacify_column_names
def _gdf_to_sliderule_polygon(gf: gpd.GeoDataFrame) -> list[dict[str, float]]:
# NOTE: .union_all().convex_hull of 2 point geodataframe is LINESTRING
bounding_polygon = gpd.GeoDataFrame(geometry=gf[["geometry"]].dissolve().envelope)
return toregion(bounding_polygon)["poly"] # type: ignore[no-any-return]
def _gdf_to_sliderule_params(gf: gpd.GeoDataFrame) -> dict[str, Any]:
# Sliderule expects single 'geometry' column
# Dissolve creates a single multipolygon from all geometries
sliderule_aoi = _gdf_to_sliderule_polygon(gf)
params = {}
params["poly"] = sliderule_aoi
params["t0"] = gf.start_datetime.min().tz_localize(None).isoformat()
params["t1"] = gf.end_datetime.max().tz_localize(None).isoformat()
return params
def _granule_from_assets(assets: gpd.GeoDataFrame) -> str | None:
granule = None
# NOTE: change to while loop in case tons of assets?
for _k, v in assets.items():
if v.get("roles") == "data":
granule = Path(v.get("href")).name
return granule
def _decode_worldcover(gf: gpd.GeoDataFrame) -> gpd.GeoDataFrame:
mapping = {
key: value["description"]
for (
key,
value,
) in WorldCover().classmap.items()
}
return gf.replace({"worldcover.value": mapping})
@typing.no_type_check
def _add_raster_samples(
include_worldcover: bool, include_3dep: bool
) -> dict[str, dict[str, Any]]:
samples = {}
if include_worldcover:
# SlidRule uses 2021 version (timestamped 2021-06-30)
# https://github.com/SlideRuleEarth/sliderule/blob/220e39bcad6d6682ba2b0d0ea502fc2ab142c3c5/packages/core/extensions/earth_data_query.lua#L205-L208
samples.update({"worldcover": {"asset": "esa-worldcover-10meter"}})
if include_3dep:
# NOTE: use "substr" to restrict to specific WESM project?
# https://slideruleearth.io/web/rtd/user_guide/SlideRule.html#raster-sampling
# Warning: use_poi_time NOT using acquisition time here
# https://github.com/SlideRuleEarth/sliderule/issues/444
samples.update({"3dep": {"asset": "usgs3dep-1meter-dem", "use_poi_time": True}})
return samples
[docs]
@depends_on_optional("sliderule")
def subset_gedi02a(
gf: gpd.GeoDataFrame,
aoi: gpd.GeoDataFrame = None,
include_worldcover: bool = False,
include_3dep: bool = False,
sliderule_params: dict[str, Any] | None = None,
) -> gpd.GeoDataFrame:
"""
Subsets GEDI L2A data based on a given GeoDataFrame and optional area of interest (AOI).
Parameters
----------
gf
A GeoDataFrame of results from coincident.search.search(dataset='gedi')
aoi
A GeoDataFrame with a POLYGON to subset The *envelope* of geometries is used to search.
include_worldcover
Whether to include WorldCover data in the processing. Default is False.
include_3dep
Whether to include 3DEP data in the processing. Default is False.
sliderule_params
A dictionary of additional parameters to be passed to SlideRule gedi02ax API.
Returns
-------
A GeoDataFrame containing the subsetted GEDI L2A data.
Notes
-----
The function sets `degrade_filter=True` and `l2_quality_filter=True` by default.
"""
params = _gdf_to_sliderule_params(gf)
granule_names = gf.assets.apply(_granule_from_assets).to_list()
if aoi is not None:
params["poly"] = _gdf_to_sliderule_polygon(aoi)
params.update(
{
"degrade_filter": True,
"l2_quality_filter": True,
}
)
params["samples"] = _add_raster_samples(include_worldcover, include_3dep)
if sliderule_params is not None:
params.update(sliderule_params)
gfsr = sliderule.run("gedi02ax", params, resources=granule_names)
if gfsr.empty:
message = "SlideRule returned an empty GeoDataFrame. Check spatial/temporal overlap and filter parameters."
warnings.warn(
message,
stacklevel=2,
)
if include_worldcover:
gfsr = _decode_worldcover(gfsr)
return gfsr
[docs]
@depends_on_optional("sliderule")
def subset_atl06(
gf: gpd.GeoDataFrame,
aoi: gpd.GeoDataFrame = None,
dropna: bool = True,
include_worldcover: bool = False,
include_3dep: bool = False,
sliderule_params: dict[str, Any] | None = None,
) -> gpd.GeoDataFrame:
"""
Subset ATL06 data using provided GeoDataFrame and optional parameters.
Parameters
----------
gf
GeoDataFrame containing the input data.
aoi
A GeoDataFrame with a POLYGON to subset The *envelope* of geometries is used to search.
dropna
Whether to drop rows with NaN values in the 'h_li' column.
include_worldcover
Whether to include WorldCover data in the processing.
include_3dep
Whether to include 3DEP data in the processing.
sliderule_params
Additional parameters to pass to the SlideRule icesat2.atl06sp().
Returns
-------
GeoDataFrame with ATL06 standard product measurements.
"""
params = _gdf_to_sliderule_params(gf)
# Note necessary but avoids another CMR search
granule_names = gf.assets.apply(_granule_from_assets).to_list()
granule_names = [x.replace("ATL03", "ATL06") for x in granule_names]
if aoi is not None:
params["poly"] = _gdf_to_sliderule_polygon(aoi)
params["samples"] = _add_raster_samples(include_worldcover, include_3dep)
# User-provided parameters take precedence
if sliderule_params is not None:
params.update(sliderule_params)
gfsr = sliderule.run("atl06x", params, resources=granule_names)
if gfsr.empty:
message = "SlideRule returned an empty GeoDataFrame. Check spatial/temporal overlap and filter parameters."
raise ValueError(
message,
)
# Drop poor-quality data
# https://github.com/orgs/SlideRuleEarth/discussions/441
gfsr = gfsr[gfsr.atl06_quality_summary == 0]
# NOTE: add server-side filtering for NaNs in sliderule.atl06sp?
if dropna:
gfsr = gfsr.dropna(subset=["h_li"])
if include_worldcover:
gfsr = _decode_worldcover(gfsr)
if gfsr.empty:
warnings.warn(
"Only poor-quality data found after filtering. Resulting GeoDataFrame is empty.",
stacklevel=2,
)
return gfsr
[docs]
@depends_on_optional("sliderule")
def process_atl06sr(
gf: gpd.GeoDataFrame,
aoi: gpd.GeoDataFrame = None,
target_surface: str = "ground",
include_worldcover: bool = False,
include_3dep: bool = False,
sliderule_params: dict[str, Any] | None = None,
) -> gpd.GeoDataFrame:
"""
Generate Custom SlideRule ATL06 Product
Parameters
----------
gf
Input GeoDataFrame with ICESat-2 ATL03 Granule metadata
aoi
A GeoDataFrame with a POLYGON to subset The *envelope* of geometries is used to search.
target_surface
Specify which ATL08 filters to apply. Must be either 'ground' or 'canopy'.
include_worldcover
Whether to include WorldCover data in the processing.
include_3dep
Whether to include 3DEP data in the processing.
sliderule_params
Additional parameters for SlideRule processing. User-provided parameters take precedence over default settings.
Returns
-------
Processed GeoDataFrame with the results from SlideRule.
"""
# Time range and AOI from input GeoDataFrame
params = _gdf_to_sliderule_params(gf)
# Common default settings for sliderule processing
# https://github.com/SlideRuleEarth/sliderule/issues/448
params.update(
{
"cnf": 0, # -2:tep, -1:not considered, 0:background, 1:within 10m, 2:low, 3:medium, 4:high (default=2)
"srt": -1, # -1:dynamic, 0:land, 1:ocean, 2:sea ice, 3:land ice, 4:inland water (default=-1)
"ats": 20.0, # Minimum allowed along track spread of a photon segment (m) (default=20.0)
"cnt": 5, # Minimum allowed number of photons in a segment (default=10)
"len": 40.0, # Length of photon segment (m) (default=40.0)
"res": 20.0, # Resolution or step size of photon segment (m) (default=20.0)
"fit": {
"maxi": 5 # Maximum iterations on least squares fit (default=5)
},
}
)
if target_surface == "ground":
params.update({"atl08_class": ["atl08_ground"]})
elif target_surface == "canopy":
params.update({"atl08_class": ["atl08_top_of_canopy", "atl08_canopy"]})
else:
msg = f"Invalid target_suface={target_surface}. Must be 'ground' or 'canopy'."
raise ValueError(msg)
params["samples"] = _add_raster_samples(include_worldcover, include_3dep)
# Not necessary but avoids another CMR search!
granule_names = gf.assets.apply(_granule_from_assets).to_list()
if aoi is not None:
params["poly"] = _gdf_to_sliderule_polygon(aoi)
# User-provided parameters take precedence
if sliderule_params is not None:
params.update(sliderule_params)
# gfsr = icesat2.atl06p(params, resources=granule_names)
gfsr = sliderule.run("atl03x", params, resources=granule_names)
# Drop columns we don't need?
# dropcols = ['worldcover.time','worldcover.flags','worldcover.file_id',
# '3dep.time','3dep.flags']
if include_worldcover:
gfsr = _decode_worldcover(gfsr)
return gfsr
[docs]
@depends_on_optional("sliderule")
def sample_raster(
gf: gpd.GeoDataFrame,
asset_key: str = "usgs3dep-1meter-dem",
project_name: str | None = None,
) -> gpd.GeoDataFrame:
"""
Sample 3DEP 1-meter DEM with POINT geometries in GeoDataFrame.
Points should be (longitude, latitude) not UTM
Parameters
----------
gf
A GeoDataFrame containing POINT geometries in EPSG:4326
asset_key
Sliderule asset key to sample (e.g. "usgs3dep-1meter-dem" or "esa-worldcover-10meter")
project_name
For 3dep, a WESM project name to restrict results to (e.g. "CO_WestCentral_2019_A19")
Returns
-------
A GeoDataFrame with sampled elevation data from the 3DEP 1-meter DEM.
Notes
-----
Sliderule automatically reprojects to ITRF2020. For 3DEP 1m this means going from tiffs stored as NAD83(2011) + NAVD88 height in various UTM zones.
"""
# Just work with geometry column
gf = gf[["geometry"]].reset_index(drop=True)
# Ensure passed geodataframe is lon/lat for arguments to sliderule
gfll = gf.to_crs("EPSG:9989")
points = [[x, y] for x, y in zip(gfll.geometry.x, gfll.geometry.y, strict=False)]
if asset_key != "usgs3dep-1meter-dem":
samples = raster.sample(asset_key, points)
else:
poly = _gdf_to_sliderule_polygon(gfll)
geojson = earthdata.tnm(
short_name="Digital Elevation Model (DEM) 1 meter", polygon=poly
)
gfsr = raster.sample(asset_key, points, {"catalog": geojson})
if project_name is not None:
# Allow NaNs if restricting to a single WESM project
gfsr = (
gfsr[gfsr.file.str.contains(project_name)]
.drop_duplicates("geometry")
.reset_index()
)
else:
# Drop NaNs if sampling among multiple WESM projects
gfsr = gfsr[gfsr.value.notna()].drop_duplicates("geometry").reset_index()
# Use acquisition datetime instead of upload time
# https://github.com/SlideRuleEarth/sliderule/issues/444
dfwesm = stacify_column_names(read_wesm_csv())
def get_wesm_datetime(filename: str) -> gpd.pd.Timestamp:
project_name = filename.split("/")[-3]
return dfwesm[dfwesm.project == project_name].datetime.iloc[0]
gfsr["time"] = gfsr.file.apply(get_wesm_datetime)
# Return results in original CRS of passed dataframe
gf3dep = gfsr.to_crs(gf.crs)
# Return with order matching input geodataframe
samples = gf.sjoin_nearest(gf3dep, how="left").drop(columns="index_right")
return samples
def to_3d(
gf: gpd.GeoDataFrame,
z_col: str,
) -> gpd.GeoDataFrame:
"""
Convert 2D GeoDataFrame to 3D by adding z-coordinate from specified column.
Necessary to apply 3D transforms via gf.to_crs().
Parameters
----------
gf
A GeoDataFrame containing 2D lon,lat POINT
z_col
The name of the column to use for the z-coordinate (e.g. h_li, elevation_lm, etc.)
Returns
-------
A GeoDataFrame with 3D POINT geometries.
"""
points3D = gpd.points_from_xy(gf.geometry.x, gf.geometry.y, gf[z_col])
gf3D = gf.copy()
gf3D.geometry = points3D
return gf3D