Source code for coincident.io.sliderule

"""
Subset and Sample using Sliderule
https://slideruleearth.io
"""

from __future__ import annotations

import typing
import warnings
from pathlib import Path
from typing import Any

import geopandas as gpd

try:
    import sliderule
    from sliderule import earthdata, raster, toregion
except ImportError:
    warnings.warn(
        "'sliderule' package not found. Install for GEDI & ICESat2 functionality: https://slideruleearth.io/web/rtd/getting_started/Install.html",
        stacklevel=2,
    )
from coincident._utils import depends_on_optional
from coincident.datasets.planetary_computer import WorldCover
from coincident.search.wesm import read_wesm_csv, stacify_column_names


def _gdf_to_sliderule_polygon(gf: gpd.GeoDataFrame) -> list[dict[str, float]]:
    # NOTE: .union_all().convex_hull of 2 point geodataframe is LINESTRING
    bounding_polygon = gpd.GeoDataFrame(geometry=gf[["geometry"]].dissolve().envelope)
    return toregion(bounding_polygon)["poly"]  # type: ignore[no-any-return]


def _gdf_to_sliderule_params(gf: gpd.GeoDataFrame) -> dict[str, Any]:
    # Sliderule expects single 'geometry' column
    # Dissolve creates a single multipolygon from all geometries
    sliderule_aoi = _gdf_to_sliderule_polygon(gf)
    params = {}
    params["poly"] = sliderule_aoi
    params["t0"] = gf.start_datetime.min().tz_localize(None).isoformat()
    params["t1"] = gf.end_datetime.max().tz_localize(None).isoformat()
    return params


def _granule_from_assets(assets: gpd.GeoDataFrame) -> str | None:
    granule = None
    # NOTE: change to while loop in case tons of assets?
    for _k, v in assets.items():
        if v.get("roles") == "data":
            granule = Path(v.get("href")).name

    return granule


def _decode_worldcover(gf: gpd.GeoDataFrame) -> gpd.GeoDataFrame:
    mapping = {
        key: value["description"]
        for (
            key,
            value,
        ) in WorldCover().classmap.items()
    }
    return gf.replace({"worldcover.value": mapping})


_EMPTY_GDF_MSG = "SlideRule returned an empty GeoDataFrame. Check spatial/temporal overlap and filter parameters."


@typing.no_type_check
def _add_raster_samples(
    include_worldcover: bool, include_3dep: bool
) -> dict[str, dict[str, Any]]:
    samples = {}
    if include_worldcover:
        # SlidRule uses 2021 version (timestamped 2021-06-30)
        # https://github.com/SlideRuleEarth/sliderule/blob/220e39bcad6d6682ba2b0d0ea502fc2ab142c3c5/packages/core/extensions/earth_data_query.lua#L205-L208
        samples.update({"worldcover": {"asset": "esa-worldcover-10meter"}})
    if include_3dep:
        # NOTE: use "substr" to restrict to specific WESM project?
        # https://slideruleearth.io/web/rtd/user_guide/SlideRule.html#raster-sampling
        # Warning: use_poi_time NOT using acquisition time here
        # https://github.com/SlideRuleEarth/sliderule/issues/444
        samples.update({"3dep": {"asset": "usgs3dep-1meter-dem", "use_poi_time": True}})
    return samples


def _build_sliderule_params(
    gf: gpd.GeoDataFrame,
    aoi: gpd.GeoDataFrame | None,
    include_worldcover: bool,
    include_3dep: bool,
    sliderule_params: dict[str, Any] | None,
) -> dict[str, Any]:
    params = _gdf_to_sliderule_params(gf)
    if aoi is not None:
        params["poly"] = _gdf_to_sliderule_polygon(aoi)
    params["samples"] = _add_raster_samples(include_worldcover, include_3dep)
    if sliderule_params is not None:
        params.update(sliderule_params)
    return params


[docs] @depends_on_optional("sliderule") def subset_gedi02a( gf: gpd.GeoDataFrame, aoi: gpd.GeoDataFrame = None, include_worldcover: bool = False, include_3dep: bool = False, sliderule_params: dict[str, Any] | None = None, ) -> gpd.GeoDataFrame: """ Subsets GEDI L2A data based on a given GeoDataFrame and optional area of interest (AOI). Parameters ---------- gf A GeoDataFrame of results from coincident.search.search(dataset='gedi') aoi A GeoDataFrame with a POLYGON to subset The *envelope* of geometries is used to search. include_worldcover Whether to include WorldCover data in the processing. Default is False. include_3dep Whether to include 3DEP data in the processing. Default is False. sliderule_params A dictionary of additional parameters to be passed to SlideRule gedi02ax API. Returns ------- A GeoDataFrame containing the subsetted GEDI L2A data. Notes ----- The function sets `degrade_filter=True` and `l2_quality_filter=True` by default. """ granule_names = gf.assets.apply(_granule_from_assets).to_list() gedi_defaults: dict[str, Any] = {"degrade_filter": True, "l2_quality_filter": True} if sliderule_params is not None: gedi_defaults.update(sliderule_params) params = _build_sliderule_params( gf, aoi, include_worldcover, include_3dep, gedi_defaults ) gfsr = sliderule.run("gedi02ax", params, resources=granule_names) if gfsr.empty: warnings.warn(_EMPTY_GDF_MSG, stacklevel=2) if include_worldcover: gfsr = _decode_worldcover(gfsr) return gfsr
[docs] @depends_on_optional("sliderule") def subset_atl06( gf: gpd.GeoDataFrame, aoi: gpd.GeoDataFrame = None, dropna: bool = True, include_worldcover: bool = False, include_3dep: bool = False, sliderule_params: dict[str, Any] | None = None, ) -> gpd.GeoDataFrame: """ Subset ATL06 data using provided GeoDataFrame and optional parameters. Parameters ---------- gf GeoDataFrame containing the input data. aoi A GeoDataFrame with a POLYGON to subset The *envelope* of geometries is used to search. dropna Whether to drop rows with NaN values in the 'h_li' column. include_worldcover Whether to include WorldCover data in the processing. include_3dep Whether to include 3DEP data in the processing. sliderule_params Additional parameters to pass to the SlideRule icesat2.atl06sp(). Returns ------- GeoDataFrame with ATL06 standard product measurements. """ # Not necessary but avoids another CMR search granule_names = gf.assets.apply(_granule_from_assets).to_list() granule_names = [x.replace("ATL03", "ATL06") for x in granule_names] params = _build_sliderule_params( gf, aoi, include_worldcover, include_3dep, sliderule_params ) gfsr = sliderule.run("atl06x", params, resources=granule_names) if gfsr.empty: raise ValueError(_EMPTY_GDF_MSG) # Drop poor-quality data # https://github.com/orgs/SlideRuleEarth/discussions/441 gfsr = gfsr[gfsr.atl06_quality_summary == 0] # NOTE: add server-side filtering for NaNs in sliderule.atl06sp? if dropna: gfsr = gfsr.dropna(subset=["h_li"]) if include_worldcover: gfsr = _decode_worldcover(gfsr) if gfsr.empty: warnings.warn( "Only poor-quality data found after filtering. Resulting GeoDataFrame is empty.", stacklevel=2, ) return gfsr
[docs] @depends_on_optional("sliderule") def subset_atl08( gf: gpd.GeoDataFrame, aoi: gpd.GeoDataFrame = None, dropna: bool = True, include_worldcover: bool = False, include_3dep: bool = False, sliderule_params: dict[str, Any] | None = None, ) -> gpd.GeoDataFrame: """ Subset ATL08 data using provided GeoDataFrame and optional parameters. Parameters ---------- gf GeoDataFrame containing the input data from coincident.search.search(dataset='icesat-2'). aoi A GeoDataFrame with a POLYGON to subset. The *envelope* of geometries is used to search. dropna Whether to drop rows with NaN values in the 'h_te_median' column. include_worldcover Whether to include WorldCover data in the processing. include_3dep Whether to include 3DEP data in the processing. sliderule_params Additional parameters to pass to the SlideRule atl08x API. Use ``te_quality_score`` and ``can_quality_score`` to apply server-side terrain and canopy quality thresholds. Returns ------- GeoDataFrame with ATL08 standard product measurements. """ # Not necessary but avoids another CMR search granule_names = gf.assets.apply(_granule_from_assets).to_list() granule_names = [x.replace("ATL03", "ATL08") for x in granule_names] params = _build_sliderule_params( gf, aoi, include_worldcover, include_3dep, sliderule_params ) gfsr = sliderule.run("atl08x", params, resources=granule_names) if gfsr.empty: raise ValueError(_EMPTY_GDF_MSG) if dropna: gfsr = gfsr.dropna(subset=["h_te_median"]) if include_worldcover: gfsr = _decode_worldcover(gfsr) if gfsr.empty: warnings.warn( "Only poor-quality data found after filtering. Resulting GeoDataFrame is empty.", stacklevel=2, ) return gfsr
[docs] @depends_on_optional("sliderule") def process_atl06sr( gf: gpd.GeoDataFrame, aoi: gpd.GeoDataFrame = None, target_surface: str = "ground", include_worldcover: bool = False, include_3dep: bool = False, sliderule_params: dict[str, Any] | None = None, ) -> gpd.GeoDataFrame: """ Generate Custom SlideRule ATL06 Product Parameters ---------- gf Input GeoDataFrame with ICESat-2 ATL03 Granule metadata aoi A GeoDataFrame with a POLYGON to subset The *envelope* of geometries is used to search. target_surface Specify which ATL08 filters to apply. Must be either 'ground' or 'canopy'. include_worldcover Whether to include WorldCover data in the processing. include_3dep Whether to include 3DEP data in the processing. sliderule_params Additional parameters for SlideRule processing. User-provided parameters take precedence over default settings. Returns ------- Processed GeoDataFrame with the results from SlideRule. """ # Time range and AOI from input GeoDataFrame params = _gdf_to_sliderule_params(gf) # Common default settings for sliderule processing # https://github.com/SlideRuleEarth/sliderule/issues/448 params.update( { "cnf": 0, # -2:tep, -1:not considered, 0:background, 1:within 10m, 2:low, 3:medium, 4:high (default=2) "srt": -1, # -1:dynamic, 0:land, 1:ocean, 2:sea ice, 3:land ice, 4:inland water (default=-1) "ats": 20.0, # Minimum allowed along track spread of a photon segment (m) (default=20.0) "cnt": 5, # Minimum allowed number of photons in a segment (default=10) "len": 40.0, # Length of photon segment (m) (default=40.0) "res": 20.0, # Resolution or step size of photon segment (m) (default=20.0) "fit": { "maxi": 5 # Maximum iterations on least squares fit (default=5) }, } ) if target_surface == "ground": params.update({"atl08_class": ["atl08_ground"]}) elif target_surface == "canopy": params.update({"atl08_class": ["atl08_top_of_canopy", "atl08_canopy"]}) else: msg = f"Invalid target_suface={target_surface}. Must be 'ground' or 'canopy'." raise ValueError(msg) params["samples"] = _add_raster_samples(include_worldcover, include_3dep) # Not necessary but avoids another CMR search! granule_names = gf.assets.apply(_granule_from_assets).to_list() if aoi is not None: params["poly"] = _gdf_to_sliderule_polygon(aoi) # User-provided parameters take precedence if sliderule_params is not None: params.update(sliderule_params) # gfsr = icesat2.atl06p(params, resources=granule_names) gfsr = sliderule.run("atl03x", params, resources=granule_names) # Drop columns we don't need? # dropcols = ['worldcover.time','worldcover.flags','worldcover.file_id', # '3dep.time','3dep.flags'] if include_worldcover: gfsr = _decode_worldcover(gfsr) return gfsr
[docs] @depends_on_optional("sliderule") def sample_raster( gf: gpd.GeoDataFrame, asset_key: str = "usgs3dep-1meter-dem", project_name: str | None = None, ) -> gpd.GeoDataFrame: """ Sample 3DEP 1-meter DEM with POINT geometries in GeoDataFrame. Points should be (longitude, latitude) not UTM Parameters ---------- gf A GeoDataFrame containing POINT geometries in EPSG:4326 asset_key Sliderule asset key to sample (e.g. "usgs3dep-1meter-dem" or "esa-worldcover-10meter") project_name For 3dep, a WESM project name to restrict results to (e.g. "CO_WestCentral_2019_A19") Returns ------- A GeoDataFrame with sampled elevation data from the 3DEP 1-meter DEM. Notes ----- Sliderule automatically reprojects to ITRF2020. For 3DEP 1m this means going from tiffs stored as NAD83(2011) + NAVD88 height in various UTM zones. """ # Just work with geometry column gf = gf[["geometry"]].reset_index(drop=True) # Ensure passed geodataframe is lon/lat for arguments to sliderule gfll = gf.to_crs("EPSG:9989") points = [[x, y] for x, y in zip(gfll.geometry.x, gfll.geometry.y, strict=False)] if asset_key != "usgs3dep-1meter-dem": samples = raster.sample(asset_key, points) else: poly = _gdf_to_sliderule_polygon(gfll) geojson = earthdata.tnm( short_name="Digital Elevation Model (DEM) 1 meter", polygon=poly ) gfsr = raster.sample(asset_key, points, {"catalog": geojson}) if project_name is not None: # Allow NaNs if restricting to a single WESM project gfsr = ( gfsr[gfsr.file.str.contains(project_name)] .drop_duplicates("geometry") .reset_index() ) else: # Drop NaNs if sampling among multiple WESM projects gfsr = gfsr[gfsr.value.notna()].drop_duplicates("geometry").reset_index() # Use acquisition datetime instead of upload time # https://github.com/SlideRuleEarth/sliderule/issues/444 dfwesm = stacify_column_names(read_wesm_csv()) def get_wesm_datetime(filename: str) -> gpd.pd.Timestamp: project_name = filename.split("/")[-3] return dfwesm[dfwesm.project == project_name].datetime.iloc[0] gfsr["time"] = gfsr.file.apply(get_wesm_datetime) # Return results in original CRS of passed dataframe gf3dep = gfsr.to_crs(gf.crs) # Return with order matching input geodataframe samples = gf.sjoin_nearest(gf3dep, how="left").drop(columns="index_right") return samples
def to_3d( gf: gpd.GeoDataFrame, z_col: str, ) -> gpd.GeoDataFrame: """ Convert 2D GeoDataFrame to 3D by adding z-coordinate from specified column. Necessary to apply 3D transforms via gf.to_crs(). Parameters ---------- gf A GeoDataFrame containing 2D lon,lat POINT z_col The name of the column to use for the z-coordinate (e.g. h_li, elevation_lm, etc.) Returns ------- A GeoDataFrame with 3D POINT geometries. """ points3D = gpd.points_from_xy(gf.geometry.x, gf.geometry.y, gf[z_col]) gf3D = gf.copy() gf3D.geometry = points3D return gf3D