Skip to content

Commit

Permalink
Support GeoPandas Polygons and MultiPolygons (#1285)
Browse files Browse the repository at this point in the history
* Add rendering of GeoPandas polygons and multipolygons via ragged arrays

* Add example notebooks

* Correct scope of shapely function call

* Calculate geopandas bounds once only

* Support dask-geopandas and use lazy imports

* Add optional dependencies so can "pip install datashader[geopandas]"

* Explicit Shapely version check

* Add tests comparing geopandas, dask-geopandas and spatialpandas

* Remove demo notebooks

* Use sindex.query instead of cx for GeoPandas.GeoDataFrame

* Better error reporting

* Move geopandas check and spatial filtering to separate function for reuse
  • Loading branch information
ianthomas23 authored Oct 16, 2023
1 parent c9f9283 commit 9e20700
Show file tree
Hide file tree
Showing 6 changed files with 264 additions and 18 deletions.
53 changes: 48 additions & 5 deletions datashader/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -733,22 +733,27 @@ def polygons(self, source, geometry, agg=None):
"""
from .glyphs import PolygonGeom
from .reductions import any as any_rdn

if spatialpandas and isinstance(source, spatialpandas.dask.DaskGeoDataFrame):
# Downselect partitions to those that may contain polygons in viewport
x_range = self.x_range if self.x_range is not None else (None, None)
y_range = self.y_range if self.y_range is not None else (None, None)
source = source.cx_partitions[slice(*x_range), slice(*y_range)]
glyph = PolygonGeom(geometry)
elif spatialpandas and isinstance(source, spatialpandas.GeoDataFrame):
pass
glyph = PolygonGeom(geometry)
elif (geopandas_source := self._source_from_geopandas(source)) is not None:
source = geopandas_source
from .glyphs.polygon import GeopandasPolygonGeom
glyph = GeopandasPolygonGeom(geometry)
else:
raise ValueError(
"source must be an instance of spatialpandas.GeoDataFrame or \n"
"spatialpandas.dask.DaskGeoDataFrame.\n"
" Received value of type {typ}".format(typ=type(source)))
"source must be an instance of spatialpandas.GeoDataFrame, "
"spatialpandas.dask.DaskGeoDataFrame, geopandas.GeoDataFrame or "
f"dask_geopandas.GeoDataFrame, not {type(source)}")

if agg is None:
agg = any_rdn()
glyph = PolygonGeom(geometry)
return bypixel(source, self, glyph, agg)

def quadmesh(self, source, x=None, y=None, agg=None):
Expand Down Expand Up @@ -1228,6 +1233,44 @@ def validate(self):
self.validate_ranges(self.x_range, self.y_range)
self.validate_size(self.plot_width, self.plot_height)

def _source_from_geopandas(self, source):
"""
Check if the specified source is a geopandas or dask-geopandas GeoDataFrame.
If so, spatially filter the source and return it.
If not, return None.
"""
try:
import geopandas
except ImportError:
geopandas = None

try:
import dask_geopandas
except ImportError:
dask_geopandas = None

if ((geopandas and isinstance(source, geopandas.GeoDataFrame)) or
(dask_geopandas and isinstance(source, dask_geopandas.GeoDataFrame))):
# Explicit shapely version check as cannot continue unless shapely >= 2
from packaging.version import Version
from shapely import __version__ as shapely_version
if Version(shapely_version) < Version('2.0.0'):
raise ImportError("Use of GeoPandas in Datashader requires Shapely >= 2.0.0")

if isinstance(source, geopandas.GeoDataFrame):
x_range = self.x_range if self.x_range is not None else (-np.inf, np.inf)
y_range = self.y_range if self.y_range is not None else (-np.inf, np.inf)
from shapely import box
query = source.sindex.query(box(x_range[0], y_range[0], x_range[1], y_range[1]))
source = source.iloc[query]
else:
x_range = self.x_range if self.x_range is not None else (None, None)
y_range = self.y_range if self.y_range is not None else (None, None)
source = source.cx[slice(*x_range), slice(*y_range)]
return source
else:
return None


def bypixel(source, canvas, glyph, agg, *, antialias=False):
"""Compute an aggregate grouped by pixel sized bins.
Expand Down
26 changes: 24 additions & 2 deletions datashader/glyphs/points.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,11 @@
cudf = None
cuda_args = None

try:
from geopandas.array import GeometryDtype as gpd_GeometryDtype
except ImportError:
gpd_GeometryDtype = type(None)

try:
import spatialpandas
except Exception:
Expand All @@ -35,6 +40,7 @@ def values(s):
class _GeometryLike(Glyph):
def __init__(self, geometry):
self.geometry = geometry
self._cached_bounds = None

@property
def ndims(self):
Expand Down Expand Up @@ -72,11 +78,27 @@ def required_columns(self):
return [self.geometry]

def compute_x_bounds(self, df):
bounds = df[self.geometry].array.total_bounds_x
col = df[self.geometry]
if isinstance(col.dtype, gpd_GeometryDtype):
# geopandas
if self._cached_bounds is None:
self._cached_bounds = col.total_bounds
bounds = self._cached_bounds[::2]
else:
# spatialpandas
bounds = col.array.total_bounds_x
return self.maybe_expand_bounds(bounds)

def compute_y_bounds(self, df):
bounds = df[self.geometry].array.total_bounds_y
col = df[self.geometry]
if isinstance(col.dtype, gpd_GeometryDtype):
# geopandas
if self._cached_bounds is None:
self._cached_bounds = col.total_bounds
bounds = self._cached_bounds[1::2]
else:
# spatialpandas
bounds = col.array.total_bounds_y
return self.maybe_expand_bounds(bounds)

@memoize
Expand Down
107 changes: 101 additions & 6 deletions datashader/glyphs/polygon.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,40 @@
spatialpandas = None


class GeopandasPolygonGeom(_GeometryLike):
@property
def geom_dtypes(self):
from geopandas.array import GeometryDtype
return (GeometryDtype,)

@memoize
def _build_extend(self, x_mapper, y_mapper, info, append, _antialias_stage_2, _antialias_stage_2_funcs):
expand_aggs_and_cols = self.expand_aggs_and_cols(append)
map_onto_pixel = _build_map_onto_pixel_for_line(x_mapper, y_mapper)
draw_polygon = _build_draw_polygon(
append, map_onto_pixel, x_mapper, y_mapper, expand_aggs_and_cols
)

perform_extend_cpu = _build_extend_geopandas_polygon_geometry(
draw_polygon, expand_aggs_and_cols
)
geom_name = self.geometry

def extend(aggs, df, vt, bounds, plot_start=True):
sx, tx, sy, ty = vt
xmin, xmax, ymin, ymax = bounds
aggs_and_cols = aggs + info(df, aggs[0].shape[:2])
geom_array = df[geom_name].array

perform_extend_cpu(
sx, tx, sy, ty,
xmin, xmax, ymin, ymax,
geom_array, *aggs_and_cols
)

return extend


class PolygonGeom(_GeometryLike):
# spatialpandas must be available if a PolygonGeom object is created.

Expand Down Expand Up @@ -52,7 +86,7 @@ def _build_draw_polygon(append, map_onto_pixel, x_mapper, y_mapper, expand_aggs_
@expand_aggs_and_cols
def draw_polygon(
i, sx, tx, sy, ty, xmin, xmax, ymin, ymax,
offsets, values, xs, ys, yincreasing, eligible,
offsets, offset_multiplier, values, xs, ys, yincreasing, eligible,
*aggs_and_cols
):
"""Draw a polygon using a winding-number scan-line algorithm
Expand All @@ -64,8 +98,8 @@ def draw_polygon(
eligible.fill(1)

# First pass, compute bounding box of polygon vertices in data coordinates
start_index = offsets[0]
stop_index = offsets[-1]
start_index = offset_multiplier*offsets[0]
stop_index = offset_multiplier*offsets[-1]
# num_edges = stop_index - start_index - 2
poly_xmin = np.min(values[start_index:stop_index:2])
poly_ymin = np.min(values[start_index + 1:stop_index:2])
Expand Down Expand Up @@ -105,8 +139,8 @@ def draw_polygon(
# Build arrays of edges in canvas coordinates
ei = 0
for j in range(len(offsets) - 1):
start = offsets[j]
stop = offsets[j + 1]
start = offset_multiplier*offsets[j]
stop = offset_multiplier*offsets[j + 1]
for k in range(start, stop - 2, 2):
x0 = values[k]
y0 = values[k + 1]
Expand Down Expand Up @@ -201,6 +235,67 @@ def draw_polygon(
return draw_polygon


def _build_extend_geopandas_polygon_geometry(
draw_polygon, expand_aggs_and_cols
):
# Lazy import shapely. Cannot get here if geopandas and shapely are not available.
import shapely

def extend_cpu(
sx, tx, sy, ty, xmin, xmax, ymin, ymax, geometry, *aggs_and_cols
):
ragged = shapely.to_ragged_array(geometry)
geometry_type = ragged[0]
if geometry_type not in (shapely.GeometryType.POLYGON, shapely.GeometryType.MULTIPOLYGON):
raise ValueError(
"Canvas.polygons supports GeoPandas geometry types of POLYGON and MULTIPOLYGON, "
f"not {repr(geometry_type)}")

coords = ragged[1].ravel()
if geometry_type == shapely.GeometryType.MULTIPOLYGON:
offsets0, offsets1, offsets2 = ragged[2]
else: # POLYGON
offsets0, offsets1 = ragged[2]
offsets2 = np.arange(len(offsets1))

extend_cpu_numba(
sx, tx, sy, ty, xmin, xmax, ymin, ymax,
coords, offsets0, offsets1, offsets2, *aggs_and_cols
)

@ngjit
@expand_aggs_and_cols
def extend_cpu_numba(sx, tx, sy, ty, xmin, xmax, ymin, ymax,
coords, offsets0, offsets1, offsets2, *aggs_and_cols):
# Pre-allocate temp arrays
max_edges = 0
n_multipolygons = len(offsets2) - 1
for i in range(n_multipolygons):
polygon_inds = offsets1[offsets2[i]:offsets2[i + 1] + 1]
for j in range(len(polygon_inds) - 1):
start = offsets0[polygon_inds[j]]
stop = offsets0[polygon_inds[j + 1]]
max_edges = max(max_edges, stop - start - 1)

xs = np.full((max_edges, 2), np.nan, dtype=np.float32)
ys = np.full((max_edges, 2), np.nan, dtype=np.float32)
yincreasing = np.zeros(max_edges, dtype=np.int8)

# Initialize array indicating which edges are still eligible for processing
eligible = np.ones(max_edges, dtype=np.int8)

for i in range(n_multipolygons):
polygon_inds = offsets1[offsets2[i]:offsets2[i + 1] + 1]
for j in range(len(polygon_inds) - 1):
start = polygon_inds[j]
stop = polygon_inds[j + 1]
draw_polygon(i, sx, tx, sy, ty, xmin, xmax, ymin, ymax,
offsets0[start:stop + 1], 2,
coords, xs, ys, yincreasing, eligible, *aggs_and_cols)

return extend_cpu


def _build_extend_polygon_geometry(
draw_polygon, expand_aggs_and_cols
):
Expand Down Expand Up @@ -269,7 +364,7 @@ def extend_cpu_numba(
stop = polygon_inds[j + 1]

draw_polygon(i, sx, tx, sy, ty, xmin, xmax, ymin, ymax,
offsets2[start:stop + 1], values,
offsets2[start:stop + 1], 1, values,
xs, ys, yincreasing, eligible, *aggs_and_cols)

return extend_cpu
73 changes: 72 additions & 1 deletion datashader/tests/test_polygons.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import pytest
import pandas as pd
import numpy as np
from numpy import nan
import xarray as xr
import datashader as ds
from datashader.tests.test_pandas import assert_eq_ndarray, assert_eq_xr
Expand All @@ -16,6 +17,18 @@
GeoDataFrame = None
MultiPolygonArray = None

try:
from geodatasets import get_path
import geopandas
except ImportError:
get_path = None
geopandas = None

try:
import dask_geopandas
except ImportError:
dask_geopandas = None


def dask_GeoDataFrame(*args, **kwargs):
return dd.from_pandas(GeoDataFrame(*args, **kwargs), npartitions=3)
Expand Down Expand Up @@ -122,7 +135,7 @@ def test_multiple_polygons_auto_range(DataFrame):
out = xr.DataArray(sol, coords=[lincoords_y, lincoords_x], dims=['y', 'x'])

assert_eq_xr(agg, out)

assert_eq_ndarray(agg.x_range, (-1, 3.5), close=True)
assert_eq_ndarray(agg.y_range, (0.1, 2), close=True)

Expand Down Expand Up @@ -319,3 +332,61 @@ def test_spatial_index_not_dropped():

assert df2.columns == ['some_geom']
assert df2.some_geom.array._sindex == df.some_geom.array._sindex


natural_earth_sol = np.array([
[nan, 7, 7, 7, 7, 7, 0, 2, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, nan],
[nan, nan, nan, nan, 5, nan, 6, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan],
[nan, nan, nan, nan, nan, nan, 9, nan, nan, nan, nan, nan, nan, 10, nan, nan, nan, nan, 11, 12],
[nan, nan, nan, nan, nan, nan, 95, nan, nan, nan, nan, 112, nan, nan, nan, nan, 21, 21, 21, 13],
[ 17, nan, nan, nan, nan, nan, 95, 95, nan, nan, nan, 112, 20, nan, nan, nan, 31, 32, 34, 22],
[nan, nan, nan, nan, nan, nan, 95, nan, nan, 112, 112, 112, 112, nan, 44, 41, 50, 43, 37, nan],
[nan, 60, nan, nan, 95, 65, 54, nan, nan, 112, 112, 112, 112, nan, 112, 112, 63, nan, nan, nan],
[nan, nan, nan, 95, 95, 95, 74, nan, nan, nan, 72, 68, 112, 112, 112, 112, 112, 71, 73, nan],
[ 87, 82, 78, 95, 95, 88, 95, nan, nan, 80, 83, 112, 112, 112, 112, 112, 112, 112, nan, nan],
[ 94, nan, nan, 116, 118, 125, 125, 126, 126, nan, nan, 121, 122, 109, nan, 123, nan, 101, 106, 93],
])


@pytest.mark.skipif(not geopandas, reason="geopandas not installed")
def test_natural_earth_geopandas():
df = geopandas.read_file(get_path("naturalearth.land"))
df["col"] = np.arange(len(df))

canvas = ds.Canvas(plot_height=10, plot_width=20)
agg = canvas.polygons(source=df, geometry="geometry", agg=ds.max("col"))

assert_eq_ndarray(agg.data, natural_earth_sol)


@pytest.mark.skipif(not geopandas, reason="geopandas not installed")
@pytest.mark.skipif(not dask_geopandas, reason="dask_geopandas not installed")
@pytest.mark.parametrize('npartitions', [1, 2, 5])
def test_natural_earth_dask_geopandas(npartitions):
df = geopandas.read_file(get_path("naturalearth.land"))
df["col"] = np.arange(len(df))
df = dd.from_pandas(df, npartitions=npartitions)
assert df.npartitions == npartitions
df.calculate_spatial_partitions()

canvas = ds.Canvas(plot_height=10, plot_width=20)
agg = canvas.polygons(source=df, geometry="geometry", agg=ds.max("col"))

assert_eq_ndarray(agg.data, natural_earth_sol)


@pytest.mark.skipif(not geopandas, reason="geopandas not installed")
@pytest.mark.skipif(not spatialpandas, reason="spatialpandas not installed")
@pytest.mark.parametrize('npartitions', [0, 1, 2, 5])
def test_natural_earth_spatialpandas(npartitions):
df = geopandas.read_file(get_path("naturalearth.land"))
df["col"] = np.arange(len(df))
df = spatialpandas.GeoDataFrame(df)
if npartitions > 0:
df = dd.from_pandas(df, npartitions=npartitions)
assert df.npartitions == npartitions

canvas = ds.Canvas(plot_height=10, plot_width=20)
agg = canvas.polygons(source=df, geometry="geometry", agg=ds.max("col"))

assert_eq_ndarray(agg.data, natural_earth_sol)
7 changes: 7 additions & 0 deletions datashader/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,11 @@
except Exception:
cudf = None

try:
from geopandas.array import GeometryDtype as gpd_GeometryDtype
except ImportError:
gpd_GeometryDtype = type(None)

try:
from spatialpandas.geometry import GeometryDtype
except ImportError:
Expand Down Expand Up @@ -430,6 +435,8 @@ def dshape_from_pandas_helper(col):
return datashape.Option(datashape.DateTime(tz=tz))
elif isinstance(col.dtype, (RaggedDtype, GeometryDtype)):
return col.dtype
elif gpd_GeometryDtype and isinstance(col.dtype, gpd_GeometryDtype):
return col.dtype
dshape = datashape.CType.from_numpy_dtype(col.dtype)
dshape = datashape.string if dshape == datashape.object_ else dshape
if dshape in (datashape.string, datashape.datetime_):
Expand Down
Loading

0 comments on commit 9e20700

Please sign in to comment.