Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -198,6 +198,12 @@ to_geotiff(dask_da, 'mosaic.vrt') # stream Dask to VRT
# Accessor methods
da.xrs.to_geotiff('out.tif', compression='lzw') # write from DataArray
ds.xrs.open_geotiff('large_dem.tif') # read windowed to Dataset extent

# xarray backend engine
import xarray as xr
xr.open_dataset('dem.tif', engine='xrspatial') # open as a Dataset
xr.open_mfdataset('*.tif', engine='xrspatial', # share one var name
backend_kwargs={'default_name': 'band_data'})
```

**Compression codecs:** Deflate, LZW (Numba JIT), ZSTD, PackBits, JPEG (Pillow, internal-only: requires `allow_internal_only_jpeg=True` and is not readable by GDAL), JPEG 2000 (glymur, experimental: requires `allow_experimental_codecs=True`), uncompressed
Expand Down
53 changes: 53 additions & 0 deletions docs/source/reference/geotiff.rst
Original file line number Diff line number Diff line change
Expand Up @@ -226,6 +226,59 @@ the top level, so ``from xrspatial import open_geotiff`` and

xrspatial.geotiff.open_geotiff

xarray backend engine
=====================
``open_geotiff`` is also registered as an xarray backend under the
engine name ``xrspatial``, so a source can be opened through
xarray's standard API:

.. code-block:: python

import xarray as xr

ds = xr.open_dataset("dem.tif", engine="xrspatial")
ds = xr.open_mfdataset(
"*.tif", engine="xrspatial",
backend_kwargs={"default_name": "band_data"})

``open_geotiff`` returns a ``DataArray``; the engine promotes it to a
one-variable ``Dataset`` (the variable name is the source stem, or
``band_data`` for an unnamed file-like source). GeoTIFF read options
(``gpu``, ``masked``, ``band``, ``overview_level``, ``window``,
``bbox``, ...) are forwarded through ``backend_kwargs``:

.. code-block:: python

xr.open_dataset(
"dem.tif", engine="xrspatial",
backend_kwargs={"masked": True, "overview_level": 1},
)

``chunks`` is the exception: xarray reserves it as a top-level argument
to ``open_dataset``, so pass it directly (``chunks={}``) rather than
through ``backend_kwargs`` to get a dask-backed dataset.

For ``open_mfdataset``, pass a shared ``default_name`` through
``backend_kwargs`` as shown above. Without it the variable in each file's
Dataset takes the source stem, so files with different names concatenate
into one variable per file (each NaN-filled outside its own slice) rather
than a single combined variable.

The ``.tif``, ``.tiff``, and ``.vrt`` extensions are auto-detected, so
``engine=`` can be omitted for those sources. Bare auto-detection is
ambiguous when another raster backend (e.g. rioxarray's ``rasterio``) is
installed and also claims those extensions; xarray then raises and asks
for an explicit ``engine=``.

The engine forwards to the standalone ``open_geotiff`` function, so the
coregistered-read options (``coregister``, ``auto_reproject``,
``resampling``) are *not* available through it; they live on the
``.xrs.open_geotiff`` accessor because they reproject and resample onto a
target array's grid, and the engine opens a single source from scratch
with no target. Passing them through ``backend_kwargs`` raises
``TypeError``. Use the accessor on the target array instead, e.g.
``target.xrs.open_geotiff("scene.tif", coregister=True)``.

Coregistered reads (experimental)
=================================

Expand Down
2 changes: 2 additions & 0 deletions setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,8 @@ zip_safe = False
[options.entry_points]
console_scripts =
xrspatial = xrspatial.__main__:main
xarray.backends =
xrspatial = xrspatial.geotiff._xarray_backend:GeoTIFFBackendEntrypoint

[options.extras_require]
doc =
Expand Down
91 changes: 91 additions & 0 deletions xrspatial/geotiff/_xarray_backend.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
"""xarray backend entry point for the native GeoTIFF/COG/VRT reader.

Registers :func:`open_geotiff` under xarray's pluggable backend API so a
GeoTIFF source can be opened through the standard entry point::

import xarray as xr

xr.open_dataset("dem.tif", engine="xrspatial")
xr.open_mfdataset("*.tif", engine="xrspatial")

The entry point is declared in ``setup.cfg`` under
``[options.entry_points] xarray.backends``. ``open_geotiff`` returns a
:class:`~xarray.DataArray`; xarray backends must return a
:class:`~xarray.Dataset`, so this wrapper promotes the single array to a
one-variable dataset.

GeoTIFF-specific read options (``gpu``, ``masked``, ``band``,
``overview_level``, ``window``, ``bbox``, ``stable_only``, ...) are
forwarded to :func:`open_geotiff` through xarray's ``backend_kwargs``::

xr.open_dataset(
"dem.tif", engine="xrspatial",
backend_kwargs={"masked": True, "overview_level": 1},
)

``chunks`` is the one exception: xarray reserves it as a top-level
argument to ``open_dataset``, so it cannot travel through
``backend_kwargs``. Pass ``chunks=`` directly to ``open_dataset`` to get
a dask-backed dataset (xarray wraps the eager read)::

xr.open_dataset("dem.tif", engine="xrspatial", chunks={})
"""
from __future__ import annotations

import os

from xarray.backends import BackendEntrypoint

# Name for the one data variable when ``open_geotiff`` cannot derive one
# from the source (e.g. an in-memory file-like object with no path).
_DEFAULT_VARIABLE_NAME = "band_data"

# Extensions ``guess_can_open`` claims so ``xr.open_dataset`` /
# ``open_mfdataset`` can auto-select this engine without ``engine=``.
_SUPPORTED_EXTENSIONS = (".tif", ".tiff", ".vrt")


class GeoTIFFBackendEntrypoint(BackendEntrypoint):
"""Open GeoTIFF / COG / VRT files with xrspatial's no-GDAL reader.

Thin wrapper that calls :func:`xrspatial.geotiff.open_geotiff` and
promotes its ``DataArray`` to a one-variable ``Dataset``.
"""

description = (
"Open GeoTIFF/COG/VRT files using xrspatial's native (no-GDAL) "
"reader via xrspatial.geotiff.open_geotiff"
)
url = "https://github.com/xarray-contrib/xarray-spatial"
# ``open_geotiff`` takes ~30 keyword options forwarded verbatim via
# ``**kwargs``, so the parameter list is declared explicitly here:
# xarray's signature introspection (``detect_parameters``) raises on an
# ``open_dataset`` that uses ``**kwargs`` without this attribute set. It
# also stops xarray from injecting its CF decoders -- in particular
# ``mask_and_scale``, which would collide with open_geotiff's deprecated
# alias of the same name. GeoTIFF read options come in through
# ``backend_kwargs`` instead.
open_dataset_parameters = ("filename_or_obj", "drop_variables")

def open_dataset(self, filename_or_obj, *, drop_variables=None, **kwargs):
# Imported here rather than at module scope so importing this
# backend module stays cheap; the heavy reader package only loads
# when a source is actually opened.
from . import open_geotiff

da = open_geotiff(filename_or_obj, **kwargs)
name = da.name if da.name is not None else _DEFAULT_VARIABLE_NAME
ds = da.to_dataset(name=name)
if drop_variables is not None:
ds = ds.drop_vars(drop_variables, errors="ignore")
return ds

def guess_can_open(self, filename_or_obj):
if isinstance(filename_or_obj, os.PathLike):
filename_or_obj = os.fspath(filename_or_obj)
if not isinstance(filename_or_obj, str):
return False
# Strip any query string / fragment so COG URLs such as
# "https://host/dem.tif?token=..." still match on extension.
path = filename_or_obj.split("?", 1)[0].split("#", 1)[0]
return path.lower().endswith(_SUPPORTED_EXTENSIONS)
168 changes: 168 additions & 0 deletions xrspatial/geotiff/tests/test_xarray_backend_3365.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,168 @@
"""xarray ``BackendEntrypoint`` for the native GeoTIFF reader.

Coverage for issue #3365: ``open_geotiff`` is exposed under xarray's
pluggable backend API so a GeoTIFF / COG / VRT source opens through the
standard entry point::

xr.open_dataset("dem.tif", engine="xrspatial")

``open_geotiff`` returns a ``DataArray``; the backend promotes it to a
one-variable ``Dataset``. These tests drive the wrapper by passing the
entrypoint class as ``engine=`` so they exercise the worktree code
without depending on the installed ``xarray.backends`` entry point. A
separate test confirms the entry point registers once the package is
installed (the path CI exercises).
"""
from __future__ import annotations

import importlib.metadata
import io

import numpy as np
import pytest
import xarray as xr

from xrspatial.geotiff import open_geotiff
from xrspatial.geotiff._xarray_backend import _DEFAULT_VARIABLE_NAME, GeoTIFFBackendEntrypoint
from xrspatial.geotiff.tests._helpers.tiff_builders import make_minimal_tiff


@pytest.fixture
def geo_tiff_path(tmp_path):
"""Write a stable-codec georeferenced GeoTIFF; yield its path."""
payload = make_minimal_tiff(
4, 4, np.dtype("float32"),
geo_transform=(-120.0, 45.0, 0.001, -0.001),
epsg=4326,
)
path = tmp_path / "backend_3365.tif"
path.write_bytes(payload)
return str(path)


def test_open_dataset_returns_one_variable_dataset(geo_tiff_path):
ds = xr.open_dataset(geo_tiff_path, engine=GeoTIFFBackendEntrypoint)
assert isinstance(ds, xr.Dataset)
assert len(ds.data_vars) == 1
# Variable name comes from open_geotiff's default (the source stem).
assert "backend_3365" in ds.data_vars


def test_dataset_matches_open_geotiff(geo_tiff_path):
da = open_geotiff(geo_tiff_path)
ds = xr.open_dataset(geo_tiff_path, engine=GeoTIFFBackendEntrypoint)
var = ds[da.name]
np.testing.assert_array_equal(var.values, da.values)
assert var.dims == da.dims
# Georeferencing survives the DataArray -> Dataset promotion.
assert var.attrs.get("crs") == da.attrs.get("crs")
assert var.attrs.get("transform") == da.attrs.get("transform")
for coord in da.coords:
np.testing.assert_array_equal(ds[coord].values, da[coord].values)


def test_file_like_source_falls_back_to_default_name():
payload = make_minimal_tiff(4, 4, np.dtype("float32"))
ds = xr.open_dataset(io.BytesIO(payload), engine=GeoTIFFBackendEntrypoint)
# A file-like source has no path for open_geotiff to derive a name
# from, so the backend uses its fallback variable name.
assert _DEFAULT_VARIABLE_NAME in ds.data_vars


def test_backend_kwargs_forwarded_to_open_geotiff(geo_tiff_path):
# backend_kwargs reach open_geotiff verbatim; default_name renames the
# resulting data variable, which is an unambiguous signal the kwarg
# made it through.
ds = xr.open_dataset(
geo_tiff_path, engine=GeoTIFFBackendEntrypoint,
backend_kwargs={"default_name": "elevation"},
)
assert "elevation" in ds.data_vars


def test_top_level_chunks_gives_dask_backed_variable(geo_tiff_path):
pytest.importorskip("dask")
# ``chunks`` is reserved by xarray's open_dataset and cannot be passed
# through backend_kwargs; the top-level argument wraps the returned
# variable in dask.
ds = xr.open_dataset(
geo_tiff_path, engine=GeoTIFFBackendEntrypoint, chunks={},
)
assert ds["backend_3365"].chunks is not None


def test_drop_variables_removes_the_only_variable(geo_tiff_path):
ds = xr.open_dataset(
geo_tiff_path, engine=GeoTIFFBackendEntrypoint,
drop_variables="backend_3365",
)
assert "backend_3365" not in ds.data_vars


def test_open_mfdataset(tmp_path):
pytest.importorskip("dask")
paths = []
for i in range(2):
payload = make_minimal_tiff(
4, 4, np.dtype("float32"),
geo_transform=(-120.0, 45.0, 0.001, -0.001),
epsg=4326,
)
p = tmp_path / f"mf_3365_{i}.tif"
p.write_bytes(payload)
paths.append(str(p))

# A shared default_name keeps every file's data variable identically
# named, so the files concatenate into one variable along the new
# dimension. Without it each file's variable takes its own stem and the
# result has one variable per file instead.
ds = xr.open_mfdataset(
paths, engine=GeoTIFFBackendEntrypoint,
combine="nested", concat_dim="tile",
backend_kwargs={"default_name": "band_data"},
)
assert isinstance(ds, xr.Dataset)
assert list(ds.data_vars) == ["band_data"]
assert ds.sizes["tile"] == 2


@pytest.mark.parametrize(
"name, expected",
[
("dem.tif", True),
("dem.TIF", True),
("scene.tiff", True),
("mosaic.vrt", True),
("https://host/path/dem.tif?token=abc", True),
("data.nc", False),
("notes.txt", False),
("noextension", False),
],
)
def test_guess_can_open_extensions(name, expected):
assert GeoTIFFBackendEntrypoint().guess_can_open(name) is expected


def test_guess_can_open_pathlike(tmp_path):
p = tmp_path / "dem_3365.tif"
assert GeoTIFFBackendEntrypoint().guess_can_open(p) is True


def test_guess_can_open_non_string_returns_false():
assert GeoTIFFBackendEntrypoint().guess_can_open(io.BytesIO(b"")) is False


def test_entry_point_registered():
"""The ``xarray.backends`` entry point resolves to the backend class.

Skips when the installed distribution metadata predates this change
(e.g. an editable install made before the entry point was added).
CI installs the branch fresh, so the assertion runs there.
"""
eps = importlib.metadata.entry_points(group="xarray.backends")
matches = [ep for ep in eps if ep.name == "xrspatial"]
if not matches:
pytest.skip(
"xrspatial installed without the xarray.backends entry point; "
"reinstall the package to register the 'xrspatial' engine.")
assert matches[0].load() is GeoTIFFBackendEntrypoint
Loading