Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions CHANGES.rst
Original file line number Diff line number Diff line change
Expand Up @@ -147,6 +147,8 @@ mast
- The cloud dataset in ``Observations`` is now enabled by default if the ``boto3`` and ``botocore`` packages are installed. This
default can be overridden by setting the ``enable_cloud_dataset`` configuration option to False. [#3534]

- Adding in ability to read FITS and ASDF dataproducts to memory from s3:// using ``Observations.read_product()`` function. [#3561]


jplspec
^^^^^^^
Expand Down
52 changes: 52 additions & 0 deletions astroquery/mast/observations.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@

import numpy as np
import astropy.units as u
from astropy.io import fits
import astropy.coordinates as coord
from requests import HTTPError
from astropy.table import Table, Row, vstack
Expand Down Expand Up @@ -44,6 +45,12 @@
'`~astroquery.mast.ObservationsClass.enable_cloud_dataset` method.'
)

try:
import asdf
import s3fs
except ImportError:
pass


@async_to_sync
class ObservationsClass(MastQueryWithLogin):
Expand Down Expand Up @@ -1203,6 +1210,51 @@ def get_unique_product_list(self, observations, *, batch_size=500):
log.info("To return all products, use `Observations.get_product_list`")
return unique_products

# TODO: Need to inlcude way to parse if it is a MAST on prem URL and handle the streaming of that
def read_product(self, product_path, read_as="auto", ignore_unrecognized=True):
"""
Read a product from Open S3 bucket to memory. Currently supports FITS and ASDF product types only.

Parameters
----------
product_path: str
URI to the product in open bucket.
read_as: str, optional
How to read the file. Currently only .fits and .asdf is supported by "auto". Defaults to "auto".
ignore_unrecognized: bool
Tells asdf.open() to include or ignore warnings from unrecognized asdf tags. Defaults to True

Returns
-------
object
FITS or ASDF object.
"""
path_lower = product_path.lower()

if read_as == "auto":
# Read logic for fits data products
if path_lower.endswith((".fits", ".fits.gz")):
try:
log.info(f"Loaded: {product_path}")
return fits.open(product_path, fsspec_kwargs={"anon": True})
except Exception as e:
log.exception(f"Failed to open FITS File: {product_path} {e}")

# Read logic for ASDF
elif path_lower.endswith(".asdf"):
try:
fs = s3fs.S3FileSystem(anon=True)
with fs.open(product_path, 'rb') as s3_file:
af = asdf.open(s3_file, ignore_unrecognized_tag=ignore_unrecognized)
log.info(f"Loaded: {product_path}")
return af
except Exception as e:
log.exception(f"Failed to open ASD File: {product_path} {e}")

else:
log.error("Unsupported extension type")
return


@async_to_sync
class MastClass(MastQueryWithLogin):
Expand Down
124 changes: 124 additions & 0 deletions astroquery/mast/tests/test_mast.py
Original file line number Diff line number Diff line change
Expand Up @@ -1355,6 +1355,130 @@ def test_observations_disable_cloud_dataset(patch_boto3):
assert Observations._cloud_enabled_explicitly is False


@pytest.fixture
def s3_fits_path():
return "s3://stpubdata/hst/public/u9o4/u9o40504m/u9o40504m_c3m.fits"


@pytest.fixture
def mock_fits_open(mocker):
return mocker.patch("astropy.io.fits.open", return_value=MagicMock(name="HDUList"))


def test_read_product_fits(s3_fits_path, mock_fits_open, mocker):
mocker.patch("astropy.__version__", "5.0.0")

result = Observations.read_product(s3_fits_path)

mock_fits_open.assert_called_once_with(
s3_fits_path, fsspec_kwargs={"anon": True}
)
assert result is mock_fits_open.return_value


@pytest.fixture
def s3_asdf_path():
return "s3://stpubdata/hst/public/test/test.asdf"


@pytest.fixture
def mock_s3fs(mocker):
s3_file = MagicMock(name="S3File")
s3_file.__enter__.return_value = s3_file
s3_file.__exit__.return_value = None

fs = MagicMock(name="S3FileSystem")
fs.open.return_value = s3_file

mocker.patch(
"astroquery.mast.observations.s3fs.S3FileSystem",
return_value=fs,
)

return fs


@pytest.fixture
def mock_asdf_open(mocker):
return mocker.patch(
"astroquery.mast.observations.asdf.open",
return_value=MagicMock(name="AsdfFile"),
)


def test_read_product_asdf(s3_asdf_path, mock_s3fs, mock_asdf_open):
pytest.importorskip("asdf")
pytest.importorskip("s3fs")

result = Observations.read_product(s3_asdf_path)

mock_s3fs.open.assert_called_once_with(
s3_asdf_path,
"rb",
)

mock_asdf_open.assert_called_once_with(
mock_s3fs.open.return_value.__enter__.return_value,
ignore_unrecognized_tag=True,
)

assert result is mock_asdf_open.return_value


def test_read_product_fits_open_failure(mocker, s3_fits_path):
# Simulate failure when opening the FITS file
mock_fits_open = mocker.patch(
"astropy.io.fits.open",
side_effect=OSError("Cannot read FITS file")
)

result = Observations.read_product(s3_fits_path)

# fits.open should have been called once with correct arguments
mock_fits_open.assert_called_once_with(
s3_fits_path, fsspec_kwargs={"anon": True}
)

# Function should return None after failure
assert result is None


def test_read_product_asdf_open_failure(mocker):
s3_asdf_path = "s3://stpubdata/hst/public/u9o4/u9o40504m/u9o40504m.asdf"

# Mock the S3 filesystem and its open() method
mock_fs = mocker.patch("s3fs.S3FileSystem")
mock_fs_instance = mock_fs.return_value

# Make fs.open raise an error when used
mock_fs_instance.open.side_effect = OSError("Cannot read ASDF file")

result = Observations.read_product(s3_asdf_path)

# Ensure S3FileSystem was created with anon=True
mock_fs.assert_called_once_with(anon=True)

# Ensure attempt was made to open the file
mock_fs_instance.open.assert_called_once_with(s3_asdf_path, "rb")

# Function should return None after failure
assert result is None


def test_read_product_unknown_extension_auto(mocker):
product_path = "s3://stpubdata/hst/public/u9o4/u9o40504m/u9o40504m.txt"

# Patch fits.open and asdf.open to ensure they are NOT called
mock_fits_open = mocker.patch("astropy.io.fits.open")
mock_asdf_open = mocker.patch("asdf.open")

result = Observations.read_product(product_path)

assert result is None
mock_fits_open.assert_not_called()
mock_asdf_open.assert_not_called()


######################
# CatalogClass tests #
######################
Expand Down
19 changes: 19 additions & 0 deletions astroquery/mast/tests/test_mast_remote.py
Original file line number Diff line number Diff line change
Expand Up @@ -1035,6 +1035,25 @@ def test_observations_get_cloud_uris_no_duplicates(self, msa_product_table, rese
uris = Observations.get_cloud_uris(products)
assert len(uris) == 1

@pytest.mark.remote_data
def test_observations_read_product_fits(self):
product_path = "s3://stpubdata/hst/public/u24r/u24r0102t/u24r0102t_c1f.fits"

product = Observations.read_product(product_path)

assert isinstance(product, fits.HDUList)

@pytest.mark.remote_data
def test_observations_read_product_asdf(self):
asdf = pytest.importorskip("asdf")

product_path = "s3://stpubdata/roman/nexus/soc_simulations/tutorial_data" \
"/r0003201001001001004_0001_wfi01_f106_cal.asdf"

product = Observations.read_product(product_path)

assert isinstance(product, asdf.AsdfFile)

######################
# CatalogClass tests #
######################
Expand Down
11 changes: 11 additions & 0 deletions docs/mast/mast_obsquery.rst
Original file line number Diff line number Diff line change
Expand Up @@ -629,3 +629,14 @@ remain fully cloud-based.
COMPLETE
COMPLETE
COMPLETE

Streaming Data Products from S3 to memory
-----------------------------------------
If instead of downloading you would like to load an S3 URI directly to memory you can use `~astroquery.mast.ObservationsClass.read_product`.
This function supports FITS and ASDF data products and will automatically parse the file for the suffix and load it to memory using `~astropy.io.fits.open` or ``~asdf.open``.
For ASDF data products additional packages may be required (e.g lz4 and roman-datamodels for ROMAN data).

.. doctest-remote-data::

>>> from astroquery.mast import Observations
>>> product = Observations.read_product(product_path="s3://stpubdata/hst/public/u9o4/u9o40504m/u9o40504m_c3m.fits", auto=True, ignore_unrecognized=True)
4 changes: 4 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -34,10 +34,14 @@ Documentation = "https://astroquery.readthedocs.io"

[project.optional-dependencies]
test = [
"asdf",
"gwcs",
"roman-datamodels",
"pytest>=7.4",
"pytest-doctestplus>=1.4",
"pytest-timeout",
"pytest-astropy",
"lz4",
"matplotlib",
# Temp workaround for https://github.com/RKrahl/pytest-dependency/issues/91
"pytest-dependency; platform_system != 'Windows'",
Expand Down
2 changes: 2 additions & 0 deletions tox.ini
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,8 @@ deps =
oldestdeps-alldeps: mocpy==0.12
oldestdeps-alldeps: regions==0.5
oldestdeps-alldeps: astropy-healpix==0.7
oldestdeps-alldeps: roman_datamodels==0.11
oldestdeps-alldeps: gwcs==0.18

online: pytest-custom_exit_code

Expand Down
Loading