diff --git a/CHANGES.rst b/CHANGES.rst index 536e238f0a..911a1451c2 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -147,6 +147,8 @@ mast - The cloud dataset in ``Observations`` is now enabled by default if the ``boto3`` and ``botocore`` packages are installed. This default can be overridden by setting the ``enable_cloud_dataset`` configuration option to False. [#3534] +- Adding in ability to read FITS and ASDF dataproducts to memory from s3:// using ``Observations.read_product()`` function. [#3561] + jplspec ^^^^^^^ diff --git a/astroquery/mast/observations.py b/astroquery/mast/observations.py index 6dffd2d6d2..4718b5a66a 100644 --- a/astroquery/mast/observations.py +++ b/astroquery/mast/observations.py @@ -14,6 +14,7 @@ import numpy as np import astropy.units as u +from astropy.io import fits import astropy.coordinates as coord from requests import HTTPError from astropy.table import Table, Row, vstack @@ -44,6 +45,12 @@ '`~astroquery.mast.ObservationsClass.enable_cloud_dataset` method.' ) +try: + import asdf + import s3fs +except ImportError: + pass + @async_to_sync class ObservationsClass(MastQueryWithLogin): @@ -1203,6 +1210,51 @@ def get_unique_product_list(self, observations, *, batch_size=500): log.info("To return all products, use `Observations.get_product_list`") return unique_products + # TODO: Need to inlcude way to parse if it is a MAST on prem URL and handle the streaming of that + def read_product(self, product_path, read_as="auto", ignore_unrecognized=True): + """ + Read a product from Open S3 bucket to memory. Currently supports FITS and ASDF product types only. + + Parameters + ---------- + product_path: str + URI to the product in open bucket. + read_as: str, optional + How to read the file. Currently only .fits and .asdf is supported by "auto". Defaults to "auto". + ignore_unrecognized: bool + Tells asdf.open() to include or ignore warnings from unrecognized asdf tags. Defaults to True + + Returns + ------- + object + FITS or ASDF object. + """ + path_lower = product_path.lower() + + if read_as == "auto": + # Read logic for fits data products + if path_lower.endswith((".fits", ".fits.gz")): + try: + log.info(f"Loaded: {product_path}") + return fits.open(product_path, fsspec_kwargs={"anon": True}) + except Exception as e: + log.exception(f"Failed to open FITS File: {product_path} {e}") + + # Read logic for ASDF + elif path_lower.endswith(".asdf"): + try: + fs = s3fs.S3FileSystem(anon=True) + with fs.open(product_path, 'rb') as s3_file: + af = asdf.open(s3_file, ignore_unrecognized_tag=ignore_unrecognized) + log.info(f"Loaded: {product_path}") + return af + except Exception as e: + log.exception(f"Failed to open ASD File: {product_path} {e}") + + else: + log.error("Unsupported extension type") + return + @async_to_sync class MastClass(MastQueryWithLogin): diff --git a/astroquery/mast/tests/test_mast.py b/astroquery/mast/tests/test_mast.py index 105582fd9a..e844109728 100644 --- a/astroquery/mast/tests/test_mast.py +++ b/astroquery/mast/tests/test_mast.py @@ -1355,6 +1355,130 @@ def test_observations_disable_cloud_dataset(patch_boto3): assert Observations._cloud_enabled_explicitly is False +@pytest.fixture +def s3_fits_path(): + return "s3://stpubdata/hst/public/u9o4/u9o40504m/u9o40504m_c3m.fits" + + +@pytest.fixture +def mock_fits_open(mocker): + return mocker.patch("astropy.io.fits.open", return_value=MagicMock(name="HDUList")) + + +def test_read_product_fits(s3_fits_path, mock_fits_open, mocker): + mocker.patch("astropy.__version__", "5.0.0") + + result = Observations.read_product(s3_fits_path) + + mock_fits_open.assert_called_once_with( + s3_fits_path, fsspec_kwargs={"anon": True} + ) + assert result is mock_fits_open.return_value + + +@pytest.fixture +def s3_asdf_path(): + return "s3://stpubdata/hst/public/test/test.asdf" + + +@pytest.fixture +def mock_s3fs(mocker): + s3_file = MagicMock(name="S3File") + s3_file.__enter__.return_value = s3_file + s3_file.__exit__.return_value = None + + fs = MagicMock(name="S3FileSystem") + fs.open.return_value = s3_file + + mocker.patch( + "astroquery.mast.observations.s3fs.S3FileSystem", + return_value=fs, + ) + + return fs + + +@pytest.fixture +def mock_asdf_open(mocker): + return mocker.patch( + "astroquery.mast.observations.asdf.open", + return_value=MagicMock(name="AsdfFile"), + ) + + +def test_read_product_asdf(s3_asdf_path, mock_s3fs, mock_asdf_open): + pytest.importorskip("asdf") + pytest.importorskip("s3fs") + + result = Observations.read_product(s3_asdf_path) + + mock_s3fs.open.assert_called_once_with( + s3_asdf_path, + "rb", + ) + + mock_asdf_open.assert_called_once_with( + mock_s3fs.open.return_value.__enter__.return_value, + ignore_unrecognized_tag=True, + ) + + assert result is mock_asdf_open.return_value + + +def test_read_product_fits_open_failure(mocker, s3_fits_path): + # Simulate failure when opening the FITS file + mock_fits_open = mocker.patch( + "astropy.io.fits.open", + side_effect=OSError("Cannot read FITS file") + ) + + result = Observations.read_product(s3_fits_path) + + # fits.open should have been called once with correct arguments + mock_fits_open.assert_called_once_with( + s3_fits_path, fsspec_kwargs={"anon": True} + ) + + # Function should return None after failure + assert result is None + + +def test_read_product_asdf_open_failure(mocker): + s3_asdf_path = "s3://stpubdata/hst/public/u9o4/u9o40504m/u9o40504m.asdf" + + # Mock the S3 filesystem and its open() method + mock_fs = mocker.patch("s3fs.S3FileSystem") + mock_fs_instance = mock_fs.return_value + + # Make fs.open raise an error when used + mock_fs_instance.open.side_effect = OSError("Cannot read ASDF file") + + result = Observations.read_product(s3_asdf_path) + + # Ensure S3FileSystem was created with anon=True + mock_fs.assert_called_once_with(anon=True) + + # Ensure attempt was made to open the file + mock_fs_instance.open.assert_called_once_with(s3_asdf_path, "rb") + + # Function should return None after failure + assert result is None + + +def test_read_product_unknown_extension_auto(mocker): + product_path = "s3://stpubdata/hst/public/u9o4/u9o40504m/u9o40504m.txt" + + # Patch fits.open and asdf.open to ensure they are NOT called + mock_fits_open = mocker.patch("astropy.io.fits.open") + mock_asdf_open = mocker.patch("asdf.open") + + result = Observations.read_product(product_path) + + assert result is None + mock_fits_open.assert_not_called() + mock_asdf_open.assert_not_called() + + ###################### # CatalogClass tests # ###################### diff --git a/astroquery/mast/tests/test_mast_remote.py b/astroquery/mast/tests/test_mast_remote.py index da6e8fe3e7..6b57bdc091 100644 --- a/astroquery/mast/tests/test_mast_remote.py +++ b/astroquery/mast/tests/test_mast_remote.py @@ -1035,6 +1035,25 @@ def test_observations_get_cloud_uris_no_duplicates(self, msa_product_table, rese uris = Observations.get_cloud_uris(products) assert len(uris) == 1 + @pytest.mark.remote_data + def test_observations_read_product_fits(self): + product_path = "s3://stpubdata/hst/public/u24r/u24r0102t/u24r0102t_c1f.fits" + + product = Observations.read_product(product_path) + + assert isinstance(product, fits.HDUList) + + @pytest.mark.remote_data + def test_observations_read_product_asdf(self): + asdf = pytest.importorskip("asdf") + + product_path = "s3://stpubdata/roman/nexus/soc_simulations/tutorial_data" \ + "/r0003201001001001004_0001_wfi01_f106_cal.asdf" + + product = Observations.read_product(product_path) + + assert isinstance(product, asdf.AsdfFile) + ###################### # CatalogClass tests # ###################### diff --git a/docs/mast/mast_obsquery.rst b/docs/mast/mast_obsquery.rst index 6ef2a65d47..2a6537d173 100644 --- a/docs/mast/mast_obsquery.rst +++ b/docs/mast/mast_obsquery.rst @@ -629,3 +629,14 @@ remain fully cloud-based. COMPLETE COMPLETE COMPLETE + +Streaming Data Products from S3 to memory +----------------------------------------- +If instead of downloading you would like to load an S3 URI directly to memory you can use `~astroquery.mast.ObservationsClass.read_product`. +This function supports FITS and ASDF data products and will automatically parse the file for the suffix and load it to memory using `~astropy.io.fits.open` or ``~asdf.open``. +For ASDF data products additional packages may be required (e.g lz4 and roman-datamodels for ROMAN data). + +.. doctest-remote-data:: + + >>> from astroquery.mast import Observations + >>> product = Observations.read_product(product_path="s3://stpubdata/hst/public/u9o4/u9o40504m/u9o40504m_c3m.fits", auto=True, ignore_unrecognized=True) \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml index bbe6c2c6cd..c2233682f4 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -34,10 +34,14 @@ Documentation = "https://astroquery.readthedocs.io" [project.optional-dependencies] test = [ + "asdf", + "gwcs", + "roman-datamodels", "pytest>=7.4", "pytest-doctestplus>=1.4", "pytest-timeout", "pytest-astropy", + "lz4", "matplotlib", # Temp workaround for https://github.com/RKrahl/pytest-dependency/issues/91 "pytest-dependency; platform_system != 'Windows'", diff --git a/tox.ini b/tox.ini index ca7173cf23..fe742e1d1e 100644 --- a/tox.ini +++ b/tox.ini @@ -51,6 +51,8 @@ deps = oldestdeps-alldeps: mocpy==0.12 oldestdeps-alldeps: regions==0.5 oldestdeps-alldeps: astropy-healpix==0.7 + oldestdeps-alldeps: roman_datamodels==0.11 + oldestdeps-alldeps: gwcs==0.18 online: pytest-custom_exit_code