Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion CHANGES.rst
Original file line number Diff line number Diff line change
Expand Up @@ -143,9 +143,12 @@ mast
improvements to cloud download handling. [#3488]
- ``MastMissions`` query functions now support single or multiple targets via ``coordinates`` and
``object_names`` (including combined use in ``query_criteria``). [#3540]

- The cloud dataset in ``Observations`` is now enabled by default if the ``boto3`` and ``botocore`` packages are installed. This
default can be overridden by setting the ``enable_cloud_dataset`` configuration option to False. [#3534]
- Results returned from ``MastMissions`` metadata query functions now include search parameters in the metadata of the ``astropy.table.Table`` object
and column descriptions in the column metadata. [#3588]
- Added ``pass_id`` as an alias for the ``pass`` column in query functions for the Roman mission to avoid conflicts with
the reserved Python keyword. [#3588]


jplspec
Expand Down
67 changes: 50 additions & 17 deletions astroquery/mast/missions.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,19 +15,23 @@
from urllib.parse import quote

import astropy.units as u
from astropy.coordinates import SkyCoord, BaseCoordinateFrame, Angle
import numpy as np
from astropy.table import Table, Row, Column, vstack
from astropy.coordinates import Angle, BaseCoordinateFrame, SkyCoord
from astropy.table import Column, Row, Table, vstack
from astropy.utils.decorators import deprecated_renamed_argument
from requests import HTTPError, RequestException

from astroquery import log
from astroquery.utils import commons, async_to_sync
from astroquery.utils.class_or_instance import class_or_instance
from astroquery.exceptions import InputWarning, InvalidQueryError, MaxResultsWarning, NoResultsWarning

from astroquery.exceptions import (
InputWarning,
InvalidQueryError,
MaxResultsWarning,
NoResultsWarning,
)
from astroquery.mast import utils
from astroquery.mast.core import MastQueryWithLogin
from astroquery.utils import async_to_sync, commons
from astroquery.utils.class_or_instance import class_or_instance

from . import conf

Expand Down Expand Up @@ -134,6 +138,17 @@ def _parse_result(self, response, *, verbose=False): # Used by the async_to_syn
if self.service == self._search:
results = self._service_api_connection._parse_result(response, verbose, data_key='results')

# Add column descriptions to column metadata
column_list = self.get_column_list()
for col in results.columns:
if col in column_list['name']:
description = column_list[column_list['name'] == col]['description'].value[0]
results[col].meta = {'description': str(description)}

# Add search parameters to table metadata
result_json = response.json()
results.meta['search_params'] = result_json.get('search_params', {})

# Warn if maximum results are returned
if len(results) >= self.limit:
warnings.warn("Maximum results returned, may not include all sources within radius.",
Expand Down Expand Up @@ -166,6 +181,10 @@ def _validate_criteria(self, **criteria):
# Check each criteria argument for validity
valid_cols = list(self.columns[self.mission]['name']) + self._search_option_fields
for kwd in criteria.keys():
if kwd == "pass_id" and "pass_id" not in valid_cols and "pass" in valid_cols:
# Special case where the actual column name is "pass", but that's a reserved keyword in Python
# We allow "pass_id" as an alias
kwd = "pass"
col = next((name for name in valid_cols if name == kwd), None)
if not col:
closest_match = difflib.get_close_matches(kwd, valid_cols, n=1)
Expand Down Expand Up @@ -403,7 +422,8 @@ def query_criteria_async(self, *, coordinates=None, object_names=None, radius=3*
List of all valid fields that can be used to match results on criteria can be retrieved by calling
`~astroquery.mast.missions.MastMissionsClass.get_column_list` function.
To filter by multiple values for a single column, pass in a list of values or
a comma-separated string of values.
a comma-separated string of values. For the Roman mission, you can also use the special "pass_id"
keyword as an alias for the "pass" column, which is a reserved keyword in Python.

Returns
-------
Expand Down Expand Up @@ -479,7 +499,8 @@ def query_region_async(self, coordinates, *, radius=3*u.arcmin, limit=5000, offs
function.
For example, one can specify the output columns(select_cols) or use other filters(conditions).
To filter by multiple values for a single column, pass in a list of values or
a comma-separated string of values.
a comma-separated string of values. For the Roman mission, you can also use the special "pass_id"
keyword as an alias for the "pass" column, which is a reserved keyword in Python.

Returns
-------
Expand Down Expand Up @@ -536,7 +557,8 @@ def query_object_async(self, object_names, *, radius=3*u.arcmin, limit=5000, off
function.
For example, one can specify the output columns(select_cols) or use other filters(conditions).
To filter by multiple values for a single column, pass in a list of values or
a comma-separated string of values.
a comma-separated string of values. For the Roman mission, you can also use the special "pass_id"
keyword as an alias for the "pass" column, which is a reserved keyword in Python.

Returns
-------
Expand Down Expand Up @@ -691,7 +713,7 @@ def filter_products(self, products, *, extension=None, **filters):

return products[filter_mask]

def download_file(self, uri, *, local_path=None, cache=True, verbose=True):
def download_file(self, uri, *, local_path=None, cache=True, mission=None, verbose=True):
"""
Downloads a single file based on the data URI.

Expand All @@ -703,6 +725,9 @@ def download_file(self, uri, *, local_path=None, cache=True, verbose=True):
Directory or filename to which the file will be downloaded. Defaults to current working directory.
cache : bool
Default is True. If file is found on disk, it will not be downloaded again.
mission : str, optional
The mission to which the file belongs. If not provided, the current value of the ``mission`` attribute
will be used.
verbose : bool, optional
Default is True. Whether to show download progress in the console.

Expand All @@ -717,18 +742,21 @@ def download_file(self, uri, *, local_path=None, cache=True, verbose=True):
"""

# Construct the full data URL based on mission
if self.mission in ['hst', 'jwst', 'roman', 'roman_spectra', 'roman_cgi']:
current_mission = mission.lower() if mission else self.mission

if current_mission in ['hst', 'jwst', 'roman', 'roman_spectra', 'roman_cgi']:
# HST, JWST, and RST have a dedicated endpoint for retrieving products
base_url = self._service_api_connection.MISSIONS_DOWNLOAD_URL + self.mission + '/api/v0.1/retrieve_product'
base_url = (f"{self._service_api_connection.MISSIONS_DOWNLOAD_URL}{current_mission}"
"/api/v0.1/retrieve_product")
keyword = 'product_name'
else:
# HLSPs use MAST download URL
base_url = self._service_api_connection.MAST_DOWNLOAD_URL
keyword = 'uri'
# These files require a MAST URI and not just a filename
if not uri.startswith('mast:'):
raise InvalidQueryError(f'For mission "{self.mission}", a full MAST URI is required for downloading. '
f'Got "{uri}".')
raise InvalidQueryError(f'For mission "{current_mission}", a full MAST URI is required '
f'for downloading. Got "{uri}".')
data_url = base_url + f'?{keyword}=' + uri
escaped_url = base_url + f'?{keyword}=' + quote(uri, safe='')

Expand Down Expand Up @@ -813,14 +841,20 @@ def _download_files(self, products, base_dir, *, flat=False, cache=True, verbose
raise InvalidQueryError('Data product is missing "dataset" or "fileset" field required for '
'constructing local download path. Specify `flat=True` to avoid this '
'requirement.')
local_path = base_dir / dataset if not flat else base_dir

# If the products are a subscription JSON, they should include a mission field
mission = data_product['mission'].lower() if 'mission' in col_names else self.mission

# Create the local file path
local_path = base_dir if flat else base_dir / 'mastDownload' / mission / dataset
local_path.mkdir(parents=True, exist_ok=True)
local_file_path = local_path / Path(filename).name

# Download files and record status
status, msg, url = self.download_file(uri,
local_path=local_file_path,
cache=cache,
mission=mission,
verbose=verbose)
manifest_entries.append([local_file_path, status, msg, url])

Expand Down Expand Up @@ -901,11 +935,10 @@ def download_products(self, products, *, download_dir=None, flat=False,

# Set up base directory for downloads
download_dir = Path(download_dir or '.')
base_dir = download_dir if flat else download_dir / 'mastDownload' / self.mission

# Download files
manifest = self._download_files(products,
base_dir=base_dir,
base_dir=download_dir,
flat=flat,
cache=cache,
verbose=verbose)
Expand Down
43 changes: 30 additions & 13 deletions astroquery/mast/tests/test_mast.py
Original file line number Diff line number Diff line change
Expand Up @@ -307,6 +307,11 @@ def test_missions_query_criteria():
)
assert isinstance(result, Table)
assert len(result) > 0
# Check that metadata for search criteria is included in the result
assert result.meta
# Check that column metadata is included
assert 'description' in result['sci_pep_id'].meta
assert 'description' in result['sci_instrume'].meta

# Raise error if invalid criteria is supplied
with pytest.raises(InvalidQueryError):
Expand Down Expand Up @@ -651,6 +656,10 @@ def test_missions_download_file(mock_is_file, tmp_path):
result = missions.download_file('mast:HLSP/classy/classy_test_file.fits', local_path=tmp_path)
assert result[0] == 'COMPLETE'

# Provide the mission to the method
result = missions.download_file('mast:HLSP/ullyses/ullyses_test_file.fits', local_path=tmp_path, mission='ullyses')
assert result[0] == 'COMPLETE'

# HLSP downloads should fail without URI
with pytest.raises(InvalidQueryError, match='For mission "classy", a full MAST URI is required'):
missions.download_file('classy_test_file.fits', local_path=tmp_path)
Expand Down Expand Up @@ -1054,49 +1063,54 @@ def test_observations_filter_products():


@patch.object(Path, "is_file", return_value=True)
def test_observations_download_products(mock_is_file, patch_boto3, monkeypatch):
def test_observations_download_products(mock_is_file, patch_boto3, monkeypatch, tmpdir):
mock_resource = patch_boto3[1]
obsid = '2003738726'
data_uri = 'mast:HST/product/u9o40504m_c3m.fits'

# Actually download the products
result = Observations.download_products(obsid,
dataURI=data_uri)
dataURI=data_uri,
download_dir=tmpdir)
assert isinstance(result, Table)

# Just get the curl script
result = Observations.download_products(obsid,
curl_flag=True,
productType=["SCIENCE"],
mrp_only=False)
mrp_only=False,
download_dir=tmpdir)
assert isinstance(result, Table)

# Without console output, flat
result = Observations.download_products(obsid,
dataURI=data_uri,
flat=True,
verbose=False)
verbose=False,
download_dir=tmpdir)
assert isinstance(result, Table)

# Passing row product
products = Observations.get_product_list(obsid)
result1 = Observations.download_products(products[0])
result1 = Observations.download_products(products[0], download_dir=tmpdir)
assert isinstance(result1, Table)

# Warn if no products to download
with pytest.warns(NoResultsWarning, match='No products to download'):
result = Observations.download_products(obsid, productType=["INVALID_TYPE"])
result = Observations.download_products(obsid, productType=["INVALID_TYPE"], download_dir=tmpdir)
assert result is None

# Warn if curl_flag and flags are both set
with pytest.warns(InputWarning, match='flat=True has no effect on curl downloads.'):
result = Observations.download_products(obsid,
curl_flag=True,
flat=True)
flat=True,
download_dir=tmpdir)
assert isinstance(result, Table)

result = Observations.download_products(obsid,
dataURI=data_uri)
dataURI=data_uri,
download_dir=tmpdir)
assert isinstance(result, Table)
assert result[0]['Status'] == 'COMPLETE'

Expand All @@ -1106,34 +1120,37 @@ def test_observations_download_products(mock_is_file, patch_boto3, monkeypatch):
mock_resource.Bucket.return_value.download_file.side_effect = client_err
# Warn and fall back to on-prem download
with pytest.warns(InputWarning, match='Falling back to MAST download'):
result = Observations.download_products(obsid, dataURI=data_uri)
result = Observations.download_products(obsid, dataURI=data_uri, download_dir=tmpdir)
assert result[0]['Status'] == 'COMPLETE'
# Do not fall back to on-prem download, skip instead
with pytest.warns(NoResultsWarning, match='Skipping download.'):
result = Observations.download_products(obsid,
dataURI=data_uri,
cloud_only=True)
cloud_only=True,
download_dir=tmpdir)
assert result[0]['Status'] == 'SKIPPED'

# Products not found in cloud
monkeypatch.setattr(Observations, 'get_cloud_uris', lambda *a, **k: {})
with pytest.warns(NoResultsWarning, match='was not found in the cloud. Skipping download.'):
result = Observations.download_products(obsid,
dataURI=data_uri,
cloud_only=True)
cloud_only=True,
download_dir=tmpdir)
assert result[0]['Status'] == 'SKIPPED'
assert result[0]['Message'] == 'Product not found in cloud'
# Warn and fall back to on-prem download if products not found in cloud and cloud_only is False
with pytest.warns(InputWarning, match='was not found in the cloud. Falling back to MAST download'):
result = Observations.download_products(obsid, dataURI=data_uri)
result = Observations.download_products(obsid, dataURI=data_uri, download_dir=tmpdir)
assert result[0]['Status'] == 'COMPLETE'

# Cloud access not enabled, warn if cloud_only is True
Observations.disable_cloud_dataset()
with pytest.warns(InputWarning, match='cloud data access is not enabled'):
result = Observations.download_products('2003738726',
dataURI='mast:HST/product/u9o40504m_c3m.fits',
cloud_only=True)
cloud_only=True,
download_dir=tmpdir)
assert result[0]['Status'] == 'COMPLETE'


Expand Down
Loading
Loading