diff --git a/CHANGES.rst b/CHANGES.rst index 536e238f0a..2b250f4e59 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -143,9 +143,12 @@ mast improvements to cloud download handling. [#3488] - ``MastMissions`` query functions now support single or multiple targets via ``coordinates`` and ``object_names`` (including combined use in ``query_criteria``). [#3540] - - The cloud dataset in ``Observations`` is now enabled by default if the ``boto3`` and ``botocore`` packages are installed. This default can be overridden by setting the ``enable_cloud_dataset`` configuration option to False. [#3534] +- Results returned from ``MastMissions`` metadata query functions now include search parameters in the metadata of the ``astropy.table.Table`` object + and column descriptions in the column metadata. [#3588] +- Added ``pass_id`` as an alias for the ``pass`` column in query functions for the Roman mission to avoid conflicts with + the reserved Python keyword. [#3588] jplspec diff --git a/astroquery/mast/missions.py b/astroquery/mast/missions.py index fbd882c5e4..94ae869a2e 100644 --- a/astroquery/mast/missions.py +++ b/astroquery/mast/missions.py @@ -15,19 +15,23 @@ from urllib.parse import quote import astropy.units as u -from astropy.coordinates import SkyCoord, BaseCoordinateFrame, Angle import numpy as np -from astropy.table import Table, Row, Column, vstack +from astropy.coordinates import Angle, BaseCoordinateFrame, SkyCoord +from astropy.table import Column, Row, Table, vstack from astropy.utils.decorators import deprecated_renamed_argument from requests import HTTPError, RequestException from astroquery import log -from astroquery.utils import commons, async_to_sync -from astroquery.utils.class_or_instance import class_or_instance -from astroquery.exceptions import InputWarning, InvalidQueryError, MaxResultsWarning, NoResultsWarning - +from astroquery.exceptions import ( + InputWarning, + InvalidQueryError, + MaxResultsWarning, + NoResultsWarning, +) from astroquery.mast import utils from astroquery.mast.core import MastQueryWithLogin +from astroquery.utils import async_to_sync, commons +from astroquery.utils.class_or_instance import class_or_instance from . import conf @@ -134,6 +138,17 @@ def _parse_result(self, response, *, verbose=False): # Used by the async_to_syn if self.service == self._search: results = self._service_api_connection._parse_result(response, verbose, data_key='results') + # Add column descriptions to column metadata + column_list = self.get_column_list() + for col in results.columns: + if col in column_list['name']: + description = column_list[column_list['name'] == col]['description'].value[0] + results[col].meta = {'description': str(description)} + + # Add search parameters to table metadata + result_json = response.json() + results.meta['search_params'] = result_json.get('search_params', {}) + # Warn if maximum results are returned if len(results) >= self.limit: warnings.warn("Maximum results returned, may not include all sources within radius.", @@ -166,6 +181,10 @@ def _validate_criteria(self, **criteria): # Check each criteria argument for validity valid_cols = list(self.columns[self.mission]['name']) + self._search_option_fields for kwd in criteria.keys(): + if kwd == "pass_id" and "pass_id" not in valid_cols and "pass" in valid_cols: + # Special case where the actual column name is "pass", but that's a reserved keyword in Python + # We allow "pass_id" as an alias + kwd = "pass" col = next((name for name in valid_cols if name == kwd), None) if not col: closest_match = difflib.get_close_matches(kwd, valid_cols, n=1) @@ -403,7 +422,8 @@ def query_criteria_async(self, *, coordinates=None, object_names=None, radius=3* List of all valid fields that can be used to match results on criteria can be retrieved by calling `~astroquery.mast.missions.MastMissionsClass.get_column_list` function. To filter by multiple values for a single column, pass in a list of values or - a comma-separated string of values. + a comma-separated string of values. For the Roman mission, you can also use the special "pass_id" + keyword as an alias for the "pass" column, which is a reserved keyword in Python. Returns ------- @@ -479,7 +499,8 @@ def query_region_async(self, coordinates, *, radius=3*u.arcmin, limit=5000, offs function. For example, one can specify the output columns(select_cols) or use other filters(conditions). To filter by multiple values for a single column, pass in a list of values or - a comma-separated string of values. + a comma-separated string of values. For the Roman mission, you can also use the special "pass_id" + keyword as an alias for the "pass" column, which is a reserved keyword in Python. Returns ------- @@ -536,7 +557,8 @@ def query_object_async(self, object_names, *, radius=3*u.arcmin, limit=5000, off function. For example, one can specify the output columns(select_cols) or use other filters(conditions). To filter by multiple values for a single column, pass in a list of values or - a comma-separated string of values. + a comma-separated string of values. For the Roman mission, you can also use the special "pass_id" + keyword as an alias for the "pass" column, which is a reserved keyword in Python. Returns ------- @@ -691,7 +713,7 @@ def filter_products(self, products, *, extension=None, **filters): return products[filter_mask] - def download_file(self, uri, *, local_path=None, cache=True, verbose=True): + def download_file(self, uri, *, local_path=None, cache=True, mission=None, verbose=True): """ Downloads a single file based on the data URI. @@ -703,6 +725,9 @@ def download_file(self, uri, *, local_path=None, cache=True, verbose=True): Directory or filename to which the file will be downloaded. Defaults to current working directory. cache : bool Default is True. If file is found on disk, it will not be downloaded again. + mission : str, optional + The mission to which the file belongs. If not provided, the current value of the ``mission`` attribute + will be used. verbose : bool, optional Default is True. Whether to show download progress in the console. @@ -717,9 +742,12 @@ def download_file(self, uri, *, local_path=None, cache=True, verbose=True): """ # Construct the full data URL based on mission - if self.mission in ['hst', 'jwst', 'roman', 'roman_spectra', 'roman_cgi']: + current_mission = mission.lower() if mission else self.mission + + if current_mission in ['hst', 'jwst', 'roman', 'roman_spectra', 'roman_cgi']: # HST, JWST, and RST have a dedicated endpoint for retrieving products - base_url = self._service_api_connection.MISSIONS_DOWNLOAD_URL + self.mission + '/api/v0.1/retrieve_product' + base_url = (f"{self._service_api_connection.MISSIONS_DOWNLOAD_URL}{current_mission}" + "/api/v0.1/retrieve_product") keyword = 'product_name' else: # HLSPs use MAST download URL @@ -727,8 +755,8 @@ def download_file(self, uri, *, local_path=None, cache=True, verbose=True): keyword = 'uri' # These files require a MAST URI and not just a filename if not uri.startswith('mast:'): - raise InvalidQueryError(f'For mission "{self.mission}", a full MAST URI is required for downloading. ' - f'Got "{uri}".') + raise InvalidQueryError(f'For mission "{current_mission}", a full MAST URI is required ' + f'for downloading. Got "{uri}".') data_url = base_url + f'?{keyword}=' + uri escaped_url = base_url + f'?{keyword}=' + quote(uri, safe='') @@ -813,7 +841,12 @@ def _download_files(self, products, base_dir, *, flat=False, cache=True, verbose raise InvalidQueryError('Data product is missing "dataset" or "fileset" field required for ' 'constructing local download path. Specify `flat=True` to avoid this ' 'requirement.') - local_path = base_dir / dataset if not flat else base_dir + + # If the products are a subscription JSON, they should include a mission field + mission = data_product['mission'].lower() if 'mission' in col_names else self.mission + + # Create the local file path + local_path = base_dir if flat else base_dir / 'mastDownload' / mission / dataset local_path.mkdir(parents=True, exist_ok=True) local_file_path = local_path / Path(filename).name @@ -821,6 +854,7 @@ def _download_files(self, products, base_dir, *, flat=False, cache=True, verbose status, msg, url = self.download_file(uri, local_path=local_file_path, cache=cache, + mission=mission, verbose=verbose) manifest_entries.append([local_file_path, status, msg, url]) @@ -901,11 +935,10 @@ def download_products(self, products, *, download_dir=None, flat=False, # Set up base directory for downloads download_dir = Path(download_dir or '.') - base_dir = download_dir if flat else download_dir / 'mastDownload' / self.mission # Download files manifest = self._download_files(products, - base_dir=base_dir, + base_dir=download_dir, flat=flat, cache=cache, verbose=verbose) diff --git a/astroquery/mast/tests/test_mast.py b/astroquery/mast/tests/test_mast.py index 105582fd9a..5eb868da2b 100644 --- a/astroquery/mast/tests/test_mast.py +++ b/astroquery/mast/tests/test_mast.py @@ -307,6 +307,11 @@ def test_missions_query_criteria(): ) assert isinstance(result, Table) assert len(result) > 0 + # Check that metadata for search criteria is included in the result + assert result.meta + # Check that column metadata is included + assert 'description' in result['sci_pep_id'].meta + assert 'description' in result['sci_instrume'].meta # Raise error if invalid criteria is supplied with pytest.raises(InvalidQueryError): @@ -651,6 +656,10 @@ def test_missions_download_file(mock_is_file, tmp_path): result = missions.download_file('mast:HLSP/classy/classy_test_file.fits', local_path=tmp_path) assert result[0] == 'COMPLETE' + # Provide the mission to the method + result = missions.download_file('mast:HLSP/ullyses/ullyses_test_file.fits', local_path=tmp_path, mission='ullyses') + assert result[0] == 'COMPLETE' + # HLSP downloads should fail without URI with pytest.raises(InvalidQueryError, match='For mission "classy", a full MAST URI is required'): missions.download_file('classy_test_file.fits', local_path=tmp_path) @@ -1054,49 +1063,54 @@ def test_observations_filter_products(): @patch.object(Path, "is_file", return_value=True) -def test_observations_download_products(mock_is_file, patch_boto3, monkeypatch): +def test_observations_download_products(mock_is_file, patch_boto3, monkeypatch, tmpdir): mock_resource = patch_boto3[1] obsid = '2003738726' data_uri = 'mast:HST/product/u9o40504m_c3m.fits' # Actually download the products result = Observations.download_products(obsid, - dataURI=data_uri) + dataURI=data_uri, + download_dir=tmpdir) assert isinstance(result, Table) # Just get the curl script result = Observations.download_products(obsid, curl_flag=True, productType=["SCIENCE"], - mrp_only=False) + mrp_only=False, + download_dir=tmpdir) assert isinstance(result, Table) # Without console output, flat result = Observations.download_products(obsid, dataURI=data_uri, flat=True, - verbose=False) + verbose=False, + download_dir=tmpdir) assert isinstance(result, Table) # Passing row product products = Observations.get_product_list(obsid) - result1 = Observations.download_products(products[0]) + result1 = Observations.download_products(products[0], download_dir=tmpdir) assert isinstance(result1, Table) # Warn if no products to download with pytest.warns(NoResultsWarning, match='No products to download'): - result = Observations.download_products(obsid, productType=["INVALID_TYPE"]) + result = Observations.download_products(obsid, productType=["INVALID_TYPE"], download_dir=tmpdir) assert result is None # Warn if curl_flag and flags are both set with pytest.warns(InputWarning, match='flat=True has no effect on curl downloads.'): result = Observations.download_products(obsid, curl_flag=True, - flat=True) + flat=True, + download_dir=tmpdir) assert isinstance(result, Table) result = Observations.download_products(obsid, - dataURI=data_uri) + dataURI=data_uri, + download_dir=tmpdir) assert isinstance(result, Table) assert result[0]['Status'] == 'COMPLETE' @@ -1106,13 +1120,14 @@ def test_observations_download_products(mock_is_file, patch_boto3, monkeypatch): mock_resource.Bucket.return_value.download_file.side_effect = client_err # Warn and fall back to on-prem download with pytest.warns(InputWarning, match='Falling back to MAST download'): - result = Observations.download_products(obsid, dataURI=data_uri) + result = Observations.download_products(obsid, dataURI=data_uri, download_dir=tmpdir) assert result[0]['Status'] == 'COMPLETE' # Do not fall back to on-prem download, skip instead with pytest.warns(NoResultsWarning, match='Skipping download.'): result = Observations.download_products(obsid, dataURI=data_uri, - cloud_only=True) + cloud_only=True, + download_dir=tmpdir) assert result[0]['Status'] == 'SKIPPED' # Products not found in cloud @@ -1120,12 +1135,13 @@ def test_observations_download_products(mock_is_file, patch_boto3, monkeypatch): with pytest.warns(NoResultsWarning, match='was not found in the cloud. Skipping download.'): result = Observations.download_products(obsid, dataURI=data_uri, - cloud_only=True) + cloud_only=True, + download_dir=tmpdir) assert result[0]['Status'] == 'SKIPPED' assert result[0]['Message'] == 'Product not found in cloud' # Warn and fall back to on-prem download if products not found in cloud and cloud_only is False with pytest.warns(InputWarning, match='was not found in the cloud. Falling back to MAST download'): - result = Observations.download_products(obsid, dataURI=data_uri) + result = Observations.download_products(obsid, dataURI=data_uri, download_dir=tmpdir) assert result[0]['Status'] == 'COMPLETE' # Cloud access not enabled, warn if cloud_only is True @@ -1133,7 +1149,8 @@ def test_observations_download_products(mock_is_file, patch_boto3, monkeypatch): with pytest.warns(InputWarning, match='cloud data access is not enabled'): result = Observations.download_products('2003738726', dataURI='mast:HST/product/u9o40504m_c3m.fits', - cloud_only=True) + cloud_only=True, + download_dir=tmpdir) assert result[0]['Status'] == 'COMPLETE' diff --git a/astroquery/mast/tests/test_mast_remote.py b/astroquery/mast/tests/test_mast_remote.py index da6e8fe3e7..b6f55a05e2 100644 --- a/astroquery/mast/tests/test_mast_remote.py +++ b/astroquery/mast/tests/test_mast_remote.py @@ -149,17 +149,23 @@ def test_missions_query_criteria_async(self): def test_missions_query_criteria(self): # Non-positional search + select_cols = ['sci_pep_id', 'sci_obs_type', 'sci_aec'] with pytest.warns(MaxResultsWarning): result = MastMissions.query_criteria(sci_pep_id=12557, sci_obs_type='SPECTRUM', sci_aec='S', limit=3, - select_cols=['sci_pep_id', 'sci_obs_type', 'sci_aec']) + select_cols=select_cols) assert isinstance(result, Table) assert len(result) == 3 assert (result['sci_pep_id'] == 12557).all() assert (result['sci_obs_type'] == 'SPECTRUM').all() assert (result['sci_aec'] == 'S').all() + assert result.meta + assert len(result.meta['search_params']['conditions']) == 3 + for cols in select_cols: + assert 'description' in result[cols].meta + assert result[cols].meta['description'] # Positional criteria search result = MastMissions.query_criteria(object_names='NGC6121', @@ -318,8 +324,11 @@ def test_missions_filter_products(self): assert all(filtered['category'] == 'CALIBRATED') def test_missions_download_products(self, tmp_path): - def check_filepath(path): - assert path.is_file() + def check_filepaths(result): + for row in result: + if row['Status'] == 'COMPLETE': + path = Path(row['Local Path']) + assert path.is_file() # Check string input test_dataset_id = 'Z14Z0104T' @@ -327,28 +336,32 @@ def check_filepath(path): download_dir=tmp_path) for row in result: if row['Status'] == 'COMPLETE': - check_filepath(row['Local Path']) + check_filepaths(result) # Check Row input datasets = MastMissions.query_object("M4", radius=0.1) prods = MastMissions.get_product_list(datasets[0])[0] result = MastMissions.download_products(prods, download_dir=tmp_path) - check_filepath(result['Local Path'][0]) + check_filepaths(result) # JSON data input - json_data = [{'fileset': 'Z14Z0104T', - 'filename': 'z14z0104t_pdq.fits'}] + json_data = [{'mission': 'hst', + 'fileset': 'Z14Z0104T', + 'filename': 'z14z0104t_pdq.fits'}, + {'mission': 'jwst', + 'fileset': 'jw01189001001_02101_00001', + 'filename': 'jw01189001001_02101_00001_nrs1_uncal.jpg'}] result = MastMissions.download_products(json_data, download_dir=tmp_path) - check_filepath(result['Local Path'][0]) + check_filepaths(result) # JSON file input json_file = tmp_path / 'products.json' json_file.write_text(json.dumps(json_data)) result = MastMissions.download_products(json_file, download_dir=tmp_path) - check_filepath(result['Local Path'][0]) + check_filepaths(result) # Warn about no products with pytest.warns(NoResultsWarning): @@ -395,7 +408,7 @@ def check_result(result, path): ('jwst', {'fileSetName': 'jw01189001001_02101_00001'}), ('classy', {'Target': 'J0021+0052'}), ('ullyses', {'host_galaxy_name': 'WLM', 'select_cols': ['observation_id']}), - ('roman', {'program': 3}), + ('roman', {'program': 3, 'pass_id': 1}), ('iue', {'iue_data_id': 'LWR08496'}), ]) def test_missions_workflow(self, tmp_path, mission, query_params): diff --git a/docs/mast/mast_missions.rst b/docs/mast/mast_missions.rst index b51c76089c..f7e13f3d87 100644 --- a/docs/mast/mast_missions.rst +++ b/docs/mast/mast_missions.rst @@ -172,6 +172,11 @@ Criteria syntax supports several operations: allowing for flexible matching of strings. The wildcard character is ``*`` and it replaces any number of characters preceding, following, or in between existing characters, depending on its placement. +.. note:: + + For the Roman mission, query methods also support the ``pass_id`` parameter as an alias for the ``pass`` column, + which refers to a single iteration of a pass plan. This is to avoid conflicts with the reserved Python keyword. + .. doctest-remote-data:: >>> results = missions.query_criteria(sci_obs_type="IMAGE",