Skip to content
Open
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 16 additions & 4 deletions pytroll_runner/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -212,18 +212,30 @@ def generate_message_from_log_output(publisher_config, mda, log_output):
return message


def _get_nefiles_from_regex(regex, log_output):
Comment thread
adybbroe marked this conversation as resolved.
Outdated
"""Get list of new output files from log messages."""
logger.debug(f"Matching regex-pattern: {regex} from log output")
nfiles = re.findall(regex, str(log_output, "utf-8"))
logger.debug(f"Output files identified from log output: {nfiles}")
return nfiles


def get_newfiles_from_regex_and_logoutput(regex, log_output):
"""Get the filenames using a regex-pattern on the log_output."""
logger.debug(f"Matching regex-pattern: {regex} from log output")
new_files = re.findall(regex, str(log_output, "utf-8"))
logger.debug(f"Output files identified from log output: {new_files}")
if isinstance(regex, list):
new_files = []
for rex in regex:
nfiles = _get_nefiles_from_regex(rex, log_output)
new_files = new_files + nfiles
else:
new_files = _get_nefiles_from_regex(regex, log_output)

return new_files


def generate_message_from_expected_files(pub_config, extra_metadata=None, preexisting_files=None):
"""Generate a message containing the expected files."""
new_files = find_new_files(pub_config, preexisting_files or set())

return generate_message_from_new_files(pub_config, new_files, extra_metadata)


Expand Down
89 changes: 75 additions & 14 deletions pytroll_runner/tests/test_prepare_messages.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

from pytroll_runner import get_newfiles_from_regex_and_logoutput, read_config

TEST_YAML_CONFIG1 = """script: /bin/awsat_l0l1b_run.sh
TEST_YAML_CONFIG_ONE_OUTPUT_FILE = """script: /bin/awsat_l0l1b_run.sh
publisher_config:
output_files_log_regex: "renamed .* -> '(.*.nc)"
publisher_settings:
Expand All @@ -24,30 +24,91 @@
addr_listener: true
"""

TEST_YAML_CONFIG_MANY_OUTPUT_FILES = """
script:
command: "/san1/opt/pps_mw_aws_runner/releases/pps_mw_aws_runner-0.0.10/bin/pps_mw.sh -p pr_hl -s aws -r W_.*.nc "
workers: 1
publisher_config:
output_files_log_regex:
- "Has written level2 file: (.*.nc)"
- "Has saved plot file: (.*euro4.png)"
- "Has saved plot file: (.*baltrad4.png)"
publisher_settings:
name: pps_mw_aws_runner
static_metadata:
data_processing_level: 2
type: NC
topic: /PPS-MW-NC/2/
subscriber_config:
topics:
- /awsat/l1b/metno/oslo
- /awsat/l1b/fmi/sodankyla
nameserver: localhost
addr_listener: true
"""

EXAMPLE_LOG_OUTPUT_BYTES = (b"""\nGenerating configuration..\n\nSetting SELinux context on...\n\nFound result file:"""
b"""\n\nrenamed '/san1/polar_in/direct_readout/aws/L1/W_XX-SMHI-Kangerlussuaq,SAT,AWS1-"""
b"""MWR-1B-RAD_C_SMHI_20250602201120_L_D_20250602033328_20250602033625_C_N____.nc' -> '"""
b"""/san1/polar_in/direct_readout/aws/lvl1/W_XX-SMHI-Kangerlussuaq,SAT,AWS1-MWR-1B-RAD_"""
b"""C_SMHI_20250602201120_L_D_20250602033328_20250602033625_C_N____.nc'\n""")

EXPECTED_LIST = ["/san1/polar_in/direct_readout/aws/lvl1/W_XX-SMHI-Kangerlussuaq,SAT,AWS1-MWR-1B-RAD_C_SMHI_20250602201"
"120_L_D_20250602033328_20250602033625_C_N____.nc"]
EXPECTED_LIST_ONE = ["/san1/polar_in/direct_readout/aws/lvl1/"
"W_XX-SMHI-Kangerlussuaq,SAT,AWS1-MWR-1B-RAD_C_SMHI_20250602201"
"120_L_D_20250602033328_20250602033625_C_N____.nc"]

LOG_OUTPUT_SEVERAL_FILES = (b"""\n[INFO: 2025-10-21 11:27:47 : pps_mw.writers.level2] Start writing level2 dataset."""
b"""\n[INFO: 2025-10-21 11:27:47 : pps_mw.writers.level2] Has written level2 file: """
b"""/san1/polar_out/direct_readout/lvl2/"""
b"""S_NWC_PRHL_aws1_20251021T1116140Z_20251021T1123330Z.nc"""
b"""\n[INFO: 2025-10-21 11:27:51 : pps_mw.utils.plotting] Has saved plot file: """
b"""/san1/polar_out/direct_readout/lvl2/"""
b"""quicklook_PRHL_aws1_20251021111614_20251021112333_euro4.png"""
b"""\n[INFO: 2025-10-21 11:27:52 : pps_mw.utils.plotting] Has saved plot file: """
b"""/san1/polar_out/direct_readout/lvl2/"""
b"""quicklook_PRHL_aws1_20251021111614_20251021112333_baltrad4.png"""
b"""\n[INFO: 2025-10-21 11:27:52 : pps_mw.pges.pge_runner] Done pr_hl processing for """
b"""/san1/polar_in/regional/aws/l1b/W_XX-FMI-Sodankyla,SAT,AWS1-MWR-1B-RAD_C_FMI_"""
b"""_20251021112652_R_D_20251021111330_20251021112402_C_N____.nc.""")

EXPECTED_LIST_MANY = ["/san1/polar_out/direct_readout/lvl2/"
"S_NWC_PRHL_aws1_20251021T1116140Z_20251021T1123330Z.nc",
"/san1/polar_out/direct_readout/lvl2/"
"quicklook_PRHL_aws1_20251021111614_20251021112333_baltrad4.png",
"/san1/polar_out/direct_readout/lvl2/"
"quicklook_PRHL_aws1_20251021111614_20251021112333_euro4.png"
]


@pytest.fixture
def fake_config_yaml_file1(tmp_path):
"""Write fake config yaml file."""
file_path = tmp_path / "some_config_file.yaml"
with open(file_path, "w") as fpt:
fpt.write(TEST_YAML_CONFIG1)
def fake_config_yaml(tmp_path, request):
"""Write fake config yaml file based on parameter."""
if request.param == "one":
content = TEST_YAML_CONFIG_ONE_OUTPUT_FILE
filename = "config_one.yaml"
elif request.param == "many":
content = TEST_YAML_CONFIG_MANY_OUTPUT_FILES
filename = "config_many.yaml"
else:
raise ValueError(f"Unknown param: {request.param}")

file_path = tmp_path / filename
file_path.write_text(content)
return file_path


def test_get_newfiles_from_regex_and_logoutput(fake_config_yaml_file1):
"""Test getting new files from regex pattern and log output."""
log_output = EXAMPLE_LOG_OUTPUT_BYTES
config = read_config(fake_config_yaml_file1)

@pytest.mark.parametrize(
("fake_config_yaml", "log_output", "expected_list"),
[
("one", EXAMPLE_LOG_OUTPUT_BYTES, EXPECTED_LIST_ONE),
("many", LOG_OUTPUT_SEVERAL_FILES, EXPECTED_LIST_MANY),
],
indirect=["fake_config_yaml"]
)
def test_get_newfiles_from_regex_and_logoutput(fake_config_yaml, log_output, expected_list):
"""Test getting new files from regex pattern(s) and log output."""
config = read_config(fake_config_yaml)
pattern = config[2]["output_files_log_regex"]
result = get_newfiles_from_regex_and_logoutput(pattern, log_output)
assert result == EXPECTED_LIST

assert sorted(result) == sorted(expected_list)
Loading