Skip to content
Open
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
80 changes: 78 additions & 2 deletions arc/scheduler.py
Original file line number Diff line number Diff line change
Expand Up @@ -325,6 +325,8 @@ def __init__(self,
self.orbitals_level = orbitals_level
self.unique_species_labels = list()
self.save_restart = False
if self.restart_dict is not None:
self._sanitize_restart_output()

if len(self.rxn_list):
rxn_info_path = self.make_reaction_labels_info_file()
Expand Down Expand Up @@ -2619,7 +2621,14 @@ def switch_ts(self, label: str):
logger.info(f'Switching a TS guess for {label}...')
self.determine_most_likely_ts_conformer(label=label) # Look for a different TS guess.
self.delete_all_species_jobs(label=label) # Delete other currently running jobs for this TS.
self.output[label]['geo'] = self.output[label]['freq'] = self.output[label]['sp'] = self.output[label]['composite'] = ''
if label in self.output:
self.output[label]['convergence'] = False
for key in ['opt', 'freq', 'sp', 'composite', 'fine']:
if key in self.output[label]['job_types']:
self.output[label]['job_types'][key] = False
if 'paths' in self.output[label]:
for key in self.output[label]['paths']:
self.output[label]['paths'][key] = '' if key != 'irc' else list()
Copy link

Copilot AI Jan 26, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There is duplicated logic for resetting output between switch_ts (lines 2624-2631) and delete_all_species_jobs (lines 3620-3626). Both methods reset convergence, job_types, and paths in nearly identical ways. Since switch_ts calls delete_all_species_jobs at line 2623, consider removing the duplicate resetting logic from switch_ts and relying solely on delete_all_species_jobs to handle the output reset. This would improve maintainability and reduce the risk of the two implementations diverging.

Suggested change
if label in self.output:
self.output[label]['convergence'] = False
for key in ['opt', 'freq', 'sp', 'composite', 'fine']:
if key in self.output[label]['job_types']:
self.output[label]['job_types'][key] = False
if 'paths' in self.output[label]:
for key in self.output[label]['paths']:
self.output[label]['paths'][key] = '' if key != 'irc' else list()

Copilot uses AI. Check for mistakes.
freq_path = os.path.join(self.project_directory, 'output', 'rxns', label, 'geometry', 'freq.out')
if os.path.isfile(freq_path):
os.remove(freq_path)
Expand Down Expand Up @@ -3044,6 +3053,9 @@ def check_all_done(self, label: str):
logger.debug(f'Species {label} did not converge.')
all_converged = False
break
if all_converged and self._missing_required_paths(label):
logger.debug(f'Species {label} did not converge due to missing output paths.')
all_converged = False
if label in self.output and all_converged:
self.output[label]['convergence'] = True
if self.species_dict[label].is_ts:
Expand Down Expand Up @@ -3084,6 +3096,64 @@ def check_all_done(self, label: str):
# Update restart dictionary and save the yaml restart file:
self.save_restart_dict()

def _missing_required_paths(self, label: str) -> bool:
"""
Check whether required output paths are missing for a species/TS.

Args:
label (str): The species label.

Returns:
bool: Whether required output paths are missing.
"""
if label not in self.output or 'paths' not in self.output[label]:
return False
path_map = {
'opt': 'geo',
'freq': 'freq',
'sp': 'sp',
'composite': 'composite',
}
for job_type, path_key in path_map.items():
if job_type == 'composite':
required = self.composite_method is not None
else:
required = self.job_types.get(job_type, False)
if not required:
continue
if self.species_dict[label].number_of_atoms == 1 and job_type in ['opt', 'freq']:
continue
if self.output[label]['job_types'].get(job_type, False) and not self.output[label]['paths'].get(path_key, ''):
return True
return False

def _sanitize_restart_output(self) -> None:
"""
Ensure restart output state is internally consistent (e.g., convergence without paths).
"""
path_map = {
'opt': 'geo',
'freq': 'freq',
'sp': 'sp',
'composite': 'composite',
}
for label in list(self.output.keys()):
if label not in self.species_dict:
continue
if self.output[label].get('convergence') and self._missing_required_paths(label):
self.output[label]['convergence'] = False
for job_type, path_key in path_map.items():
if job_type == 'composite':
required = self.composite_method is not None
else:
required = self.job_types.get(job_type, False)
if not required:
continue
if self.species_dict[label].number_of_atoms == 1 and job_type in ['opt', 'freq']:
continue
if not self.output[label]['paths'].get(path_key, ''):
self.output[label]['job_types'][job_type] = False
Copy link

Copilot AI Jan 26, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The _sanitize_restart_output method duplicates logic from _missing_required_paths. Lines 3145-3155 repeat the same path validation logic that's already in _missing_required_paths (lines 3117-3128). Consider refactoring to reuse the existing helper method or extracting the path validation into a shared utility function to improve maintainability and reduce duplication.

Copilot uses AI. Check for mistakes.
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

if it's duplicated, maybe outsource it into a single external function?

Copy link

Copilot AI Jan 26, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In the _sanitize_restart_output method, there's no check for the existence of 'job_types' key before accessing it at line 3155. While _missing_required_paths at line 3126 uses .get() with a default value to safely access job_types, this line directly accesses self.output[label]['job_types'][job_type]. If 'job_types' doesn't exist in self.output[label], this will raise a KeyError. Consider using safe dictionary access or add a check for the existence of 'job_types' key.

Suggested change
self.output[label]['job_types'][job_type] = False
if 'job_types' in self.output[label]:
self.output[label]['job_types'][job_type] = False

Copilot uses AI. Check for mistakes.

def get_server_job_ids(self, specific_server: Optional[str] = None):
"""
Check job status on a specific server or on all active servers, get a list of relevant running job IDs.
Expand Down Expand Up @@ -3547,7 +3617,13 @@ def delete_all_species_jobs(self, label: str):
logger.info(f'Deleted job {job_name}')
job.delete()
self.running_jobs[label] = list()
self.output[label]['paths'] = {key: '' if key != 'irc' else list() for key in self.output[label]['paths'].keys()}
if label in self.output:
self.output[label]['convergence'] = False
for key in ['opt', 'freq', 'sp', 'composite', 'fine']:
if key in self.output[label]['job_types']:
self.output[label]['job_types'][key] = False
self.output[label]['paths'] = {key: '' if key != 'irc' else list()
for key in self.output[label]['paths'].keys()}

def restore_running_jobs(self):
"""
Expand Down
120 changes: 120 additions & 0 deletions arc/scheduler_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
import unittest
import os
import shutil
from unittest import mock

import arc.parser.parser as parser
from arc.checks.ts import check_ts
Expand Down Expand Up @@ -726,6 +727,125 @@ def test_save_e_elect(self):
self.assertEqual(content, {'formaldehyde': -300621.95378630824, 'mehylamine': -251360.00924747565})
shutil.rmtree(project_directory, ignore_errors=True)

def test_check_all_done_requires_paths(self):
"""Test that convergence isn't set when required paths are missing."""
spc = ARCSpecies(label='formaldehyde', smiles='C=O')
output = {
'formaldehyde': {
'paths': {'geo': '', 'freq': '', 'sp': '', 'composite': ''},
'restart': '',
'convergence': False,
'job_types': {'conf_opt': False, 'conf_sp': False, 'opt': True, 'freq': True, 'sp': True,
'rotors': False, 'irc': False, 'fine': False, 'composite': False},
}
}
sched = Scheduler(project='test_check_all_done_requires_paths',
ess_settings=self.ess_settings,
species_list=[spc],
opt_level=Level(repr=default_levels_of_theory['opt']),
freq_level=Level(repr=default_levels_of_theory['freq']),
sp_level=Level(repr=default_levels_of_theory['sp']),
project_directory=self.project_directory,
testing=True,
job_types=initialize_job_types(),
restart_dict={'output': output},
)
sched.check_all_done(label='formaldehyde')
self.assertFalse(sched.output['formaldehyde']['convergence'])

def test_restart_sanitizes_convergence_for_ts(self):
"""Test restart output sanitization for TS with missing paths."""
ts_spc = ARCSpecies(label='TS0', is_ts=True, multiplicity=2, charge=0)
output = {
'TS0': {
'paths': {'geo': '', 'freq': '', 'sp': '', 'composite': ''},
'restart': '',
'convergence': True,
'job_types': {'conf_opt': False, 'conf_sp': False, 'opt': True, 'freq': True, 'sp': True,
'rotors': False, 'irc': False, 'fine': False, 'composite': False},
}
}
sched = Scheduler(project='test_restart_sanitizes_convergence_for_ts',
ess_settings=self.ess_settings,
species_list=[ts_spc],
opt_level=Level(repr=default_levels_of_theory['opt']),
freq_level=Level(repr=default_levels_of_theory['freq']),
sp_level=Level(repr=default_levels_of_theory['sp']),
project_directory=self.project_directory,
testing=True,
job_types=initialize_job_types(),
restart_dict={'output': output, 'running_jobs': {'TS0': []}},
)
self.assertFalse(sched.output['TS0']['convergence'])
for key in ['opt', 'freq', 'sp', 'composite']:
self.assertFalse(sched.output['TS0']['job_types'][key])

def test_delete_all_species_jobs_resets_output(self):
"""Test that deleting jobs clears convergence and job status."""
spc = ARCSpecies(label='formaldehyde', smiles='C=O')
output = {
'formaldehyde': {
'paths': {'geo': 'geo.out', 'freq': 'freq.out', 'sp': 'sp.out', 'composite': ''},
'restart': '',
'convergence': True,
'job_types': {'conf_opt': False, 'conf_sp': False, 'opt': True, 'freq': True, 'sp': True,
'rotors': False, 'irc': False, 'fine': True, 'composite': False},
}
}
sched = Scheduler(project='test_delete_all_species_jobs_resets_output',
ess_settings=self.ess_settings,
species_list=[spc],
opt_level=Level(repr=default_levels_of_theory['opt']),
freq_level=Level(repr=default_levels_of_theory['freq']),
sp_level=Level(repr=default_levels_of_theory['sp']),
project_directory=self.project_directory,
testing=True,
job_types=initialize_job_types(),
restart_dict={'output': output},
)
sched.job_dict['formaldehyde'] = {'opt': {}, 'freq': {}, 'sp': {}}
sched.running_jobs['formaldehyde'] = []
sched.delete_all_species_jobs(label='formaldehyde')
self.assertFalse(sched.output['formaldehyde']['convergence'])
for key in ['opt', 'freq', 'sp', 'composite', 'fine']:
self.assertFalse(sched.output['formaldehyde']['job_types'][key])
self.assertEqual(sched.output['formaldehyde']['paths'],
{'geo': '', 'freq': '', 'sp': '', 'composite': ''})

def test_switch_ts_resets_output(self):
"""Test that switching TS guesses resets convergence and paths."""
ts_spc = ARCSpecies(label='TS0', is_ts=True, multiplicity=2, charge=0)
output = {
'TS0': {
'paths': {'geo': 'geo.out', 'freq': 'freq.out', 'sp': 'sp.out', 'composite': ''},
'restart': '',
'convergence': True,
'job_types': {'conf_opt': False, 'conf_sp': False, 'opt': True, 'freq': True, 'sp': True,
'rotors': False, 'irc': False, 'fine': True, 'composite': False},
}
}
sched = Scheduler(project='test_switch_ts_resets_output',
ess_settings=self.ess_settings,
species_list=[ts_spc],
opt_level=Level(repr=default_levels_of_theory['opt']),
freq_level=Level(repr=default_levels_of_theory['freq']),
sp_level=Level(repr=default_levels_of_theory['sp']),
project_directory=self.project_directory,
testing=True,
job_types=initialize_job_types(),
restart_dict={'output': output},
)
sched.species_dict['TS0'].ts_guesses_exhausted = True
sched.species_dict['TS0'].chosen_ts = None
with mock.patch.object(Scheduler, 'determine_most_likely_ts_conformer', return_value=None), \
mock.patch.object(Scheduler, 'delete_all_species_jobs', return_value=None):
sched.switch_ts(label='TS0')
self.assertFalse(sched.output['TS0']['convergence'])
for key in ['opt', 'freq', 'sp', 'composite', 'fine']:
self.assertFalse(sched.output['TS0']['job_types'][key])
self.assertEqual(sched.output['TS0']['paths'],
{'geo': '', 'freq': '', 'sp': '', 'composite': ''})

def test_species_has_geo_sp_freq(self):
"""Test the species_has_geo() / species_has_sp() / species_has_freq() functions."""
for property_, species_has_property in zip(['geo', 'sp', 'freq'], [species_has_geo, species_has_sp, species_has_freq]):
Expand Down
8 changes: 4 additions & 4 deletions arc/species/species.py
Original file line number Diff line number Diff line change
Expand Up @@ -1536,12 +1536,12 @@ def make_ts_report(self):
self.ts_report += ':\n'
if self.successful_methods:
self.ts_report += 'Methods that successfully generated a TS guess:\n'
for successful_method in self.successful_methods:
self.ts_report += successful_method + ','
unique_successful_methods = list(dict.fromkeys(self.successful_methods))
self.ts_report += ','.join(unique_successful_methods)
if self.unsuccessful_methods:
self.ts_report += '\nMethods that were unsuccessfully in generating a TS guess:\n'
Copy link

Copilot AI Jan 26, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Grammatical error: "were unsuccessfully in generating" should be "were unsuccessful in generating" (remove the 'ly').

Suggested change
self.ts_report += '\nMethods that were unsuccessfully in generating a TS guess:\n'
self.ts_report += '\nMethods that were unsuccessful in generating a TS guess:\n'

Copilot uses AI. Check for mistakes.
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

see this minor comment

for unsuccessful_method in self.unsuccessful_methods:
self.ts_report += unsuccessful_method + ','
unique_unsuccessful_methods = list(dict.fromkeys(self.unsuccessful_methods))
self.ts_report += ','.join(unique_unsuccessful_methods)
Comment on lines +1539 to +1544
Copy link

Copilot AI Jan 26, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The deduplication of TS methods in the report will cause an existing test to fail. The test in arc/species/species_test.py at line 1202-1206 expects the ts_report to contain duplicates (e.g., 'autotst,autotst,autotst,autotst,gcn,gcn,...'), but after this change, the report will show 'autotst,gcn,kinbot' instead. The test should be updated to match the new expected behavior.

Copilot uses AI. Check for mistakes.
if not self.ts_guesses_exhausted:
self.ts_report += f'\nThe method that generated the best TS guess and its output used for the ' \
f'optimization: {self.chosen_ts_method}\n'
Expand Down
36 changes: 35 additions & 1 deletion functional/restart_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,11 @@
import shutil
import unittest
import warnings
from unittest import mock

from arc.molecule.molecule import Molecule

from arc.common import ARC_PATH, read_yaml_file
from arc.common import ARC_PATH, read_yaml_file, save_yaml_file
from arc.main import ARC


Expand Down Expand Up @@ -212,6 +213,38 @@ def test_globalize_paths(self):
content['output']['spc']['paths']['freq'])
self.assertNotIn('gpfs/workspace/users/user', content['output']['spc']['paths']['freq'])

def test_restart_sanitizes_ts_output(self):
"""Test sanitizing inconsistent TS output on restart."""
project = 'arc_project_for_testing_delete_after_usage_restart_sanitize_ts'
project_directory = os.path.join(ARC_PATH, 'Projects', project)
os.makedirs(project_directory, exist_ok=True)
restart_path = os.path.join(project_directory, 'restart.yml')
restart_dict = {
'project': project,
'project_directory': project_directory,
'job_types': {'conf_opt': False, 'conf_sp': False, 'opt': True, 'freq': True, 'sp': True,
'rotors': False, 'irc': False, 'fine': False},
'species': [{'label': 'TS0', 'is_ts': True, 'multiplicity': 1, 'charge': 0}],
'output': {
'TS0': {
'paths': {'geo': '', 'freq': '', 'sp': '', 'composite': ''},
'restart': '',
'convergence': True,
'job_types': {'conf_opt': False, 'conf_sp': False, 'opt': True, 'freq': True, 'sp': True,
'rotors': False, 'irc': False, 'fine': False, 'composite': False},
}
},
'running_jobs': {'TS0': []},
}
save_yaml_file(path=restart_path, content=restart_dict)
input_dict = read_yaml_file(path=restart_path, project_directory=project_directory)
input_dict['project'], input_dict['project_directory'] = project, project_directory
with mock.patch('arc.scheduler.Scheduler.schedule_jobs', return_value=None), \
mock.patch('arc.main.process_arc_project', return_value=None):
arc1 = ARC(**input_dict)
arc1.execute()
self.assertFalse(arc1.scheduler.output['TS0']['convergence'])

@classmethod
def tearDownClass(cls):
"""
Expand All @@ -222,6 +255,7 @@ def tearDownClass(cls):
'arc_project_for_testing_delete_after_usage_restart_rate_1',
'arc_project_for_testing_delete_after_usage_restart_rate_2',
'test_restart_bde',
'arc_project_for_testing_delete_after_usage_restart_sanitize_ts',
]
for project in projects:
project_directory = os.path.join(ARC_PATH, 'Projects', project)
Expand Down
Loading