illinois-ceesd · MTCam · Jul 9, 2021 · May 28, 2021 · May 28, 2021 · May 28, 2021
diff --git a/doc/support/tools.rst b/doc/support/tools.rst
@@ -1,6 +1,6 @@
 Random Pile'o'Tools
 ===================
 
+.. automodule:: mirgecom.exceptions
 .. automodule:: mirgecom.simutil
-
 .. automodule:: mirgecom.utils
diff --git a/examples/autoignition-mpi.py b/examples/autoignition-mpi.py
@@ -39,10 +39,8 @@
 from mirgecom.euler import euler_operator
 from mirgecom.simutil import (
     inviscid_sim_timestep,
-    sim_checkpoint,
     check_step,
-    generate_and_distribute_mesh,
-    ExactSolutionMismatch
+    generate_and_distribute_mesh
 )
 from mirgecom.io import make_init_message
 from mirgecom.mpi import mpi_entry_point
@@ -52,6 +50,7 @@
 from mirgecom.boundary import AdiabaticSlipBoundary
 from mirgecom.initializers import MixtureInitializer
 from mirgecom.eos import PyrometheusMixture
+
 import cantera
 import pyrometheus as pyro
 
@@ -81,6 +80,7 @@ def main(ctx_factory=cl.create_some_context, use_leap=False):
     constant_cfl = False
     nstatus = 1
     nviz = 5
+    nhealth = 1
     rank = 0
     checkpoint_t = current_t
     current_step = 0
@@ -91,7 +91,6 @@ def main(ctx_factory=cl.create_some_context, use_leap=False):
         timestepper = rk4_step
     box_ll = -0.005
     box_ur = 0.005
-    error_state = False
     debug = False
 
     from mpi4py import MPI
@@ -223,25 +222,56 @@ def my_rhs(t, state):
                 + eos.get_species_source_terms(state))
 
     def my_checkpoint(step, t, dt, state):
-        reaction_rates = eos.get_production_rates(state)
-        viz_fields = [("reaction_rates", reaction_rates)]
-        return sim_checkpoint(discr, visualizer, eos, cv=state,
-                              vizname=casename, step=step,
-                              t=t, dt=dt, nstatus=nstatus, nviz=nviz,
-                              constant_cfl=constant_cfl, comm=comm,
-                              viz_fields=viz_fields)
-
-    try:
-        (current_step, current_t, current_state) = \
-            advance_state(rhs=my_rhs, timestepper=timestepper,
-                checkpoint=my_checkpoint,
-                get_timestep=get_timestep, state=current_state,
-                t=current_t, t_final=t_final)
-    except ExactSolutionMismatch as ex:
-        error_state = True
-        current_step = ex.step
-        current_t = ex.t
-        current_state = ex.state
+        from mirgecom.simutil import check_step
+        do_status = check_step(step=step, interval=nstatus)
+        do_viz = check_step(step=step, interval=nviz)
+        do_health = check_step(step=step, interval=nhealth)
+
+        if do_status or do_viz or do_health:
+            dv = eos.dependent_vars(state)
+            reaction_rates = eos.get_production_rates(state)
+            io_fields = [
+                ("cv", state),
+                ("dv", dv),
+                ("reaction_rates", reaction_rates)
+            ]
+
+        if do_status:  # This is bad, logging already completely replaces this
+            from mirgecom.io import make_status_message
+            status_msg = make_status_message(discr=discr, t=t, step=step, dt=dt,
+                                             cfl=current_cfl, dependent_vars=dv)
+            if rank == 0:
+                logger.info(status_msg)
+
+        errors = 0
+        if do_health:
+            from mirgecom.simutil import check_naninf_local, check_range_local
+            if check_naninf_local(discr, "vol", dv.pressure) \
+               or check_range_local(discr, "vol", dv.pressure):
+                errors = 1
+                message = "Invalid pressure data found.\n"
+            errors = discr.mpi_communicator.allreduce(errors, op=MPI.SUM)
+            if errors > 0:
+                if rank == 0:
+                    logger.info("Fluid solution failed health check.")
+                logger.info(message)   # do this on all ranks
+
+        if do_viz or errors > 0:
+            from mirgecom.simutil import sim_visualization
+            sim_visualization(discr, io_fields, visualizer, vizname=casename,
+                              step=step, t=t, overwrite=True)
+
+        if errors > 0:
+            a = 1/0
+            print(f"{a=}")
+
+        return state
+
+    current_step, current_t, current_state = \
+        advance_state(rhs=my_rhs, timestepper=timestepper,
+                      pre_step_callback=my_checkpoint,
+                      get_timestep=get_timestep, state=current_state,
+                      t=current_t, t_final=t_final, eos=eos, dim=dim)
 
     if not check_step(current_step, nviz):  # If final step not an output step
         if rank == 0:
@@ -250,12 +280,6 @@ def my_checkpoint(step, t, dt, state):
                       dt=(current_t - checkpoint_t),
                       state=current_state)
 
-    if current_t - t_final < 0:
-        error_state = True
-
-    if error_state:
-        raise ValueError("Simulation did not complete successfully.")
-
 
 if __name__ == "__main__":
     logging.basicConfig(level=logging.INFO)

diff --git a/examples/lump-mpi.py b/examples/lump-mpi.py
@@ -39,9 +39,7 @@
 from mirgecom.euler import euler_operator
 from mirgecom.simutil import (
     inviscid_sim_timestep,
-    sim_checkpoint,
-    generate_and_distribute_mesh,
-    ExactSolutionMismatch
+    generate_and_distribute_mesh
 )
 from mirgecom.io import make_init_message
 from mirgecom.mpi import mpi_entry_point
@@ -81,6 +79,7 @@ def main(ctx_factory=cl.create_some_context, use_leap=False):
     boundaries = {BTAG_ALL: PrescribedBoundary(initializer)}
     constant_cfl = False
     nstatus = 1
+    nhealth = 1
     nviz = 1
     rank = 0
     checkpoint_t = current_t
@@ -131,21 +130,66 @@ def my_rhs(t, state):
                               boundaries=boundaries, eos=eos)
 
     def my_checkpoint(step, t, dt, state):
-        return sim_checkpoint(discr, visualizer, eos, cv=state,
-                              exact_soln=initializer, vizname=casename, step=step,
-                              t=t, dt=dt, nstatus=nstatus, nviz=nviz,
-                              exittol=exittol, constant_cfl=constant_cfl, comm=comm)
-
-    try:
-        (current_step, current_t, current_state) = \
-            advance_state(rhs=my_rhs, timestepper=timestepper,
-                checkpoint=my_checkpoint,
-                get_timestep=get_timestep, state=current_state,
-                t=current_t, t_final=t_final)
-    except ExactSolutionMismatch as ex:
-        current_step = ex.step
-        current_t = ex.t
-        current_state = ex.state
+        from mirgecom.simutil import check_step
+        do_status = check_step(step=step, interval=nstatus)
+        do_viz = check_step(step=step, interval=nviz)
+        do_health = check_step(step=step, interval=nhealth)
+
+        if do_status or do_viz or do_health:
+            from mirgecom.simutil import compare_fluid_solutions
+            dv = eos.dependent_vars(state)
+            exact_mix = initializer(x_vec=nodes, eos=eos, t=t)
+            component_errors = compare_fluid_solutions(discr, state, exact_mix)
+            resid = state - exact_mix
+            io_fields = [
+                ("cv", state),
+                ("dv", dv),
+                ("exact_mix", exact_mix),
+                ("resid", resid)
+            ]
+
+        if do_status:  # This is bad, logging already completely replaces this
+            from mirgecom.io import make_status_message
+            status_msg = make_status_message(discr=discr, t=t, step=step, dt=dt,
+                                             cfl=current_cfl, dependent_vars=dv)
+            status_msg += (
+                "\n------- errors="
+                + ", ".join("%.3g" % en for en in component_errors))
+            if rank == 0:
+                logger.info(status_msg)
+
+        errors = 0
+        if do_health:
+            from mirgecom.simutil import check_naninf_local, check_range_local
+            if check_naninf_local(discr, "vol", dv.pressure) \
+               or check_range_local(discr, "vol", dv.pressure):
+                errors = 1
+                message = "Invalid pressure data found.\n"
+            if np.max(component_errors) > exittol:
+                errors = errors + 1
+                message += "Solution errors exceed tolerance.\n"
+            errors = discr.mpi_communicator.allreduce(errors, op=MPI.SUM)
+            if errors > 0:
+                if rank == 0:
+                    logger.info("Fluid solution failed health check.")
+                logger.info(message)   # do this on all ranks
+
+        if do_viz or errors > 0:
+            from mirgecom.simutil import sim_visualization
+            sim_visualization(discr, io_fields, visualizer, vizname=casename,
+                              step=step, t=t, overwrite=True)
+
+        if errors > 0:
+            a = 1/0
+            print(f"{a=}")
+
+        return state
+
+    current_step, current_t, current_state = \
+        advance_state(rhs=my_rhs, timestepper=timestepper,
+                      pre_step_callback=my_checkpoint,
+                      get_timestep=get_timestep, state=current_state,
+                      t=current_t, t_final=t_final, eos=eos, dim=dim)
 
     #    if current_t != checkpoint_t:
     if rank == 0:
@@ -154,9 +198,6 @@ def my_checkpoint(step, t, dt, state):
                   dt=(current_t - checkpoint_t),
                   state=current_state)
 
-    if current_t - t_final < 0:
-        raise ValueError("Simulation exited abnormally")
-
 
 if __name__ == "__main__":
     logging.basicConfig(format="%(message)s", level=logging.INFO)

diff --git a/examples/mixture-mpi.py b/examples/mixture-mpi.py
@@ -39,9 +39,7 @@
 from mirgecom.euler import euler_operator
 from mirgecom.simutil import (
     inviscid_sim_timestep,
-    sim_checkpoint,
-    generate_and_distribute_mesh,
-    ExactSolutionMismatch
+    generate_and_distribute_mesh
 )
 from mirgecom.io import make_init_message
 from mirgecom.mpi import mpi_entry_point
@@ -78,6 +76,7 @@ def main(ctx_factory=cl.create_some_context, use_leap=False):
     current_t = 0
     constant_cfl = False
     nstatus = 1
+    nhealth = 1
     nviz = 1
     rank = 0
     checkpoint_t = current_t
@@ -89,7 +88,6 @@ def main(ctx_factory=cl.create_some_context, use_leap=False):
         timestepper = rk4_step
     box_ll = -5.0
     box_ur = 5.0
-    error_state = 0
 
     from mpi4py import MPI
     comm = MPI.COMM_WORLD
@@ -152,24 +150,66 @@ def my_rhs(t, state):
                               boundaries=boundaries, eos=eos)
 
     def my_checkpoint(step, t, dt, state):
-        global checkpoint_t
-        checkpoint_t = t
-        return sim_checkpoint(discr, visualizer, eos, cv=state,
-                              exact_soln=initializer, vizname=casename, step=step,
-                              t=t, dt=dt, nstatus=nstatus, nviz=nviz,
-                              exittol=exittol, constant_cfl=constant_cfl, comm=comm)
-
-    try:
-        (current_step, current_t, current_state) = \
-            advance_state(rhs=my_rhs, timestepper=timestepper,
-                checkpoint=my_checkpoint,
-                get_timestep=get_timestep, state=current_state,
-                t=current_t, t_final=t_final)
-    except ExactSolutionMismatch as ex:
-        error_state = 1
-        current_step = ex.step
-        current_t = ex.t
-        current_state = ex.state
+        from mirgecom.simutil import check_step
+        do_status = check_step(step=step, interval=nstatus)
+        do_viz = check_step(step=step, interval=nviz)
+        do_health = check_step(step=step, interval=nhealth)
+
+        if do_status or do_viz or do_health:
+            from mirgecom.simutil import compare_fluid_solutions
+            dv = eos.dependent_vars(state)
+            exact_mix = initializer(x_vec=nodes, eos=eos, t=t)
+            component_errors = compare_fluid_solutions(discr, state, exact_mix)
+            resid = state - exact_mix
+            io_fields = [
+                ("cv", state),
+                ("dv", dv),
+                ("exact_mix", exact_mix),
+                ("resid", resid)
+            ]
+
+        if do_status:  # This is bad, logging already completely replaces this
+            from mirgecom.io import make_status_message
+            status_msg = make_status_message(discr=discr, t=t, step=step, dt=dt,
+                                             cfl=current_cfl, dependent_vars=dv)
+            status_msg += (
+                "\n------- errors="
+                + ", ".join("%.3g" % en for en in component_errors))
+            if rank == 0:
+                logger.info(status_msg)
+
+        errors = 0
+        if do_health:
+            from mirgecom.simutil import check_naninf_local, check_range_local
+            if check_naninf_local(discr, "vol", dv.pressure) \
+               or check_range_local(discr, "vol", dv.pressure):
+                errors = 1
+                message = "Invalid pressure data found.\n"
+            if np.max(component_errors) > exittol:
+                errors = errors + 1
+                message += "Solution errors exceed tolerance.\n"
+            errors = discr.mpi_communicator.allreduce(errors, op=MPI.SUM)
+            if errors > 0:
+                if rank == 0:
+                    logger.info("Fluid solution failed health check.")
+                logger.info(message)   # do this on all ranks
+
+        if do_viz or errors > 0:
+            from mirgecom.simutil import sim_visualization
+            sim_visualization(discr, io_fields, visualizer, vizname=casename,
+                              step=step, t=t, overwrite=True)
+
+        if errors > 0:
+            a = 1/0
+            print(f"{a=}")
+
+        return state
+
+    current_step, current_t, current_state = \
+        advance_state(rhs=my_rhs, timestepper=timestepper,
+                      pre_step_callback=my_checkpoint,
+                      get_timestep=get_timestep, state=current_state,
+                      t=current_t, t_final=t_final, eos=eos, dim=dim)
 
     if current_t != checkpoint_t:  # This check because !overwrite
         if rank == 0:
@@ -178,12 +218,6 @@ def my_checkpoint(step, t, dt, state):
                       dt=(current_t - checkpoint_t),
                       state=current_state)
 
-    if current_t - t_final < 0:
-        error_state = 1
-
-    if error_state:
-        raise ValueError("Simulation did not complete successfully.")
-
 
 if __name__ == "__main__":
     logging.basicConfig(format="%(message)s", level=logging.INFO)