diff --git a/JenkinsFile b/JenkinsFile
index 9c7155d6..95959201 100644
--- a/JenkinsFile
+++ b/JenkinsFile
@@ -109,6 +109,27 @@ pipeline {
networkShares()
filecopy copyOfflineOperations, 'test', env.WORKSPACE
filecopy copyOperations, 'test', env.WORKSPACE
+ powershell label: 'Verify /health returns 200', script: '''
+ $url = "https://secure-test.vetmed.ucdavis.edu/2/health"
+ $maxAttempts = 15
+ for ($i = 1; $i -le $maxAttempts; $i++) {
+ # 2s for the first few attempts then 4s - absorbs IIS app-pool warm-up
+ $delay = if ($i -le 5) { 2 } else { 4 }
+ try {
+ $response = Invoke-WebRequest -Uri $url -UseBasicParsing -TimeoutSec 10
+ if ($response.StatusCode -eq 200) {
+ Write-Host "Attempt ${i}: $url returned 200 OK"
+ exit 0
+ }
+ Write-Host "Attempt ${i}: $url returned $($response.StatusCode)"
+ } catch {
+ Write-Host "Attempt ${i}: $($_.Exception.Message)"
+ }
+ if ($i -lt $maxAttempts) { Start-Sleep -Seconds $delay }
+ }
+ Write-Error "Health check at $url failed after $maxAttempts attempts"
+ exit 1
+ '''
}
}
stage('Deploy to prod') {
@@ -122,6 +143,26 @@ pipeline {
networkShares()
filecopy copyOfflineOperations, 'prod', env.WORKSPACE
filecopy copyOperations, 'prod', env.WORKSPACE
+ powershell label: 'Verify /health returns 200', script: '''
+ $url = "https://secure.vetmed.ucdavis.edu/2/health"
+ $maxAttempts = 15
+ for ($i = 1; $i -le $maxAttempts; $i++) {
+ $delay = if ($i -le 5) { 2 } else { 4 }
+ try {
+ $response = Invoke-WebRequest -Uri $url -UseBasicParsing -TimeoutSec 10
+ if ($response.StatusCode -eq 200) {
+ Write-Host "Attempt ${i}: $url returned 200 OK"
+ exit 0
+ }
+ Write-Host "Attempt ${i}: $url returned $($response.StatusCode)"
+ } catch {
+ Write-Host "Attempt ${i}: $($_.Exception.Message)"
+ }
+ if ($i -lt $maxAttempts) { Start-Sleep -Seconds $delay }
+ }
+ Write-Error "Health check at $url failed after $maxAttempts attempts"
+ exit 1
+ '''
}
}
}
diff --git a/web/Classes/CloudflareNetworks.cs b/web/Classes/CloudflareNetworks.cs
new file mode 100644
index 00000000..983ca707
--- /dev/null
+++ b/web/Classes/CloudflareNetworks.cs
@@ -0,0 +1,59 @@
+namespace Viper.Classes
+{
+ ///
+ /// Cloudflare's published IPv4/IPv6 networks, used to mark CF as a known
+ /// proxy in ForwardedHeadersOptions. Fetched from cloudflare.com at startup
+ /// so we automatically pick up rotations; falls back to a hardcoded snapshot
+ /// when the fetch fails (CF outage during deploy, sandboxed network, etc).
+ ///
+ public static class CloudflareNetworks
+ {
+ // Snapshot of https://www.cloudflare.com/ips/ - only used when the
+ // runtime fetch fails. Refresh occasionally if logs show this falling
+ // through and current CF IPs aren't in the list.
+ private static readonly string[] HardcodedFallback =
+ [
+ "173.245.48.0/20",
+ "103.21.244.0/22",
+ "103.22.200.0/22",
+ "103.31.4.0/22",
+ "141.101.64.0/18",
+ "108.162.192.0/18",
+ "190.93.240.0/20",
+ "188.114.96.0/20",
+ "197.234.240.0/22",
+ "198.41.128.0/17",
+ "162.158.0.0/15",
+ "104.16.0.0/13",
+ "104.24.0.0/14",
+ "172.64.0.0/13",
+ "131.0.72.0/22",
+ "2400:cb00::/32",
+ "2606:4700::/32",
+ "2803:f800::/32",
+ "2405:b500::/32",
+ "2405:8100::/32",
+ "2a06:98c0::/29",
+ "2c0f:f248::/32",
+ ];
+
+ public static IReadOnlyList FetchOrFallback(NLog.Logger logger)
+ {
+ try
+ {
+ using var http = new HttpClient { Timeout = TimeSpan.FromSeconds(5) };
+ var v4 = http.GetStringAsync("https://www.cloudflare.com/ips-v4/").GetAwaiter().GetResult();
+ var v6 = http.GetStringAsync("https://www.cloudflare.com/ips-v6/").GetAwaiter().GetResult();
+ var cidrs = (v4 + "\n" + v6)
+ .Split('\n', StringSplitOptions.RemoveEmptyEntries | StringSplitOptions.TrimEntries);
+ logger.Info("Fetched {Count} Cloudflare networks from cloudflare.com", cidrs.Length);
+ return cidrs;
+ }
+ catch (Exception ex) when (ex is HttpRequestException or TaskCanceledException)
+ {
+ logger.Warn(ex, "Failed to fetch Cloudflare IP ranges; using hardcoded fallback ({Count} entries)", HardcodedFallback.Length);
+ return HardcodedFallback;
+ }
+ }
+ }
+}
diff --git a/web/Classes/HealthChecks/AdaptivePollingHealthCheck.cs b/web/Classes/HealthChecks/AdaptivePollingHealthCheck.cs
new file mode 100644
index 00000000..d44b5cdb
--- /dev/null
+++ b/web/Classes/HealthChecks/AdaptivePollingHealthCheck.cs
@@ -0,0 +1,81 @@
+using Microsoft.Extensions.Diagnostics.HealthChecks;
+
+namespace Viper.Classes.HealthChecks
+{
+ ///
+ /// Throttles an inner health check by caching its last result for a
+ /// status-dependent duration: Healthy results are reused for longer, while
+ /// Unhealthy/Degraded results refresh on a tighter cycle so recovery is
+ /// noticed quickly. When a cached result is returned, the original probe
+ /// timestamp is appended to the description so operators can tell how
+ /// stale the reading is.
+ ///
+ public class AdaptivePollingHealthCheck : IHealthCheck
+ {
+ private readonly IHealthCheck _inner;
+ private readonly TimeSpan _healthyCacheDuration;
+ private readonly TimeSpan _unhealthyCacheDuration;
+ private readonly SemaphoreSlim _semaphore = new(1, 1);
+ private DateTime _lastCheckTime;
+ private HealthCheckResult? _lastResult;
+
+ public AdaptivePollingHealthCheck(
+ IHealthCheck inner,
+ TimeSpan healthyCacheDuration,
+ TimeSpan unhealthyCacheDuration)
+ {
+ _inner = inner;
+ _healthyCacheDuration = healthyCacheDuration;
+ _unhealthyCacheDuration = unhealthyCacheDuration;
+ }
+
+ public async Task CheckHealthAsync(
+ HealthCheckContext context,
+ CancellationToken cancellationToken = default)
+ {
+ await _semaphore.WaitAsync(cancellationToken);
+ try
+ {
+ if (_lastResult.HasValue)
+ {
+ // S6561: DateTime.Now used for elapsed-time calc. Accepted
+ // because VIPER convention is DateTimeKind.Local and a
+ // sub-hour DST skew only shifts one cache window.
+#pragma warning disable S6561
+ var elapsed = DateTime.Now - _lastCheckTime;
+#pragma warning restore S6561
+ var cacheDuration = _lastResult.Value.Status == HealthStatus.Healthy
+ ? _healthyCacheDuration
+ : _unhealthyCacheDuration;
+
+ if (elapsed < cacheDuration)
+ {
+ return AppendTimestamp(_lastResult.Value, _lastCheckTime);
+ }
+ }
+
+ var result = await _inner.CheckHealthAsync(context, cancellationToken);
+ _lastResult = result;
+ _lastCheckTime = DateTime.Now;
+ return result;
+ }
+ finally
+ {
+ _semaphore.Release();
+ }
+ }
+
+ private static HealthCheckResult AppendTimestamp(HealthCheckResult result, DateTime lastCheckedAt)
+ {
+ var stamp = $"Last checked: {lastCheckedAt:MMM d, h:mm tt}";
+ var description = string.IsNullOrWhiteSpace(result.Description)
+ ? stamp
+ : $"{result.Description}\n{stamp}";
+ return new HealthCheckResult(
+ result.Status,
+ description,
+ result.Exception,
+ result.Data);
+ }
+ }
+}
diff --git a/web/Classes/HealthChecks/AwsSsmHealthCheck.cs b/web/Classes/HealthChecks/AwsSsmHealthCheck.cs
new file mode 100644
index 00000000..444aca59
--- /dev/null
+++ b/web/Classes/HealthChecks/AwsSsmHealthCheck.cs
@@ -0,0 +1,56 @@
+using Amazon;
+using Amazon.Runtime;
+using Amazon.SimpleSystemsManagement;
+using Amazon.SimpleSystemsManagement.Model;
+using Microsoft.Extensions.Diagnostics.HealthChecks;
+
+namespace Viper.Classes.HealthChecks
+{
+ ///
+ /// Verifies AWS SSM Parameter Store is reachable with the app's credentials.
+ /// Uses a lightweight DescribeParameters probe (MaxResults=1) so the check
+ /// does not actually fetch any parameter values.
+ ///
+ public class AwsSsmHealthCheck : IHealthCheck
+ {
+ private readonly RegionEndpoint _region;
+ private readonly bool _healthyWhenMissing;
+
+ ///
+ /// If true, missing credentials or client-side SDK errors return Healthy
+ /// with a "skipped" description. Use for Development where local machines
+ /// may not have AWS credentials configured.
+ ///
+ public AwsSsmHealthCheck(RegionEndpoint? region = null, bool healthyWhenMissing = false)
+ {
+ _region = region ?? RegionEndpoint.USWest1;
+ _healthyWhenMissing = healthyWhenMissing;
+ }
+
+ public async Task CheckHealthAsync(
+ HealthCheckContext context,
+ CancellationToken cancellationToken = default)
+ {
+ try
+ {
+ using var client = new AmazonSimpleSystemsManagementClient(_region);
+ await client.DescribeParametersAsync(
+ new DescribeParametersRequest { MaxResults = 1 },
+ cancellationToken);
+ return HealthCheckResult.Healthy("AWS SSM reachable.");
+ }
+ catch (AmazonServiceException ex)
+ {
+ return _healthyWhenMissing
+ ? HealthCheckResult.Healthy("AWS SSM not configured (skipped).")
+ : HealthCheckResult.Unhealthy($"AWS SSM unreachable: {ex.ErrorCode}.");
+ }
+ catch (AmazonClientException)
+ {
+ return _healthyWhenMissing
+ ? HealthCheckResult.Healthy("AWS SSM not configured (skipped).")
+ : HealthCheckResult.Unhealthy("AWS SSM client error (credentials or network).");
+ }
+ }
+ }
+}
diff --git a/web/Classes/HealthChecks/DiskSpaceHealthCheck.cs b/web/Classes/HealthChecks/DiskSpaceHealthCheck.cs
new file mode 100644
index 00000000..f854949b
--- /dev/null
+++ b/web/Classes/HealthChecks/DiskSpaceHealthCheck.cs
@@ -0,0 +1,156 @@
+using Microsoft.Extensions.Diagnostics.HealthChecks;
+
+namespace Viper.Classes.HealthChecks
+{
+ ///
+ /// Reports free space on the drive hosting the running application.
+ /// Resolves the drive at runtime so the same check works on any deploy target
+ /// without per-environment config. Thresholds are percent-based so a single
+ /// default works across drive sizes (a 1 GB floor is alarming on a 20 GB
+ /// drive and meaningless on a 2 TB drive).
+ ///
+ public class DiskSpaceHealthCheck : IHealthCheck
+ {
+ private readonly string? _explicitDrivePath;
+ private readonly double _criticalFreePercent;
+ private readonly double _warningFreePercent;
+ private readonly bool _healthyWhenMissing;
+ private readonly bool _requirePathExists;
+ private readonly bool _verifyWritable;
+
+ ///
+ /// Drive or path to monitor. If null, the drive hosting the running app is used.
+ /// Pass e.g. "S:\\" (or any path on that drive) to monitor an alternate volume.
+ ///
+ ///
+ /// If true, a missing or unready drive returns Healthy with a "not mounted"
+ /// description. Use for optional drives (e.g., network shares that don't
+ /// exist on developer machines). Defaults to false (missing drive = Unhealthy).
+ ///
+ ///
+ /// If true, also verify the supplied explicitDrivePath is an existing directory
+ /// (not just that its drive is ready). Use for checks where the application
+ /// writes to a specific sub-path and its absence is a real failure. Ignored
+ /// when explicitDrivePath is null.
+ ///
+ ///
+ /// If true, attempt a zero-byte file create + delete in the target directory
+ /// to confirm the path is actually writable (catches read-only mounts and
+ /// ACL regressions that a disk-space check misses). Windows has no reliable
+ /// "can I write?" API short of actually writing, so this is the minimal probe.
+ /// Requires explicitDrivePath to be set and point at a directory.
+ ///
+ public DiskSpaceHealthCheck(
+ string? explicitDrivePath = null,
+ double criticalFreePercent = 5.0,
+ double warningFreePercent = 10.0,
+ bool healthyWhenMissing = false,
+ bool requirePathExists = false,
+ bool verifyWritable = false)
+ {
+ _explicitDrivePath = explicitDrivePath;
+ _criticalFreePercent = criticalFreePercent;
+ _warningFreePercent = warningFreePercent;
+ _healthyWhenMissing = healthyWhenMissing;
+ _requirePathExists = requirePathExists;
+ _verifyWritable = verifyWritable;
+ }
+
+ public Task CheckHealthAsync(
+ HealthCheckContext context,
+ CancellationToken cancellationToken = default)
+ {
+ var driveRoot = _explicitDrivePath is null
+ ? Path.GetPathRoot(AppContext.BaseDirectory)
+ : Path.GetPathRoot(_explicitDrivePath);
+ if (string.IsNullOrEmpty(driveRoot))
+ {
+ return Task.FromResult(_healthyWhenMissing
+ ? HealthCheckResult.Healthy("Drive not mounted (skipped).")
+ : HealthCheckResult.Unhealthy("Could not determine drive to monitor."));
+ }
+
+ var drive = new DriveInfo(driveRoot);
+ if (!drive.IsReady)
+ {
+ return Task.FromResult(_healthyWhenMissing
+ ? HealthCheckResult.Healthy("Drive not mounted (skipped).")
+ : HealthCheckResult.Unhealthy("Drive not ready."));
+ }
+
+ if (_requirePathExists && _explicitDrivePath is not null
+ && !Directory.Exists(_explicitDrivePath))
+ {
+ return Task.FromResult(_healthyWhenMissing
+ ? HealthCheckResult.Healthy($"Path '{_explicitDrivePath}' does not exist (skipped).")
+ : HealthCheckResult.Unhealthy($"Path '{_explicitDrivePath}' does not exist."));
+ }
+
+ if (_verifyWritable && _explicitDrivePath is not null)
+ {
+ // Unique probe name per invocation - overlapping UI polls + /health/detail
+ // requests would otherwise race on a shared file name and produce
+ // intermittent false Unhealthy results.
+ var probePath = Path.Join(
+ _explicitDrivePath,
+ $".health-probe-{Environment.ProcessId}-{Guid.NewGuid():N}");
+ try
+ {
+ File.WriteAllBytes(probePath, Array.Empty());
+ }
+ catch (UnauthorizedAccessException)
+ {
+ return Task.FromResult(HealthCheckResult.Unhealthy(
+ $"Path '{_explicitDrivePath}' not writable: access denied."));
+ }
+ catch (IOException ex)
+ {
+ return Task.FromResult(HealthCheckResult.Unhealthy(
+ $"Path '{_explicitDrivePath}' not writable: {ex.Message}"));
+ }
+ finally
+ {
+ if (File.Exists(probePath))
+ {
+ try
+ {
+ File.Delete(probePath);
+ }
+ catch (Exception ex) when (ex is IOException or UnauthorizedAccessException)
+ {
+ // Best-effort cleanup; unique name means a missed delete
+ // doesn't break future probes, just leaves a 0-byte file.
+ }
+ }
+ }
+ }
+
+ var freeBytes = drive.AvailableFreeSpace;
+ var totalBytes = drive.TotalSize;
+ var freeGb = Math.Round(freeBytes / (1024.0 * 1024.0 * 1024.0), 1);
+ var freePercent = Math.Round(freeBytes * 100.0 / totalBytes, 1);
+ var data = new Dictionary
+ {
+ ["drive"] = drive.Name,
+ ["free_gb"] = freeGb,
+ ["total_gb"] = Math.Round(totalBytes / (1024.0 * 1024.0 * 1024.0), 1),
+ ["free_percent"] = freePercent,
+ };
+
+ if (freePercent < _criticalFreePercent)
+ {
+ return Task.FromResult(HealthCheckResult.Unhealthy(
+ $"Low disk space: {freeGb} GB free ({freePercent}%).", data: data));
+ }
+
+ if (freePercent < _warningFreePercent)
+ {
+ return Task.FromResult(HealthCheckResult.Degraded(
+ $"Disk space getting low: {freeGb} GB free ({freePercent}%).", data: data));
+ }
+
+ return Task.FromResult(HealthCheckResult.Healthy(
+ $"Disk space OK: {freeGb} GB free ({freePercent}%).", data: data));
+ }
+ }
+}
diff --git a/web/Classes/HealthChecks/HealthCheckCollectorAuth.cs b/web/Classes/HealthChecks/HealthCheckCollectorAuth.cs
new file mode 100644
index 00000000..b05852ea
--- /dev/null
+++ b/web/Classes/HealthChecks/HealthCheckCollectorAuth.cs
@@ -0,0 +1,29 @@
+using System.Security.Cryptography;
+using System.Text;
+
+namespace Viper.Classes.HealthChecks
+{
+ ///
+ /// Process-unique secret used by the in-app HealthChecksUI collector to bypass
+ /// the InternalAllowlist IP check on its self-call to /health/detail. The token
+ /// is regenerated each time the app starts; both the outbound handler and the
+ /// inbound filter live in the same process, so they always agree.
+ ///
+ public static class HealthCheckCollectorAuth
+ {
+ public const string HeaderName = "X-Health-Collector-Token";
+
+ public static string Token { get; } = Guid.NewGuid().ToString("N");
+
+ ///
+ /// Constant-time comparison so we don't leak token bytes via timing.
+ ///
+ public static bool Matches(string? provided)
+ {
+ if (string.IsNullOrEmpty(provided)) return false;
+ var a = Encoding.UTF8.GetBytes(provided);
+ var b = Encoding.UTF8.GetBytes(Token);
+ return a.Length == b.Length && CryptographicOperations.FixedTimeEquals(a, b);
+ }
+ }
+}
diff --git a/web/Classes/HealthChecks/HealthCheckCollectorTokenHandler.cs b/web/Classes/HealthChecks/HealthCheckCollectorTokenHandler.cs
new file mode 100644
index 00000000..34c49c52
--- /dev/null
+++ b/web/Classes/HealthChecks/HealthCheckCollectorTokenHandler.cs
@@ -0,0 +1,19 @@
+namespace Viper.Classes.HealthChecks
+{
+ ///
+ /// Wired into HealthChecksUI's API-endpoint HttpClient via
+ /// UseApiEndpointDelegatingHandler. Stamps every outbound collector request
+ /// with the process-unique token so the /health/detail endpoint filter can
+ /// distinguish "us calling ourselves" from arbitrary remote callers.
+ ///
+ public sealed class HealthCheckCollectorTokenHandler : DelegatingHandler
+ {
+ protected override Task SendAsync(
+ HttpRequestMessage request, CancellationToken cancellationToken)
+ {
+ request.Headers.Remove(HealthCheckCollectorAuth.HeaderName);
+ request.Headers.Add(HealthCheckCollectorAuth.HeaderName, HealthCheckCollectorAuth.Token);
+ return base.SendAsync(request, cancellationToken);
+ }
+ }
+}
diff --git a/web/Classes/HealthChecks/HealthCheckExtensions.cs b/web/Classes/HealthChecks/HealthCheckExtensions.cs
new file mode 100644
index 00000000..3970232c
--- /dev/null
+++ b/web/Classes/HealthChecks/HealthCheckExtensions.cs
@@ -0,0 +1,340 @@
+using System.Text;
+using HealthChecks.UI.Client;
+using Microsoft.AspNetCore.Diagnostics.HealthChecks;
+using Microsoft.Extensions.Diagnostics.HealthChecks;
+using Viper.Classes.SQLContext;
+using Web.Authorization;
+
+namespace Viper.Classes.HealthChecks
+{
+ ///
+ /// DI + pipeline wiring for /health, /health/detail, and /healthchecks.
+ /// Kept here so Program.cs shows two lines for this concern instead of ~80.
+ ///
+ public static class HealthCheckExtensions
+ {
+ // Per-process cache-buster for the injected UI-extras script, so browsers
+ // re-fetch after a deploy without requiring a hard refresh.
+ private static readonly string _assetVersion = DateTime.Now.Ticks.ToString();
+
+ ///
+ /// Path prefixes owned by the HealthChecks.UI dashboard and its assets.
+ /// Program.cs uses this list both to skip CSP (the UI bundle relies on
+ /// inline scripts / data: fonts) and to IP-gate every sub-path.
+ ///
+ public static readonly string[] UIPaths =
+ [
+ "/healthchecks",
+ "/healthchecks-api",
+ "/ui/resources",
+ ];
+
+ /// True if the request targets any health-UI path.
+ public static bool IsUIPath(PathString path) =>
+ UIPaths.Any(prefix => path.StartsWithSegments(prefix));
+
+ ///
+ /// Registers all health checks plus HealthChecks.UI. Checks tagged "ready"
+ /// run on /health/detail; /health is bare liveness.
+ ///
+ public static IServiceCollection AddViperHealthChecks(
+ this IServiceCollection services,
+ IConfiguration configuration,
+ IHostEnvironment environment)
+ {
+ // Names use a "group-qualifier" convention ("db-*", "disk-space-*") so
+ // the UI's alphabetical sort groups related checks visually.
+ var builder = services.AddHealthChecks()
+ .AddDbContextCheck("db-aaud", tags: new[] { "ready" })
+ .AddDbContextCheck("db-clinical-scheduler", tags: new[] { "ready" })
+ .AddDbContextCheck("db-courses", tags: new[] { "ready" })
+ .AddDbContextCheck("db-crest", tags: new[] { "ready" })
+ .AddDbContextCheck("db-dictionary", tags: new[] { "ready" })
+ .AddDbContextCheck("db-eval-harvest", tags: new[] { "ready" })
+ .AddDbContextCheck("db-raps", tags: new[] { "ready" })
+ .AddDbContextCheck("db-sis", tags: new[] { "ready" })
+ .AddDbContextCheck("db-viper", tags: new[] { "ready" })
+ .AddCheck("disk-space-app", new DiskSpaceHealthCheck(), tags: new[] { "ready" });
+
+ // Photo gallery drive. Always registered so operators can see the check
+ // exists; in Development the drive is a network share not mounted locally,
+ // so healthyWhenMissing=true treats "drive absent" as a pass (with a
+ // "skipped" description) rather than a permanent Unhealthy in dev.
+ var photoPath = configuration["PhotoGallery:IDCardPhotoPath"];
+ if (!string.IsNullOrWhiteSpace(photoPath))
+ {
+ builder.AddCheck(
+ "disk-space-photos",
+ new DiskSpaceHealthCheck(
+ explicitDrivePath: photoPath,
+ healthyWhenMissing: environment.IsDevelopment()),
+ tags: new[] { "ready" });
+ }
+
+ // CMS files drive. Same pattern as photos - the drive (S:\) is a network
+ // share unmounted on developer machines, so skip in dev. Path mirrors
+ // Areas/CMS/Data/CMS.GetRootFileFolder().
+ var cmsFilesPath = environment.IsDevelopment() ? @"C:\Sites\Files" : @"S:\Files";
+ builder.AddCheck(
+ "disk-space-cms",
+ new DiskSpaceHealthCheck(
+ explicitDrivePath: cmsFilesPath,
+ healthyWhenMissing: environment.IsDevelopment()),
+ tags: new[] { "ready" });
+
+ // NLog writes to LoggingPath (C:\nlog in dev, S:\nlog in test/prod).
+ // requirePathExists + verifyWritable together catch the three failure
+ // modes: missing directory, ACL/readonly regression, or drive full.
+ // Missing path is always an alert (never "skipped") since the app
+ // requires logging everywhere.
+ var loggingPath = configuration["LoggingPath"];
+ if (!string.IsNullOrWhiteSpace(loggingPath))
+ {
+ builder.AddCheck(
+ "disk-space-logs",
+ new DiskSpaceHealthCheck(
+ explicitDrivePath: loggingPath,
+ requirePathExists: true,
+ verifyWritable: true),
+ tags: new[] { "ready" });
+ }
+
+ // AWS SSM Parameter Store. The app loads config from SSM at startup
+ // (.AddSystemsManager in Program.cs); this check verifies runtime
+ // reachability with the same SDK.
+ builder.AddCheck(
+ "aws-ssm",
+ new AwsSsmHealthCheck(),
+ tags: new[] { "ready" });
+
+ // "campus-*" groups checks for services we don't own - UCD directory,
+ // SSO, mail gateway, clinical data source - so the UI sort surfaces
+ // them together and separately from DB/disk/internal checks.
+
+ // LDAP - UCD directory lookups (Classes/Utilities/LdapService.cs).
+ // Real LDAPS bind to ldap.ucdavis.edu:636 so a single probe covers
+ // TCP reachability, TLS/cert validity, and service-account auth.
+ // CA1416: LdapHealthCheck uses System.DirectoryServices.Protocols
+ // (Windows only). VIPER only runs on Windows/IIS, matching the
+ // existing pattern in Classes/Utilities/LdapService.cs.
+#pragma warning disable CA1416
+ builder.AddCheck(
+ "campus-ldap",
+ WithAdaptivePolling(new LdapHealthCheck()),
+ tags: new[] { "ready" });
+#pragma warning restore CA1416
+
+ // CAS - single sign-on. If this is down, nobody can log in.
+ // URL is environment-specific (ssodev in dev/test, cas in prod),
+ // read from Cas:CasBaseUrl.
+ var casBaseUrl = configuration["Cas:CasBaseUrl"];
+ if (!string.IsNullOrWhiteSpace(casBaseUrl))
+ {
+ // LazyInitializer: HealthCheckRegistration.Factory is invoked on
+ // every poll. Returning a fresh decorator each time would reset
+ // the cache - we need the same instance across calls so the
+ // adaptive-polling state persists. Pre-constructing at registration
+ // time isn't possible because IHttpClientFactory comes from DI.
+ AdaptivePollingHealthCheck? casCheck = null;
+ builder.Add(new HealthCheckRegistration(
+ "campus-cas",
+ sp => LazyInitializer.EnsureInitialized(ref casCheck, () =>
+ WithAdaptivePolling(new HttpEndpointHealthCheck(
+ sp.GetRequiredService(), casBaseUrl, "CAS"))),
+ failureStatus: HealthStatus.Unhealthy,
+ tags: new[] { "ready" }));
+ }
+
+ // SMTP - email notifications (Services/EmailService.cs). MailKit probe
+ // that does Connect + NoOp + Disconnect so a single check covers TCP
+ // reachability, EHLO handshake, and STARTTLS/cert validation when
+ // EnableSsl is set. Mirrors EmailService's connect path minus DATA.
+ // Dev + Mailpit is treated as "skipped" when Mailpit is not running so
+ // developer dashboards aren't permanently red.
+ var smtpHost = configuration["EmailSettings:SmtpHost"];
+ var smtpPort = configuration.GetValue("EmailSettings:SmtpPort") ?? 25;
+ if (!string.IsNullOrWhiteSpace(smtpHost))
+ {
+ var enableSsl = configuration.GetValue("EmailSettings:EnableSsl");
+ var useMailpit = configuration.GetValue("EmailSettings:UseMailpit");
+ var mailpitDev = environment.IsDevelopment() && useMailpit;
+ var socketOptions = enableSsl && !mailpitDev
+ ? MailKit.Security.SecureSocketOptions.StartTls
+ : MailKit.Security.SecureSocketOptions.None;
+
+ builder.AddCheck(
+ "campus-smtp",
+ WithAdaptivePolling(new SmtpHealthCheck(
+ smtpHost,
+ smtpPort,
+ socketOptions,
+ healthyWhenMissing: mailpitDev)),
+ tags: new[] { "ready" });
+ }
+
+ // VMACs - clinical data source (Areas/Directory/Services/VMACSService.cs
+ // and Areas/RAPS/Services/VMACSExport.cs). Simple HTTP probe.
+ // Same LazyInitializer pattern as campus-cas above - see that note.
+ AdaptivePollingHealthCheck? vmacsCheck = null;
+ builder.Add(new HealthCheckRegistration(
+ "campus-vmacs",
+ sp => LazyInitializer.EnsureInitialized(ref vmacsCheck, () =>
+ WithAdaptivePolling(new HttpEndpointHealthCheck(
+ sp.GetRequiredService(),
+ "https://vmacs-vmth.vetmed.ucdavis.edu",
+ "VMACs"))),
+ failureStatus: HealthStatus.Unhealthy,
+ tags: new[] { "ready" }));
+
+ // The collector polls /health/detail at the public BaseUrl. The
+ // outbound HttpClient stamps a process-unique token header (see
+ // UseApiEndpointDelegatingHandler below) so the endpoint filter
+ // can recognize the self-call without widening the IP allowlist
+ // to cover whatever NAT'd source IP the loop-out produces.
+ // Dev has no BaseUrl configured, so fall back to a relative URL.
+ var baseUrl = configuration["EmailSettings:BaseUrl"]?.TrimEnd('/');
+ var healthEndpointUrl = string.IsNullOrWhiteSpace(baseUrl)
+ ? "/health/detail"
+ : $"{baseUrl}/health/detail";
+ services.AddTransient();
+ services
+ .AddHealthChecksUI(setup =>
+ {
+ setup.AddHealthCheckEndpoint("viper", healthEndpointUrl);
+ setup.SetEvaluationTimeInSeconds(300);
+ setup.MaximumHistoryEntriesPerEndpoint(50);
+ setup.UseApiEndpointDelegatingHandler();
+ })
+ .AddInMemoryStorage();
+
+ return services;
+ }
+
+ ///
+ /// Wires the health-check endpoints and the UI dashboard into the pipeline,
+ /// including IP gating, duration-humanizer script injection, and the UI map.
+ /// Call AFTER UseRouting / UseAuthentication / UseSession.
+ ///
+ public static WebApplication UseViperHealthChecks(this WebApplication app)
+ {
+ // /health - bare liveness. Anonymous (Jenkins has no CAS creds).
+ app.MapHealthChecks("/health", new HealthCheckOptions
+ {
+ Predicate = _ => false,
+ });
+
+ // /health/detail - per-check JSON (UI format), IP-allowlisted to SVM
+ // infra via InternalAllowlist. Intentionally not CAS-gated so the
+ // endpoint stays reachable when auth subsystems are degraded. The
+ // in-app HealthChecksUI collector bypasses the IP check by sending
+ // a process-unique token header (HealthCheckCollectorAuth).
+ app.MapHealthChecks("/health/detail", new HealthCheckOptions
+ {
+ Predicate = c => c.Tags.Contains("ready"),
+ ResponseWriter = UIResponseWriter.WriteHealthCheckUIResponse,
+ }).AddEndpointFilter(async (ctx, next) =>
+ {
+ var token = ctx.HttpContext.Request.Headers[HealthCheckCollectorAuth.HeaderName].FirstOrDefault();
+ if (HealthCheckCollectorAuth.Matches(token))
+ {
+ return await next(ctx);
+ }
+ if (!ClientIpFilterAttribute.IsClientIpSafe("InternalAllowlist"))
+ {
+ ctx.HttpContext.Response.StatusCode = StatusCodes.Status401Unauthorized;
+ return null;
+ }
+ return await next(ctx);
+ });
+
+ // IP-gate every UI sub-path (HTML page, API, resource files, webhook config).
+ app.UseWhen(
+ ctx => IsUIPath(ctx.Request.Path),
+ branch => branch.Use(async (ctx, next) =>
+ {
+ if (!ClientIpFilterAttribute.IsClientIpSafe("InternalAllowlist"))
+ {
+ ctx.Response.StatusCode = StatusCodes.Status401Unauthorized;
+ return;
+ }
+ await next();
+ }));
+
+ // Xabaril renders raw TimeSpan strings in the DURATION column; we can't
+ // configure this server-side. Inject a small JS that rewrites those
+ // cells as "243ms" / "2.19s" / "1m23s". Runs before MapHealthChecksUI
+ // so the middleware wraps the UI endpoint's response body.
+ app.Use(async (ctx, next) =>
+ {
+ // StartsWithSegments handles trailing slashes ("/healthchecks/")
+ // without matching siblings like "/healthchecks-api"; we still
+ // gate on text/html below so JSON/asset responses aren't mangled.
+ if (!ctx.Request.Path.StartsWithSegments("/healthchecks"))
+ {
+ await next();
+ return;
+ }
+
+ var originalBody = ctx.Response.Body;
+ using var buffer = new MemoryStream();
+ ctx.Response.Body = buffer;
+ try
+ {
+ await next();
+ }
+ finally
+ {
+ // Restore before any downstream error handler runs, even if
+ // next() threw - leaving Response.Body pointing at the (soon
+ // disposed) MemoryStream breaks error-page middleware.
+ ctx.Response.Body = originalBody;
+ }
+ buffer.Seek(0, SeekOrigin.Begin);
+
+ if (ctx.Response.ContentType?.Contains("text/html", StringComparison.OrdinalIgnoreCase) == true)
+ {
+ using var reader = new StreamReader(buffer, leaveOpen: true);
+ var html = await reader.ReadToEndAsync();
+ var injected = html
+ .Replace("Health Checks UI", "Health Checks Status")
+ .Replace(
+ "