diff --git a/CHANGELOG.md b/CHANGELOG.md index 70022f903c..cf343e4f51 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -15,6 +15,9 @@ All notable changes to this project will be documented in this file. From versio - Add `Vary` header to responses by @develop7 in #4609 - Add config `db-timezone-enabled` for optional querying of timezones by @taimoorzaeem in #4751 +- Introduced producing OpenTelemetry traces by @develop7 in #3140 + + Requires a new `server-otel-enabled` config parameter to be enabled first. + ### Changed - All responses now include a `Vary` header by @develop7 in #4609 diff --git a/default.nix b/default.nix index 09af0f1d01..40335884f6 100644 --- a/default.nix +++ b/default.nix @@ -150,6 +150,7 @@ rec { ghc = pkgs.haskell.compiler."${compiler}"; inherit (pkgs.haskell.packages."${compiler}") hpc-codecov; inherit (pkgs.haskell.packages."${compiler}") weeder; + otelcol = pkgs.opentelemetry-collector; }; } // pkgs.lib.optionalAttrs pkgs.stdenv.isLinux rec { # Static executable. diff --git a/docs/integrations/opentelemetry.rst b/docs/integrations/opentelemetry.rst new file mode 100644 index 0000000000..62b50beb4c --- /dev/null +++ b/docs/integrations/opentelemetry.rst @@ -0,0 +1,66 @@ +.. _opentelemetry: + +OpenTelemetry +------------- + +PostgREST is able to act as OpenTelemetry traces producer. OpenTelemetry is configured +using ``OTEL_*`` environment variables, per the `OpenTelemetry specification`_. + +The OpenTelemetry support is currently both experimental and in early stages of development, so expect some rough edges +or lack of functionality, such as metrics or logs. Since current OpenTelemetry implementation incurs a small +(~6% in our "Loadtest (mixed)" suite) performance hit, it is gated behind the :ref:`server-otel-enabled` +configuration option, disabled by default. + +Example configuration: + +.. code-block:: shell + + OTEL_EXPORTER_OTLP_ENDPOINT='https://api.honeycomb.io/' \ + OTEL_EXPORTER_OTLP_HEADERS="x-honeycomb-team=" \ + OTEL_SERVICE_NAME='PostgREST'\ + OTEL_LOG_LEVEL='debug'\ + OTEL_TRACES_SAMPLER='always_on' \ + postgrest + +Prometheus metrics through the OpenTelemetry Collector +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +PostgREST currently exports traces through OpenTelemetry, but not metrics. But, +it's possible to scrape the :ref:`metrics` endpoint exposed by the +:ref:`admin_server` and relay it through the OpenTelemetry Collector. + +Example collector configuration: + +.. code-block:: yaml + + receivers: + prometheus: + config: + scrape_configs: + - job_name: postgrest + scrape_interval: 15s + metrics_path: /metrics + static_configs: + - targets: ["127.0.0.1:3001"] + + processors: + batch: + + exporters: + otlp: + endpoint: otel-collector:4317 + tls: + insecure: true + + service: + pipelines: + metrics: + receivers: [prometheus] + processors: [batch] + exporters: [otlp] + +This assumes the PostgREST :ref:`admin_server` is listening on port ``3001`` and that +its ``/metrics`` endpoint is reachable by the collector. Adjust the target address and +OTLP exporter settings for your deployment. + +.. _`OpenTelemetry specification`: https://opentelemetry.io/docs/specs/otel/configuration/sdk-environment-variables/ diff --git a/docs/postgrest.dict b/docs/postgrest.dict index 355c72dccb..6db87212da 100644 --- a/docs/postgrest.dict +++ b/docs/postgrest.dict @@ -57,6 +57,7 @@ HMAC htmx Htmx Homebrew +hs hstore HTTP HTTPS @@ -84,6 +85,7 @@ localhost login lookups Logins +Loadtest LIBPQ logins lon @@ -107,6 +109,10 @@ Observability Okta OpenAPI openapi +OpenTelemetry +opentelemetry +otel +OTLP ov parametrized passphrase diff --git a/docs/references/configuration.rst b/docs/references/configuration.rst index 48e1a92753..77260d5ac5 100644 --- a/docs/references/configuration.rst +++ b/docs/references/configuration.rst @@ -930,6 +930,21 @@ server-timing-enabled Enables the `Server-Timing `_ header. See :ref:`server-timing_header`. +.. _server-otel-enabled: + +server-otel-enabled +------------------- + + =============== ================================= + **Type** Boolean + **Default** False + **Reloadable** N + **Environment** PGRST_SERVER_OTEL_ENABLED + **In-Database** `n/a` + =============== ================================= + + When this is set to :code:`true`, OpenTelemetry tracing is enabled. See :ref:`opentelemetry` for details and settings. + .. _server-unix-socket: server-unix-socket diff --git a/nix/tools/tests.nix b/nix/tools/tests.nix index 4e2bf0803a..f87d3be784 100644 --- a/nix/tools/tests.nix +++ b/nix/tools/tests.nix @@ -10,6 +10,7 @@ , hostPlatform , jq , lib +, otelcol , postgrest , python3 , runtimeShell @@ -40,6 +41,7 @@ let args = [ "ARG_LEFTOVERS([hspec arguments])" ]; workingDir = "/"; withEnv = postgrest.env; + withPath = [ otelcol ]; } '' ${withTools.withPg} -f test/observability/fixtures/load.sql \ diff --git a/postgrest.cabal b/postgrest.cabal index d88a035db2..19dfbe1f79 100644 --- a/postgrest.cabal +++ b/postgrest.cabal @@ -79,6 +79,7 @@ library PostgREST.Query.QueryBuilder PostgREST.Query.SqlFragment PostgREST.Query.Statements + PostgREST.OpenTelemetry PostgREST.Plan PostgREST.Plan.CallPlan PostgREST.Plan.MutatePlan @@ -112,6 +113,7 @@ library , cookie >= 0.4.2 && < 0.6 , directory >= 1.2.6 && < 1.4 , either >= 4.4.1 && < 5.1 + , exceptions >= 0.10 && < 0.12 , extra >= 1.7.0 && < 2.0 , fuzzyset >= 0.2.4 && < 0.3 , hasql >= 1.6.1.1 && < 1.7 @@ -121,6 +123,13 @@ library , hasql-transaction >= 1.0.1 && < 1.2 , http-client >= 0.7.19 && < 0.8 , http-types >= 0.12.2 && < 0.13 + , hs-opentelemetry-sdk >= 0.1.0.0 && < 0.2.0.0 + , hs-opentelemetry-instrumentation-wai + , hs-opentelemetry-api + -- ^ this is due to hs-otel-sdk is not reexporting getTracerTracerProvider + -- needed to initialize OpenTelemetry.middleware + , hs-opentelemetry-utils-exceptions + , hs-opentelemetry-propagator-w3c , insert-ordered-containers >= 0.2.2 && < 0.3 , jose-jwt >= 0.9.6 && < 0.11 , lens >= 4.14 && < 5.4 @@ -271,6 +280,8 @@ test-suite spec , hasql-pool >= 1.0.1 && < 1.1 , hasql-transaction >= 1.0.1 && < 1.2 , heredoc >= 0.2 && < 0.3 + , hs-opentelemetry-sdk >= 0.1.0.0 && < 0.2.0.0 + , hs-opentelemetry-instrumentation-hspec , hspec >= 2.3 && < 2.12 , hspec-expectations >= 0.8.4 && < 0.9 , hspec-wai >= 0.10 && < 0.12 @@ -306,11 +317,17 @@ test-suite observability hs-source-dirs: test/observability main-is: Main.hs other-modules: ObsHelper + OTelHelper Observation.JwtCache Observation.MetricsSpec + Observation.OpenTelemetry build-depends: base >= 4.9 && < 4.20 + , aeson >= 2.0.3 && < 2.3 , base64-bytestring >= 1 && < 1.3 , bytestring >= 0.10.8 && < 0.13 + , containers >= 0.5.7 && < 0.7 + , directory >= 1.2.6 && < 1.4 + , filepath >= 1.4.1 && < 1.6 , hasql-pool >= 1.0.1 && < 1.1 , hasql-transaction >= 1.0.1 && < 1.2 , hspec >= 2.3 && < 2.12 @@ -319,10 +336,15 @@ test-suite observability , hspec-wai-json >= 0.10 && < 0.12 , http-types >= 0.12.3 && < 0.13 , jose-jwt >= 0.9.6 && < 0.11 + , network >= 2.6 && < 3.3 , postgrest , prometheus-client >= 1.1.1 && < 1.2.0 + , process >= 1.4.2 && < 1.7 , protolude >= 0.3.1 && < 0.4 , text >= 1.2.2 && < 2.2 + , retry >= 0.7.4 && < 0.10 + , streaming-commons + , temporary >= 1.3 && < 1.4 , wai >= 3.2.1 && < 3.3 ghc-options: -threaded -O0 -Werror -Wall -fwarn-identities -fno-spec-constr -optP-Wno-nonportable-include-path diff --git a/src/PostgREST/App.hs b/src/PostgREST/App.hs index 2938e61843..ed5db47a34 100644 --- a/src/PostgREST/App.hs +++ b/src/PostgREST/App.hs @@ -33,22 +33,23 @@ import qualified Data.Text.Encoding as T import qualified Network.Wai as Wai import qualified Network.Wai.Handler.Warp as Warp -import qualified PostgREST.Admin as Admin -import qualified PostgREST.ApiRequest as ApiRequest -import qualified PostgREST.AppState as AppState -import qualified PostgREST.Auth as Auth -import qualified PostgREST.Cors as Cors -import qualified PostgREST.Error as Error -import qualified PostgREST.Listener as Listener -import qualified PostgREST.Logger as Logger -import qualified PostgREST.MainTx as MainTx -import qualified PostgREST.Plan as Plan -import qualified PostgREST.Query as Query -import qualified PostgREST.Response as Response -import qualified PostgREST.Unix as Unix (installSignalHandlers) +import qualified PostgREST.Admin as Admin +import qualified PostgREST.ApiRequest as ApiRequest +import qualified PostgREST.AppState as AppState +import qualified PostgREST.Auth as Auth +import qualified PostgREST.Cors as Cors +import qualified PostgREST.Error as Error +import qualified PostgREST.Listener as Listener +import qualified PostgREST.Logger as Logger +import qualified PostgREST.MainTx as MainTx +import qualified PostgREST.OpenTelemetry as OTel +import qualified PostgREST.Plan as Plan +import qualified PostgREST.Query as Query +import qualified PostgREST.Response as Response +import qualified PostgREST.Unix as Unix (installSignalHandlers) import PostgREST.ApiRequest (ApiRequest (..)) -import PostgREST.AppState (AppState) +import PostgREST.AppState (AppState, getOTelTracer) import PostgREST.Auth.Types (AuthResult (..)) import PostgREST.Config (AppConfig (..), LogLevel (..)) import PostgREST.Error (Error) @@ -68,12 +69,13 @@ import qualified Data.Text as T import qualified Network.HTTP.Types as HTTP import qualified Network.HTTP.Types.Header as HTTP (hVary) import qualified Network.Socket as NS +import OpenTelemetry.Trace (defaultSpanArguments) import PostgREST.Unix (createAndBindDomainSocket) import Protolude hiding (Handler) type Handler = ExceptT Error -run :: AppState -> IO () +run :: HasCallStack => AppState -> IO () run appState = do conf@AppConfig{..} <- AppState.getConfig appState @@ -119,15 +121,16 @@ serverSettings AppConfig{..} = & setServerName ("postgrest/" <> prettyVersion) -- | PostgREST application -postgrest :: LogLevel -> AppState.AppState -> IO () -> Wai.Application +postgrest :: HasCallStack => LogLevel -> AppState.AppState -> IO () -> Wai.Application postgrest logLevel appState connWorker = + OTel.middleware appState . traceHeaderMiddleware appState . Cors.middleware appState . Auth.middleware appState . Logger.middleware logLevel Auth.getRole $ -- fromJust can be used, because the auth middleware will **always** add -- some AuthResult to the vault. - \req respond -> do + \req respond -> OTel.inSpanM (getOTelTracer appState) "request" defaultSpanArguments $ do appConf@AppConfig{..} <- AppState.getConfig appState -- the config must be read again because it can reload case fromJust $ Auth.getResult req of Left err -> respond $ Error.errorResponseFor configClientErrorVerbosity err @@ -154,7 +157,8 @@ postgrest logLevel appState connWorker = respond resp postgrestResponse - :: AppState.AppState + :: HasCallStack + => AppState.AppState -> AppConfig -> Maybe SchemaCache -> AuthResult @@ -177,14 +181,16 @@ postgrestResponse appState conf@AppConfig{..} maybeSchemaCache authResult@AuthRe timezones = dbTimezones sCache prefs = ApiRequest.userPreferences conf req timezones - (parseTime, apiReq@ApiRequest{..}) <- withTiming $ liftEither . mapLeft Error.ApiRequestErr $ ApiRequest.userApiRequest conf prefs req body - (planTime, plan) <- withTiming $ liftEither $ Plan.actionPlan iAction conf apiReq sCache + (parseTime, apiReq@ApiRequest{..}) <- withOTel "parse" $ withTiming $ liftEither . mapLeft Error.ApiRequestErr $ ApiRequest.userApiRequest conf prefs req body + (planTime, plan) <- withOTel "plan" $ withTiming $ liftEither $ Plan.actionPlan iAction conf apiReq sCache - let mainQ = Query.mainQuery plan conf apiReq authResult configDbPreRequest + traceContext <- lift OTel.renderTraceContext + + let mainQ = Query.mainQuery plan conf apiReq authResult configDbPreRequest traceContext tx = MainTx.mainTx mainQ conf authResult apiReq plan sCache obsQuery s = when configLogQuery $ observer $ QueryObs mainQ s - (txTime, txResult) <- withTiming $ do + (txTime, txResult) <- withOTel "query" $ withTiming $ do case tx of MainTx.NoDbTx r -> pure r MainTx.DbTx{..} -> do @@ -197,7 +203,7 @@ postgrestResponse appState conf@AppConfig{..} maybeSchemaCache authResult@AuthRe lift $ whenLeft eitherResp $ obsQuery . Error.status liftEither eitherResp - (respTime, resp) <- withTiming $ do + (respTime, resp) <- withOTel "response" $ withTiming $ do let response = Response.actionResponse txResult apiReq (T.decodeUtf8 prettyVersion, docsVersion) conf sCache status' = either Error.status Response.pgrstStatus response @@ -208,7 +214,7 @@ postgrestResponse appState conf@AppConfig{..} maybeSchemaCache authResult@AuthRe return $ toWaiResponse (ServerTiming jwtTime parseTime planTime txTime respTime) resp where - toWaiResponse :: ServerTiming -> Response.PgrstResponse -> Wai.Response + toWaiResponse :: HasCallStack => ServerTiming -> Response.PgrstResponse -> Wai.Response toWaiResponse timing (Response.PgrstResponse st hdrs bod) = Wai.responseLBS st (hdrs ++ serverTimingHeaders timing ++ [varyHeader | not $ varyHeaderPresent hdrs]) bod @@ -221,7 +227,7 @@ postgrestResponse appState conf@AppConfig{..} maybeSchemaCache authResult@AuthRe varyHeaderPresent :: [HTTP.Header] -> Bool varyHeaderPresent = any (\(h, _v) -> h == HTTP.hVary) - withTiming :: Handler IO a -> Handler IO (Maybe Double, a) + withTiming :: HasCallStack => Handler IO a -> Handler IO (Maybe Double, a) withTiming f = if configServerTimingEnabled then do (t, r) <- timeItT f @@ -230,6 +236,10 @@ postgrestResponse appState conf@AppConfig{..} maybeSchemaCache authResult@AuthRe r <- f pure (Nothing, r) + withOTel :: HasCallStack => Text -> Handler IO a -> Handler IO a + withOTel label = do + OTel.inSpanM (getOTelTracer appState) label defaultSpanArguments + traceHeaderMiddleware :: AppState -> Wai.Middleware traceHeaderMiddleware appState app req respond = do conf <- AppState.getConfig appState diff --git a/src/PostgREST/AppState.hs b/src/PostgREST/AppState.hs index bed8e37b3a..2ecb89d064 100644 --- a/src/PostgREST/AppState.hs +++ b/src/PostgREST/AppState.hs @@ -13,6 +13,7 @@ module PostgREST.AppState , getNextListenerDelay , getTime , getJwtCacheState + , getOTelTracer , init , initWithPool , putConfig -- For tests TODO refactoring @@ -28,46 +29,52 @@ module PostgREST.AppState , isPending ) where -import qualified Data.ByteString.Char8 as BS -import Data.Either.Combinators (whenLeft) -import qualified Hasql.Pool as SQL -import qualified Hasql.Pool.Config as SQL -import qualified Hasql.Session as SQL -import qualified Hasql.Transaction.Sessions as SQL -import qualified Network.HTTP.Types.Status as HTTP -import qualified PostgREST.Auth.JwtCache as JwtCache -import qualified PostgREST.Error as Error -import qualified PostgREST.Logger as Logger -import qualified PostgREST.Metrics as Metrics +import Control.AutoUpdate (defaultUpdateSettings, + mkAutoUpdate, + updateAction) +import Control.Debounce +import Control.Retry (RetryPolicy, + RetryStatus (..), + capDelay, + exponentialBackoff, + retrying, + rsPreviousDelay) +import qualified Data.ByteString.Char8 as BS +import Data.Either.Combinators (whenLeft) +import Data.IORef (IORef, + atomicWriteIORef, + newIORef, + readIORef) +import Data.Time.Clock (UTCTime, + getCurrentTime) +import qualified Hasql.Pool as SQL +import qualified Hasql.Pool.Config as SQL +import qualified Hasql.Session as SQL +import qualified Hasql.Transaction.Sessions as SQL +import qualified Network.HTTP.Types.Status as HTTP +import OpenTelemetry.Trace (Tracer) +import PostgREST.Auth.JwtCache (JwtCacheState, + update) +import qualified PostgREST.Auth.JwtCache as JwtCache +import PostgREST.Config (AppConfig (..), + addFallbackAppName, + readAppConfig) +import PostgREST.Config.Database (queryDbSettings, + queryPgVersion, + queryRoleSettings) +import PostgREST.Config.PgVersion (PgVersion (..), + minimumPgVersion) +import qualified PostgREST.Error as Error +import qualified PostgREST.Logger as Logger +import qualified PostgREST.Metrics as Metrics import PostgREST.Observation -import PostgREST.TimeIt (timeItT) -import PostgREST.Version (prettyVersion) - -import Control.AutoUpdate (defaultUpdateSettings, mkAutoUpdate, - updateAction) -import Control.Debounce -import Control.Retry (RetryPolicy, RetryStatus (..), capDelay, - exponentialBackoff, retrying, - rsPreviousDelay) -import Data.IORef (IORef, atomicWriteIORef, newIORef, - readIORef) -import Data.Time.Clock (UTCTime, getCurrentTime) - -import PostgREST.Auth.JwtCache (JwtCacheState, update) -import PostgREST.Config (AppConfig (..), - addFallbackAppName, - readAppConfig) -import PostgREST.Config.Database (queryDbSettings, - queryPgVersion, - queryRoleSettings) -import PostgREST.Config.PgVersion (PgVersion (..), - minimumPgVersion) -import PostgREST.SchemaCache (SchemaCache (..), - querySchemaCache, - showSummary) -import PostgREST.SchemaCache.Identifiers (quoteQi) - -import Protolude +import PostgREST.SchemaCache (SchemaCache (..), + querySchemaCache, + showSummary) +import PostgREST.SchemaCache.Identifiers (quoteQi) +import PostgREST.TimeIt (timeItT) +import PostgREST.Version (prettyVersion) +import Protolude data AppState = AppState -- | Database connection pool @@ -98,6 +105,10 @@ data AppState = AppState , stateJwtCache :: JwtCache.JwtCacheState , stateLogger :: Logger.LoggerState , stateMetrics :: Metrics.MetricsState + -- | OpenTelemetry tracer. @Nothing@ represents disabled OTel SDK. + -- It's a workaround for now, as @hs-opentelemetry-api@ doesn't have @Tracer.tracerIsEnabled@ released yet. + -- Tracking issue: https://github.com/iand675/hs-opentelemetry/issues/212 + , stateOTelTracer :: Maybe Tracer } -- | Schema cache status. @@ -106,8 +117,8 @@ newtype SchemaCacheStatus = SchemaCacheStatus { getSCStatusMVar :: MVar () } -init :: AppConfig -> IO AppState -init conf@AppConfig{configLogLevel, configDbPoolSize} = do +init :: AppConfig -> Maybe Tracer -> IO AppState +init conf@AppConfig{configLogLevel, configDbPoolSize} tracer = do loggerState <- Logger.init metricsState <- Metrics.init configDbPoolSize let observer = liftA2 (>>) (Logger.observationLogger loggerState configLogLevel) (Metrics.observationMetrics metricsState) @@ -115,10 +126,10 @@ init conf@AppConfig{configLogLevel, configDbPoolSize} = do observer $ AppStartObs prettyVersion pool <- initPool conf observer - initWithPool pool conf loggerState metricsState observer --{ stateSocketREST = sock, stateSocketAdmin = adminSock} + initWithPool pool conf loggerState metricsState tracer observer -initWithPool :: SQL.Pool -> AppConfig -> Logger.LoggerState -> Metrics.MetricsState -> ObservationHandler -> IO AppState -initWithPool pool conf loggerState metricsState observer = do +initWithPool :: SQL.Pool -> AppConfig -> Logger.LoggerState -> Metrics.MetricsState -> Maybe Tracer -> ObservationHandler -> IO AppState +initWithPool pool conf loggerState metricsState tracer observer = do appState <- AppState pool <$> newIORef minimumPgVersion -- assume we're in a supported version when starting, this will be corrected on a later step @@ -135,6 +146,7 @@ initWithPool pool conf loggerState metricsState observer = do <*> JwtCache.init conf observer <*> pure loggerState <*> pure metricsState + <*> pure tracer deb <- let decisecond = 100000 in @@ -262,6 +274,9 @@ getTime = stateGetTime getJwtCacheState :: AppState -> JwtCacheState getJwtCacheState = stateJwtCache +getOTelTracer :: AppState -> Maybe Tracer +getOTelTracer = stateOTelTracer + getMainThreadId :: AppState -> ThreadId getMainThreadId = stateMainThreadId diff --git a/src/PostgREST/CLI.hs b/src/PostgREST/CLI.hs index a635aa0814..8b948fcd40 100644 --- a/src/PostgREST/CLI.hs +++ b/src/PostgREST/CLI.hs @@ -13,11 +13,12 @@ import qualified Data.ByteString.Lazy as LBS import qualified Hasql.Transaction.Sessions as SQL import qualified Options.Applicative as O -import PostgREST.AppState (AppState) -import PostgREST.Config (AppConfig (..)) -import PostgREST.Observation (Observation (..)) -import PostgREST.SchemaCache (querySchemaCache) -import PostgREST.Version (prettyVersion) +import PostgREST.AppState (AppState) +import PostgREST.Config (AppConfig (..)) +import PostgREST.Observation (Observation (..)) +import PostgREST.OpenTelemetry (Tracer, withTracer) +import PostgREST.SchemaCache (querySchemaCache) +import PostgREST.Version (prettyVersion) import qualified PostgREST.App as App import qualified PostgREST.AppState as AppState @@ -26,27 +27,31 @@ import qualified PostgREST.Config as Config import Protolude +main :: HasCallStack => CLI -> IO () +main CLI{cliCommand, cliPath} = withTracer $ \tracer -> do -main :: CLI -> IO () -main CLI{cliCommand, cliPath} = do conf <- either panic identity <$> Config.readAppConfig mempty cliPath Nothing mempty mempty + let tracer' = if configServerOtelEnabled conf + then Just tracer + else Nothing + case cliCommand of Client adminCmd -> runClientCommand conf adminCmd - Run runCmd -> runAppCommand conf runCmd + Run runCmd -> runAppCommand tracer' conf runCmd -- | Run command using http-client to communicate with an already running postgrest runClientCommand :: AppConfig -> ClientCommand -> IO () runClientCommand conf CmdReady = Client.ready conf -- | Run postgrest with command -runAppCommand :: AppConfig -> RunCommand -> IO () -runAppCommand conf@AppConfig{..} runCmd = do +runAppCommand :: Maybe Tracer -> AppConfig -> RunCommand -> IO () +runAppCommand tracer conf@AppConfig{..} runCmd = do -- Per https://github.com/PostgREST/postgrest/issues/268, we want to -- explicitly close the connections to PostgreSQL on shutdown. -- 'AppState.destroy' takes care of that. bracket - (AppState.init conf) + (AppState.init conf tracer) AppState.destroy (\appState -> case runCmd of CmdDumpConfig -> do diff --git a/src/PostgREST/Config.hs b/src/PostgREST/Config.hs index d18a8da57c..8065332489 100644 --- a/src/PostgREST/Config.hs +++ b/src/PostgREST/Config.hs @@ -124,6 +124,7 @@ data AppConfig = AppConfig , configInternalSCQuerySleep :: Maybe Int32 , configInternalSCLoadSleep :: Maybe Int32 , configInternalSCRelLoadSleep :: Maybe Int32 + , configServerOtelEnabled :: Bool } data LogLevel = LogCrit | LogError | LogWarn | LogInfo | LogDebug @@ -200,6 +201,7 @@ toText conf = ,("server-port", show . configServerPort) ,("server-trace-header", q . T.decodeUtf8 . maybe mempty CI.original . configServerTraceHeader) ,("server-timing-enabled", T.toLower . show . configServerTimingEnabled) + ,("server-otel-enabled", T.toLower . show . configServerOtelEnabled) ,("server-unix-socket", q . maybe mempty T.pack . configServerUnixSocket) ,("server-unix-socket-mode", q . T.pack . showSocketMode) ,("admin-server-host", q . configAdminServerHost) @@ -325,6 +327,7 @@ parser optPath env dbSettings roleSettings roleIsolationLvl = <*> optInt "internal-schema-cache-query-sleep" <*> optInt "internal-schema-cache-load-sleep" <*> optInt "internal-schema-cache-relationship-load-sleep" + <*> (fromMaybe False <$> optBool "server-otel-enabled") where parseErrorVerbosity :: C.Key -> C.Parser C.Config Verbosity parseErrorVerbosity k = @@ -567,6 +570,8 @@ readDbUriFile maybeDbUri conf = type Environment = M.Map [Char] Text -- | Read environment variables that start with PGRST_ +-- Note: `OTEL_*` environment variables, while being recornized by OpenTelemetry +-- subsystem, are specifically ignored here readPGRSTEnvironment :: IO Environment readPGRSTEnvironment = M.map T.pack . M.fromList . filter (isPrefixOf "PGRST_" . fst) <$> getEnvironment diff --git a/src/PostgREST/OpenTelemetry.hs b/src/PostgREST/OpenTelemetry.hs new file mode 100644 index 0000000000..f167c386d4 --- /dev/null +++ b/src/PostgREST/OpenTelemetry.hs @@ -0,0 +1,90 @@ +{- | +Module : PostgREST.OpenTelemetry +Description : OpenTelemetry integration +Maintains the OpenTelemetry Tracer and provides a function to run +PostgREST with it. + +Basically, you want to use `withTracer` in your main function, and then +use `getOTelTracer` in your application code to get the tracer and +create spans with `inSpanM`. + +At this moment trace spans have to be explicit, by wrapping the code in `inSpanM` calls. +In order produced spans to have correct code locations, all the functions across the call stack up to the +`inSpanM` call must have `HasCallStack` constraint, because +[GHC is never inferring it](https://downloads.haskell.org/ghc/9.8.4/docs/users_guide/exts/callstack.html) for us. +-} +module PostgREST.OpenTelemetry (Tracer, withTracer, middleware, inSpanM, renderTraceContext) where + +import Control.Monad.Catch (MonadMask) +import Network.Wai (Middleware) +import OpenTelemetry.Attributes (emptyAttributes) +import qualified OpenTelemetry.Context as Context +import OpenTelemetry.Context.ThreadLocal (getContext) +import OpenTelemetry.Instrumentation.Wai (newOpenTelemetryWaiMiddleware') +import OpenTelemetry.Propagator.W3CTraceContext (encodeSpanContext) +import OpenTelemetry.Trace (InstrumentationLibrary (..), + SpanArguments, + Tracer, + initializeGlobalTracerProvider, + makeTracer, + shutdownTracerProvider, + tracerOptions) +import OpenTelemetry.Trace.Core (getTracerTracerProvider) +import OpenTelemetry.Utils.Exceptions (inSpanM'') +import PostgREST.AppState (AppState, + getOTelTracer) +import PostgREST.Version (prettyVersion) +import Protolude + +{- | Wrap user's code with OpenTelemetry Tracer, initializing it with sensible defaults -} +withTracer :: (Tracer -> IO c) -> IO c +withTracer f = bracket + initializeGlobalTracerProvider + shutdownTracerProvider + (\tracerProvider -> f $ makeTracer tracerProvider instrumentationLibrary tracerOptions) + where + instrumentationLibrary = + InstrumentationLibrary + { libraryName = "PostgREST" + , libraryVersion = decodeUtf8 prettyVersion + , librarySchemaUrl = "" + , libraryAttributes = emptyAttributes} + +middleware :: AppState -> Network.Wai.Middleware +middleware s = case getOTelTracer s of + Just t -> newOpenTelemetryWaiMiddleware' $ getTracerTracerProvider t + -- Make sure OTel code is actually noop + Nothing -> identity + + +-- | The simplest function for annotating code with trace information. +-- In case the tracer is @Nothing@, i.e. disabled, the function is noop. +inSpanM + :: (MonadIO m, MonadMask m, HasCallStack) + => Maybe Tracer + -> Text + -- ^ The name of the span. This may be updated later via 'updateName' + -> SpanArguments + -- ^ Additional options for creating the span, such as 'SpanKind', + -- span links, starting attributes, etc. + -> m a + -- ^ The action to perform. 'inSpan' will record the time spent on the + -- action without forcing strict evaluation of the result. Any uncaught + -- exceptions will be recorded and rethrown. + -> m a +inSpanM (Just t) n args m = inSpanM'' t callStack n args (const m) +inSpanM Nothing _ _ m = m + +renderTraceContext :: IO (Maybe ByteString) +renderTraceContext = do + -- Render TraceContext in W3C Trace Context format + -- https://www.w3.org/TR/trace-context/#traceparent-header + -- https://www.w3.org/TR/trace-context/#tracestate-header + -- + ctx <- getContext + case Context.lookupSpan ctx of + Nothing -> pure Nothing + Just span -> do + (tParent, _tState) <- encodeSpanContext span + let traceContext = "traceparent='" <> tParent <> "'" + pure $ Just traceContext diff --git a/src/PostgREST/Query.hs b/src/PostgREST/Query.hs index b47abd8965..1e4ad5b461 100644 --- a/src/PostgREST/Query.hs +++ b/src/PostgREST/Query.hs @@ -41,10 +41,10 @@ data MainQuery = MainQuery , mqExplain :: Maybe SQL.Snippet -- ^ the explain query that gets generated for the "Prefer: count=estimated" case } -mainQuery :: ActionPlan -> AppConfig -> ApiRequest -> AuthResult -> Maybe QualifiedIdentifier -> MainQuery -mainQuery (NoDb _) _ _ _ _ = MainQuery mempty Nothing mempty (mempty, mempty, mempty) mempty -mainQuery (Db plan) conf@AppConfig{..} apiReq@ApiRequest{iTopLevelRange=range, iPreferences=Preferences{..}} authRes preReq = - let genQ = MainQuery (PreQuery.txVarQuery plan conf authRes apiReq) (PreQuery.preReqQuery <$> preReq) in +mainQuery :: ActionPlan -> AppConfig -> ApiRequest -> AuthResult -> Maybe QualifiedIdentifier -> Maybe ByteString -> MainQuery +mainQuery (NoDb _) _ _ _ _ _ = MainQuery mempty Nothing mempty (mempty, mempty, mempty) mempty +mainQuery (Db plan) conf@AppConfig{..} apiReq@ApiRequest{iTopLevelRange=range, iPreferences=Preferences{..}} authRes preReq traceContext = + let genQ = MainQuery (PreQuery.txVarQuery plan conf authRes apiReq traceContext ) (PreQuery.preReqQuery <$> preReq) in case plan of DbCrud _ WrappedReadPlan{..} -> let countQuery = QueryBuilder.readPlanToCountQuery wrReadPlan in diff --git a/src/PostgREST/Query/PreQuery.hs b/src/PostgREST/Query/PreQuery.hs index bb931ffb22..48fa9140c4 100644 --- a/src/PostgREST/Query/PreQuery.hs +++ b/src/PostgREST/Query/PreQuery.hs @@ -16,7 +16,6 @@ import qualified Data.HashMap.Strict as HM import qualified Hasql.DynamicStatements.Snippet as SQL hiding (sql) - import PostgREST.ApiRequest (ApiRequest (..)) import PostgREST.ApiRequest.Preferences (PreferTimezone (..), Preferences (..)) @@ -35,12 +34,12 @@ import PostgREST.SchemaCache.Routine (Routine (..)) import Protolude hiding (Handler) -- sets transaction variables -txVarQuery :: DbActionPlan -> AppConfig -> AuthResult -> ApiRequest -> SQL.Snippet -txVarQuery dbActPlan AppConfig{..} AuthResult{..} ApiRequest{..} = +txVarQuery :: DbActionPlan -> AppConfig -> AuthResult -> ApiRequest -> Maybe ByteString -> SQL.Snippet +txVarQuery dbActPlan AppConfig{..} AuthResult{..} ApiRequest{..} traceContext = -- To ensure `GRANT SET ON PARAMETER TO authenticator` works, the role settings must be set before the impersonated role. -- Otherwise the GRANT SET would have to be applied to the impersonated role. See https://github.com/PostgREST/postgrest/issues/3045 "select " <> intercalateSnippet ", " ( - searchPathSql : roleSettingsSql ++ roleSql ++ claimsSql ++ [methodSql, pathSql] ++ headersSql ++ cookiesSql ++ timezoneSql ++ funcSettingsSql ++ appSettingsSql + searchPathSql : roleSettingsSql ++ roleSql ++ claimsSql ++ [methodSql, pathSql] ++ headersSql ++ cookiesSql ++ timezoneSql ++ funcSettingsSql ++ appSettingsSql ++ traceContextSql ) where methodSql = setConfigWithConstantName ("request.method", iMethod) @@ -52,6 +51,7 @@ txVarQuery dbActPlan AppConfig{..} AuthResult{..} ApiRequest{..} = claims = authClaims & KM.insert "role" (JSON.String $ decodeUtf8 authRole) -- insert "role" to claims as well roleSql = [setConfigWithConstantName ("role", authRole)] + traceContextSql = maybe mempty (\tc -> [setConfigWithConstantName ("pg_tracing.trace_context", tc)]) traceContext roleSettingsSql = setConfigWithDynamicName <$> HM.toList (fromMaybe mempty $ HM.lookup authRole configRoleSettings) appSettingsSql = setConfigWithDynamicName . join bimap toUtf8 <$> configAppSettings timezoneSql = maybe mempty (\(PreferTimezone tz) -> [setConfigWithConstantName ("timezone", tz)]) $ preferTimezone iPreferences diff --git a/stack.yaml b/stack.yaml index c88035cee7..758a35fee7 100644 --- a/stack.yaml +++ b/stack.yaml @@ -16,5 +16,18 @@ extra-deps: - postgresql-libpq-0.10.1.0 - streaming-commons-0.2.3.1 + - hs-opentelemetry-sdk-0.1.0.0@sha256:2642851866f11a494c99f15202d4bd9e75d4a5e1a7f3f172742a0676a33c664f,4059 + - hs-opentelemetry-api-0.2.0.0@sha256:bbdbe7e212e99f17a7e68d09b94c1a6613e50ce88b3cb1b68979bbb0221291ae,4051 + - hs-opentelemetry-exporter-otlp-0.1.0.0@sha256:4c908a7e2e5053879687b7a7ee6e40a8eb22868e1a0808cd0cfd6ac9905057b8,1526 + - hs-opentelemetry-instrumentation-wai-0.1.1.0@sha256:d97b4cb3870217e64e95da3f51db814eca62eb57484ee0a6f747366da5940bc2,1371 + - hs-opentelemetry-propagator-b3-0.0.1.2@sha256:8815dd74f27a908b5be0729cc09a3bf9f3049481c982252bbd6c3f6b908ecfcd,1340 + - hs-opentelemetry-propagator-datadog-0.0.1.0@sha256:c85de95e3c33b3ffcf980f560166e960cab0888e0741315f487288b3653c007c,2950 + - hs-opentelemetry-propagator-w3c-0.0.1.4@sha256:251428754454fbaf71d9b6acbbea473014b1ab50bdcda8bc8fe1532e63193374,1382 + - hs-opentelemetry-utils-exceptions-0.2.0.1@sha256:b32c3109b896dbab67c74c28e8ffcfe6e7f86aa29454fc6a31c06a671246e78b,1477 + - hs-opentelemetry-otlp-0.1.0.0@sha256:5cd096b15f26f51ffae4c18f6a26794daef801acc9e13033db8b21a7606336d4,2533 + - hs-opentelemetry-instrumentation-hspec-0.0.1.2@sha256:cba36dc9a8fed4288a1b9d071b869f5b3382451fe37821e369628e2761834fb4,1191 + - thread-utils-context-0.3.0.4@sha256:e763da1c6cab3b6d378fb670ca74aa9bf03c9b61b6fcf7628c56363fb0e3e71e,1671 + - thread-utils-finalizers-0.1.1.0@sha256:24944b71d9f1d01695a5908b4a3b44838fab870883114a323336d537995e0a5b,1381 + # fix build with GCC 15-ish; https://github.com/gregorycollins/hashtables/issues/98 for details - hashtables-1.4.2@sha256:4940cab94a15d469845ccf5225f9cb3d354c15e8127ebb58425c8b681f7721d9,10386 diff --git a/stack.yaml.lock b/stack.yaml.lock index ae2cbfd7ce..0f875245c9 100644 --- a/stack.yaml.lock +++ b/stack.yaml.lock @@ -46,6 +46,90 @@ packages: size: 2374 original: hackage: streaming-commons-0.2.3.1 +- completed: + hackage: hs-opentelemetry-sdk-0.1.0.0@sha256:2642851866f11a494c99f15202d4bd9e75d4a5e1a7f3f172742a0676a33c664f,4059 + pantry-tree: + sha256: c0868a6eb3d6add84df1ad32cdb0ebdbebe41205897e16ae8b30e96f205a8fe0 + size: 1934 + original: + hackage: hs-opentelemetry-sdk-0.1.0.0@sha256:2642851866f11a494c99f15202d4bd9e75d4a5e1a7f3f172742a0676a33c664f,4059 +- completed: + hackage: hs-opentelemetry-api-0.2.0.0@sha256:bbdbe7e212e99f17a7e68d09b94c1a6613e50ce88b3cb1b68979bbb0221291ae,4051 + pantry-tree: + sha256: fcb11b19fa633afb8c34e002e6b8e8927d20fc2332d4234cc10a0b6e3dbe6022 + size: 4396 + original: + hackage: hs-opentelemetry-api-0.2.0.0@sha256:bbdbe7e212e99f17a7e68d09b94c1a6613e50ce88b3cb1b68979bbb0221291ae,4051 +- completed: + hackage: hs-opentelemetry-exporter-otlp-0.1.0.0@sha256:4c908a7e2e5053879687b7a7ee6e40a8eb22868e1a0808cd0cfd6ac9905057b8,1526 + pantry-tree: + sha256: dd22c915f65b1ca76c6130cfb39ce666376d4813c267e12dd59be61a914bb264 + size: 511 + original: + hackage: hs-opentelemetry-exporter-otlp-0.1.0.0@sha256:4c908a7e2e5053879687b7a7ee6e40a8eb22868e1a0808cd0cfd6ac9905057b8,1526 +- completed: + hackage: hs-opentelemetry-instrumentation-wai-0.1.1.0@sha256:d97b4cb3870217e64e95da3f51db814eca62eb57484ee0a6f747366da5940bc2,1371 + pantry-tree: + sha256: 23bbd4e58ba48b0ec3541a494d02e08e6b934d7173523be8aab04c6b2c7bb98b + size: 360 + original: + hackage: hs-opentelemetry-instrumentation-wai-0.1.1.0@sha256:d97b4cb3870217e64e95da3f51db814eca62eb57484ee0a6f747366da5940bc2,1371 +- completed: + hackage: hs-opentelemetry-propagator-b3-0.0.1.2@sha256:8815dd74f27a908b5be0729cc09a3bf9f3049481c982252bbd6c3f6b908ecfcd,1340 + pantry-tree: + sha256: fc71f8b7dc25625af6b81c1b3c1c5d808b682e2a7c1daf8e23f2af45ab9dc123 + size: 431 + original: + hackage: hs-opentelemetry-propagator-b3-0.0.1.2@sha256:8815dd74f27a908b5be0729cc09a3bf9f3049481c982252bbd6c3f6b908ecfcd,1340 +- completed: + hackage: hs-opentelemetry-propagator-datadog-0.0.1.0@sha256:c85de95e3c33b3ffcf980f560166e960cab0888e0741315f487288b3653c007c,2950 + pantry-tree: + sha256: 04c10d8901e506c8c7662c8ce549a152118303fd2d0354a887bede4e73f0a8ee + size: 730 + original: + hackage: hs-opentelemetry-propagator-datadog-0.0.1.0@sha256:c85de95e3c33b3ffcf980f560166e960cab0888e0741315f487288b3653c007c,2950 +- completed: + hackage: hs-opentelemetry-propagator-w3c-0.0.1.4@sha256:251428754454fbaf71d9b6acbbea473014b1ab50bdcda8bc8fe1532e63193374,1382 + pantry-tree: + sha256: 5f7ff3fd37b7f720064193f02c84e8af6b554f8a7a2b7702a4bcd34fe576f721 + size: 445 + original: + hackage: hs-opentelemetry-propagator-w3c-0.0.1.4@sha256:251428754454fbaf71d9b6acbbea473014b1ab50bdcda8bc8fe1532e63193374,1382 +- completed: + hackage: hs-opentelemetry-utils-exceptions-0.2.0.1@sha256:b32c3109b896dbab67c74c28e8ffcfe6e7f86aa29454fc6a31c06a671246e78b,1477 + pantry-tree: + sha256: 7829e2f06282a2ca913ab46ac98a3dd5b0b89b1189d5eb071f250b641115e548 + size: 406 + original: + hackage: hs-opentelemetry-utils-exceptions-0.2.0.1@sha256:b32c3109b896dbab67c74c28e8ffcfe6e7f86aa29454fc6a31c06a671246e78b,1477 +- completed: + hackage: hs-opentelemetry-otlp-0.1.0.0@sha256:5cd096b15f26f51ffae4c18f6a26794daef801acc9e13033db8b21a7606336d4,2533 + pantry-tree: + sha256: 618a513764a7ae9995fc4f8b8ee5cec731a8759ac8c5df8e9553171abd3ff97d + size: 2585 + original: + hackage: hs-opentelemetry-otlp-0.1.0.0@sha256:5cd096b15f26f51ffae4c18f6a26794daef801acc9e13033db8b21a7606336d4,2533 +- completed: + hackage: hs-opentelemetry-instrumentation-hspec-0.0.1.2@sha256:cba36dc9a8fed4288a1b9d071b869f5b3382451fe37821e369628e2761834fb4,1191 + pantry-tree: + sha256: 04ca1dab12e22a1a43f491568674470e49ad29a4d2796af87c41a6e354c49293 + size: 365 + original: + hackage: hs-opentelemetry-instrumentation-hspec-0.0.1.2@sha256:cba36dc9a8fed4288a1b9d071b869f5b3382451fe37821e369628e2761834fb4,1191 +- completed: + hackage: thread-utils-context-0.3.0.4@sha256:e763da1c6cab3b6d378fb670ca74aa9bf03c9b61b6fcf7628c56363fb0e3e71e,1671 + pantry-tree: + sha256: 57d909a991b5e0b4c7a28121cb52ee9c2db6c09e0419b89af6c82fae52be88d4 + size: 397 + original: + hackage: thread-utils-context-0.3.0.4@sha256:e763da1c6cab3b6d378fb670ca74aa9bf03c9b61b6fcf7628c56363fb0e3e71e,1671 +- completed: + hackage: thread-utils-finalizers-0.1.1.0@sha256:24944b71d9f1d01695a5908b4a3b44838fab870883114a323336d537995e0a5b,1381 + pantry-tree: + sha256: 8c2c2e2e22c20bf3696ee6f30b50b3a9eeae187a22beb536441eefb0a3f9c549 + size: 400 + original: + hackage: thread-utils-finalizers-0.1.1.0@sha256:24944b71d9f1d01695a5908b4a3b44838fab870883114a323336d537995e0a5b,1381 - completed: hackage: hashtables-1.4.2@sha256:4940cab94a15d469845ccf5225f9cb3d354c15e8127ebb58425c8b681f7721d9,10386 pantry-tree: diff --git a/test/io/configs/expected/aliases.config b/test/io/configs/expected/aliases.config index d280c6254c..ba8217b889 100644 --- a/test/io/configs/expected/aliases.config +++ b/test/io/configs/expected/aliases.config @@ -36,6 +36,7 @@ server-host = "!4" server-port = 3000 server-trace-header = "" server-timing-enabled = false +server-otel-enabled = false server-unix-socket = "" server-unix-socket-mode = "660" admin-server-host = "!4" diff --git a/test/io/configs/expected/boolean-numeric.config b/test/io/configs/expected/boolean-numeric.config index 7f1ac07a14..e8a17b60d5 100644 --- a/test/io/configs/expected/boolean-numeric.config +++ b/test/io/configs/expected/boolean-numeric.config @@ -36,6 +36,7 @@ server-host = "!4" server-port = 3000 server-trace-header = "" server-timing-enabled = false +server-otel-enabled = false server-unix-socket = "" server-unix-socket-mode = "660" admin-server-host = "!4" diff --git a/test/io/configs/expected/boolean-string.config b/test/io/configs/expected/boolean-string.config index 7f1ac07a14..e8a17b60d5 100644 --- a/test/io/configs/expected/boolean-string.config +++ b/test/io/configs/expected/boolean-string.config @@ -36,6 +36,7 @@ server-host = "!4" server-port = 3000 server-trace-header = "" server-timing-enabled = false +server-otel-enabled = false server-unix-socket = "" server-unix-socket-mode = "660" admin-server-host = "!4" diff --git a/test/io/configs/expected/defaults.config b/test/io/configs/expected/defaults.config index ddd9364c28..dce0e1c079 100644 --- a/test/io/configs/expected/defaults.config +++ b/test/io/configs/expected/defaults.config @@ -36,6 +36,7 @@ server-host = "!4" server-port = 3000 server-trace-header = "" server-timing-enabled = false +server-otel-enabled = false server-unix-socket = "" server-unix-socket-mode = "660" admin-server-host = "!4" diff --git a/test/io/configs/expected/jspath-str-op-dump1.config b/test/io/configs/expected/jspath-str-op-dump1.config index 25fd233463..2a1461333d 100644 --- a/test/io/configs/expected/jspath-str-op-dump1.config +++ b/test/io/configs/expected/jspath-str-op-dump1.config @@ -36,6 +36,7 @@ server-host = "!4" server-port = 3000 server-trace-header = "" server-timing-enabled = false +server-otel-enabled = false server-unix-socket = "" server-unix-socket-mode = "660" admin-server-host = "!4" diff --git a/test/io/configs/expected/jspath-str-op-dump2.config b/test/io/configs/expected/jspath-str-op-dump2.config index b53bf827ed..6a212ad886 100644 --- a/test/io/configs/expected/jspath-str-op-dump2.config +++ b/test/io/configs/expected/jspath-str-op-dump2.config @@ -36,6 +36,7 @@ server-host = "!4" server-port = 3000 server-trace-header = "" server-timing-enabled = false +server-otel-enabled = false server-unix-socket = "" server-unix-socket-mode = "660" admin-server-host = "!4" diff --git a/test/io/configs/expected/jspath-str-op-dump3.config b/test/io/configs/expected/jspath-str-op-dump3.config index bd1bed9936..10454ef1a6 100644 --- a/test/io/configs/expected/jspath-str-op-dump3.config +++ b/test/io/configs/expected/jspath-str-op-dump3.config @@ -36,6 +36,7 @@ server-host = "!4" server-port = 3000 server-trace-header = "" server-timing-enabled = false +server-otel-enabled = false server-unix-socket = "" server-unix-socket-mode = "660" admin-server-host = "!4" diff --git a/test/io/configs/expected/jspath-str-op-dump4.config b/test/io/configs/expected/jspath-str-op-dump4.config index b169f03a22..f60b9bc862 100644 --- a/test/io/configs/expected/jspath-str-op-dump4.config +++ b/test/io/configs/expected/jspath-str-op-dump4.config @@ -36,6 +36,7 @@ server-host = "!4" server-port = 3000 server-trace-header = "" server-timing-enabled = false +server-otel-enabled = false server-unix-socket = "" server-unix-socket-mode = "660" admin-server-host = "!4" diff --git a/test/io/configs/expected/jspath-str-op-dump5.config b/test/io/configs/expected/jspath-str-op-dump5.config index 13596cc813..78b0f2d6b1 100644 --- a/test/io/configs/expected/jspath-str-op-dump5.config +++ b/test/io/configs/expected/jspath-str-op-dump5.config @@ -36,6 +36,7 @@ server-host = "!4" server-port = 3000 server-trace-header = "" server-timing-enabled = false +server-otel-enabled = false server-unix-socket = "" server-unix-socket-mode = "660" admin-server-host = "!4" diff --git a/test/io/configs/expected/no-defaults-with-db-other-authenticator.config b/test/io/configs/expected/no-defaults-with-db-other-authenticator.config index c47bb402ed..53344cfd08 100644 --- a/test/io/configs/expected/no-defaults-with-db-other-authenticator.config +++ b/test/io/configs/expected/no-defaults-with-db-other-authenticator.config @@ -36,6 +36,7 @@ server-host = "0.0.0.0" server-port = 80 server-trace-header = "traceparent" server-timing-enabled = true +server-otel-enabled = true server-unix-socket = "/tmp/pgrst_io_test.sock" server-unix-socket-mode = "777" admin-server-host = "127.0.0.1" diff --git a/test/io/configs/expected/no-defaults-with-db.config b/test/io/configs/expected/no-defaults-with-db.config index 4363f3262c..690f60c01a 100644 --- a/test/io/configs/expected/no-defaults-with-db.config +++ b/test/io/configs/expected/no-defaults-with-db.config @@ -36,6 +36,7 @@ server-host = "0.0.0.0" server-port = 80 server-trace-header = "CF-Ray" server-timing-enabled = false +server-otel-enabled = true server-unix-socket = "/tmp/pgrst_io_test.sock" server-unix-socket-mode = "777" admin-server-host = "127.0.0.1" diff --git a/test/io/configs/expected/no-defaults.config b/test/io/configs/expected/no-defaults.config index 3156287737..e59862880c 100644 --- a/test/io/configs/expected/no-defaults.config +++ b/test/io/configs/expected/no-defaults.config @@ -36,6 +36,7 @@ server-host = "0.0.0.0" server-port = 80 server-trace-header = "X-Request-Id" server-timing-enabled = true +server-otel-enabled = true server-unix-socket = "/tmp/pgrst_io_test.sock" server-unix-socket-mode = "777" admin-server-host = "127.0.0.1" diff --git a/test/io/configs/expected/types.config b/test/io/configs/expected/types.config index fac4d596df..f2cf6f7761 100644 --- a/test/io/configs/expected/types.config +++ b/test/io/configs/expected/types.config @@ -36,6 +36,7 @@ server-host = "!4" server-port = 3000 server-trace-header = "" server-timing-enabled = false +server-otel-enabled = false server-unix-socket = "" server-unix-socket-mode = "660" admin-server-host = "!4" diff --git a/test/io/configs/expected/utf-8.config b/test/io/configs/expected/utf-8.config index 7f29a498c9..98b1e059ae 100644 --- a/test/io/configs/expected/utf-8.config +++ b/test/io/configs/expected/utf-8.config @@ -36,6 +36,7 @@ server-host = "!4" server-port = 3000 server-trace-header = "" server-timing-enabled = false +server-otel-enabled = false server-unix-socket = "" server-unix-socket-mode = "660" admin-server-host = "!4" diff --git a/test/io/configs/no-defaults-env.yaml b/test/io/configs/no-defaults-env.yaml index ce9280e1e5..b05ac3ae23 100644 --- a/test/io/configs/no-defaults-env.yaml +++ b/test/io/configs/no-defaults-env.yaml @@ -39,6 +39,7 @@ PGRST_SERVER_HOST: 0.0.0.0 PGRST_SERVER_PORT: 80 PGRST_SERVER_TRACE_HEADER: X-Request-Id PGRST_SERVER_TIMING_ENABLED: true +PGRST_SERVER_OTEL_ENABLED: true PGRST_SERVER_UNIX_SOCKET: /tmp/pgrst_io_test.sock PGRST_SERVER_UNIX_SOCKET_MODE: 777 PGRST_ADMIN_SERVER_HOST: 127.0.0.1 diff --git a/test/io/configs/no-defaults.config b/test/io/configs/no-defaults.config index 6bb1cec158..61e1d95c8c 100644 --- a/test/io/configs/no-defaults.config +++ b/test/io/configs/no-defaults.config @@ -36,6 +36,7 @@ server-host = "0.0.0.0" server-port = 80 server-trace-header = "X-Request-Id" server-timing-enabled = true +server-otel-enabled = true server-unix-socket = "/tmp/pgrst_io_test.sock" server-unix-socket-mode = "777" admin-server-port = 3001 diff --git a/test/io/test_io.py b/test/io/test_io.py index 6dc81845c8..4e53ed3174 100644 --- a/test/io/test_io.py +++ b/test/io/test_io.py @@ -1916,3 +1916,17 @@ def test_vary_default_header_set(defaultenv): response = postgrest.session.get("/projects") assert response.headers["Vary"] == "Accept, Prefer, Range" + + +def test_server_otel_enabled(defaultenv): + "Enable OpenTelemetry and check that the code branch is covered" + + env = { + **defaultenv, + "PGRST_SERVER_OTEL_ENABLED": "true", + } + + with run(env=env) as postgrest: + response = postgrest.session.get("/projects") + + assert response.status_code == 200 diff --git a/test/observability/Main.hs b/test/observability/Main.hs index 7a38b51c81..ff63d82cfe 100644 --- a/test/observability/Main.hs +++ b/test/observability/Main.hs @@ -4,22 +4,28 @@ import qualified Hasql.Pool as P import qualified Hasql.Pool.Config as P import qualified Hasql.Transaction.Sessions as HT -import Data.Function (id) +import Data.Function (id) +import System.Directory (findExecutable) +import Network.Wai (Application) import PostgREST.App (postgrest) import qualified PostgREST.AppState as AppState import PostgREST.Config (AppConfig (..)) import PostgREST.Config.Database (queryPgVersion) import qualified PostgREST.Logger as Logger import qualified PostgREST.Metrics as Metrics +import PostgREST.OpenTelemetry (withTracer) import PostgREST.SchemaCache (querySchemaCache) import qualified Observation.JwtCache import qualified Observation.MetricsSpec +import qualified Observation.OpenTelemetry import ObsHelper +import OTelHelper import PostgREST.Observation (Observation (HasqlPoolObs)) -import Protolude hiding (toList, toS) +import Protolude hiding (toList) +import System.IO.Temp (withSystemTempDirectory) import Test.Hspec main :: IO () @@ -49,22 +55,50 @@ main = do loggerState <- Logger.init let - initApp sCache config = do + initApp sCache config tracer = do -- duplicate poolChan as a starting point obsChan <- dupChan poolChan stateObsChan <- newObsChan obsChan - appState <- AppState.initWithPool pool config loggerState metricsState (Metrics.observationMetrics metricsState <> writeChan obsChan) + appState <- AppState.initWithPool pool config loggerState metricsState tracer (Metrics.observationMetrics metricsState <> writeChan obsChan) AppState.putPgVersion appState actualPgVersion AppState.putSchemaCache appState (Just sCache) return (SpecState appState metricsState stateObsChan, postgrest (configLogLevel config) appState (pure ())) + initJwtApp = initApp baseSchemaCache testCfgJwtCache Nothing + + initMetricsApp = initApp baseSchemaCache testCfg Nothing + + -- Dedicated initializer for the OTel spec: start collector, configure env, then create OTel-enabled app. + initOTelApp :: ActionWith (Maybe Collector, Application) -> IO () + initOTelApp action = do + mCollectorBin <- findExecutable "otelcol" + case mCollectorBin of + Nothing -> do + -- app is still initialized so the state type remains consistent; the spec will mark itself pending. + (_, app) <- initApp baseSchemaCache testCfgOTel Nothing + action (Nothing, app) + Just collectorBin -> + withSystemTempDirectory "postgrest-otel-" $ \tmpDir -> + bracket + (startCollector tmpDir collectorBin) + stopCollector + (\collector -> do + configureOTelEnv (collectorEndpoint collector) + withTracer $ \tracer -> do + (_, app) <- initApp baseSchemaCache testCfgOTel (Just tracer) + action (Just collector, app) + ) + -- Run all test modules hspec $ do - before (initApp baseSchemaCache testCfgJwtCache) $ + before initJwtApp $ describe "Observation.JwtCacheObs" Observation.JwtCache.spec - before (initApp baseSchemaCache testCfg) $ + before initMetricsApp $ describe "Feature.MetricsSpec" Observation.MetricsSpec.spec + around initOTelApp $ + describe "Observation.OpenTelemetry" Observation.OpenTelemetry.spec + where loadSCache pool conf = either (panic.show) id <$> P.use pool (HT.transaction HT.ReadCommitted HT.Read $ querySchemaCache conf) diff --git a/test/observability/OTelHelper.hs b/test/observability/OTelHelper.hs new file mode 100644 index 0000000000..a49bcdc9cc --- /dev/null +++ b/test/observability/OTelHelper.hs @@ -0,0 +1,193 @@ +{-# LANGUAGE LambdaCase #-} +{-# LANGUAGE NumericUnderscores #-} +{-# LANGUAGE RecordWildCards #-} + +module OTelHelper where + +import Control.Retry (constantDelay, limitRetries, + retrying) +import qualified Data.Aeson as JSON +import qualified Data.Aeson.KeyMap as KM +import qualified Data.Aeson.Types as JSONT +import qualified Data.ByteString as BS +import qualified Data.ByteString.Char8 as BSC +import qualified Data.ByteString.Lazy as BL +import qualified Data.List as L +import qualified Data.Map.Strict as M +import Data.Streaming.Network (getUnassignedPort) +import System.Directory (doesFileExist) +import System.Environment (setEnv) +import System.FilePath (()) +import System.IO (BufferMode (LineBuffering), + hClose, hSetBuffering) +import System.Process (CreateProcess (..), + ProcessHandle, + StdStream (UseHandle), + createProcess, proc, + terminateProcess, + waitForProcess) + +import Protolude + +data Collector = Collector + { collectorHandle :: ProcessHandle + , collectorLogHandle :: Handle + , collectorLogPath :: FilePath + , collectorTracesPath :: FilePath + , collectorPort :: Int + } + +data NormalizedSpan = NormalizedSpan + { spanName :: Text + , spanParent :: Maybe Text + } deriving (Eq, Ord, Show) + +instance JSON.FromJSON NormalizedSpan where + parseJSON = JSON.withObject "NormalizedSpan" $ \obj -> + NormalizedSpan <$> obj JSON..: "name" <*> obj JSON..:? "parent" + +instance JSON.ToJSON NormalizedSpan where + toJSON NormalizedSpan{..} = + JSON.object ["name" JSON..= spanName, "parent" JSON..= spanParent] + +type RawSpan = (Text, Maybe Text, Maybe Text) + +collectorEndpoint :: Collector -> Text +collectorEndpoint Collector{..} = "http://127.0.0.1:" <> show collectorPort + +startCollector :: FilePath -> FilePath -> IO Collector +startCollector tmpDir collectorBin = do + collectorPort <- getUnassignedPort + let configPath = tmpDir "otelcol-config.yaml" + collectorTracesPath = tmpDir "traces.json" + collectorLogPath = tmpDir "otelcol.log" + openLog = do + h <- openFile collectorLogPath WriteMode + hSetBuffering h LineBuffering + pure h + + writeFile configPath . toS $ collectorConfig collectorPort collectorTracesPath + bracketOnError openLog cleanupLogHandle $ \collectorLogHandle -> do + (_, _, _, collectorHandle) <- createProcess (proc collectorBin ["--config", configPath]) + { std_out = UseHandle collectorLogHandle + , std_err = UseHandle collectorLogHandle + } + pure Collector{..} + +cleanupLogHandle :: Handle -> IO () +cleanupLogHandle h = void (try (hClose h) :: IO (Either SomeException ())) + +stopCollector :: Collector -> IO () +stopCollector Collector{..} = do + void (try (terminateProcess collectorHandle) :: IO (Either SomeException ())) + void (try (waitForProcess collectorHandle) :: IO (Either SomeException ExitCode)) + cleanupLogHandle collectorLogHandle + +waitForNormalizedSpans :: Collector -> IO [NormalizedSpan] +waitForNormalizedSpans collector@Collector{..} = + retrying (constantDelay 200_000 <> limitRetries 49) shouldRetry (const pollSpans) + >>= either timeout pure + where + pollSpans = do + exists <- doesFileExist collectorTracesPath + if not exists + then pure pending + else do + result <- decodeNormalizedSpans <$> BL.readFile collectorTracesPath + pure $ case result of + Right spans -> bool pending (Right spans) (not $ null spans) + Left err -> Left $ Just err + + pending = Left Nothing + shouldRetry _ result = pure $ isLeft result + timeout lastErr = + failCollector collector $ + "Timed out waiting for exported traces at " + <> toS collectorTracesPath + <> maybe mempty ("\nLast decode error:\n" <>) lastErr + +decodeNormalizedSpans :: BL.ByteString -> Either Text [NormalizedSpan] +decodeNormalizedSpans = fmap (normalizeSpans . concatMap extractSpans) . decodeValues + +normalizeSpans :: [RawSpan] -> [NormalizedSpan] +normalizeSpans spans = + L.sortOn (\NormalizedSpan{..} -> (spanParent, spanName)) + [ NormalizedSpan spanName (parentSpanId >>= (`M.lookup` spanIdsToNames)) + | (spanName, _, parentSpanId) <- interesting + ] + where + interesting = filter (\(spanName, _, _) -> spanName `elem` ["request", "parse", "plan", "query", "response"]) spans + spanIdsToNames = M.fromList [(spanId, spanName) | (spanName, Just spanId, _) <- interesting] + +decodeValues :: BL.ByteString -> Either Text [JSON.Value] +decodeValues bytes + | BL.null bytes = Right [] + | otherwise = + case JSON.eitherDecode bytes of + Right value -> Right [value] + Left jsonErr -> + first + (\ndjsonErr -> + "Failed to decode collector payload as JSON (" + <> toS jsonErr + <> ") or newline-delimited JSON (" + <> toS ndjsonErr + <> ")" + ) + ( traverse JSON.eitherDecodeStrict' + . filter (not . BS.null) + . BSC.lines + $ BL.toStrict bytes + ) + +extractSpans :: JSON.Value -> [RawSpan] +extractSpans = \case + JSON.Array arr -> concatMap extractSpans (toList arr) + JSON.Object obj -> + let directSpans = case KM.lookup "spans" obj of + Just (JSON.Array spans) -> mapMaybe parseSpan (toList spans) + _ -> [] + in directSpans <> concatMap extractSpans (KM.elems obj) + _ -> [] + +parseSpan :: JSON.Value -> Maybe RawSpan +parseSpan = JSONT.parseMaybe $ JSON.withObject "Span" $ \obj -> + (,,) <$> obj JSON..: "name" <*> obj JSON..:? "spanId" <*> obj JSON..:? "parentSpanId" + +failCollector :: Collector -> Text -> IO a +failCollector Collector{..} message = do + logs <- bool (pure "") (readFile collectorLogPath) =<< doesFileExist collectorLogPath + panic . toS $ message <> bool "" ("\nCollector logs:\n" <> logs) (logs /= "") + +collectorConfig :: Int -> FilePath -> Text +collectorConfig port tracesPath = + unlines + [ "receivers:" + , " otlp:" + , " protocols:" + , " http:" + , " endpoint: 127.0.0.1:" <> show port + , "exporters:" + , " file:" + , " path: \"" <> toS tracesPath <> "\"" + , "service:" + , " pipelines:" + , " traces:" + , " receivers: [otlp]" + , " exporters: [file]" + ] + +configureOTelEnv :: Text -> IO () +configureOTelEnv otlpEndpoint = do + -- Configure the SDK once so spans from the OTel test go to the local collector. + mapM_ + (uncurry setEnv) + [ ("OTEL_TRACES_EXPORTER", "otlp"), + ("OTEL_EXPORTER_OTLP_PROTOCOL", "http/protobuf"), + ("OTEL_EXPORTER_OTLP_ENDPOINT", toS otlpEndpoint), + ("OTEL_EXPORTER_OTLP_TRACES_ENDPOINT", toS $ otlpEndpoint <> "/v1/traces"), + ("OTEL_TRACES_SAMPLER", "always_on"), + ("OTEL_BSP_SCHEDULE_DELAY", "100"), + ("OTEL_BSP_EXPORT_TIMEOUT", "30000"), + ("OTEL_SERVICE_NAME", "postgrest-observability-tests") + ] diff --git a/test/observability/ObsHelper.hs b/test/observability/ObsHelper.hs index fb897390e8..429ac72aee 100644 --- a/test/observability/ObsHelper.hs +++ b/test/observability/ObsHelper.hs @@ -121,6 +121,7 @@ baseCfg = let secret = encodeUtf8 "reallyreallyreallyreallyverysafe" in , configInternalSCLoadSleep = Nothing , configInternalSCRelLoadSleep = Nothing , configServerTimingEnabled = True + , configServerOtelEnabled = False } testCfg :: AppConfig @@ -134,6 +135,9 @@ testCfgJwtCache = , configJwtCacheMaxEntries = 2 } +testCfgOTel :: AppConfig +testCfgOTel = baseCfg { configServerOtelEnabled = True } + authHeader :: BS.ByteString -> BS.ByteString -> Header authHeader typ creds = (hAuthorization, typ <> " " <> creds) diff --git a/test/observability/Observation/OpenTelemetry.hs b/test/observability/Observation/OpenTelemetry.hs new file mode 100644 index 0000000000..4b656be4fd --- /dev/null +++ b/test/observability/Observation/OpenTelemetry.hs @@ -0,0 +1,44 @@ +module Observation.OpenTelemetry where + +import qualified Data.Aeson as JSON +import qualified Data.ByteString.Lazy as BL +import Network.HTTP.Types (methodGet) +import Network.Wai (Application) +import Test.Hspec (SpecWith, describe, it, + shouldBe) +import Test.Hspec.Wai + +import Protolude + +import ObsHelper (authHeaderJWT, generateJWT) +import OTelHelper (Collector, NormalizedSpan, waitForNormalizedSpans) + +spec :: SpecWith (Maybe Collector, Application) +spec = + describe "OpenTelemetry traces" $ + it "exports normalized request spans and matches snapshot output" $ do + mCollector <- getState + case mCollector of + Nothing -> + pendingWith "OTel collector binary not found. Expected otelcol in PATH." + Just collector -> do + expected <- liftIO loadSnapshotSpans + let claims :: BL.ByteString + claims = "{\"exp\":9999999999,\"role\":\"postgrest_test_author\",\"id\":\"otel\"}" + auth = authHeaderJWT (generateJWT claims) + + request methodGet "/authors_only" [auth] "" + `shouldRespondWith` 200 + + actual <- liftIO $ waitForNormalizedSpans collector + liftIO $ actual `shouldBe` expected + +loadSnapshotSpans :: IO [NormalizedSpan] +loadSnapshotSpans = do + res <- JSON.eitherDecodeFileStrict' snapshotTracePath + case res of + Left err -> panic $ "Failed to decode snapshot traces from " <> show snapshotTracePath <> ": " <> toS err + Right spans -> pure spans + where + snapshotTracePath :: FilePath + snapshotTracePath = "test/observability/fixtures/otel-traces.snapshot.json" diff --git a/test/observability/fixtures/otel-traces.snapshot.json b/test/observability/fixtures/otel-traces.snapshot.json new file mode 100644 index 0000000000..ca764d11df --- /dev/null +++ b/test/observability/fixtures/otel-traces.snapshot.json @@ -0,0 +1,22 @@ +[ + { + "name": "request", + "parent": null + }, + { + "name": "parse", + "parent": "request" + }, + { + "name": "plan", + "parent": "request" + }, + { + "name": "query", + "parent": "request" + }, + { + "name": "response", + "parent": "request" + } +] diff --git a/test/spec/Main.hs b/test/spec/Main.hs index a7f9ee7aa0..04ba55ae39 100644 --- a/test/spec/Main.hs +++ b/test/spec/Main.hs @@ -6,11 +6,14 @@ import qualified Hasql.Transaction.Sessions as HT import Data.Function (id) +import OpenTelemetry.Context.ThreadLocal (getContext) +import OpenTelemetry.Instrumentation.Hspec import Test.Hspec import PostgREST.App (postgrest) import PostgREST.Config (AppConfig (..)) import PostgREST.Config.Database (queryPgVersion) +import PostgREST.OpenTelemetry (withTracer) import PostgREST.SchemaCache (querySchemaCache) import Protolude hiding (toList, toS) import SpecHelper @@ -72,7 +75,7 @@ import qualified Feature.RpcPreRequestGucsSpec main :: IO () -main = do +main = withTracer $ \tracer -> do pool <- P.acquire $ P.settings [ P.size 3 , P.acquisitionTimeout 10 @@ -90,7 +93,10 @@ main = do let initApp sCache st config = do - appState <- AppState.initWithPool pool config loggerState metricsState (Metrics.observationMetrics metricsState) + let tracer' = if configServerOtelEnabled config + then Just tracer + else Nothing + appState <- AppState.initWithPool pool config loggerState metricsState tracer' (Metrics.observationMetrics metricsState) AppState.putPgVersion appState actualPgVersion AppState.putSchemaCache appState (Just sCache) return (st, postgrest (configLogLevel config) appState (pure ())) @@ -168,7 +174,8 @@ main = do , ("Feature.Query.UpsertSpec" , Feature.Query.UpsertSpec.spec) ] - hspec $ do + ctxt <- getContext + hspec $ instrumentSpec tracer ctxt $ do mapM_ (parallel . before withApp) specs -- we analyze to get accurate results from EXPLAIN diff --git a/test/spec/SpecHelper.hs b/test/spec/SpecHelper.hs index 6b50ec5a4d..116ac3a35f 100644 --- a/test/spec/SpecHelper.hs +++ b/test/spec/SpecHelper.hs @@ -162,6 +162,7 @@ baseCfg = let secret = encodeUtf8 "reallyreallyreallyreallyverysafe" in , configInternalSCLoadSleep = Nothing , configInternalSCRelLoadSleep = Nothing , configServerTimingEnabled = True + , configServerOtelEnabled = False } testCfg :: AppConfig