Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
78 changes: 78 additions & 0 deletions cliv2/cmd/cliv2/instrumentation.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,14 +4,19 @@ package main
import _ "github.com/snyk/go-application-framework/pkg/networking/fips_enable"

import (
"context"
"encoding/json"
"os/exec"
"strconv"
"strings"
"time"

"github.com/rs/zerolog"
"github.com/snyk/go-application-framework/pkg/analytics"
"github.com/snyk/go-application-framework/pkg/configuration"
"github.com/snyk/go-application-framework/pkg/instrumentation"

"github.com/snyk/cli/cliv2/internal/constants"
cli_utils "github.com/snyk/cli/cliv2/internal/utils"

localworkflows "github.com/snyk/go-application-framework/pkg/local_workflows"
Expand Down Expand Up @@ -74,3 +79,76 @@ func updateInstrumentationDataBeforeSending(cliAnalytics analytics.Analytics, st
cliAnalytics.GetInstrumentation().SetStatus(analytics.Failure)
}
}

func sendAnalytics(ctx context.Context, a analytics.Analytics, debugLogger *zerolog.Logger) {
debugLogger.Print("Sending Analytics")

a.SetApiUrl(globalConfiguration.GetString(configuration.API_URL))

request, err := a.GetRequest()
if err != nil {
debugLogger.Err(err).Msg("Failed to create Analytics request")
return
}

// Use context to respect teardown timeout
request = request.WithContext(ctx)

client := globalEngine.GetNetworkAccess().GetHttpClient()
res, err := client.Do(request)
if err != nil {
debugLogger.Err(err).Msg("Failed to send Analytics")
return
}
defer func() {
_ = res.Body.Close()
}()

successfullySend := 200 <= res.StatusCode && res.StatusCode < 300
if successfullySend {
debugLogger.Print("Analytics successfully send")
} else {
debugLogger.Print("Failed to send Analytics:", res.Status)
}
}

func sendInstrumentation(ctx context.Context, eng workflow.Engine, instrumentor analytics.InstrumentationCollector, logger *zerolog.Logger) {
// Avoid duplicate data to be sent for IDE integrations that use the CLI
if !shallSendInstrumentation(eng.GetConfiguration(), instrumentor) {
logger.Print("This CLI call is not instrumented!")
return
}

// add temporary static nodejs binary flag, remove once linuxstatic is official
staticNodeJsBinaryBool, parseErr := strconv.ParseBool(constants.StaticNodeJsBinary)
if parseErr != nil {
logger.Print("Failed to parse staticNodeJsBinary:", parseErr)
} else {
// the legacycli:: prefix is added to maintain compatibility with our monitoring dashboard
instrumentor.AddExtension("legacycli::static-nodejs-binary", staticNodeJsBinaryBool)
}

logger.Print("Sending Instrumentation")
data, err := analytics.GetV2InstrumentationObject(instrumentor, analytics.WithLogger(logger))
if err != nil {
logger.Err(err).Msg("Failed to derive data object")
}

v2InstrumentationData := utils.ValueOf(json.Marshal(data))
localConfiguration := globalConfiguration.Clone()
// the report analytics workflow needs --experimental to run
// we pass the flag here so that we report at every interaction
localConfiguration.Set(configuration.FLAG_EXPERIMENTAL, true)
localConfiguration.Set("inputData", string(v2InstrumentationData))
_, err = eng.Invoke(
localworkflows.WORKFLOWID_REPORT_ANALYTICS,
workflow.WithConfig(localConfiguration),
workflow.WithContext(ctx),
)

if err != nil {
logger.Err(err).Msg("Failed to send Instrumentation")
} else {
logger.Print("Instrumentation successfully sent")
}
}
204 changes: 103 additions & 101 deletions cliv2/cmd/cliv2/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,10 @@ import (
"io"
"os"
"os/exec"
"strconv"
"os/signal"
"strings"
"sync"
"syscall"
"time"

"github.com/google/uuid"
Expand Down Expand Up @@ -75,6 +76,7 @@ import (
var internalOS string
var globalEngine workflow.Engine
var globalConfiguration configuration.Configuration
var globalContext context.Context
var helpProvided bool

var noopLogger zerolog.Logger = zerolog.New(io.Discard)
Expand All @@ -88,6 +90,7 @@ const (
debug_level_flag string = "log-level"
integrationNameFlag string = "integration-name"
maxNetworkRequestAttempts string = "max-attempts"
teardownTimeout = 5 * time.Second
)

type JsonErrorStruct struct {
Expand Down Expand Up @@ -194,98 +197,33 @@ func runMainWorkflow(config configuration.Configuration, cmd *cobra.Command, arg
globalLogger.Print("Running ", name)
globalEngine.GetAnalytics().SetCommand(name)

err = runWorkflowAndProcessData(globalEngine, globalLogger, name)
err = runWorkflowAndProcessData(globalContext, globalEngine, globalLogger, name)

return err
}

func runWorkflowAndProcessData(engine workflow.Engine, logger *zerolog.Logger, name string) error {
func runWorkflowAndProcessData(ctx context.Context, engine workflow.Engine, logger *zerolog.Logger, name string) error {
ic := engine.GetAnalytics().GetInstrumentation()

output, err := engine.Invoke(workflow.NewWorkflowIdentifier(name), workflow.WithInstrumentationCollector(ic))
output, err := engine.Invoke(workflow.NewWorkflowIdentifier(name), workflow.WithContext(ctx), workflow.WithInstrumentationCollector(ic))
if err != nil {
logger.Print("Failed to execute the command! ", err)
return err
}

outputFiltered, err := engine.Invoke(localworkflows.WORKFLOWID_FILTER_FINDINGS, workflow.WithInput(output), workflow.WithInstrumentationCollector(ic))
outputFiltered, err := engine.Invoke(localworkflows.WORKFLOWID_FILTER_FINDINGS, workflow.WithContext(ctx), workflow.WithInput(output), workflow.WithInstrumentationCollector(ic))
if err != nil {
logger.Err(err).Msg(err.Error())
return err
}

_, err = engine.Invoke(localworkflows.WORKFLOWID_OUTPUT_WORKFLOW, workflow.WithInput(outputFiltered), workflow.WithInstrumentationCollector(ic))
_, err = engine.Invoke(localworkflows.WORKFLOWID_OUTPUT_WORKFLOW, workflow.WithContext(ctx), workflow.WithInput(outputFiltered), workflow.WithInstrumentationCollector(ic))
if err == nil {
err = getErrorFromWorkFlowData(engine, outputFiltered)
}
return err
}

func sendAnalytics(analytics analytics.Analytics, debugLogger *zerolog.Logger) {
debugLogger.Print("Sending Analytics")

analytics.SetApiUrl(globalConfiguration.GetString(configuration.API_URL))

res, err := analytics.Send()
if err != nil {
debugLogger.Err(err).Msg("Failed to send Analytics")
return
}
defer func() { _ = res.Body.Close() }()

successfullySend := 200 <= res.StatusCode && res.StatusCode < 300
if successfullySend {
debugLogger.Print("Analytics successfully send")
} else {
var details string
if res != nil {
details = res.Status
}

debugLogger.Print("Failed to send Analytics:", details)
}
}

func sendInstrumentation(eng workflow.Engine, instrumentor analytics.InstrumentationCollector, logger *zerolog.Logger) {
// Avoid duplicate data to be sent for IDE integrations that use the CLI
if !shallSendInstrumentation(eng.GetConfiguration(), instrumentor) {
logger.Print("This CLI call is not instrumented!")
return
}

// add temporary static nodejs binary flag, remove once linuxstatic is official
staticNodeJsBinaryBool, parseErr := strconv.ParseBool(constants.StaticNodeJsBinary)
if parseErr != nil {
logger.Print("Failed to parse staticNodeJsBinary:", parseErr)
} else {
// the legacycli:: prefix is added to maintain compatibility with our monitoring dashboard
instrumentor.AddExtension("legacycli::static-nodejs-binary", staticNodeJsBinaryBool)
}

logger.Print("Sending Instrumentation")
data, err := analytics.GetV2InstrumentationObject(instrumentor, analytics.WithLogger(logger))
if err != nil {
logger.Err(err).Msg("Failed to derive data object")
}

v2InstrumentationData := utils.ValueOf(json.Marshal(data))
localConfiguration := globalConfiguration.Clone()
// the report analytics workflow needs --experimental to run
// we pass the flag here so that we report at every interaction
localConfiguration.Set(configuration.FLAG_EXPERIMENTAL, true)
localConfiguration.Set("inputData", string(v2InstrumentationData))
_, err = eng.InvokeWithConfig(
localworkflows.WORKFLOWID_REPORT_ANALYTICS,
localConfiguration,
)

if err != nil {
logger.Err(err).Msg("Failed to send Instrumentation")
} else {
logger.Print("Instrumentation successfully sent")
}
}

func help(_ *cobra.Command, _ []string) error {
helpProvided = true
args := utils.RemoveSimilar(os.Args[1:], "--") // remove all double dash arguments to avoid issues with the help command
Expand Down Expand Up @@ -548,11 +486,55 @@ func initExtensions(engine workflow.Engine, config configuration.Configuration)
}
}

// tearDown handles sending analytics and instrumentation
// It is used both for normal exit and signal-triggered exit
func tearDown(ctx context.Context, err error, errorList []error, startTime time.Time, ua networking.UserAgentInfo, cliAnalytics analytics.Analytics, networkAccess networking.NetworkAccess) int {
// Create a context with timeout for teardown operations to ensure we don't hang indefinitely
teardownCtx, cancel := context.WithTimeout(ctx, teardownTimeout)
Comment on lines +491 to +493
Copy link
Copy Markdown
Contributor

@danskmt danskmt Apr 3, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The context ctx here can happen to be already-cancelled as it is derived from the parent context. Testing locally, I saw this issue happening:

Image

Here I suppose you should use a fresh context for tearing down:

teardownCtx, cancel := context.WithTimeout(context.Background(), teardownTimeout)

Also, the context parameter we can use ctxCancel, updating the function signature:
func tearDown(ctx context.Context, ... -> func tearDown(ctxCancel context.CancelFunc, ...

in the calls of tearDown (L664 and L716), we provide ctxCancel:

finalExitCode = tearDown(ctxCancel, signalError, errorListCopy, startTime, ua, cliAnalytics, networkAccess)
...
finalExitCode = tearDown(ctxCancel, err, errorListCopy, startTime, ua, cliAnalytics, networkAccess)

And after sendInstrumentation call (line 516) can terminate child processes:

sendInstrumentation(teardownCtx, globalEngine, cliAnalytics.GetInstrumentation(), globalLogger)
ctxCancel()

i.e. instead of calling ctxCancel in line 655, we do it here after sendInstrumentation, wdyt?

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks @danskmt ! Good catches! I marked the PR as draft as it is far from ready. There are plenty of local changes as well, so I prematurely marked this as to review. Sorry for that!

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No worries! Let me know when you want me to review it again.

defer cancel()

if err != nil {
errorList, err = processError(err, errorList)

for _, tempError := range errorList {
if tempError != nil {
cliAnalytics.AddError(tempError)
}
}
}

exitCode := cliv2.DeriveExitCode(err)
globalLogger.Printf("Deriving Exit Code %d (cause: %v)", exitCode, err)

displayError(err, globalEngine.GetUserInterface(), globalConfiguration, teardownCtx)

updateInstrumentationDataBeforeSending(cliAnalytics, startTime, ua, exitCode)

if !globalConfiguration.GetBool(configuration.ANALYTICS_DISABLED) {
sendAnalytics(teardownCtx, cliAnalytics, globalLogger)
}
sendInstrumentation(teardownCtx, globalEngine, cliAnalytics.GetInstrumentation(), globalLogger)

// cleanup resources in use
// WARNING: deferred actions will execute AFTER cleanup; only defer if not impacted by this
if _, cleanupErr := globalEngine.Invoke(basic_workflows.WORKFLOWID_GLOBAL_CLEANUP, workflow.WithContext(teardownCtx)); cleanupErr != nil {
globalLogger.Printf("Failed to cleanup %v", cleanupErr)
}

if globalConfiguration.GetBool(configuration.DEBUG) {
writeLogFooter(exitCode, errorList, globalConfiguration, networkAccess)
}

return exitCode
}

func MainWithErrorCode() int {
initDebugBuild()

errorList := []error{}
errorListMutex := sync.Mutex{}
var tearDownOnce sync.Once
var finalExitCode int

startTime := time.Now()
var err error
Expand Down Expand Up @@ -633,9 +615,11 @@ func MainWithErrorCode() int {
return constants.SNYK_EXIT_CODE_ERROR
}

// init context
ctx := context.Background()
// init context with cancel function for signal handling
ctx, ctxCancel := context.WithCancel(context.Background())
defer ctxCancel() // ensure context is canceled on exit
ctx = context.WithValue(ctx, networking.InteractionIdKey, instrumentation.AssembleUrnFromUUID(interactionId))
globalContext = ctx

// add output flags as persistent flags
outputWorkflow, _ := globalEngine.GetWorkflow(localworkflows.WORKFLOWID_OUTPUT_WORKFLOW)
Expand All @@ -656,6 +640,35 @@ func MainWithErrorCode() int {
cliAnalytics.GetInstrumentation().SetStage(instrumentation.DetermineStage(cliAnalytics.IsCiEnvironment()))
cliAnalytics.GetInstrumentation().SetStatus(analytics.Success)

// prepare for signal handling
signalChan := make(chan os.Signal, 1)
exitCodeChan := make(chan int, 1)

if globalConfiguration.GetBool(configuration.PREVIEW_FEATURES_ENABLED) {
// Set up signal handling to send instrumentation on premature termination
signal.Notify(signalChan, syscall.SIGINT, syscall.SIGTERM)
go func() {
sig := <-signalChan
globalLogger.Printf("Received signal %v, attempting to send instrumentation before exit", sig)

// Cancel the context to terminate any running child processes
ctxCancel()

tearDownOnce.Do(func() {
signalError := cli.NewTerminatedBySignalError(fmt.Sprintf("Signal: %v", sig))

errorListMutex.Lock()
errorListCopy := append([]error{}, errorList...)
errorListMutex.Unlock()

finalExitCode = tearDown(ctx, signalError, errorListCopy, startTime, ua, cliAnalytics, networkAccess)
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

question: if teardown hangs (e.g. when making the analytics API call), can we still SIGINT the process? Or does teardownOnce prevent this? Basically, is it possible to be a hanging state?

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As mentioned before, I'm working on a solution.

})
// Send exit code to main goroutine instead of calling os.Exit directly
// This allows deferred functions (like lock cleanup) to run
exitCodeChan <- finalExitCode
}()
}

setTimeout(globalConfiguration, func() {
os.Exit(constants.SNYK_EXIT_CODE_EX_UNAVAILABLE)
})
Expand All @@ -681,40 +694,29 @@ func MainWithErrorCode() int {
// ignore
}

if err != nil {
errorList, err = processError(err, errorList)

for _, tempError := range errorList {
if tempError != nil {
cliAnalytics.AddError(tempError)
}
}
// Check if signal handler already ran teardown
select {
case code := <-exitCodeChan:
// Signal was received and teardown completed - return its exit code
return code
default:
// No signal received - run normal teardown
}

displayError(err, globalEngine.GetUserInterface(), globalConfiguration, ctx)

exitCode := cliv2.DeriveExitCode(err)
globalLogger.Printf("Deriving Exit Code %d (cause: %v)", exitCode, err)

updateInstrumentationDataBeforeSending(cliAnalytics, startTime, ua, exitCode)

if !globalConfiguration.GetBool(configuration.ANALYTICS_DISABLED) {
sendAnalytics(cliAnalytics, globalLogger)
if globalConfiguration.GetBool(configuration.PREVIEW_FEATURES_ENABLED) {
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

question(0): what's the reason for putting this behind preview features?
question(1): what's the condition for removing this?

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Q1: This could easily have side effects that I want to carefully roll out. Using preview only is a simple straight forward way to collect feedback from internal and preview usage in general.
Q2: That is a good question. I would say if we think it creates value and if concerns are mitigated.

// Stop signal handling before cleanup to prevent race conditions
signal.Stop(signalChan)
}
sendInstrumentation(globalEngine, cliAnalytics.GetInstrumentation(), globalLogger)

// cleanup resources in use
// WARNING: deferred actions will execute AFTER cleanup; only defer if not impacted by this
_, err = globalEngine.Invoke(basic_workflows.WORKFLOWID_GLOBAL_CLEANUP)
if err != nil {
globalLogger.Printf("Failed to cleanup %v", err)
}
tearDownOnce.Do(func() {
errorListMutex.Lock()
errorListCopy := append([]error{}, errorList...)
errorListMutex.Unlock()

if debugEnabled {
writeLogFooter(exitCode, errorList, globalConfiguration, networkAccess)
}
finalExitCode = tearDown(ctx, err, errorListCopy, startTime, ua, cliAnalytics, networkAccess)
})

return exitCode
return finalExitCode
}

func processError(err error, errorList []error) ([]error, error) {
Expand Down
Loading
Loading