Skip to content
This repository was archived by the owner on Sep 30, 2024. It is now read-only.
Open
Show file tree
Hide file tree
Changes from 8 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions cmd/frontend/graphqlbackend/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -272,6 +272,7 @@ go_library(
"//internal/embeddings/background/repo",
"//internal/encryption",
"//internal/encryption/keyring",
"//internal/endpoint",
"//internal/env",
"//internal/errcode",
"//internal/executor",
Expand Down Expand Up @@ -378,6 +379,8 @@ go_library(
"@com_github_sourcegraph_zoekt//stream",
"@com_github_stretchr_testify//require",
"@com_github_throttled_throttled_v2//:throttled",
"@io_k8s_apimachinery//pkg/apis/meta/v1:meta",
"@io_k8s_apimachinery//pkg/types",
"@io_opentelemetry_go_otel//:otel",
"@io_opentelemetry_go_otel//attribute",
],
Expand Down
2 changes: 1 addition & 1 deletion cmd/frontend/graphqlbackend/site.go
Original file line number Diff line number Diff line change
Expand Up @@ -167,7 +167,7 @@ func (r *siteResolver) SettingsURL() *string { return strptr("/site-admin/global

func (r *siteResolver) CanReloadSite(ctx context.Context) bool {
err := auth.CheckCurrentUserIsSiteAdmin(ctx, r.db)
return canReloadSite && err == nil
return isGoremanSite && err == nil
}

func (r *siteResolver) BuildVersion() string { return version.Version() }
Expand Down
30 changes: 27 additions & 3 deletions cmd/frontend/graphqlbackend/site_reload.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,29 +2,53 @@ package graphqlbackend

import (
"context"
"fmt"
"time"

"github.com/inconshreveable/log15" //nolint:logging // TODO move all logging to sourcegraph/log
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
k8stypes "k8s.io/apimachinery/pkg/types"

"github.com/sourcegraph/sourcegraph/cmd/frontend/internal/processrestart"
"github.com/sourcegraph/sourcegraph/internal/actor"
"github.com/sourcegraph/sourcegraph/internal/auth"
"github.com/sourcegraph/sourcegraph/internal/cloud"
"github.com/sourcegraph/sourcegraph/internal/endpoint"
"github.com/sourcegraph/sourcegraph/lib/errors"
)

// canReloadSite is whether the current site can be reloaded via the API. Currently
// only goreman-managed sites can be reloaded. Callers must also check if the actor
// is an admin before actually reloading the site.
var canReloadSite = processrestart.CanRestart()
var isGoremanSite = processrestart.CanRestart()

func (r *schemaResolver) ReloadSite(ctx context.Context) (*EmptyResponse, error) {
// 🚨 SECURITY: Reloading the site is an interruptive action, so only admins
// 🚨 SECURITY: Reloading the site is an disruptive action, so only admins
// may do it.
if err := auth.CheckCurrentUserIsSiteAdmin(ctx, r.db); err != nil {
return nil, err
}
if cloud.SiteConfig().SourcegraphOperatorAuthProviderEnabled() {
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Cloud only - nice 👍

// use k8s client to restart the frontend deployment rollout
client, err := endpoint.LoadClient()
if err != nil {
return nil, err
}
ns := endpoint.Namespace(r.logger)
data := fmt.Sprintf(`{"spec": {"template": {"metadata": {"annotations": {"kubectl.kubernetes.io/restartedAt": "%s"}}}}}`, time.Now().Format("20060102150405"))
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

will this cause drift with the deployment manifests? e.g., kustomize, helm.

I would like to see an E2E test against a locally deployed (with kind) sourcegraph k8s deployment.

you can build images from this branch with:

sg ci build docker-images-candidates-notest

Then in your helm overrides:

storageClass:
  create: false
  name: standard

sourcegraph: 
  localDevMode: true
  image:
    # you can find the tag from buildkite logs
    defaultTag: docker-images-candidates-notest-<replace_md>
    useGlobalTagAsDefault: true

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

will this cause drift with the deployment manifests? e.g., kustomize, helm.

I would like to see an E2E test against a locally deployed (with kind) sourcegraph k8s deployment.

you can build images from this branch with:

sg ci build docker-images-candidates-notest

Then in your helm overrides:

storageClass:
  create: false
  name: standard

sourcegraph: 
  localDevMode: true
  image:
    # you can find the tag from buildkite logs
    defaultTag: docker-images-candidates-notest-<replace_md>
    useGlobalTagAsDefault: true

@michaellzc I do not expect any drift, b/c I just did kubectl rollout restart deployment sourcegraph-frontend and it resulted in these annotations being added to deployment:

  template:
    metadata:
      annotations:
        checksum/auth: 5dda2c15191bf709225a311e8b732e6253d7b1c9afcf89e6ec994511cbbd2cf4
        checksum/redis: 63b58e05a2640417d599c4aee6d866cb9063e3a9aa452dc08dbfff836b7781b7
        kubectl.kubernetes.io/default-container: frontend
        kubectl.kubernetes.io/restartedAt: "2024-02-29T00:51:29+01:00" <-- new one just added by kubectl

so this action is same as normal operation done by our self-service
but +1 for test results.

Copy link
Copy Markdown
Member

@michaellzc michaellzc Feb 29, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

got it, didn't know that's how rollout restart work underneath.

on that notes, a few suggestions:

  • replace the annotation namespace from kubectl.kubernetes.io/restartedAt to something like sourcegraph.com/restartedAt. just makes it clear we're not trying to impersonate as kubectl, and leave a proper fingerprint.
  • add some basic sanity checks to prevent mis-used, e.g., check if all pods are healthy, check if there's restart in progress, etc.
  • record this action in audit logs
  • add a flag (e.g., SRC_ENABLE_FRONTEND_SITE_RELOAD) so it's possible to disable this feature if we want, especially frontend availability affects Cloud uptime SLA.

deploymentClient := client.AppsV1().Deployments(ns)

r.logger.Info("Restarting k8s deployment")

_, err = deploymentClient.Patch(ctx, "sourcegraph-frontend",
k8stypes.StrategicMergePatchType, []byte(data), metav1.PatchOptions{})
if err != nil {
return nil, err
}
return &EmptyResponse{}, nil
}

if !canReloadSite {
if !isGoremanSite {
return nil, errors.New("reloading site is not supported")
}

Expand Down
2 changes: 1 addition & 1 deletion internal/endpoint/endpoint.go
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ type endpoints struct {
// New creates a new Map for the URL specifier.
//
// If the scheme is prefixed with "k8s+", one URL is expected and the format is
// expected to match e.g. k8s+http://service.namespace:port/path. namespace,
// expected to match e.g. k8s+http://service.namespace:port/path. Namespace,
// port and path are optional. URLs of this form will consistently hash among
// the endpoints for the Kubernetes service. The values returned by Get will
// look like http://endpoint:port/path.
Expand Down
12 changes: 6 additions & 6 deletions internal/endpoint/k8s.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ func K8S(logger log.Logger, urlspec string) *Map {
logger = logger.Scoped("k8s")
return &Map{
urlspec: urlspec,
discofunk: k8sDiscovery(logger, urlspec, namespace(logger), loadClient),
discofunk: k8sDiscovery(logger, urlspec, Namespace(logger), LoadClient),
}
}

Expand Down Expand Up @@ -189,10 +189,10 @@ func parseURL(rawurl string) (*k8sURL, error) {
}, nil
}

// namespace returns the namespace the pod is currently running in
// this is done because the k8s client we previously used set the namespace
// Namespace returns the Namespace the pod is currently running in
// this is done because the k8s client we previously used set the Namespace
// when the client was created, the official k8s client does not
func namespace(logger log.Logger) string {
func Namespace(logger log.Logger) string {
logger = logger.Scoped("namespace")
const filename = "/var/run/secrets/kubernetes.io/serviceaccount/namespace"
data, err := os.ReadFile(filename)
Expand All @@ -209,7 +209,7 @@ func namespace(logger log.Logger) string {
return ns
}

func loadClient() (client *kubernetes.Clientset, err error) {
func LoadClient() (client *kubernetes.Clientset, err error) {
// Uncomment below to test against a real cluster. This is only important
// when you are changing how we interact with the k8s API and you want to
// test against the real thing.
Expand All @@ -226,7 +226,7 @@ func loadClient() (client *kubernetes.Clientset, err error) {
}
clientConfig := clientcmd.NewDefaultClientConfig(*c, nil)
config, err = clientConfig.ClientConfig()
namespace = "prod"
Namespace = "prod"
*/

config, err := rest.InClusterConfig()
Expand Down