Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
189 changes: 107 additions & 82 deletions experimental/ir/builtins.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,25 +19,25 @@ import (
"strings"

"github.com/bufbuild/protocompile/experimental/id"
"github.com/bufbuild/protocompile/experimental/report"
"github.com/bufbuild/protocompile/internal/arena"
"github.com/bufbuild/protocompile/internal/intern"
)

// builtinIDs contains [intern.ID]s for symbols with special meaning in the
// language.
// builtins contains those symbols that are built into the language, and which the compiler cannot
// handle not being present. This field is only present in the Context
// for descriptor.proto.
// builtins contains those symbols that are built into the language, referenced
// by the compiler for lowering. This field is only present in the Context for
// descriptor.proto.
//
// This is resolved using reflection in [resolveLangSymbols]. The names of the
// fields of this type must match those in builtinIDs that names its symbol.
// Fields are resolved using reflection in [resolveBuiltins]. The names of the
// fields of this type must match the corresponding entries in [builtinIDs].
//
// Fields without a tag are required: any descriptor.proto missing one of them
// is considered genuinely broken, and [resolveBuiltins] emits an error
// diagnostic for each missing required symbol. Fields tagged
// `builtin:"optional"` may be absent without diagnostic — they correspond to
// post-proto2 or editions-only features, and older vendored copies of
// descriptor.proto will legitimately not contain them.
type builtins struct {
// This indicates whether or not the descriptor.proto file used for compilation is valid
// or not.
//
// An invalid descriptor.proto file will be missing non-optional fields.
valid bool `builtin:"ignore"`

FileOptions Member
MessageOptions Member
FieldOptions Member
Expand All @@ -48,25 +48,26 @@ type builtins struct {
ServiceOptions Member
MethodOptions Member

JavaUTF8 Member
JavaMultipleFiles Member
OptimizeFor Member
MapEntry Member
Packed Member
OptionTargets Member
CType, JSType Member
Lazy, UnverifiedLazy Member
AllowAlias Member
MessageSet Member
JSONName Member
JavaUTF8 Member
JavaMultipleFiles Member
OptimizeFor Member
MapEntry Member
Packed Member
OptionTargets Member `builtin:"optional"`
CType, JSType Member
Lazy Member
UnverifiedLazy Member `builtin:"optional"`
AllowAlias Member
MessageSet Member
JSONName Member

ExtnDecls Member
ExtnVerification Member
ExtnDeclNumber Member
ExtnDeclName Member
ExtnDeclType Member
ExtnDeclReserved Member
ExtnDeclRepeated Member
ExtnDecls Member `builtin:"optional"`
ExtnVerification Member `builtin:"optional"`
ExtnDeclNumber Member `builtin:"optional"`
ExtnDeclName Member `builtin:"optional"`
ExtnDeclType Member `builtin:"optional"`
ExtnDeclReserved Member `builtin:"optional"`
ExtnDeclRepeated Member `builtin:"optional"`

FileDeprecated Member
MessageDeprecated Member
Expand All @@ -76,34 +77,36 @@ type builtins struct {
ServiceDeprecated Member
MethodDeprecated Member

EditionDefaults, EditionDefaultsKey, EditionDefaultsValue Member
EditionDefaults Member `builtin:"optional"`
EditionDefaultsKey Member `builtin:"optional"`
EditionDefaultsValue Member `builtin:"optional"`

EditionSupport Member
EditionSupportIntroduced Member
EditionSupportDeprecated Member
EditionSupportWarning Member
EditionSupportRemoved Member
EditionSupport Member `builtin:"optional"`
EditionSupportIntroduced Member `builtin:"optional"`
EditionSupportDeprecated Member `builtin:"optional"`
EditionSupportWarning Member `builtin:"optional"`
EditionSupportRemoved Member `builtin:"optional"`

FeatureSet Type
FeaturePresence Member
FeatureEnumType Member
FeaturePacked Member
FeatureUTF8 Member
FeatureGroup Member
FeatureEnum Member
FeatureJSON Member
FeatureSet Type `builtin:"optional"`
FeaturePresence Member `builtin:"optional"`
FeatureEnumType Member `builtin:"optional"`
FeaturePacked Member `builtin:"optional"`
FeatureUTF8 Member `builtin:"optional"`
FeatureGroup Member `builtin:"optional"`
FeatureEnum Member `builtin:"optional"`
FeatureJSON Member `builtin:"optional"`
FeatureVisibility Member `builtin:"optional"`
FeatureNamingStyle Member `builtin:"optional"`

FileFeatures Member
MessageFeatures Member
FieldFeatures Member
OneofFeatures Member
RangeFeatures Member
EnumFeatures Member
EnumValueFeatures Member
ServiceFeatures Member
MethodFeatures Member
FileFeatures Member `builtin:"optional"`
MessageFeatures Member `builtin:"optional"`
FieldFeatures Member `builtin:"optional"`
OneofFeatures Member `builtin:"optional"`
RangeFeatures Member `builtin:"optional"`
EnumFeatures Member `builtin:"optional"`
EnumValueFeatures Member `builtin:"optional"`
ServiceFeatures Member `builtin:"optional"`
MethodFeatures Member `builtin:"optional"`
}

// builtinIDs is all of the interning IDs of names in [builtins], plus some
Expand Down Expand Up @@ -194,11 +197,16 @@ type builtinIDs struct {
MethodFeatures intern.ID `intern:"google.protobuf.MethodOptions.features"`
}

// resolveBuiltins resolves the symbols from descriptor.proto and returns whether the
// builtins resolved are valid.
func resolveBuiltins(file *File) bool {
// resolveBuiltins resolves the symbols from descriptor.proto.
//
// For each required field (untagged in [builtins]) that cannot be resolved,
// an error diagnostic is emitted on the descriptor.proto file. Optional
// fields (tagged `builtin:"optional"`) silently remain zero when absent.
// Downstream accessors handle zero members gracefully, so non-editions files
// continue to compile against older vendored copies of descriptor.proto.
func resolveBuiltins(file *File, r *report.Report) {
if !file.IsDescriptorProto() {
return file.builtins().valid
return
}

// If adding a new kind of symbol to resolve, add it to this map.
Expand All @@ -217,45 +225,30 @@ func resolveBuiltins(file *File) bool {
}

file.dpBuiltins = new(builtins)
file.dpBuiltins.valid = true
v := reflect.ValueOf(file.dpBuiltins).Elem()
ids := reflect.ValueOf(file.session.builtins)

for i := range v.NumField() {
field := v.Field(i)
tyField := v.Type().Field(i)
if tyField.Name == "valid" {
continue
}

id := ids.FieldByName(tyField.Name).Interface().(intern.ID) //nolint:errcheck
kind := kinds[field.Type()]
var optional bool
for option := range strings.SplitSeq(tyField.Tag.Get("builtin"), ",") {
if option == "optional" {
optional = true
}
}

ref := file.exported.lookup(id)
sym := GetRef(file, ref)
if sym.IsZero() && optional {
continue
}

if sym.Kind() != kind.kind {
// There is a missing field on descriptor.proto, so we mark the builtins as invalid,
// and stop resolving.
//
// TODO: There is no trivial way to ascertain whether the invalid descriptor.proto
// was provided by the compiler or vendored in from a third-party source. Ideally,
// we would crash if the compiler is misbehaving.
file.dpBuiltins.valid = false
} else {
kind.wrap(sym.Raw().data, field)
if !isOptionalBuiltinField(tyField) {
r.Errorf("`%s` is missing required symbol `%s`", file.Path(), file.session.intern.Value(id)).Apply(
report.Snippet(file.AST()),
report.Helpf("the descriptor.proto supplied to the compiler does not declare this %s; "+
"it may be vendored from a version that predates this symbol, or may be genuinely corrupt", kind.kind.noun()),
)
}
continue
}
kind.wrap(sym.Raw().data, field)
}
return file.dpBuiltins.valid
}

// makeBuiltinWrapper helps construct reflection shims for resolveBuiltins.
Expand All @@ -267,3 +260,35 @@ func makeBuiltinWrapper[T ~id.Node[T, *File, Raw], Raw any](
out.Set(reflect.ValueOf(x))
}
}

// isOptionalBuiltinField reports whether the given [builtins] field is tagged
// `builtin:"optional"`.
func isOptionalBuiltinField(f reflect.StructField) bool {
for option := range strings.SplitSeq(f.Tag.Get("builtin"), ",") {
if option == "optional" {
return true
}
}
return false
}

// optionalBuiltinIDs is the set of intern IDs for symbols declared optional in
// [builtins]. It is populated at session init and consulted when emitting
// diagnostics about user references to missing optional builtins.
func optionalBuiltinIDs(ids *builtinIDs) map[intern.ID]struct{} {
out := make(map[intern.ID]struct{})
bs := reflect.TypeFor[builtins]()
idsV := reflect.ValueOf(*ids)
for i := range bs.NumField() {
f := bs.Field(i)
if !isOptionalBuiltinField(f) {
continue
}
idField := idsV.FieldByName(f.Name)
if !idField.IsValid() {
continue
}
out[idField.Interface().(intern.ID)] = struct{}{} //nolint:errcheck
}
return out
}
11 changes: 0 additions & 11 deletions experimental/ir/ir_file.go
Original file line number Diff line number Diff line change
Expand Up @@ -114,17 +114,6 @@ func (f *File) AST() *ast.File {
return f.ast
}

// Lowered returns whether or not the file has completed lowering. If the builtins are
// invalid (failed to resolve from descriptor.proto), then we bail out of the rest of the
// steps for lowering, other than for descriptor.proto itself, and the file is not
// considered lowered.
func (f *File) Lowered() bool {
if f == nil {
return false
}
return f.builtins().valid || f.IsDescriptorProto()
}

// Syntax returns the syntax pragma that applies to this file.
func (f *File) Syntax() syntax.Syntax {
if f == nil {
Expand Down
28 changes: 24 additions & 4 deletions experimental/ir/ir_member.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ import (

"github.com/bufbuild/protocompile/experimental/ast"
"github.com/bufbuild/protocompile/experimental/ast/predeclared"
"github.com/bufbuild/protocompile/experimental/ast/syntax"
"github.com/bufbuild/protocompile/experimental/id"
"github.com/bufbuild/protocompile/experimental/internal/taxa"
"github.com/bufbuild/protocompile/experimental/ir/presence"
Expand Down Expand Up @@ -122,8 +123,17 @@ func (m Member) IsPacked() bool {
return packed
}

feature := m.FeatureSet().Lookup(builtins.FeaturePacked).Value()
value, _ := feature.AsInt()
// Syntax dictates default packing for proto2/proto3: proto2 repeated
// fields are expanded, proto3 are packed. The repeated_field_encoding
// feature only takes effect in editions.
switch s := m.Context().Syntax(); {
case s == syntax.Proto2:
return false
case s == syntax.Proto3:
return true
}

value, _ := m.FeatureSet().Lookup(builtins.FeaturePacked).Value().AsInt()
return value == tags.FeatureSet_RepeatedFieldEncoding_Packed
}

Expand All @@ -134,9 +144,19 @@ func (m Member) IsUnicode() bool {
return false
}

// Syntax dictates UTF-8 validation for proto2/proto3: proto2 does not
// validate string fields, proto3 does. The utf8_validation feature only
// takes effect in editions.
switch s := m.Context().Syntax(); {
case s == syntax.Proto2:
return false
case s == syntax.Proto3:
return true
}

builtins := m.Context().builtins()
utf8Feature, _ := m.FeatureSet().Lookup(builtins.FeatureUTF8).Value().AsInt()
return utf8Feature == tags.FeatureSet_Utf8Validation_Verify
value, _ := m.FeatureSet().Lookup(builtins.FeatureUTF8).Value().AsInt()
return value == tags.FeatureSet_Utf8Validation_Verify
}

// AsTagRange wraps this member in a TagRange.
Expand Down
11 changes: 11 additions & 0 deletions experimental/ir/ir_type.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ import (

"github.com/bufbuild/protocompile/experimental/ast"
"github.com/bufbuild/protocompile/experimental/ast/predeclared"
"github.com/bufbuild/protocompile/experimental/ast/syntax"
"github.com/bufbuild/protocompile/experimental/id"
"github.com/bufbuild/protocompile/experimental/internal/taxa"
"github.com/bufbuild/protocompile/experimental/seq"
Expand Down Expand Up @@ -180,6 +181,16 @@ func (t Type) IsClosedEnum() bool {
return false
}

// Syntax dictates enum closedness for proto2/proto3: proto2 enums are
// always closed, proto3 enums are always open. Feature overrides only
// apply in editions, where the enum_type feature is the source of truth.
switch s := t.Context().Syntax(); {
case s == syntax.Proto2:
return true
case s == syntax.Proto3:
return false
}

builtins := t.Context().builtins()
n, _ := t.FeatureSet().Lookup(builtins.FeatureEnum).Value().AsInt()
return n == tags.FeatureSet_EnumType_Closed
Expand Down
5 changes: 5 additions & 0 deletions experimental/ir/ir_value.go
Original file line number Diff line number Diff line change
Expand Up @@ -811,7 +811,12 @@ func (v MessageValue) IsEmpty() bool {
}

// Field returns the field corresponding with the given member, if it is set.
//
// Returns a zero [Value] if either the member or the message value is zero.
func (v MessageValue) Field(field Member) Value {
if field.IsZero() || v.IsZero() {
return Value{}
}
if field.Container() != v.Type() {
return Value{}
}
Expand Down
17 changes: 10 additions & 7 deletions experimental/ir/lower.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,8 +31,9 @@ import (
type Session struct {
intern intern.Table

once sync.Once
builtins builtinIDs
once sync.Once
builtins builtinIDs
optionalBuiltins map[intern.ID]struct{}
}

// RecordInternStats enables instrumentation of the session's intern table.
Expand Down Expand Up @@ -78,7 +79,10 @@ func (s *Session) Lower(source *ast.File, errs *report.Report, importer Importer
}

func (s *Session) init() {
s.once.Do(func() { s.intern.Preload(&s.builtins) })
s.once.Do(func() {
s.intern.Preload(&s.builtins)
s.optionalBuiltins = optionalBuiltinIDs(&s.builtins)
})
}

func lower(file *File, r *report.Report, importer Importer) {
Expand All @@ -100,10 +104,9 @@ func lower(file *File, r *report.Report, importer Importer) {
mergeImportedSymbolTables(file, r)

// Perform "early" name resolution, i.e. field names and extension types.
if !resolveNames(file, r) {
// An invalid descriptor.proto was found, stop lowering the file.
return
}
// Name resolution always proceeds regardless of builtin validity; field
// types, method types, and extensions use the symbol table, not builtins.
resolveNames(file, r)
resolveEarlyOptions(file)

// Perform constant evaluation.
Expand Down
Loading
Loading