Skip to content

Commit 84a7435

Browse files
rustyconoverclaude
andcommitted
Improve Setting class DX with shorter names and type inference
- Rename `description` to `desc` and `default_value` to `default` - Type is now inferred from default value for common Python types (bool, int, float, str, bytes) - Raise TypeError if neither type nor default is provided Before: Setting( name="vgi_debug", description="Enable debug mode", type=pa.bool_(), default_value=False, ) After: Setting("vgi_debug", "Enable debug mode", default=False) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
1 parent b23df87 commit 84a7435

3 files changed

Lines changed: 118 additions & 70 deletions

File tree

tests/catalog/test_serialization.py

Lines changed: 48 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -511,70 +511,75 @@ def test_none_invocation_id(self) -> None:
511511
class TestSettingSerialization:
512512
"""Test Setting serialization round-trip."""
513513

514-
def test_basic_round_trip(self) -> None:
515-
"""Test basic serialization and deserialization."""
516-
original = Setting(
517-
name="vgi_verbose_mode",
518-
description="Enable verbose output",
519-
type=pa.bool_(),
520-
default_value=None,
521-
)
514+
def test_basic_round_trip_explicit_type(self) -> None:
515+
"""Test with explicit type and no default (required setting)."""
516+
original = Setting("vgi_api_key", "API key for auth", type=pa.string())
522517
serialized = original.serialize()
523518
batch = deserialize_record_batch(serialized)
524519
restored = Setting.deserialize(batch)
525520

526521
assert restored.name == original.name
527-
assert restored.description == original.description
528-
assert restored.type == pa.bool_()
529-
assert restored.default_value is None
530-
531-
def test_with_default_value(self) -> None:
532-
"""Test with a default value."""
533-
original = Setting(
534-
name="vgi_log_level",
535-
description="Logging level",
536-
type=pa.string(),
537-
default_value="info",
538-
)
522+
assert restored.desc == original.desc
523+
assert restored.type == pa.string()
524+
assert restored.default is None
525+
526+
def test_type_inferred_from_string_default(self) -> None:
527+
"""Test type inference from string default."""
528+
original = Setting("vgi_log_level", "Logging level", default="info")
539529
serialized = original.serialize()
540530
batch = deserialize_record_batch(serialized)
541531
restored = Setting.deserialize(batch)
542532

543533
assert restored.name == original.name
544-
assert restored.description == original.description
545534
assert restored.type == pa.string()
546-
assert restored.default_value == "info"
547-
548-
def test_integer_type(self) -> None:
549-
"""Test with integer type."""
550-
original = Setting(
551-
name="vgi_max_workers",
552-
description="Maximum worker count",
553-
type=pa.int64(),
554-
default_value=4,
555-
)
535+
assert restored.default == "info"
536+
537+
def test_type_inferred_from_int_default(self) -> None:
538+
"""Test type inference from int default."""
539+
original = Setting("vgi_max_workers", "Maximum worker count", default=4)
556540
serialized = original.serialize()
557541
batch = deserialize_record_batch(serialized)
558542
restored = Setting.deserialize(batch)
559543

560-
assert restored.name == original.name
561544
assert restored.type == pa.int64()
562-
assert restored.default_value == 4
563-
564-
def test_bool_default_value(self) -> None:
565-
"""Test with boolean default value."""
566-
original = Setting(
567-
name="vgi_debug",
568-
description="Enable debug mode",
569-
type=pa.bool_(),
570-
default_value=False,
571-
)
545+
assert restored.default == 4
546+
547+
def test_type_inferred_from_bool_default(self) -> None:
548+
"""Test type inference from bool default."""
549+
original = Setting("vgi_debug", "Enable debug mode", default=False)
572550
serialized = original.serialize()
573551
batch = deserialize_record_batch(serialized)
574552
restored = Setting.deserialize(batch)
575553

576554
assert restored.type == pa.bool_()
577-
assert restored.default_value is False
555+
assert restored.default is False
556+
557+
def test_type_inferred_from_float_default(self) -> None:
558+
"""Test type inference from float default."""
559+
original = Setting("vgi_timeout", "Timeout in seconds", default=30.5)
560+
serialized = original.serialize()
561+
batch = deserialize_record_batch(serialized)
562+
restored = Setting.deserialize(batch)
563+
564+
assert restored.type == pa.float64()
565+
assert restored.default == 30.5
566+
567+
def test_explicit_type_overrides_inference(self) -> None:
568+
"""Test that explicit type is used even when default is provided."""
569+
original = Setting("vgi_port", "Port number", type=pa.int32(), default=8080)
570+
serialized = original.serialize()
571+
batch = deserialize_record_batch(serialized)
572+
restored = Setting.deserialize(batch)
573+
574+
assert restored.type == pa.int32()
575+
assert restored.default == 8080
576+
577+
def test_missing_type_and_default_raises(self) -> None:
578+
"""Test that missing type and default raises TypeError."""
579+
import pytest
580+
581+
with pytest.raises(TypeError, match="type must be specified"):
582+
Setting("vgi_api_key", "API key")
578583

579584

580585
class TestFunctionInfoRequiredSettings:

vgi/catalog/catalog_interface.py

Lines changed: 69 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -147,31 +147,68 @@ def deserialize(cls, batch: pa.RecordBatch) -> Self:
147147
)
148148

149149

150+
def _infer_arrow_type(value: Any) -> pa.DataType:
151+
"""Infer Arrow type from a Python value.
152+
153+
Args:
154+
value: Python value to infer type from.
155+
156+
Returns:
157+
The inferred Arrow DataType.
158+
159+
Raises:
160+
TypeError: If the type cannot be inferred.
161+
162+
"""
163+
if isinstance(value, bool):
164+
return pa.bool_()
165+
if isinstance(value, int):
166+
return pa.int64()
167+
if isinstance(value, float):
168+
return pa.float64()
169+
if isinstance(value, str):
170+
return pa.string()
171+
if isinstance(value, bytes):
172+
return pa.binary()
173+
raise TypeError(
174+
f"Cannot infer Arrow type from {type(value).__name__}. "
175+
f"Please specify type explicitly."
176+
)
177+
178+
150179
@dataclass(frozen=True)
151180
class Setting:
152181
"""A setting exposed by a VGI worker.
153182
154183
Settings can be configured via DuckDB's SET command and are passed
155184
to VGI functions via the settings parameter in the Invocation.
156185
157-
Example:
158-
Setting(
159-
name="vgi_verbose_mode",
160-
description="Enable verbose output with extra columns",
161-
type=pa.bool_(),
162-
default_value=False,
163-
)
186+
The type can be inferred from the default value for common Python types
187+
(bool, int, float, str, bytes). If no default is provided, type must
188+
be specified explicitly.
189+
190+
Examples:
191+
# Type inferred from default
192+
Setting("vgi_debug", "Enable debug mode", default=False)
193+
Setting("vgi_workers", "Max workers", default=4)
194+
Setting("vgi_log_level", "Log level", default="info")
195+
196+
# Explicit type (required when no default)
197+
Setting("vgi_api_key", "API key", type=pa.string())
198+
199+
# Explicit type with default
200+
Setting("vgi_timeout", "Timeout in ms", type=pa.int32(), default=5000)
164201
165202
"""
166203

167204
# Setting name (e.g., "vgi_verbose_mode")
168205
name: str
169206
# Human-readable description
170-
description: str
171-
# Arrow data type for this setting
172-
type: pa.DataType
173-
# Default value (None if required, otherwise the Python value)
174-
default_value: Any = None
207+
desc: str
208+
# Arrow data type (inferred from default if not provided)
209+
type: pa.DataType | None = None
210+
# Default value (None means the setting is required)
211+
default: Any = None
175212

176213
ARROW_SCHEMA: ClassVar[pa.Schema] = pa.schema(
177214
[
@@ -182,25 +219,38 @@ class Setting:
182219
] # type: ignore[arg-type]
183220
)
184221

222+
def __post_init__(self) -> None:
223+
"""Infer type from default value if not provided."""
224+
if self.type is None:
225+
if self.default is None:
226+
raise TypeError(
227+
f"Setting '{self.name}': type must be specified when no default "
228+
f"is provided. Use type=pa.string() or similar."
229+
)
230+
inferred_type = _infer_arrow_type(self.default)
231+
object.__setattr__(self, "type", inferred_type)
232+
185233
def serialize(self) -> bytes:
186234
"""Serialize to Arrow IPC bytes."""
235+
assert self.type is not None # Guaranteed by __post_init__
236+
187237
# Serialize type as a single-field schema
188238
type_schema = pa.schema([pa.field("value", self.type)])
189239
type_bytes = type_schema.serialize().to_pybytes()
190240

191241
# Serialize default value if present
192242
default_bytes: bytes | None = None
193-
if self.default_value is not None:
243+
if self.default is not None:
194244
default_batch = pa.RecordBatch.from_pydict(
195-
{"value": [self.default_value]}, schema=type_schema
245+
{"value": [self.default]}, schema=type_schema
196246
)
197247
default_bytes = vgi.ipc_utils.serialize_record_batch(default_batch)
198248

199249
batch = pa.RecordBatch.from_pylist(
200250
[
201251
{
202252
"name": self.name,
203-
"description": self.description,
253+
"description": self.desc,
204254
"type": type_bytes,
205255
"default_value": default_bytes,
206256
}
@@ -222,16 +272,16 @@ def deserialize(cls, batch: pa.RecordBatch) -> Self:
222272
data_type = type_schema.field("value").type
223273

224274
# Deserialize default value if present
225-
default_value: Any = None
275+
default: Any = None
226276
if row["default_value"] is not None:
227277
default_batch = vgi.ipc_utils.deserialize_record_batch(row["default_value"])
228-
default_value = default_batch.column("value")[0].as_py()
278+
default = default_batch.column("value")[0].as_py()
229279

230280
return cls(
231281
name=row["name"],
232-
description=row["description"],
282+
desc=row["description"],
233283
type=data_type,
234-
default_value=default_value,
284+
default=default,
235285
)
236286

237287

vgi/examples/worker.py

Lines changed: 1 addition & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -16,8 +16,6 @@
1616
vgi-example-worker
1717
"""
1818

19-
import pyarrow as pa
20-
2119
from vgi.catalog import Setting
2220
from vgi.examples.scalar import (
2321
AddNumericColumnsFunction,
@@ -62,12 +60,7 @@ class ExampleWorker(Worker):
6260
catalog_name = "example"
6361

6462
settings = [
65-
Setting(
66-
name="vgi_verbose_mode",
67-
description="Enable verbose output with extra columns",
68-
type=pa.bool_(),
69-
default_value=False,
70-
),
63+
Setting("vgi_verbose_mode", "Enable verbose output", default=False),
7164
]
7265

7366
functions = [

0 commit comments

Comments
 (0)