diff --git a/README.md b/README.md index f53daec..e197ca9 100644 --- a/README.md +++ b/README.md @@ -127,6 +127,7 @@ Other commands: - [`avrotize pcf`](#create-the-parsing-canonical-form-pcf-of-an-avrotize-schema) - Create the Parsing Canonical Form (PCF) of an Avrotize Schema. - [`avrotize validate`](#validate-json-instances-against-schemas) - Validate JSON instances against Avro or JSON Structure schemas. +- [`avrotize validate-tmsl`](#validate-tmsl-scripts-locally) - Validate TMSL scripts locally against documented object structure. JSON Structure conversions: @@ -1484,6 +1485,35 @@ avrotize validate events.jsonl --schema events.jstruct.json avrotize validate data.json --schema schema.avsc --quiet ``` +### Validate TMSL scripts locally + +```bash +avrotize validate-tmsl [input] [--quiet] +``` + +Parameters: + +- `[input]`: Path to the TMSL JSON file. If omitted, the file is read from stdin. +- `--quiet`: (optional) Suppress output. Exit code 0 if valid, 1 if invalid. + +Validation notes: + +- Performs local structural validation aligned with Microsoft TMSL object definitions for compatibility level 1200+. +- Validates `createOrReplace` command payload shape for the database/model/table/column path. +- Enforces documented column `dataType` enum values (`automatic`, `string`, `int64`, `double`, `dateTime`, `decimal`, `boolean`, `binary`, `unknown`, `variant`). +- Enforces strict object property checks (`additionalProperties: false`) for the validated subset. +- This is not a semantic engine validation; semantic checks still require execution against an XMLA endpoint. + +Example: + +```bash +# Validate a generated TMSL file +avrotize validate-tmsl model.tmsl.json + +# CI mode with exit code only +avrotize validate-tmsl model.tmsl.json --quiet +``` + ### Convert JSON Structure schema to GraphQL schema ```bash diff --git a/avrotize/avrotojstruct.py b/avrotize/avrotojstruct.py index bfa8dd7..5ef06d1 100644 --- a/avrotize/avrotojstruct.py +++ b/avrotize/avrotojstruct.py @@ -132,6 +132,10 @@ def build_type_definition(self, avro_schema, namespace, definitions): props = {"name": name, "type": "object", "properties": {}, "required": []} if "doc" in avro_schema: props["description"] = avro_schema["doc"] + if isinstance(avro_schema.get("unique"), list): + props["x-avrotize-unique"] = avro_schema["unique"] + if isinstance(avro_schema.get("foreignKeys"), list): + props["x-avrotize-foreignKeys"] = avro_schema["foreignKeys"] # Namespace for resolving field types within this record record_fields_namespace = avro_schema.get("namespace", namespace) diff --git a/avrotize/avrototsml.py b/avrotize/avrototsml.py new file mode 100644 index 0000000..f8232cc --- /dev/null +++ b/avrotize/avrototsml.py @@ -0,0 +1,349 @@ +"""Convert an Avro schema to a Tabular Model Scripting Language (TMSL) schema.""" + +import json +import sys +from typing import Any, Dict, List, Optional, Tuple + +JsonNode = Dict[str, "JsonNode"] | List["JsonNode"] | str | bool | int | float | None + + +class AvroToTmslConverter: + """Class to convert Avro schema to TMSL schema.""" + + def __init__(self: "AvroToTmslConverter") -> None: + self.named_type_cache: Dict[str, Dict[str, Any]] = {} + + def get_fullname(self, namespace: str, name: str) -> str: + """Get fully-qualified type name.""" + return f"{namespace}.{name}" if namespace else name + + def cache_named_types(self, avro_type: JsonNode, namespace: str = "") -> None: + """Cache named Avro types for reference resolution.""" + if isinstance(avro_type, list): + for item in avro_type: + self.cache_named_types(item, namespace) + return + + if isinstance(avro_type, dict): + current_namespace = str(avro_type.get("namespace", namespace)) + type_name = avro_type.get("name") + if isinstance(type_name, str): + fullname = self.get_fullname(current_namespace, type_name) + self.named_type_cache[fullname] = avro_type + self.named_type_cache[type_name] = avro_type + + avro_kind = avro_type.get("type") + if avro_kind == "record": + for field in avro_type.get("fields", []): + if isinstance(field, dict) and "type" in field: + self.cache_named_types(field["type"], current_namespace) + elif avro_kind == "array": + self.cache_named_types(avro_type.get("items"), current_namespace) + elif avro_kind == "map": + self.cache_named_types(avro_type.get("values"), current_namespace) + + def map_avro_type_to_tmsl(self, avro_type: JsonNode) -> Tuple[str, bool]: + """Map an Avro field type to a TMSL data type and nullability.""" + if isinstance(avro_type, list): + non_null_types = [item for item in avro_type if item != "null"] + nullable = len(non_null_types) != len(avro_type) + if not non_null_types: + return "string", True + if len(non_null_types) == 1: + mapped_type, _ = self.map_avro_type_to_tmsl(non_null_types[0]) + return mapped_type, True if nullable else False + return "variant", True + + if isinstance(avro_type, dict): + avro_kind = avro_type.get("type") + + if avro_kind == "record": + return "variant", False + if avro_kind in ["array", "map"]: + return "variant", False + if avro_kind == "enum": + return "string", False + if avro_kind == "fixed": + return "binary", False + + logical_type = avro_type.get("logicalType") + if logical_type in ["timestamp-millis", "timestamp-micros", "date", "time-millis", "time-micros"]: + return "dateTime", False + if logical_type == "decimal": + return "decimal", False + + if isinstance(avro_kind, (str, dict, list)): + return self.map_avro_type_to_tmsl(avro_kind) + return "string", False + + if isinstance(avro_type, str): + if avro_type in ["boolean"]: + return "boolean", False + if avro_type in ["int", "long"]: + return "int64", False + if avro_type in ["float", "double"]: + return "double", False + if avro_type == "bytes": + return "binary", False + if avro_type in ["string", "null"]: + return "string", avro_type == "null" + + referenced = self.named_type_cache.get(avro_type) + if referenced is not None: + return self.map_avro_type_to_tmsl(referenced) + + return "variant", False + + return "string", False + + def resolve_root_record(self, schema: JsonNode, avro_record_type: Optional[str]) -> Dict[str, Any]: + """Resolve the root record from a schema document.""" + if isinstance(schema, dict): + if schema.get("type") != "record": + print("Expected an Avro schema with a root type of 'record'") + sys.exit(1) + return schema + + if isinstance(schema, list): + if avro_record_type: + for candidate in schema: + if not isinstance(candidate, dict): + continue + if candidate.get("type") != "record": + continue + record_name = str(candidate.get("name", "")) + namespace = str(candidate.get("namespace", "")) + fullname = self.get_fullname(namespace, record_name) + if avro_record_type in [record_name, fullname]: + return candidate + print(f"No top-level record type {avro_record_type} found in the Avro schema") + sys.exit(1) + + for candidate in schema: + if isinstance(candidate, dict) and candidate.get("type") == "record": + return candidate + + print("Expected at least one Avro 'record' schema in the schema list") + sys.exit(1) + + print("Expected an Avro schema as a JSON object or a list of schema records") + sys.exit(1) + + def resolve_records(self, schema: JsonNode, avro_record_type: Optional[str]) -> List[Dict[str, Any]]: + """Resolve one or more record schemas from the input document.""" + if isinstance(schema, dict): + return [self.resolve_root_record(schema, avro_record_type)] + + if isinstance(schema, list): + if avro_record_type: + return [self.resolve_root_record(schema, avro_record_type)] + + records = [item for item in schema if isinstance(item, dict) and item.get("type") == "record"] + if records: + return records + + print("Expected one or more Avro 'record' schemas") + sys.exit(1) + + def build_table(self, record: Dict[str, Any], emit_cloudevents_columns: bool) -> Dict[str, Any]: + """Build a TMSL table object from an Avro record.""" + table_name = str(record.get("name", "Table")) + unique_columns = set(str(column) for column in record.get("unique", []) if isinstance(column, str)) + + columns: List[Dict[str, Any]] = [] + for field in record.get("fields", []): + if not isinstance(field, dict): + continue + field_name = str(field.get("name", "")) + if not field_name: + continue + data_type, nullable = self.map_avro_type_to_tmsl(field.get("type")) + column: Dict[str, Any] = { + "name": field_name, + "dataType": data_type, + "sourceColumn": field_name, + } + if field_name in unique_columns: + column["isKey"] = True + if nullable: + column["isNullable"] = True + columns.append(column) + + if emit_cloudevents_columns: + columns.extend([ + {"name": "___type", "dataType": "string", "sourceColumn": "___type", "isNullable": True}, + {"name": "___source", "dataType": "string", "sourceColumn": "___source", "isNullable": True}, + {"name": "___id", "dataType": "string", "sourceColumn": "___id", "isNullable": True}, + {"name": "___time", "dataType": "dateTime", "sourceColumn": "___time", "isNullable": True}, + {"name": "___subject", "dataType": "string", "sourceColumn": "___subject", "isNullable": True}, + ]) + + return { + "name": table_name, + "columns": columns, + } + + def _record_sql_identifier(self, record: Dict[str, Any]) -> str | None: + """Get SQL table identifier from Avro altnames metadata, if available.""" + altnames = record.get("altnames") + if isinstance(altnames, dict): + sql_name = altnames.get("sql") + if isinstance(sql_name, str) and sql_name: + return sql_name + return None + + def build_relationships(self, records: List[Dict[str, Any]]) -> List[Dict[str, Any]]: + """Build TMSL relationships from Avro foreignKeys metadata.""" + table_by_name = { + str(record.get("name")): str(record.get("name")) + for record in records + if isinstance(record.get("name"), str) + } + table_by_sql = {} + for record in records: + record_name = record.get("name") + if not isinstance(record_name, str) or not record_name: + continue + sql_identifier = self._record_sql_identifier(record) + if sql_identifier: + table_by_sql[sql_identifier] = record_name + + relationships: List[Dict[str, Any]] = [] + relationship_names: set[str] = set() + + for record in records: + from_table = record.get("name") + if not isinstance(from_table, str) or not from_table: + continue + + foreign_keys = record.get("foreignKeys") + if not isinstance(foreign_keys, list): + continue + + for fk in foreign_keys: + if not isinstance(fk, dict): + continue + + columns = fk.get("columns") + referenced_columns = fk.get("referencedColumns") + if not isinstance(columns, list) or not isinstance(referenced_columns, list): + continue + if len(columns) != len(referenced_columns) or len(columns) == 0: + continue + + target_table = None + referenced_table_sql = fk.get("referencedTableSql") + if isinstance(referenced_table_sql, str): + target_table = table_by_sql.get(referenced_table_sql) + + if not target_table: + referenced_table = fk.get("referencedTable") + if isinstance(referenced_table, str): + target_table = table_by_name.get(referenced_table) + + if not target_table: + continue + + for from_column, to_column in zip(columns, referenced_columns): + if not isinstance(from_column, str) or not isinstance(to_column, str): + continue + + relationship_name = f"{from_table}_{from_column}_to_{target_table}_{to_column}" + if relationship_name in relationship_names: + continue + + relationships.append( + { + "name": relationship_name, + "fromTable": from_table, + "fromColumn": from_column, + "toTable": target_table, + "toColumn": to_column, + } + ) + relationship_names.add(relationship_name) + + return relationships + + def build_tmsl_schema( + self, + avro_schema: JsonNode, + avro_record_type: Optional[str] = None, + database_name: str = "", + compatibility_level: int = 1605, + emit_cloudevents_columns: bool = False, + ) -> Dict[str, Any]: + """Build a TMSL JSON document from an Avro schema document.""" + self.cache_named_types(avro_schema) + records = self.resolve_records(avro_schema, avro_record_type) + + tables = [self.build_table(record, emit_cloudevents_columns) for record in records] + first_table_name = str(tables[0].get("name", "Database")) if tables else "Database" + database = database_name or first_table_name + relationships = self.build_relationships(records) + + model: Dict[str, Any] = { + "culture": "en-US", + "tables": tables, + } + if relationships: + model["relationships"] = relationships + + return { + "createOrReplace": { + "object": {"database": database}, + "database": { + "name": database, + "compatibilityLevel": compatibility_level, + "model": model, + }, + } + } + + def convert_avro_to_tmsl( + self, + avro_schema_path: str, + avro_record_type: Optional[str], + tmsl_file_path: str, + database_name: str = "", + compatibility_level: int = 1605, + emit_cloudevents_columns: bool = False, + ) -> None: + """Convert an Avro schema file to a TMSL JSON file.""" + if not avro_schema_path: + print("Please specify the avro schema file") + sys.exit(1) + + with open(avro_schema_path, "r", encoding="utf-8") as f: + schema = json.load(f) + + tmsl_schema = self.build_tmsl_schema( + schema, + avro_record_type=avro_record_type, + database_name=database_name, + compatibility_level=compatibility_level, + emit_cloudevents_columns=emit_cloudevents_columns, + ) + + with open(tmsl_file_path, "w", encoding="utf-8") as f: + json.dump(tmsl_schema, f, indent=2) + + +def convert_avro_to_tmsl( + avro_schema_path: str, + avro_record_type: Optional[str], + tmsl_file_path: str, + database_name: str = "", + compatibility_level: int = 1605, + emit_cloudevents_columns: bool = False, +) -> None: + """Convert an Avro schema file to a TMSL JSON file.""" + converter = AvroToTmslConverter() + converter.convert_avro_to_tmsl( + avro_schema_path, + avro_record_type, + tmsl_file_path, + database_name, + compatibility_level, + emit_cloudevents_columns, + ) diff --git a/avrotize/commands.json b/avrotize/commands.json index 8daac86..ff6a073 100644 --- a/avrotize/commands.json +++ b/avrotize/commands.json @@ -1674,6 +1674,39 @@ ], "prompts": [] }, + { + "command": "validate-tmsl", + "description": "Validate TMSL scripts locally against documented object structure", + "group": "7_Utility", + "function": { + "name": "avrotize.tmslvalidate.validate_tmsl", + "args": { + "tmsl_file_path": "input_file_path", + "quiet": "args.quiet" + } + }, + "extensions": [ + ".tmsl.json", + ".json" + ], + "args": [ + { + "name": "input", + "type": "str", + "nargs": "?", + "help": "Path to the TMSL JSON file (or read from stdin if omitted)", + "required": false + }, + { + "name": "--quiet", + "type": "bool", + "help": "Suppress output. Exit code 0 if valid, 1 if invalid.", + "default": false, + "required": false + } + ], + "prompts": [] + }, { "command": "a2mongo", "description": "Convert Avrotize schema to MongoDB schema", @@ -1915,6 +1948,153 @@ } ] }, + { + "command": "a2tsml", + "description": "Convert Avrotize schema to Tabular Model Scripting Language (TMSL) schema", + "group": "3_Datalake", + "function": { + "name": "avrotize.avrototsml.convert_avro_to_tmsl", + "args": { + "avro_schema_path": "input_file_path", + "tmsl_file_path": "output_file_path", + "avro_record_type": "args.record_type", + "database_name": "args.database_name", + "compatibility_level": "args.compatibility_level", + "emit_cloudevents_columns": "args.emit_cloudevents_columns" + } + }, + "extensions": [ + ".avsc" + ], + "args": [ + { + "name": "input", + "type": "str", + "nargs": "?", + "help": "Path to the Avrotize schema file (or read from stdin if omitted)", + "required": false + }, + { + "name": "--out", + "type": "str", + "help": "Path to the TMSL schema JSON file", + "required": false + }, + { + "name": "--avsc", + "type": "str", + "help": "Deprecated: Path to the Avrotize schema file (for backcompat)", + "required": false + }, + { + "name": "--record-type", + "type": "str", + "help": "Record type in the Avrotize schema", + "required": false + }, + { + "name": "--database-name", + "type": "str", + "help": "Tabular model database name (defaults to the selected record type name)", + "required": false, + "default": "" + }, + { + "name": "--compatibility-level", + "type": "int", + "help": "Tabular model compatibility level", + "required": false, + "default": 1605 + }, + { + "name": "--emit-cloudevents-columns", + "type": "bool", + "help": "Add CloudEvents columns to the TMSL table", + "default": false, + "required": false + } + ], + "suggested_output_file_path": "{input_file_name}.tmsl.json", + "prompts": [ + { + "name": "--emit-cloudevents-columns", + "message": "Add CloudEvents columns to the TMSL schema?", + "type": "bool", + "default": false + } + ] + }, + { + "command": "s2tsml", + "description": "Convert JSON Structure to Tabular Model Scripting Language (TMSL) schema", + "group": "3_Datalake", + "function": { + "name": "avrotize.structuretotsml.convert_structure_to_tmsl", + "args": { + "structure_schema_path": "input_file_path", + "tmsl_file_path": "output_file_path", + "structure_record_type": "args.record_type", + "database_name": "args.database_name", + "compatibility_level": "args.compatibility_level", + "emit_cloudevents_columns": "args.emit_cloudevents_columns" + } + }, + "extensions": [ + ".struct.json", + ".json" + ], + "args": [ + { + "name": "input", + "type": "str", + "nargs": "?", + "help": "Path to the JSON Structure schema file (or read from stdin if omitted)", + "required": false + }, + { + "name": "--out", + "type": "str", + "help": "Path to the TMSL schema JSON file", + "required": false + }, + { + "name": "--record-type", + "type": "str", + "help": "Record type in the JSON Structure schema", + "required": false + }, + { + "name": "--database-name", + "type": "str", + "help": "Tabular model database name (defaults to the selected record type name)", + "required": false, + "default": "" + }, + { + "name": "--compatibility-level", + "type": "int", + "help": "Tabular model compatibility level", + "required": false, + "default": 1605 + }, + { + "name": "--emit-cloudevents-columns", + "type": "bool", + "help": "Add CloudEvents columns to the TMSL table", + "default": false, + "required": false + } + ], + "suggested_output_file_path": "{input_file_name}.tmsl.json", + "prompts": [ + { + "name": "--emit-cloudevents-columns", + "message": "Add CloudEvents columns to the TMSL schema?", + "type": "bool", + "default": false + } + ] + }, { "command": "pq2a", "description": "Convert Parquet schema to Avrotize schema", diff --git a/avrotize/jstructtoavro.py b/avrotize/jstructtoavro.py index b38774c..487de2b 100644 --- a/avrotize/jstructtoavro.py +++ b/avrotize/jstructtoavro.py @@ -364,6 +364,11 @@ def _convert_object(self, schema: Dict[str, Any], namespace: Optional[str], name if 'description' in merged_schema: avro_record['doc'] = merged_schema['description'] + + if isinstance(merged_schema.get('x-avrotize-unique'), list): + avro_record['unique'] = merged_schema['x-avrotize-unique'] + if isinstance(merged_schema.get('x-avrotize-foreignKeys'), list): + avro_record['foreignKeys'] = merged_schema['x-avrotize-foreignKeys'] # Convert properties to fields properties = merged_schema.get('properties', {}) diff --git a/avrotize/sqltoavro.py b/avrotize/sqltoavro.py index 71735d9..6c0a8d5 100644 --- a/avrotize/sqltoavro.py +++ b/avrotize/sqltoavro.py @@ -578,6 +578,90 @@ def fetch_primary_keys(self, table_name: str, table_schema: str = 'public') -> L cursor.close() return pk_columns + def fetch_foreign_keys(self, table_name: str, table_schema: str = 'public') -> List[Dict[str, Any]]: + """Fetches foreign key mappings for a table.""" + cursor = self.connection.cursor() + + if self.dialect == 'postgres': + query = """ + SELECT + tc.constraint_name, + kcu.column_name, + ccu.table_schema AS foreign_table_schema, + ccu.table_name AS foreign_table_name, + ccu.column_name AS foreign_column_name, + kcu.ordinal_position + FROM information_schema.table_constraints tc + JOIN information_schema.key_column_usage kcu + ON tc.constraint_name = kcu.constraint_name + AND tc.table_schema = kcu.table_schema + JOIN information_schema.constraint_column_usage ccu + ON tc.constraint_name = ccu.constraint_name + AND tc.table_schema = ccu.table_schema + WHERE tc.table_name = %s + AND tc.table_schema = %s + AND tc.constraint_type = 'FOREIGN KEY' + ORDER BY tc.constraint_name, kcu.ordinal_position + """ + cursor.execute(query, (table_name, table_schema)) + elif self.dialect == 'mysql': + query = """ + SELECT + constraint_name, + column_name, + referenced_table_schema, + referenced_table_name, + referenced_column_name, + ordinal_position + FROM information_schema.key_column_usage + WHERE table_name = %s + AND table_schema = %s + AND referenced_table_name IS NOT NULL + ORDER BY constraint_name, ordinal_position + """ + cursor.execute(query, (table_name, table_schema)) + elif self.dialect == 'sqlserver': + query = """ + SELECT + fk.name AS constraint_name, + cp.name AS column_name, + sr.name AS foreign_table_schema, + tr.name AS foreign_table_name, + cr.name AS foreign_column_name, + fkc.constraint_column_id AS ordinal_position + FROM sys.foreign_keys fk + INNER JOIN sys.foreign_key_columns fkc ON fk.object_id = fkc.constraint_object_id + INNER JOIN sys.tables tp ON fk.parent_object_id = tp.object_id + INNER JOIN sys.schemas sp ON tp.schema_id = sp.schema_id + INNER JOIN sys.columns cp ON fkc.parent_object_id = cp.object_id AND fkc.parent_column_id = cp.column_id + INNER JOIN sys.tables tr ON fk.referenced_object_id = tr.object_id + INNER JOIN sys.schemas sr ON tr.schema_id = sr.schema_id + INNER JOIN sys.columns cr ON fkc.referenced_object_id = cr.object_id AND fkc.referenced_column_id = cr.column_id + WHERE tp.name = %s AND sp.name = %s + ORDER BY fk.name, fkc.constraint_column_id + """ + cursor.execute(query, (table_name, table_schema)) + else: + cursor.close() + return [] + + grouped: Dict[str, Dict[str, Any]] = {} + for row in cursor.fetchall(): + constraint_name = row[0] + if constraint_name not in grouped: + grouped[constraint_name] = { + 'constraint_name': constraint_name, + 'columns': [], + 'foreign_table_schema': row[2], + 'foreign_table_name': row[3], + 'foreign_columns': [] + } + grouped[constraint_name]['columns'].append(row[1]) + grouped[constraint_name]['foreign_columns'].append(row[4]) + + cursor.close() + return list(grouped.values()) + def fetch_table_comment(self, table_name: str, table_schema: str = 'public') -> str | None: """Fetches table comment/description.""" cursor = self.connection.cursor() @@ -809,6 +893,7 @@ def table_to_avro_schema( """Converts a SQL table to Avro schema.""" columns = self.fetch_table_columns(table_name, table_schema) primary_keys = self.fetch_primary_keys(table_name, table_schema) + foreign_keys = self.fetch_foreign_keys(table_name, table_schema) table_comment = self.fetch_table_comment(table_name, table_schema) # Check for CloudEvents pattern @@ -907,6 +992,19 @@ def table_to_avro_schema( if primary_keys: schema["unique"] = [avro_name(pk) for pk in primary_keys] + # Add foreign keys as relationship metadata + if foreign_keys: + schema["foreignKeys"] = [ + { + "name": fk["constraint_name"], + "columns": [avro_name(column_name) for column_name in fk["columns"]], + "referencedTable": avro_name(str(fk["foreign_table_name"])), + "referencedColumns": [avro_name(column_name) for column_name in fk["foreign_columns"]], + "referencedTableSql": f"{fk['foreign_table_schema']}.{fk['foreign_table_name']}" if fk["foreign_table_schema"] != 'public' else str(fk["foreign_table_name"]) + } + for fk in foreign_keys + ] + self._apply_schema_attributes(schema, table_name, table_schema, type_value, type_namespace, table_comment) schemas.append(schema) diff --git a/avrotize/structuretotsml.py b/avrotize/structuretotsml.py new file mode 100644 index 0000000..53300c3 --- /dev/null +++ b/avrotize/structuretotsml.py @@ -0,0 +1,34 @@ +"""Convert a JSON Structure schema to a Tabular Model Scripting Language (TMSL) schema.""" + +import json +from typing import Optional + +from avrotize.avrototsml import AvroToTmslConverter +from avrotize.jstructtoavro import JsonStructureToAvro + + +def convert_structure_to_tmsl( + structure_schema_path: str, + tmsl_file_path: str, + structure_record_type: Optional[str] = None, + database_name: str = "", + compatibility_level: int = 1605, + emit_cloudevents_columns: bool = False, +) -> None: + """Convert a JSON Structure schema file to a TMSL JSON file.""" + with open(structure_schema_path, "r", encoding="utf-8") as f: + structure_schema = json.load(f) + + avro_schema = JsonStructureToAvro().convert(structure_schema) + + converter = AvroToTmslConverter() + tmsl_schema = converter.build_tmsl_schema( + avro_schema, + avro_record_type=structure_record_type, + database_name=database_name, + compatibility_level=compatibility_level, + emit_cloudevents_columns=emit_cloudevents_columns, + ) + + with open(tmsl_file_path, "w", encoding="utf-8") as f: + json.dump(tmsl_schema, f, indent=2) diff --git a/avrotize/tmslvalidate.py b/avrotize/tmslvalidate.py new file mode 100644 index 0000000..8a58d80 --- /dev/null +++ b/avrotize/tmslvalidate.py @@ -0,0 +1,317 @@ +"""Validate Tabular Model Scripting Language (TMSL) scripts locally. + +Validation rules in this module are aligned with Microsoft TMSL object definitions: +- Tabular Model Scripting Language (TMSL) Reference +- CreateOrReplace command (TMSL) +- Database object (TMSL) +- Tables object (TMSL) + +This validator performs structural/schema-like checks only; semantic validation +still requires execution against an XMLA endpoint. +""" + +import json +from typing import Any, Dict, List + + +ALLOWED_TMSL_COMMANDS = { + "create", + "createOrReplace", + "alter", + "delete", + "refresh", + "mergePartitions", + "sequence", +} + +ALLOWED_READ_WRITE_MODES = {"readWrite", "readOnly", "readOnlyExclusive"} +ALLOWED_MODEL_DEFAULT_MODES = {"import", "directQuery", "default"} +ALLOWED_ALIGNMENT = {"default", "left", "right", "center"} +ALLOWED_SUMMARIZE_BY = {"default", "none", "sum", "min", "max", "count", "average", "distinctCount"} +ALLOWED_COLUMN_TYPES = {"data", "calculated", "rowNumber", "calculatedTableColumn"} +ALLOWED_COLUMN_DATA_TYPES = { + "automatic", + "string", + "int64", + "double", + "dateTime", + "decimal", + "boolean", + "binary", + "unknown", + "variant", +} + + +class TmslValidationError(Exception): + """Raised when TMSL validation fails.""" + + +class TmslValidator: + """Local structural validator for TMSL scripts.""" + + def __init__(self) -> None: + self.errors: List[str] = [] + + def _err(self, path: str, message: str) -> None: + self.errors.append(f"{path}: {message}") + + def _expect_type(self, value: Any, expected_type: type, path: str) -> bool: + if not isinstance(value, expected_type): + self._err(path, f"Expected {expected_type.__name__}, got {type(value).__name__}") + return False + return True + + def _check_allowed_keys(self, obj: Dict[str, Any], allowed_keys: set[str], path: str) -> None: + for key in obj.keys(): + if key not in allowed_keys: + self._err(path, f"Unexpected property '{key}'") + + def _validate_annotation(self, annotation: Any, path: str) -> None: + if not self._expect_type(annotation, dict, path): + return + self._check_allowed_keys(annotation, {"name", "value"}, path) + if not isinstance(annotation.get("name"), str) or not annotation.get("name"): + self._err(f"{path}.name", "Annotation name must be a non-empty string") + if "value" in annotation: + value = annotation["value"] + if isinstance(value, str): + return + if isinstance(value, list) and all(isinstance(item, str) for item in value): + return + self._err(f"{path}.value", "Annotation value must be a string or an array of strings") + + def _validate_annotations(self, annotations: Any, path: str) -> None: + if not self._expect_type(annotations, list, path): + return + for i, annotation in enumerate(annotations): + self._validate_annotation(annotation, f"{path}[{i}]") + + def _validate_column(self, column: Any, path: str) -> None: + if not self._expect_type(column, dict, path): + return + + allowed_keys = { + "name", + "dataType", + "dataCategory", + "description", + "isHidden", + "isUnique", + "isKey", + "isNullable", + "alignment", + "tableDetailPosition", + "isDefaultLabel", + "isDefaultImage", + "summarizeBy", + "type", + "formatString", + "isAvailableInMdx", + "keepUniqueRows", + "displayOrdinal", + "sourceProviderType", + "displayFolder", + "sourceColumn", + "sortByColumn", + "isNameInferred", + "isDataTypeInferred", + "columnOriginTable", + "columnOriginColumn", + "expression", + "annotations", + } + self._check_allowed_keys(column, allowed_keys, path) + + if not isinstance(column.get("name"), str) or not column.get("name"): + self._err(f"{path}.name", "Column name must be a non-empty string") + + data_type = column.get("dataType") + if not isinstance(data_type, str) or data_type not in ALLOWED_COLUMN_DATA_TYPES: + self._err( + f"{path}.dataType", + "Column dataType must be one of: " + ", ".join(sorted(ALLOWED_COLUMN_DATA_TYPES)), + ) + + if "alignment" in column and column["alignment"] not in ALLOWED_ALIGNMENT: + self._err(f"{path}.alignment", "Invalid alignment value") + + if "summarizeBy" in column and column["summarizeBy"] not in ALLOWED_SUMMARIZE_BY: + self._err(f"{path}.summarizeBy", "Invalid summarizeBy value") + + if "type" in column and column["type"] not in ALLOWED_COLUMN_TYPES: + self._err(f"{path}.type", "Invalid column type value") + + if "annotations" in column: + self._validate_annotations(column["annotations"], f"{path}.annotations") + + def _validate_table(self, table: Any, path: str) -> None: + if not self._expect_type(table, dict, path): + return + + allowed_keys = { + "name", + "dataCategory", + "description", + "isHidden", + "partitions", + "annotations", + "columns", + "measures", + "hierarchies", + } + self._check_allowed_keys(table, allowed_keys, path) + + if not isinstance(table.get("name"), str) or not table.get("name"): + self._err(f"{path}.name", "Table name must be a non-empty string") + + columns = table.get("columns") + if columns is None: + self._err(f"{path}.columns", "Missing 'columns' collection") + elif self._expect_type(columns, list, f"{path}.columns"): + for i, column in enumerate(columns): + self._validate_column(column, f"{path}.columns[{i}]") + + if "annotations" in table: + self._validate_annotations(table["annotations"], f"{path}.annotations") + + def _validate_model(self, model: Any, path: str) -> None: + if not self._expect_type(model, dict, path): + return + + # Per TMSL reference schema, model has additionalProperties: false. + allowed_keys = { + "name", + "description", + "storageLocation", + "defaultMode", + "defaultDataView", + "culture", + "collation", + "annotations", + "tables", + "relationships", + "dataSources", + "perspectives", + "cultures", + "roles", + "functions", + } + self._check_allowed_keys(model, allowed_keys, path) + + if "defaultMode" in model and model["defaultMode"] not in ALLOWED_MODEL_DEFAULT_MODES: + self._err(f"{path}.defaultMode", "Invalid defaultMode value") + + if "culture" in model and not isinstance(model["culture"], str): + self._err(f"{path}.culture", "culture must be a string") + + tables = model.get("tables") + if tables is None: + self._err(f"{path}.tables", "Missing 'tables' collection") + elif self._expect_type(tables, list, f"{path}.tables"): + for i, table in enumerate(tables): + self._validate_table(table, f"{path}.tables[{i}]") + + if "annotations" in model: + self._validate_annotations(model["annotations"], f"{path}.annotations") + + def _validate_database(self, database: Any, path: str) -> None: + if not self._expect_type(database, dict, path): + return + + # Database schema also uses additionalProperties: false. + allowed_keys = { + "name", + "id", + "description", + "compatibilityLevel", + "readWriteMode", + "model", + "annotations", + } + self._check_allowed_keys(database, allowed_keys, path) + + if not isinstance(database.get("name"), str) or not database.get("name"): + self._err(f"{path}.name", "Database name must be a non-empty string") + + compatibility_level = database.get("compatibilityLevel") + if compatibility_level is not None: + if not isinstance(compatibility_level, int): + self._err(f"{path}.compatibilityLevel", "compatibilityLevel must be an integer") + elif compatibility_level < 1200: + self._err(f"{path}.compatibilityLevel", "compatibilityLevel must be 1200 or higher for TMSL") + + if "readWriteMode" in database and database["readWriteMode"] not in ALLOWED_READ_WRITE_MODES: + self._err(f"{path}.readWriteMode", "Invalid readWriteMode value") + + if "model" in database: + self._validate_model(database["model"], f"{path}.model") + + if "annotations" in database: + self._validate_annotations(database["annotations"], f"{path}.annotations") + + def _validate_create_or_replace(self, command: Any, path: str) -> None: + if not self._expect_type(command, dict, path): + return + + allowed_keys = {"object", "database", "dataSource", "table", "partition", "role"} + self._check_allowed_keys(command, allowed_keys, path) + + target = command.get("object") + if not self._expect_type(target, dict, f"{path}.object"): + return + if not isinstance(target.get("database"), str) or not target.get("database"): + self._err(f"{path}.object.database", "Target database must be a non-empty string") + + if "database" in command: + self._validate_database(command["database"], f"{path}.database") + + def validate_tmsl(self, document: Any) -> List[str]: + """Validate a parsed TMSL document and return a list of errors.""" + self.errors = [] + + if not self._expect_type(document, dict, "$"): + return self.errors + + command_keys = [key for key in document.keys() if key in ALLOWED_TMSL_COMMANDS] + if len(command_keys) != 1: + self._err("$", "TMSL must contain exactly one top-level command") + return self.errors + + command_name = command_keys[0] + for key in document.keys(): + if key != command_name: + self._err("$", f"Unexpected top-level property '{key}'") + + if command_name == "createOrReplace": + self._validate_create_or_replace(document[command_name], f"$.{command_name}") + + return self.errors + + def validate_file(self, tmsl_file_path: str) -> List[str]: + """Validate a TMSL JSON file and return a list of errors.""" + with open(tmsl_file_path, "r", encoding="utf-8") as f: + document = json.load(f) + return self.validate_tmsl(document) + + +def validate_tmsl_file(tmsl_file_path: str) -> List[str]: + """Validate a TMSL JSON file and return a list of errors.""" + validator = TmslValidator() + return validator.validate_file(tmsl_file_path) + + +def validate_tmsl(tmsl_file_path: str, quiet: bool = False) -> None: + """CLI command entrypoint for local TMSL validation.""" + errors = validate_tmsl_file(tmsl_file_path) + + if not quiet: + if errors: + for error in errors: + print(f"✗ {error}") + print(f"\nValidation summary: {len(errors)} error(s)") + else: + print("✓ Valid TMSL") + + if errors: + exit(1) diff --git a/test/test_avrototsml.py b/test/test_avrototsml.py new file mode 100644 index 0000000..e545da4 --- /dev/null +++ b/test/test_avrototsml.py @@ -0,0 +1,137 @@ +"""Tests for Avro to TMSL conversion.""" + +import json +import os +import sys +import tempfile +import unittest + +# Ensure the project root is in the system path for imports +current_script_path = os.path.abspath(__file__) +project_root = os.path.dirname(os.path.dirname(current_script_path)) +sys.path.append(project_root) + +from avrotize.avrototsml import convert_avro_to_tmsl + + +class TestAvroToTmsl(unittest.TestCase): + """Test cases for Avro to TMSL conversion.""" + + def _load_reference(self, file_name: str): + cwd = os.getcwd() + ref_path = os.path.join(cwd, "test", "tsml", file_name) + with open(ref_path, "r", encoding="utf-8") as f: + return json.load(f) + + def test_convert_address_avsc_to_tmsl(self): + """Test converting address.avsc to a TMSL schema.""" + cwd = os.getcwd() + avro_path = os.path.join(cwd, "test", "avsc", "address.avsc") + tmsl_path = os.path.join(tempfile.gettempdir(), "avrotize", "address.tmsl.json") + os.makedirs(os.path.dirname(tmsl_path), exist_ok=True) + + convert_avro_to_tmsl(avro_path, None, tmsl_path) + + self.assertTrue(os.path.exists(tmsl_path)) + with open(tmsl_path, "r", encoding="utf-8") as f: + tmsl = json.load(f) + + self.assertIn("createOrReplace", tmsl) + expected = self._load_reference("address-ref.tmsl.json") + self.assertEqual(expected, tmsl) + database = tmsl["createOrReplace"]["database"] + self.assertIn("model", database) + self.assertIn("tables", database["model"]) + self.assertEqual(len(database["model"]["tables"]), 1) + + table = database["model"]["tables"][0] + columns = table.get("columns", []) + column_names = {column["name"] for column in columns} + self.assertIn("streetAddress", column_names) + self.assertIn("postOfficeBox", column_names) + + nullable_column = next(column for column in columns if column["name"] == "postOfficeBox") + self.assertTrue(nullable_column.get("isNullable", False)) + + def test_convert_with_cloudevents_columns(self): + """Test conversion with CloudEvents columns enabled.""" + cwd = os.getcwd() + avro_path = os.path.join(cwd, "test", "avsc", "address.avsc") + tmsl_path = os.path.join(tempfile.gettempdir(), "avrotize", "address-ce.tmsl.json") + os.makedirs(os.path.dirname(tmsl_path), exist_ok=True) + + convert_avro_to_tmsl(avro_path, None, tmsl_path, emit_cloudevents_columns=True) + + with open(tmsl_path, "r", encoding="utf-8") as f: + tmsl = json.load(f) + + columns = tmsl["createOrReplace"]["database"]["model"]["tables"][0]["columns"] + column_names = {column["name"] for column in columns} + self.assertIn("___type", column_names) + self.assertIn("___source", column_names) + self.assertIn("___id", column_names) + self.assertIn("___time", column_names) + self.assertIn("___subject", column_names) + + def test_convert_with_relationships_from_foreign_keys(self): + """Test relationship generation from Avro foreignKeys metadata.""" + avro_schema = [ + { + "type": "record", + "name": "Customer", + "fields": [ + {"name": "id", "type": "long"}, + {"name": "name", "type": "string"} + ], + "unique": ["id"], + "altnames": {"sql": "public.customers"} + }, + { + "type": "record", + "name": "Order", + "fields": [ + {"name": "id", "type": "long"}, + {"name": "customer_id", "type": "long"} + ], + "unique": ["id"], + "foreignKeys": [ + { + "name": "fk_orders_customers", + "columns": ["customer_id"], + "referencedTable": "Customer", + "referencedColumns": ["id"], + "referencedTableSql": "public.customers" + } + ], + "altnames": {"sql": "public.orders"} + } + ] + + avro_path = os.path.join(tempfile.gettempdir(), "avrotize", "relational.avsc") + tmsl_path = os.path.join(tempfile.gettempdir(), "avrotize", "relational.tmsl.json") + os.makedirs(os.path.dirname(avro_path), exist_ok=True) + + with open(avro_path, "w", encoding="utf-8") as f: + json.dump(avro_schema, f, indent=2) + + convert_avro_to_tmsl(avro_path, None, tmsl_path) + + with open(tmsl_path, "r", encoding="utf-8") as f: + tmsl = json.load(f) + + expected = self._load_reference("relational-ref.tmsl.json") + self.assertEqual(expected, tmsl) + + model = tmsl["createOrReplace"]["database"]["model"] + self.assertIn("relationships", model) + self.assertEqual(1, len(model["relationships"])) + + relationship = model["relationships"][0] + self.assertEqual("Order", relationship["fromTable"]) + self.assertEqual("customer_id", relationship["fromColumn"]) + self.assertEqual("Customer", relationship["toTable"]) + self.assertEqual("id", relationship["toColumn"]) + + +if __name__ == "__main__": + unittest.main() diff --git a/test/test_jstructtoavro.py b/test/test_jstructtoavro.py index a73764c..5ba7b27 100644 --- a/test/test_jstructtoavro.py +++ b/test/test_jstructtoavro.py @@ -272,6 +272,37 @@ def test_round_trip_conversion(self): self.assertEqual(result['namespace'], 'com.example') self.assertEqual(len(result['fields']), 3) + def test_round_trip_preserves_relational_metadata(self): + """Test that unique/foreignKeys metadata is preserved across A2S/S2A.""" + original_avro = { + "type": "record", + "name": "Order", + "namespace": "com.example", + "fields": [ + {"name": "id", "type": "long"}, + {"name": "customer_id", "type": "long"} + ], + "unique": ["id"], + "foreignKeys": [ + { + "name": "fk_orders_customers", + "columns": ["customer_id"], + "referencedTable": "Customer", + "referencedColumns": ["id"], + "referencedTableSql": "public.customers" + } + ] + } + + structure = AvroToJsonStructure().convert(original_avro) + round_tripped = JsonStructureToAvro().convert(structure) + + self.assertEqual(["id"], round_tripped.get("unique")) + self.assertEqual(1, len(round_tripped.get("foreignKeys", []))) + fk = round_tripped["foreignKeys"][0] + self.assertEqual(["customer_id"], fk.get("columns")) + self.assertEqual("Customer", fk.get("referencedTable")) + def test_file_conversion(self): """Test file-based conversion.""" structure = { diff --git a/test/test_main.py b/test/test_main.py index 558aee7..02c5618 100644 --- a/test/test_main.py +++ b/test/test_main.py @@ -37,6 +37,14 @@ def get_struct(): """Provides the JSON Structure input file path.""" return os.path.join(os.path.dirname(__file__), 'jsons', 'address-ref.struct.json') +def get_struct_basic_types(): + """Provides a basic JSON Structure input file path.""" + return os.path.join(os.path.dirname(__file__), 'struct', 'basic-types.struct.json') + +def get_tmsl(): + """Provides a TMSL file path for validation command tests.""" + return os.path.join(tempfile.gettempdir(), 'output.tmsl.json') + class TestMain(unittest.TestCase): @patch('argparse.ArgumentParser.parse_args', return_value=argparse.Namespace(command=None)) @@ -112,6 +120,25 @@ def test_main_a2ib_command(self, mock_parse_args): main() assert os.path.exists(tempfile.gettempdir() + '/output.iceberg') # Add assertion for file existence + @patch('argparse.ArgumentParser.parse_args', return_value=argparse.Namespace(command='a2tsml', input=get_avsc(), out=tempfile.gettempdir() + '/output.tmsl.json', record_type='Northwind.Order', database_name='Northwind', compatibility_level=1605, emit_cloudevents_columns=True)) + def test_main_a2tsml_command(self, mock_parse_args): + """Test main function with a2tsml command.""" + main() + assert os.path.exists(tempfile.gettempdir() + '/output.tmsl.json') # Add assertion for file existence + + @patch('argparse.ArgumentParser.parse_args', return_value=argparse.Namespace(command='validate-tmsl', input=get_tmsl(), quiet=True)) + def test_main_validate_tmsl_command(self, mock_parse_args): + """Test main function with validate-tmsl command.""" + with open(get_tmsl(), 'w', encoding='utf-8') as f: + f.write('{"createOrReplace":{"object":{"database":"db"},"database":{"name":"db","compatibilityLevel":1605,"model":{"culture":"en-US","tables":[{"name":"t","columns":[{"name":"c","dataType":"string","sourceColumn":"c"}]}]}}}}') + main() + + @patch('argparse.ArgumentParser.parse_args', return_value=argparse.Namespace(command='s2tsml', input=get_struct_basic_types(), out=tempfile.gettempdir() + '/output-struct.tmsl.json', record_type=None, database_name='BasicTypes', compatibility_level=1605, emit_cloudevents_columns=False)) + def test_main_s2tsml_command(self, mock_parse_args): + """Test main function with s2tsml command.""" + main() + assert os.path.exists(tempfile.gettempdir() + '/output-struct.tmsl.json') # Add assertion for file existence + @patch('argparse.ArgumentParser.parse_args', return_value=argparse.Namespace(command='pq2a', input=get_parquet(), out=tempfile.gettempdir() + '/output.avsc', namespace='com.example')) def test_main_pq2a_command(self, mock_parse_args): """Test main function with pq2a command.""" diff --git a/test/test_structuretotsml.py b/test/test_structuretotsml.py new file mode 100644 index 0000000..3328b70 --- /dev/null +++ b/test/test_structuretotsml.py @@ -0,0 +1,40 @@ +"""Tests for JSON Structure to TMSL conversion.""" + +import json +import os +import sys +import tempfile +import unittest + +# Ensure the project root is in the system path for imports +current_script_path = os.path.abspath(__file__) +project_root = os.path.dirname(os.path.dirname(current_script_path)) +sys.path.append(project_root) + +from avrotize.structuretotsml import convert_structure_to_tmsl + + +class TestStructureToTmsl(unittest.TestCase): + """Test cases for JSON Structure to TMSL conversion.""" + + def test_convert_basic_types_structure_to_tmsl(self): + """Test converting basic-types.struct.json to a TMSL schema.""" + cwd = os.getcwd() + structure_path = os.path.join(cwd, "test", "struct", "basic-types.struct.json") + tmsl_path = os.path.join(tempfile.gettempdir(), "avrotize", "basic-types.tmsl.json") + os.makedirs(os.path.dirname(tmsl_path), exist_ok=True) + + convert_structure_to_tmsl(structure_path, tmsl_path) + + self.assertTrue(os.path.exists(tmsl_path)) + with open(tmsl_path, "r", encoding="utf-8") as f: + tmsl = json.load(f) + + self.assertIn("createOrReplace", tmsl) + table = tmsl["createOrReplace"]["database"]["model"]["tables"][0] + self.assertIn("columns", table) + self.assertGreater(len(table["columns"]), 0) + + +if __name__ == "__main__": + unittest.main() diff --git a/test/test_tmslvalidate.py b/test/test_tmslvalidate.py new file mode 100644 index 0000000..a5b4a77 --- /dev/null +++ b/test/test_tmslvalidate.py @@ -0,0 +1,105 @@ +"""Tests for local TMSL validation.""" + +import json +import os +import sys +import tempfile +import unittest + +current_script_path = os.path.abspath(__file__) +project_root = os.path.dirname(os.path.dirname(current_script_path)) +sys.path.append(project_root) + +from avrotize.avrototsml import convert_avro_to_tmsl +from avrotize.tmslvalidate import validate_tmsl_file + + +class TestTmslValidate(unittest.TestCase): + """Test cases for TMSL validation.""" + + def test_validate_generated_tmsl(self): + """Generated TMSL from a2tsml should validate locally.""" + cwd = os.getcwd() + avro_path = os.path.join(cwd, "test", "avsc", "address.avsc") + tmsl_path = os.path.join(tempfile.gettempdir(), "avrotize", "address.validate.tmsl.json") + os.makedirs(os.path.dirname(tmsl_path), exist_ok=True) + + convert_avro_to_tmsl(avro_path, None, tmsl_path) + + errors = validate_tmsl_file(tmsl_path) + self.assertEqual([], errors) + + def test_rejects_invalid_datatype(self): + """Invalid column dataType should fail validation.""" + tmsl_doc = { + "createOrReplace": { + "object": {"database": "db"}, + "database": { + "name": "db", + "compatibilityLevel": 1605, + "model": { + "culture": "en-US", + "tables": [ + { + "name": "T1", + "columns": [ + { + "name": "c1", + "dataType": "notAType", + "sourceColumn": "c1", + } + ], + } + ], + }, + }, + } + } + + tmsl_path = os.path.join(tempfile.gettempdir(), "avrotize", "invalid-datatype.tmsl.json") + os.makedirs(os.path.dirname(tmsl_path), exist_ok=True) + with open(tmsl_path, "w", encoding="utf-8") as f: + json.dump(tmsl_doc, f, indent=2) + + errors = validate_tmsl_file(tmsl_path) + self.assertTrue(any("dataType" in error for error in errors)) + + def test_rejects_unexpected_property(self): + """Unexpected property should fail validation for strict object shapes.""" + tmsl_doc = { + "createOrReplace": { + "object": {"database": "db"}, + "database": { + "name": "db", + "compatibilityLevel": 1605, + "model": { + "culture": "en-US", + "tables": [ + { + "name": "T1", + "columns": [ + { + "name": "c1", + "dataType": "string", + "sourceColumn": "c1", + "unexpected": True, + } + ], + } + ], + }, + }, + } + } + + tmsl_path = os.path.join(tempfile.gettempdir(), "avrotize", "invalid-extra-prop.tmsl.json") + os.makedirs(os.path.dirname(tmsl_path), exist_ok=True) + with open(tmsl_path, "w", encoding="utf-8") as f: + json.dump(tmsl_doc, f, indent=2) + + errors = validate_tmsl_file(tmsl_path) + self.assertTrue(any("Unexpected property 'unexpected'" in error for error in errors)) + + +if __name__ == "__main__": + unittest.main() diff --git a/test/tsml/address-ref.tmsl.json b/test/tsml/address-ref.tmsl.json new file mode 100644 index 0000000..9cbf3c0 --- /dev/null +++ b/test/tsml/address-ref.tmsl.json @@ -0,0 +1,65 @@ +{ + "createOrReplace": { + "object": { + "database": "record" + }, + "database": { + "name": "record", + "compatibilityLevel": 1605, + "model": { + "culture": "en-US", + "tables": [ + { + "name": "record", + "columns": [ + { + "name": "type", + "dataType": "string", + "sourceColumn": "type" + }, + { + "name": "postOfficeBox", + "dataType": "string", + "sourceColumn": "postOfficeBox", + "isNullable": true + }, + { + "name": "extendedAddress", + "dataType": "string", + "sourceColumn": "extendedAddress", + "isNullable": true + }, + { + "name": "streetAddress", + "dataType": "string", + "sourceColumn": "streetAddress", + "isNullable": true + }, + { + "name": "locality", + "dataType": "string", + "sourceColumn": "locality" + }, + { + "name": "region", + "dataType": "string", + "sourceColumn": "region" + }, + { + "name": "postalCode", + "dataType": "string", + "sourceColumn": "postalCode", + "isNullable": true + }, + { + "name": "countryName", + "dataType": "string", + "sourceColumn": "countryName" + } + ] + } + ] + } + } + } +} diff --git a/test/tsml/relational-ref.tmsl.json b/test/tsml/relational-ref.tmsl.json new file mode 100644 index 0000000..d13fb7e --- /dev/null +++ b/test/tsml/relational-ref.tmsl.json @@ -0,0 +1,57 @@ +{ + "createOrReplace": { + "object": { + "database": "Customer" + }, + "database": { + "name": "Customer", + "compatibilityLevel": 1605, + "model": { + "culture": "en-US", + "tables": [ + { + "name": "Customer", + "columns": [ + { + "name": "id", + "dataType": "int64", + "sourceColumn": "id", + "isKey": true + }, + { + "name": "name", + "dataType": "string", + "sourceColumn": "name" + } + ] + }, + { + "name": "Order", + "columns": [ + { + "name": "id", + "dataType": "int64", + "sourceColumn": "id", + "isKey": true + }, + { + "name": "customer_id", + "dataType": "int64", + "sourceColumn": "customer_id" + } + ] + } + ], + "relationships": [ + { + "name": "Order_customer_id_to_Customer_id", + "fromTable": "Order", + "fromColumn": "customer_id", + "toTable": "Customer", + "toColumn": "id" + } + ] + } + } + } +}