diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 0cc1f5ef5..a64081ee2 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -86,8 +86,8 @@ jobs: env: ASCEND_HOME_PATH: /usr/local/Ascend/cann-8.5.0 PTOAS_ROOT: ${{ github.workspace }}/ptoas-bin - PTOAS_VERSION: v0.26 - PTOAS_SHA256: 8e8239f92b169d88fd117f7d4841739c129ecbdd8d3a01f96087df576ecd7814 + PTOAS_VERSION: v0.29 + PTOAS_SHA256: 9487c3f3366636f097076c57528781e2018856a2d2ec24f3e7b99c543286843c CMAKE_BUILD_PARALLEL_LEVEL: 16 CMAKE_C_COMPILER_LAUNCHER: ccache CMAKE_CXX_COMPILER_LAUNCHER: ccache @@ -169,8 +169,8 @@ jobs: runs-on: ubuntu-latest env: PTOAS_ROOT: ${{ github.workspace }}/ptoas-bin - PTOAS_VERSION: v0.26 - PTOAS_SHA256: 5abf99f9729997e171bee08bdb25662a7a24b62921c5ecfc05790a56a6e1b076 + PTOAS_VERSION: v0.29 + PTOAS_SHA256: 6f33b8e89d995470fc0c81dd3ae025a69c63c8a432f341bdaef0eabc3831645b container: image: ghcr.io/hw-native-sys/pypto/github-ci:latest steps: diff --git a/docs/en/dev/ir/05-operators.md b/docs/en/dev/ir/05-operators.md index 7f0339563..23d54ed56 100644 --- a/docs/en/dev/ir/05-operators.md +++ b/docs/en/dev/ir/05-operators.md @@ -219,7 +219,7 @@ UINT32 + INT32 → INT32 (signed precedence) **Location**: `src/ir/op/tensor_ops/` **Python API**: `from pypto.ir.op import tensor` -**Operations:** `tensor.add/sub/mul/div` (element-wise with full N-D broadcasting), `tensor.set_validshape` (internal, update valid-shape metadata without data movement — compiler-generated only), `tensor.sort32` / `tensor.mrgsort_format1` / `tensor.mrgsort_format2` (sorting; tensor-level counterparts of `tile.sort32` / `tile.mrgsort` — converted to tile ops by `ConvertTensorToTileOps`), `tensor.gather` (per-dim indexing; MVP supports rank-2 inputs with `dim=-1` and lowers to a per-row `tile.gather` loop via `ConvertTensorToTileOps`), `tensor.gather_mask` (mask-pattern gather; tensor-level counterpart of `tile.gather_mask`, with optional same-bit-width `output_dtype`) +**Operations:** `tensor.add/sub/mul/div` (element-wise with full N-D broadcasting), `tensor.set_validshape` (internal, update valid-shape metadata without data movement — compiler-generated only), `tensor.sort32` / `tensor.mrgsort_format1` / `tensor.mrgsort_format2` (sorting; tensor-level counterparts of `tile.sort32` / `tile.mrgsort` — converted to tile ops by `ConvertTensorToTileOps`), `tensor.gather` (per-dim indexing; MVP supports rank-2 inputs with `dim=-1` and lowers to a per-row `tile.gather` loop via `ConvertTensorToTileOps`), `tensor.gather_mask` (mask-pattern gather; tensor-level counterpart of `tile.gather_mask`, with optional same-bit-width `output_dtype`), `tensor.ci` / `tensor.arange` (contiguous integer sequence generation; lowers to `tile.ci`) **Example:** @@ -258,6 +258,7 @@ with ib.function("tensor_example") as f: | - | `tile.reshape` | Reshape tile to new dimensions (element count must match) | | - | `tile.transpose` | Swap two axes of a tile | | - | `tile.set_validshape` | Update valid-shape metadata without data movement | +| - | `tile.ci` | Generate contiguous integer sequence (start + k / start - k); dtype ∈ {INT16, INT32}; innermost dim != 1 | | **Reduction** | `tile.sum` | Reduction along axis (axis, keepdim) | **Data Flow:** `TensorType (DDR) → tile.load → TileType (Unified Buffer) → tile.{ops} → TileType → tile.store → TensorType (DDR)` diff --git a/docs/zh-cn/dev/ir/05-operators.md b/docs/zh-cn/dev/ir/05-operators.md index 9ca8e9529..0df51021a 100644 --- a/docs/zh-cn/dev/ir/05-operators.md +++ b/docs/zh-cn/dev/ir/05-operators.md @@ -216,7 +216,7 @@ UINT32 + INT32 → INT32 (signed precedence) **位置**:`src/ir/op/tensor_ops/` **Python API**:`from pypto.ir.op import tensor` -**操作:** `tensor.add/sub/mul/div`(逐元素,支持完整 N 维广播),`tensor.set_validshape`(内部 API,更新 valid_shape 元数据,不搬移数据 — 仅供编译器生成代码使用),`tensor.sort32` / `tensor.mrgsort_format1` / `tensor.mrgsort_format2`(排序;分别对应 `tile.sort32` / `tile.mrgsort` 的 tensor 层接口,由 `ConvertTensorToTileOps` 转换为 tile 操作),`tensor.gather`(按维索引;MVP 仅支持 2D 输入 + `dim=-1`,由 `ConvertTensorToTileOps` 按行展开为 `tile.gather` 循环),`tensor.gather_mask`(掩码模式选择;对应 `tile.gather_mask`,支持可选同位宽 `output_dtype`) +**操作:** `tensor.add/sub/mul/div`(逐元素,支持完整 N 维广播),`tensor.set_validshape`(内部 API,更新 valid_shape 元数据,不搬移数据 — 仅供编译器生成代码使用),`tensor.sort32` / `tensor.mrgsort_format1` / `tensor.mrgsort_format2`(排序;分别对应 `tile.sort32` / `tile.mrgsort` 的 tensor 层接口,由 `ConvertTensorToTileOps` 转换为 tile 操作),`tensor.gather`(按维索引;MVP 仅支持 2D 输入 + `dim=-1`,由 `ConvertTensorToTileOps` 按行展开为 `tile.gather` 循环),`tensor.gather_mask`(掩码模式选择;对应 `tile.gather_mask`,支持可选同位宽 `output_dtype`),`tensor.ci` / `tensor.arange`(生成连续整数序列,下层降到 `tile.ci`) **示例:** @@ -255,6 +255,7 @@ with ib.function("tensor_example") as f: | - | `tile.reshape` | 重塑 tile 维度(元素总数须一致) | | - | `tile.transpose` | 交换 tile 的两个轴 | | - | `tile.set_validshape` | 更新 valid_shape 元数据,不搬移数据 | +| - | `tile.ci` | 生成连续整数序列(升序 start+k 或降序 start-k);dtype ∈ {INT16, INT32};最内维 != 1 | | **规约** | `tile.sum` | 沿轴规约(axis, keepdim) | **数据流:** `TensorType (DDR) → tile.load → TileType (Unified Buffer) → tile.{ops} → TileType → tile.store → TensorType (DDR)` diff --git a/python/pypto/ir/op/tensor_ops.py b/python/pypto/ir/op/tensor_ops.py index b18c7be77..3c943a94e 100644 --- a/python/pypto/ir/op/tensor_ops.py +++ b/python/pypto/ir/op/tensor_ops.py @@ -84,6 +84,45 @@ def full( return _ir_core.create_op_call("tensor.full", [shape_tuple, value_expr], kwargs, actual_span) +def ci( + start: int | Expr, + shape: Sequence[int | Expr] | _ir_core.MakeTuple, + dtype: DataType = DataType.INT32, + descending: bool = False, + span: Span | None = None, +) -> Call: + """Generate a contiguous integer sequence into a tensor (lowers to tile.ci). + + Note: + Lowers to ``pto.tci`` which only populates the first row. Leading + dimensions must be 1 — prefer shapes of the form ``[1, N]``. + + Args: + start: Starting integer (plain int or scalar Expr). Must match ``dtype``. + shape: Destination shape (leading dims must be 1, innermost dim != 1). + dtype: Destination dtype. One of {INT16, INT32}. + descending: If True, generate a descending sequence. + span: Optional source span for debugging (auto-captured if not provided). + + Returns: + Call expression that returns a TensorType. + """ + actual_span = _get_span_or_capture(span) + if isinstance(start, Expr): + if isinstance(start, ConstInt) and start.dtype != dtype: + start_expr = ConstInt(start.value, dtype, actual_span) + else: + start_expr = start + else: + start_expr = ConstInt(start, dtype, actual_span) + shape_tuple = _to_make_tuple(shape, actual_span) + kwargs: dict[str, Any] = {"dtype": dtype, "descending": descending} + return _ir_core.create_op_call("tensor.ci", [start_expr, shape_tuple], kwargs, actual_span) + + +arange = ci + + def read( tensor: Expr, indices: Expr | list[int | Expr] | _ir_core.MakeTuple, span: Span | None = None ) -> Call: diff --git a/python/pypto/ir/op/tile_ops.py b/python/pypto/ir/op/tile_ops.py index 2b6af86ff..1314067bb 100644 --- a/python/pypto/ir/op/tile_ops.py +++ b/python/pypto/ir/op/tile_ops.py @@ -480,6 +480,50 @@ def full( return _ir_core.create_op_call("tile.full", [shape_tuple, value_expr], kwargs, actual_span) +def ci( + start: int | Expr, + shape: Sequence[int | Expr] | _ir_core.MakeTuple, + dtype: DataType = DataType.INT32, + descending: bool = False, + span: Span | None = None, +) -> Call: + """Generate a contiguous integer sequence into a tile (pto.tci). + + For a column index ``k`` in the first row of the destination tile: + - Ascending: ``dst[0, k] = start + k`` + - Descending: ``dst[0, k] = start - k`` + + Note: + ``pto.tci`` uses the destination's valid-column count as the sequence + length and does NOT populate additional rows. Leading dimensions must + be 1 — prefer shapes of the form ``[1, N]``. + + Args: + start: Starting integer (plain int or a scalar Expr). Its dtype must match ``dtype``. + shape: Destination tile shape (static, leading dims must be 1, innermost dim != 1). + dtype: Destination dtype. Must be one of {INT16, INT32}. + descending: If True, generate a descending sequence. + span: Optional source span for debugging (auto-captured if not provided). + + Returns: + Call expression that returns a TileType with the generated sequence. + """ + actual_span = _get_span_or_capture(span) + if isinstance(start, Expr): + if isinstance(start, ConstInt) and start.dtype != dtype: + start_expr = ConstInt(start.value, dtype, actual_span) + else: + start_expr = start + else: + start_expr = ConstInt(start, dtype, actual_span) + shape_tuple = _to_make_tuple(shape, actual_span) + kwargs: dict[str, Any] = {"dtype": dtype, "descending": descending} + return _ir_core.create_op_call("tile.ci", [start_expr, shape_tuple], kwargs, actual_span) + + +arange = ci + + def fillpad(tile: Expr, pad_value: PadValue | int | float = PadValue.zero, span: Span | None = None) -> Call: """Fill remaining tile elements with specified padding value. diff --git a/python/pypto/language/op/tensor_ops.py b/python/pypto/language/op/tensor_ops.py index a12a59e0a..6087a7e4b 100644 --- a/python/pypto/language/op/tensor_ops.py +++ b/python/pypto/language/op/tensor_ops.py @@ -26,6 +26,8 @@ "slice", "fillpad", "full", + "ci", + "arange", "matmul", "matmul_acc", "mul", @@ -264,6 +266,33 @@ def full(shape: Sequence[IntLike], dtype: DataType, value: int | float) -> Tenso return Tensor(expr=call_expr) +def ci( + start: int | Scalar, + shape: Sequence[IntLike], + dtype: DataType = DataType.INT32, + descending: bool = False, +) -> Tensor: + """Generate a contiguous integer sequence into a tensor. + + Equivalent to ``numpy.arange`` / ``torch.arange``. Lowers to ``tile.ci`` → ``pto.tci``. + + Args: + start: Starting integer (plain int or Scalar). Must match ``dtype``. + shape: Destination tensor shape (innermost dim != 1). + dtype: Destination dtype. One of {INT16, INT32}. Defaults to INT32. + descending: If True, generate a descending sequence. + + Returns: + Tensor wrapping the ci operation. + """ + start_expr = start.unwrap() if isinstance(start, Scalar) else start + call_expr = _ir_ops.ci(start_expr, _normalize_intlike(shape), dtype=dtype, descending=descending) + return Tensor(expr=call_expr) + + +arange = ci + + def matmul( lhs: Tensor, rhs: Tensor, diff --git a/python/pypto/language/op/tile_ops.py b/python/pypto/language/op/tile_ops.py index 02ea27760..1e708b9bf 100644 --- a/python/pypto/language/op/tile_ops.py +++ b/python/pypto/language/op/tile_ops.py @@ -33,6 +33,8 @@ "concat", "move", "full", + "ci", + "arange", "fillpad", "fillpad_inplace", "get_block_idx", @@ -414,6 +416,33 @@ def full(shape: list[int], dtype: DataType, value: int | float) -> Tile: return Tile(expr=call_expr) +def ci( + start: int | Scalar, + shape: Sequence[int], + dtype: DataType = DataType.INT32, + descending: bool = False, +) -> Tile: + """Generate a contiguous integer sequence into a tile. + + Equivalent to ``numpy.arange``-style index generation. Maps to ``pto.tci``. + + Args: + start: Starting integer (plain int or a Scalar). Must match ``dtype``. + shape: Shape of the destination tile (static, innermost dim != 1). + dtype: Destination dtype. One of {INT16, INT32}. Defaults to INT32. + descending: If True, generate a descending sequence. + + Returns: + Tile wrapping the ci operation. + """ + start_expr = start.unwrap() if isinstance(start, Scalar) else start + call_expr = _ir_ops.ci(start_expr, list(shape), dtype=dtype, descending=descending) + return Tile(expr=call_expr) + + +arange = ci + + def fillpad(tile: Tile, pad_value: PadValue | int | float = PadValue.zero) -> Tile: """Fill remaining tile elements with specified padding value. diff --git a/src/backend/common/pto_ops_common.cpp b/src/backend/common/pto_ops_common.cpp index 37c87a5a3..70d7ba38e 100644 --- a/src/backend/common/pto_ops_common.cpp +++ b/src/backend/common/pto_ops_common.cpp @@ -456,13 +456,25 @@ static std::string MakeAssignCodegenPTO(const std::string& pto_op_name, const Ca static std::string MakeCiCodegenPTO(const std::string& pto_op_name, const CallPtr& op, codegen::CodegenBase& codegen_base) { auto& codegen = dynamic_cast(codegen_base); - CHECK(op->args_.size() == 1) << "Operation:[" << pto_op_name << "] requires 1 argument, but got " - << op->args_.size(); + CHECK(op->args_.size() == 2) << "Operation:[" << pto_op_name + << "] requires 2 arguments (start, shape), but got " << op->args_.size(); bool descending = op->GetKwarg("descending"); std::string src = codegen.GetExprAsCode(op->args_[0]); + std::string src_type = codegen.GetExprTypeAnnotation(op->args_[0]); std::string config_attr = descending ? "{descending = true}" : "{descending = false}"; std::string dst = codegen.GetCurrentResultTarget(); - codegen.Emit(pto_op_name + " ins(" + src + " " + config_attr + ") outs(" + dst + ")"); + std::string dst_type = codegen.GetCurrentResultTileBufTypeString(); + std::ostringstream oss; + oss << pto_op_name << " ins(" << src << " " << config_attr; + if (!src_type.empty()) { + oss << " : " << src_type; + } + oss << ") outs(" << dst; + if (!dst_type.empty()) { + oss << " : " << dst_type; + } + oss << ")"; + codegen.Emit(oss.str()); return ""; } diff --git a/src/codegen/pto/pto_codegen.cpp b/src/codegen/pto/pto_codegen.cpp index 4843435fb..4c5531167 100644 --- a/src/codegen/pto/pto_codegen.cpp +++ b/src/codegen/pto/pto_codegen.cpp @@ -722,6 +722,13 @@ std::string PTOCodegen::GetOrEmitConstant(int64_t value, DataType dt) { if (it != fs_.emitted_numeric_constants.end()) return it->second; std::string mlir_type = GetTypeString(dt); + // MLIR's arith.constant requires signless integer return types (upstream + // ArithOps.cpp ConstantOp::verify). For unsigned dtypes, emit the constant + // at the signless type and bridge to the unsigned type via + // builtin.unrealized_conversion_cast; some consumer ops (e.g. pto.tci) in + // turn require the operand type to match the destination dtype exactly. + bool is_unsigned = dt.IsUnsignedInt() && !mlir_type.empty() && mlir_type[0] == 'u'; + std::string signless_type = is_unsigned ? mlir_type.substr(1) : mlir_type; std::string ssa_suffix = "_" + mlir_type; std::string ssa_id; @@ -741,8 +748,17 @@ std::string PTOCodegen::GetOrEmitConstant(int64_t value, DataType dt) { } else { name = NewTemp(); } - fs_.constants_section << fs_.constants_indent << name << " = arith.constant " << value << " : " << mlir_type - << "\n"; + + if (is_unsigned) { + std::string signless_name = NewTemp(); + fs_.constants_section << fs_.constants_indent << signless_name << " = arith.constant " << value << " : " + << signless_type << "\n"; + fs_.constants_section << fs_.constants_indent << name << " = builtin.unrealized_conversion_cast " + << signless_name << " : " << signless_type << " to " << mlir_type << "\n"; + } else { + fs_.constants_section << fs_.constants_indent << name << " = arith.constant " << value << " : " + << mlir_type << "\n"; + } fs_.emitted_numeric_constants[key] = name; return name; } @@ -1160,6 +1176,9 @@ std::string PTOCodegen::GetExprTypeAnnotation(const ir::ExprPtr& expr) { return GetTypeString(const_float->dtype()); } if (auto const_int = As(expr)) { + // The SSA value produced by GetOrEmitConstant is cast back to the dtype's + // MLIR type (via unrealized_conversion_cast for unsigned), so the use-site + // annotation matches the declared dtype directly. return GetTypeString(const_int->dtype()); } return ""; diff --git a/src/ir/op/tensor_ops/memory.cpp b/src/ir/op/tensor_ops/memory.cpp index d86c3aa88..838bb16cf 100644 --- a/src/ir/op/tensor_ops/memory.cpp +++ b/src/ir/op/tensor_ops/memory.cpp @@ -412,6 +412,90 @@ REGISTER_OP("tensor.full") return DeduceTensorFullType(args, kwargs); }); +TypePtr DeduceTensorCiType(const std::vector& args, + const std::vector>& kwargs) { + // tensor.ci signature: (start, shape) with attrs {dtype, descending} + CHECK(args.size() == 2) << "tensor.ci requires exactly 2 arguments (start, shape), but got " << args.size(); + + bool found_dtype = false; + DataType dtype; + for (const auto& [key, value] : kwargs) { + if (key == "dtype") { + dtype = AnyCast(value, "kwarg key: dtype"); + found_dtype = true; + break; + } + } + CHECK(found_dtype) << "tensor.ci requires 'dtype' kwarg"; + CHECK(dtype == DataType::INT16 || dtype == DataType::INT32 || dtype == DataType::UINT16 || + dtype == DataType::UINT32) + << "tensor.ci dtype must be one of {INT16, INT32, UINT16, UINT32}, but got " << dtype.ToString(); + + // First arg: start scalar; dtype must match destination dtype. + auto start_scalar_type = As(args[0]->GetType()); + CHECK(start_scalar_type) << "tensor.ci requires first argument 'start' to be a scalar, but got " + << args[0]->GetType()->TypeName(); + CHECK(start_scalar_type->dtype_ == dtype) + << "tensor.ci 'start' dtype (" << start_scalar_type->dtype_.ToString() + << ") must match destination dtype (" << dtype.ToString() << ")"; + + // Second arg: shape TupleType. + auto shape_tuple_type = As(args[1]->GetType()); + CHECK(shape_tuple_type) << "tensor.ci requires shape to be TupleType, but got " + << args[1]->GetType()->TypeName(); + + for (size_t i = 0; i < shape_tuple_type->types_.size(); ++i) { + auto scalar_type = As(shape_tuple_type->types_[i]); + CHECK(scalar_type) << "tensor.ci shape element " << i << " must be ScalarType, but got " + << shape_tuple_type->types_[i]->TypeName(); + CHECK(scalar_type->dtype_.IsInt()) + << "tensor.ci shape element " << i << " must have integer dtype, but got " + << scalar_type->dtype_.ToString(); + } + + std::vector shape; + shape.reserve(shape_tuple_type->types_.size()); + if (auto make_tuple = As(args[1])) { + shape = make_tuple->elements_; + } else { + for (size_t i = 0; i < shape_tuple_type->types_.size(); ++i) { + shape.emplace_back(std::make_shared(args[1], static_cast(i), args[1]->span_)); + } + } + CHECK(!shape.empty()) << "tensor.ci requires non-empty shape"; + + // ISA constraint: innermost dim Cols != 1. + if (auto last_const = As(shape.back())) { + CHECK(last_const->value_ != 1) << "tensor.ci requires the innermost dimension (Cols) to be != 1, got " + << last_const->value_; + } + + // ISA constraint: pto.tci only populates the first row. Reject multi-row compile-time + // shapes so tensor.ci metadata stays consistent with the tile.ci lowering. + for (size_t i = 0; i + 1 < shape.size(); ++i) { + if (auto const_dim = As(shape[i])) { + CHECK(const_dim->value_ == 1) + << "tensor.ci only populates the first row because pto.tci ignores valid rows; " + << "leading dimensions must be 1, but got " << const_dim->value_ << " at index " << i; + } + } + + (void)kwargs; // descending is optional bool kwarg, no validation needed beyond type. + return std::make_shared(shape, dtype); +} + +REGISTER_OP("tensor.ci") + .set_op_category("TensorOp") + .set_description("Generate a contiguous integer sequence into a tensor (lowers to tile.ci)") + .add_argument("start", "Starting integer scalar (must match dst dtype)") + .add_argument("shape", "Destination shape (TupleType of ScalarType integer)") + .set_attr("dtype") + .set_attr("descending") + .f_deduce_type([](const std::vector& args, + const std::vector>& kwargs) { + return DeduceTensorCiType(args, kwargs); + }); + TypePtr DeduceTensorDimType(const std::vector& args, const std::vector>& kwargs) { // tensor.dim: Extract a shape dimension from a tensor as a scalar diff --git a/src/ir/op/tile_ops/memory.cpp b/src/ir/op/tile_ops/memory.cpp index 563d02dab..20a2beed5 100644 --- a/src/ir/op/tile_ops/memory.cpp +++ b/src/ir/op/tile_ops/memory.cpp @@ -388,6 +388,73 @@ TypePtr DeduceTileFullType(const std::vector& args, return std::make_shared(tile_shape, dtype, std::nullopt, tile_view); } +TypePtr DeduceTileCiType(const std::vector& args, + const std::vector>& kwargs, + const std::string& op_name) { + // tile.ci signature: (start, shape) with attrs {dtype, descending} + CHECK(args.size() == 2) << "The operator " << op_name + << " requires exactly 2 arguments (start, shape), but got " << args.size(); + + // Extract dtype and validate it is one of the supported integer types. + DataType dtype = GetKwarg(kwargs, "dtype"); + CHECK(dtype == DataType::INT16 || dtype == DataType::INT32 || dtype == DataType::UINT16 || + dtype == DataType::UINT32) + << "The operator " << op_name << " requires dtype to be one of {INT16, INT32, UINT16, UINT32}, but got " + << dtype.ToString(); + + // First argument is the scalar start value; its dtype must match the destination dtype. + auto start_scalar_type = As(args[0]->GetType()); + CHECK(start_scalar_type) << "The operator " << op_name + << " requires first argument 'start' to be a scalar, but got " + << args[0]->GetType()->TypeName(); + CHECK(start_scalar_type->dtype_ == dtype) + << "The operator " << op_name << " requires 'start' dtype (" << start_scalar_type->dtype_.ToString() + << ") to match destination dtype (" << dtype.ToString() << ")"; + + // Second argument must be a MakeTuple of static ConstInt elements. + auto make_tuple = As(args[1]); + CHECK(make_tuple) + << "The operator " << op_name + << " requires second argument 'shape' to be a MakeTuple of compile-time constants, but got " + << args[1]->TypeName(); + + std::vector tile_shape; + tile_shape.reserve(make_tuple->elements_.size()); + for (size_t i = 0; i < make_tuple->elements_.size(); ++i) { + auto const_int = As(make_tuple->elements_[i]); + CHECK(const_int) << "The operator " << op_name << " shape element " << i + << " must be a compile-time constant (ConstInt), but got " + << make_tuple->elements_[i]->TypeName(); + CHECK(const_int->value_ > 0) << "The operator " << op_name << " shape element " << i + << " must be positive, got " << const_int->value_; + tile_shape.push_back(make_tuple->elements_[i]); + } + CHECK(!tile_shape.empty()) << "The operator " << op_name << " requires non-empty shape"; + + // ISA constraint: destination Cols != 1 (column vectors not supported by pto.tci). + auto last_dim = As(tile_shape.back()); + CHECK(last_dim && last_dim->value_ != 1) + << "The operator " << op_name << " requires the innermost dimension (Cols) to be != 1, got " + << (last_dim ? last_dim->value_ : -1); + + // ISA constraint: pto.tci only populates the first row and ignores valid rows, so every + // leading dimension must be 1. Reject multi-row shapes here to keep type metadata truthful. + for (size_t i = 0; i + 1 < tile_shape.size(); ++i) { + auto leading_dim = As(tile_shape[i]); + CHECK(leading_dim && leading_dim->value_ == 1) + << "The operator " << op_name << " only populates the first row because pto.tci ignores valid rows; " + << "leading dimensions must be 1, but got " << (leading_dim ? leading_dim->value_ : -1) + << " at index " << i; + } + + // descending kwarg is optional and defaults to false. + (void)GetKwarg(kwargs, "descending", false); + + TileView tile_view; + tile_view.valid_shape = tile_shape; + return std::make_shared(tile_shape, dtype, std::nullopt, tile_view); +} + TypePtr DeduceTileReadType(const std::vector& args, const std::vector>& kwargs, const std::string& op_name) { @@ -683,5 +750,18 @@ REGISTER_OP("tile.full") return DeduceTileFullType(args, kwargs, "tile.full"); }); +REGISTER_OP("tile.ci") + .set_op_category("TileOp") + .set_description("Generate a contiguous integer sequence into a destination tile (pto.tci)") + .add_argument("start", "Starting integer scalar (must match dst dtype)") + .add_argument("shape", "Destination shape (TupleType of ConstInt)") + .set_attr("dtype") + .set_attr("descending") + .set_output_memory(MemorySpace::Vec) + .f_deduce_type([](const std::vector& args, + const std::vector>& kwargs) { + return DeduceTileCiType(args, kwargs, "tile.ci"); + }); + } // namespace ir } // namespace pypto diff --git a/src/ir/transforms/op_conversion_registry.cpp b/src/ir/transforms/op_conversion_registry.cpp index 6a2e2cba5..68d850738 100644 --- a/src/ir/transforms/op_conversion_registry.cpp +++ b/src/ir/transforms/op_conversion_registry.cpp @@ -163,6 +163,7 @@ void OpConversionRegistry::RegisterBroadcastAndTransformOps() { RegisterSimple("tensor.set_validshape", "tile.set_validshape"); RegisterSimple("tensor.full", "tile.full"); + RegisterSimple("tensor.ci", "tile.ci"); } // ============================================================================ diff --git a/tests/st/harness/core/harness.py b/tests/st/harness/core/harness.py index 350cf454d..07a5b0105 100644 --- a/tests/st/harness/core/harness.py +++ b/tests/st/harness/core/harness.py @@ -89,6 +89,8 @@ class DataType(Enum): FP16 = "fp16" INT32 = "int32" UINT32 = "uint32" + INT16 = "int16" + UINT16 = "uint16" INT64 = "int64" BOOL = "bool" @@ -101,6 +103,8 @@ def torch_dtype(self) -> torch.dtype: DataType.FP16: torch.float16, DataType.INT32: torch.int32, DataType.UINT32: torch.int32, # PyTorch has no uint32; use int32 (same bits) + DataType.INT16: torch.int16, + DataType.UINT16: torch.int16, # PyTorch has limited uint16 support; use int16 (same bits) DataType.INT64: torch.int64, DataType.BOOL: torch.bool, } diff --git a/tests/st/runtime/test_ci.py b/tests/st/runtime/test_ci.py new file mode 100644 index 000000000..4ee8a6f9f --- /dev/null +++ b/tests/st/runtime/test_ci.py @@ -0,0 +1,464 @@ +# Copyright (c) PyPTO Contributors. +# This program is free software, you can redistribute it and/or modify it under the terms and conditions of +# CANN Open Software License Agreement Version 2.0 (the "License"). +# Please refer to the License for details. You may not use this file except in compliance with the License. +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR IMPLIED, +# INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY, OR FITNESS FOR A PARTICULAR PURPOSE. +# See LICENSE in the root of the software repository for the full text of the License. +# ----------------------------------------------------------------------------------------------------------- + +"""Test tile.ci / tensor.ci (arange) contiguous integer sequence generation. + +Covers: +1. Ascending INT32 sequence (start=0). +2. Ascending INT32 sequence with non-zero start. +3. Descending INT32 sequence (tile.ci). +4. tensor.ci ascending (lowers to tile.ci via conversion pass). +5. tensor.ci descending. +6. pl.tile.arange alias. +7. pl.tensor.arange alias. +""" + +from typing import Any + +import pypto.language as pl +import pytest +import torch +from harness.core.harness import DataType, PTOTestCase, TensorSpec +from pypto.backend import BackendType +from pypto.ir.pass_manager import OptimizationStrategy + +ROWS = 1 +COLS = 32 +N = COLS + + +# --- Programs --- + + +@pl.program +class CiAscendStart0Program: + @pl.function(type=pl.FunctionType.InCore) + def kernel( + self, + output: pl.Out[pl.Tensor[[ROWS, COLS], pl.INT32]], + ) -> pl.Tensor[[ROWS, COLS], pl.INT32]: + seq: pl.Tile[[ROWS, COLS], pl.INT32] = pl.tile.ci(0, [ROWS, COLS], dtype=pl.INT32) + out: pl.Tensor[[ROWS, COLS], pl.INT32] = pl.store(seq, offsets=[0, 0], output_tensor=output) + return out + + @pl.function(type=pl.FunctionType.Orchestration) + def orchestrator( + self, + output: pl.Out[pl.Tensor[[ROWS, COLS], pl.INT32]], + ) -> pl.Tensor[[ROWS, COLS], pl.INT32]: + output = self.kernel(output) + return output + + +@pl.program +class CiAscendStart10Program: + @pl.function(type=pl.FunctionType.InCore) + def kernel( + self, + output: pl.Out[pl.Tensor[[ROWS, COLS], pl.INT32]], + ) -> pl.Tensor[[ROWS, COLS], pl.INT32]: + seq: pl.Tile[[ROWS, COLS], pl.INT32] = pl.tile.ci(10, [ROWS, COLS], dtype=pl.INT32) + out: pl.Tensor[[ROWS, COLS], pl.INT32] = pl.store(seq, offsets=[0, 0], output_tensor=output) + return out + + @pl.function(type=pl.FunctionType.Orchestration) + def orchestrator( + self, + output: pl.Out[pl.Tensor[[ROWS, COLS], pl.INT32]], + ) -> pl.Tensor[[ROWS, COLS], pl.INT32]: + output = self.kernel(output) + return output + + +@pl.program +class CiDescendingProgram: + @pl.function(type=pl.FunctionType.InCore) + def kernel( + self, + output: pl.Out[pl.Tensor[[ROWS, COLS], pl.INT32]], + ) -> pl.Tensor[[ROWS, COLS], pl.INT32]: + seq: pl.Tile[[ROWS, COLS], pl.INT32] = pl.tile.ci( + N - 1, [ROWS, COLS], dtype=pl.INT32, descending=True + ) + out: pl.Tensor[[ROWS, COLS], pl.INT32] = pl.store(seq, offsets=[0, 0], output_tensor=output) + return out + + @pl.function(type=pl.FunctionType.Orchestration) + def orchestrator( + self, + output: pl.Out[pl.Tensor[[ROWS, COLS], pl.INT32]], + ) -> pl.Tensor[[ROWS, COLS], pl.INT32]: + output = self.kernel(output) + return output + + +@pl.program +class CiTensorAscendProgram: + """tensor.ci — Opaque main + pl.at(CORE_GROUP) + pl.assemble writes result into Out.""" + + @pl.function(type=pl.FunctionType.Opaque) + def main( + self, + output: pl.Out[pl.Tensor[[ROWS, COLS], pl.INT32]], + ) -> pl.Tensor[[ROWS, COLS], pl.INT32]: + with pl.at(level=pl.Level.CORE_GROUP): + seq = pl.tensor.ci(0, [ROWS, COLS], dtype=pl.INT32) + output = pl.assemble(output, seq, [0, 0]) + return output + + +@pl.program +class CiTensorDescendingProgram: + @pl.function(type=pl.FunctionType.Opaque) + def main( + self, + output: pl.Out[pl.Tensor[[ROWS, COLS], pl.INT32]], + ) -> pl.Tensor[[ROWS, COLS], pl.INT32]: + with pl.at(level=pl.Level.CORE_GROUP): + seq = pl.tensor.ci(N - 1, [ROWS, COLS], dtype=pl.INT32, descending=True) + output = pl.assemble(output, seq, [0, 0]) + return output + + +@pl.program +class TileArangeAliasProgram: + """pl.tile.arange should be the alias of pl.tile.ci.""" + + @pl.function(type=pl.FunctionType.InCore) + def kernel( + self, + output: pl.Out[pl.Tensor[[ROWS, COLS], pl.INT32]], + ) -> pl.Tensor[[ROWS, COLS], pl.INT32]: + seq: pl.Tile[[ROWS, COLS], pl.INT32] = pl.tile.arange(0, [ROWS, COLS], dtype=pl.INT32) + out: pl.Tensor[[ROWS, COLS], pl.INT32] = pl.store(seq, offsets=[0, 0], output_tensor=output) + return out + + @pl.function(type=pl.FunctionType.Orchestration) + def orchestrator( + self, + output: pl.Out[pl.Tensor[[ROWS, COLS], pl.INT32]], + ) -> pl.Tensor[[ROWS, COLS], pl.INT32]: + output = self.kernel(output) + return output + + +@pl.program +class TileArangeDescendingProgram: + """pl.tile.arange descending — alias of pl.tile.ci with descending=True.""" + + @pl.function(type=pl.FunctionType.InCore) + def kernel( + self, + output: pl.Out[pl.Tensor[[ROWS, COLS], pl.INT32]], + ) -> pl.Tensor[[ROWS, COLS], pl.INT32]: + seq: pl.Tile[[ROWS, COLS], pl.INT32] = pl.tile.arange( + N - 1, [ROWS, COLS], dtype=pl.INT32, descending=True + ) + out: pl.Tensor[[ROWS, COLS], pl.INT32] = pl.store(seq, offsets=[0, 0], output_tensor=output) + return out + + @pl.function(type=pl.FunctionType.Orchestration) + def orchestrator( + self, + output: pl.Out[pl.Tensor[[ROWS, COLS], pl.INT32]], + ) -> pl.Tensor[[ROWS, COLS], pl.INT32]: + output = self.kernel(output) + return output + + +@pl.program +class TensorArangeAscendingProgram: + """pl.tensor.arange ascending — alias of pl.tensor.ci.""" + + @pl.function(type=pl.FunctionType.Opaque) + def main( + self, + output: pl.Out[pl.Tensor[[ROWS, COLS], pl.INT32]], + ) -> pl.Tensor[[ROWS, COLS], pl.INT32]: + with pl.at(level=pl.Level.CORE_GROUP): + seq = pl.tensor.arange(0, [ROWS, COLS], dtype=pl.INT32) + output = pl.assemble(output, seq, [0, 0]) + return output + + +@pl.program +class TensorArangeAliasProgram: + """pl.tensor.arange should be the alias of pl.tensor.ci.""" + + @pl.function(type=pl.FunctionType.Opaque) + def main( + self, + output: pl.Out[pl.Tensor[[ROWS, COLS], pl.INT32]], + ) -> pl.Tensor[[ROWS, COLS], pl.INT32]: + with pl.at(level=pl.Level.CORE_GROUP): + seq = pl.tensor.arange(N - 1, [ROWS, COLS], dtype=pl.INT32, descending=True) + output = pl.assemble(output, seq, [0, 0]) + return output + + +@pl.program +class CiUint32AscendProgram: + @pl.function(type=pl.FunctionType.InCore) + def kernel( + self, + output: pl.Out[pl.Tensor[[ROWS, COLS], pl.UINT32]], + ) -> pl.Tensor[[ROWS, COLS], pl.UINT32]: + seq: pl.Tile[[ROWS, COLS], pl.UINT32] = pl.tile.ci(5, [ROWS, COLS], dtype=pl.UINT32) + out: pl.Tensor[[ROWS, COLS], pl.UINT32] = pl.store(seq, offsets=[0, 0], output_tensor=output) + return out + + @pl.function(type=pl.FunctionType.Orchestration) + def orchestrator( + self, + output: pl.Out[pl.Tensor[[ROWS, COLS], pl.UINT32]], + ) -> pl.Tensor[[ROWS, COLS], pl.UINT32]: + output = self.kernel(output) + return output + + +@pl.program +class CiUint16AscendProgram: + @pl.function(type=pl.FunctionType.InCore) + def kernel( + self, + output: pl.Out[pl.Tensor[[ROWS, COLS], pl.UINT16]], + ) -> pl.Tensor[[ROWS, COLS], pl.UINT16]: + seq: pl.Tile[[ROWS, COLS], pl.UINT16] = pl.tile.ci(0, [ROWS, COLS], dtype=pl.UINT16) + out: pl.Tensor[[ROWS, COLS], pl.UINT16] = pl.store(seq, offsets=[0, 0], output_tensor=output) + return out + + @pl.function(type=pl.FunctionType.Orchestration) + def orchestrator( + self, + output: pl.Out[pl.Tensor[[ROWS, COLS], pl.UINT16]], + ) -> pl.Tensor[[ROWS, COLS], pl.UINT16]: + output = self.kernel(output) + return output + + +# --- Test Cases --- + + +class _CiBaseTestCase(PTOTestCase): + __test__ = False + + def get_strategy(self) -> OptimizationStrategy: + return OptimizationStrategy.Default + + def get_backend_type(self) -> BackendType: + return BackendType.Ascend910B + + +class CiAscendStart0TestCase(_CiBaseTestCase): + def get_name(self) -> str: + return "ci_ascend_start0" + + def define_tensors(self) -> list[TensorSpec]: + return [TensorSpec("output", [ROWS, COLS], DataType.INT32, is_output=True)] + + def get_program(self) -> Any: + return CiAscendStart0Program + + def compute_expected(self, tensors, params=None): + tensors["output"][:] = torch.arange(0, N, dtype=torch.int32).reshape(ROWS, COLS) + + +class CiAscendStart10TestCase(_CiBaseTestCase): + def get_name(self) -> str: + return "ci_ascend_start10" + + def define_tensors(self) -> list[TensorSpec]: + return [TensorSpec("output", [ROWS, COLS], DataType.INT32, is_output=True)] + + def get_program(self) -> Any: + return CiAscendStart10Program + + def compute_expected(self, tensors, params=None): + tensors["output"][:] = torch.arange(10, 10 + N, dtype=torch.int32).reshape(ROWS, COLS) + + +class CiDescendingTestCase(_CiBaseTestCase): + def get_name(self) -> str: + return "ci_descending" + + def define_tensors(self) -> list[TensorSpec]: + return [TensorSpec("output", [ROWS, COLS], DataType.INT32, is_output=True)] + + def get_program(self) -> Any: + return CiDescendingProgram + + def compute_expected(self, tensors, params=None): + tensors["output"][:] = torch.arange(N - 1, -1, -1, dtype=torch.int32).reshape(ROWS, COLS) + + +class CiTensorAscendTestCase(_CiBaseTestCase): + def get_name(self) -> str: + return "ci_tensor_ascend" + + def define_tensors(self) -> list[TensorSpec]: + return [TensorSpec("output", [ROWS, COLS], DataType.INT32, is_output=True)] + + def get_program(self) -> Any: + return CiTensorAscendProgram + + def compute_expected(self, tensors, params=None): + tensors["output"][:] = torch.arange(0, N, dtype=torch.int32).reshape(ROWS, COLS) + + +class CiTensorDescendingTestCase(_CiBaseTestCase): + def get_name(self) -> str: + return "ci_tensor_descending" + + def define_tensors(self) -> list[TensorSpec]: + return [TensorSpec("output", [ROWS, COLS], DataType.INT32, is_output=True)] + + def get_program(self) -> Any: + return CiTensorDescendingProgram + + def compute_expected(self, tensors, params=None): + tensors["output"][:] = torch.arange(N - 1, -1, -1, dtype=torch.int32).reshape(ROWS, COLS) + + +class TileArangeAliasTestCase(_CiBaseTestCase): + def get_name(self) -> str: + return "tile_arange_alias" + + def define_tensors(self) -> list[TensorSpec]: + return [TensorSpec("output", [ROWS, COLS], DataType.INT32, is_output=True)] + + def get_program(self) -> Any: + return TileArangeAliasProgram + + def compute_expected(self, tensors, params=None): + tensors["output"][:] = torch.arange(0, N, dtype=torch.int32).reshape(ROWS, COLS) + + +class TileArangeDescendingTestCase(_CiBaseTestCase): + def get_name(self) -> str: + return "tile_arange_descending" + + def define_tensors(self) -> list[TensorSpec]: + return [TensorSpec("output", [ROWS, COLS], DataType.INT32, is_output=True)] + + def get_program(self) -> Any: + return TileArangeDescendingProgram + + def compute_expected(self, tensors, params=None): + tensors["output"][:] = torch.arange(N - 1, -1, -1, dtype=torch.int32).reshape(ROWS, COLS) + + +class TensorArangeAscendingTestCase(_CiBaseTestCase): + def get_name(self) -> str: + return "tensor_arange_ascending" + + def define_tensors(self) -> list[TensorSpec]: + return [TensorSpec("output", [ROWS, COLS], DataType.INT32, is_output=True)] + + def get_program(self) -> Any: + return TensorArangeAscendingProgram + + def compute_expected(self, tensors, params=None): + tensors["output"][:] = torch.arange(0, N, dtype=torch.int32).reshape(ROWS, COLS) + + +class TensorArangeAliasTestCase(_CiBaseTestCase): + def get_name(self) -> str: + return "tensor_arange_alias" + + def define_tensors(self) -> list[TensorSpec]: + return [TensorSpec("output", [ROWS, COLS], DataType.INT32, is_output=True)] + + def get_program(self) -> Any: + return TensorArangeAliasProgram + + def compute_expected(self, tensors, params=None): + tensors["output"][:] = torch.arange(N - 1, -1, -1, dtype=torch.int32).reshape(ROWS, COLS) + + +class CiUint32AscendTestCase(_CiBaseTestCase): + def get_name(self) -> str: + return "ci_uint32_ascend" + + def define_tensors(self) -> list[TensorSpec]: + return [TensorSpec("output", [ROWS, COLS], DataType.UINT32, is_output=True)] + + def get_program(self) -> Any: + return CiUint32AscendProgram + + def compute_expected(self, tensors, params=None): + tensors["output"][:] = torch.arange(5, 5 + N, dtype=torch.int64).to(torch.uint32).reshape(ROWS, COLS) + + +class CiUint16AscendTestCase(_CiBaseTestCase): + def get_name(self) -> str: + return "ci_uint16_ascend" + + def define_tensors(self) -> list[TensorSpec]: + return [TensorSpec("output", [ROWS, COLS], DataType.UINT16, is_output=True)] + + def get_program(self) -> Any: + return CiUint16AscendProgram + + def compute_expected(self, tensors, params=None): + tensors["output"][:] = torch.arange(0, N, dtype=torch.int64).to(torch.uint16).reshape(ROWS, COLS) + + +# --- Tests --- + + +class TestCi: + """Verify tile.ci / tensor.ci produce correct integer sequences on device.""" + + def test_ci_ascend_start0(self, test_runner): + result = test_runner.run(CiAscendStart0TestCase()) + assert result.passed, f"Test failed: {result.error}" + + def test_ci_ascend_start10(self, test_runner): + result = test_runner.run(CiAscendStart10TestCase()) + assert result.passed, f"Test failed: {result.error}" + + def test_ci_descending(self, test_runner): + result = test_runner.run(CiDescendingTestCase()) + assert result.passed, f"Test failed: {result.error}" + + def test_ci_tensor_ascend(self, test_runner): + result = test_runner.run(CiTensorAscendTestCase()) + assert result.passed, f"Test failed: {result.error}" + + def test_ci_tensor_descending(self, test_runner): + result = test_runner.run(CiTensorDescendingTestCase()) + assert result.passed, f"Test failed: {result.error}" + + def test_tile_arange_alias(self, test_runner): + result = test_runner.run(TileArangeAliasTestCase()) + assert result.passed, f"Test failed: {result.error}" + + def test_tile_arange_descending(self, test_runner): + result = test_runner.run(TileArangeDescendingTestCase()) + assert result.passed, f"Test failed: {result.error}" + + def test_tensor_arange_alias(self, test_runner): + result = test_runner.run(TensorArangeAliasTestCase()) + assert result.passed, f"Test failed: {result.error}" + + def test_tensor_arange_ascending(self, test_runner): + result = test_runner.run(TensorArangeAscendingTestCase()) + assert result.passed, f"Test failed: {result.error}" + + def test_ci_uint32_ascend(self, test_runner): + result = test_runner.run(CiUint32AscendTestCase()) + assert result.passed, f"Test failed: {result.error}" + + def test_ci_uint16_ascend(self, test_runner): + result = test_runner.run(CiUint16AscendTestCase()) + assert result.passed, f"Test failed: {result.error}" + + +if __name__ == "__main__": + pytest.main([__file__, "-v"]) diff --git a/tests/ut/ir/operators/test_tensor_ops.py b/tests/ut/ir/operators/test_tensor_ops.py index 9db52d020..c61e9b787 100644 --- a/tests/ut/ir/operators/test_tensor_ops.py +++ b/tests/ut/ir/operators/test_tensor_ops.py @@ -2212,5 +2212,44 @@ def test_tensor_gather_rejects_mixed_index_and_mask(): ir.op.tensor.gather(inp, dim=-1, index=idx, mask_pattern=1) +class TestTensorCiOp: + """Tests for tensor.ci (contiguous integer sequence).""" + + def test_tensor_ci_ascending(self): + call = tensor.ci(0, [1, 32], dtype=DataType.INT32) + t = call.type + assert isinstance(t, ir.TensorType) + assert t.dtype == DataType.INT32 + assert len(t.shape) == 2 + assert "tensor.ci" in str(call) + + def test_tensor_ci_descending_kwarg_printed(self): + call = tensor.ci(10, [1, 16], dtype=DataType.INT32, descending=True) + assert "descending=True" in str(call) + + def test_tensor_ci_rejects_float_dtype(self): + with pytest.raises(ValueError, match=r"INT16.*INT32.*UINT16.*UINT32"): + tensor.ci(0, [1, 32], dtype=DataType.FP32) + + @pytest.mark.parametrize("dtype", [DataType.INT16, DataType.UINT16, DataType.UINT32]) + def test_tensor_ci_accepts_non_int32_dtypes(self, dtype): + call = tensor.ci(0, [1, 16], dtype=dtype) + t = call.type + assert isinstance(t, ir.TensorType) + assert t.dtype == dtype + + def test_tensor_ci_rejects_cols_equal_one(self): + with pytest.raises(ValueError, match="innermost dimension"): + tensor.ci(0, [32, 1], dtype=DataType.INT32) + + def test_tensor_ci_rejects_multi_row_shape(self): + """pto.tci only populates the first row, so leading dims must be 1.""" + with pytest.raises(ValueError, match=r"leading dimensions must be 1"): + tensor.ci(0, [4, 32], dtype=DataType.INT32) + + def test_tensor_arange_alias_is_ci(self): + assert pl.tensor.arange is pl.tensor.ci + + if __name__ == "__main__": pytest.main([__file__, "-v"]) diff --git a/tests/ut/ir/operators/test_tile_ops.py b/tests/ut/ir/operators/test_tile_ops.py index 5a207ffce..30387f642 100644 --- a/tests/ut/ir/operators/test_tile_ops.py +++ b/tests/ut/ir/operators/test_tile_ops.py @@ -2513,5 +2513,50 @@ def test_tile_shape_mismatch_message(self, op_callable, lhs_dims, rhs_dims, matc op_callable(tile_a, tile_b) +class TestTileCiOp: + """Tests for tile.ci (contiguous integer sequence generation, pto.tci).""" + + def test_tile_ci_ascending(self): + """tile.ci returns a TileType with requested shape / dtype.""" + call = tile.ci(0, [1, 32], dtype=DataType.INT32) + t = call.type + assert isinstance(t, ir.TileType) + assert t.dtype == DataType.INT32 + assert len(t.shape) == 2 + assert "tile.ci" in str(call) + assert "descending=False" in str(call) + + def test_tile_ci_descending_kwarg_printed(self): + """descending=True should appear in the printed IR.""" + call = tile.ci(10, [1, 16], dtype=DataType.INT32, descending=True) + assert "descending=True" in str(call) + + def test_tile_ci_rejects_float_dtype(self): + with pytest.raises(ValueError, match=r"INT16.*INT32.*UINT16.*UINT32"): + tile.ci(0, [1, 32], dtype=DataType.FP32) + + def test_tile_ci_accepts_uint_dtype(self): + call = tile.ci(0, [1, 16], dtype=DataType.UINT32) + assert call is not None + + def test_tile_ci_rejects_cols_equal_one(self): + with pytest.raises(ValueError, match="innermost dimension"): + tile.ci(0, [32, 1], dtype=DataType.INT32) + + def test_tile_ci_rejects_multi_row_shape(self): + """pto.tci only populates the first row, so leading dims must be 1.""" + with pytest.raises(ValueError, match=r"leading dimensions must be 1"): + tile.ci(0, [4, 32], dtype=DataType.INT32) + + def test_tile_ci_rejects_start_dtype_mismatch(self): + span = ir.Span.unknown() + start = ir.Var("s", ir.ScalarType(DataType.INT16), span) + with pytest.raises(ValueError, match=r"start.*dtype"): + tile.ci(start, [1, 32], dtype=DataType.INT32) + + def test_tile_arange_alias_is_ci(self): + assert pl.tile.arange is pl.tile.ci + + if __name__ == "__main__": pytest.main([__file__, "-v"]) diff --git a/tests/ut/ir/transforms/test_convert_tensor_to_tile_ops.py b/tests/ut/ir/transforms/test_convert_tensor_to_tile_ops.py index b2e17a7de..f1b9eaae9 100644 --- a/tests/ut/ir/transforms/test_convert_tensor_to_tile_ops.py +++ b/tests/ut/ir/transforms/test_convert_tensor_to_tile_ops.py @@ -1763,6 +1763,30 @@ def main(self, x: pl.Tensor[[64], pl.FP32]) -> pl.Tensor[[64], pl.FP32]: assert "tensor.full" not in ir_str +class TestTensorCiConversion: + def test_tensor_ci_conversion(self): + """tensor.ci -> tile.ci conversion preserves dtype + descending kwargs.""" + + @pl.program + class Before: + @pl.function(type=pl.FunctionType.InCore) + def main_incore_0(self, x: pl.Tensor[[1, 32], pl.INT32]) -> pl.Tensor[[1, 32], pl.INT32]: + idx: pl.Tensor[[1, 32], pl.INT32] = pl.tensor.ci(0, [1, 32], dtype=pl.INT32, descending=True) + y: pl.Tensor[[1, 32], pl.INT32] = pl.add(idx, x) + return y + + @pl.function + def main(self, x: pl.Tensor[[1, 32], pl.INT32]) -> pl.Tensor[[1, 32], pl.INT32]: + y: pl.Tensor[[1, 32], pl.INT32] = self.main_incore_0(x) + return y + + After = passes.convert_tensor_to_tile_ops()(Before) + ir_str = str(After) + assert "tile.ci" in ir_str + assert "tensor.ci" not in ir_str + assert "descending=True" in ir_str + + class TestAssembleParentStride: """Tests for physical stride propagation when assemble is in orchestration."""