Skip to content
Open
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion docs/en/dev/ir/05-operators.md
Original file line number Diff line number Diff line change
Expand Up @@ -219,7 +219,7 @@ UINT32 + INT32 → INT32 (signed precedence)
**Location**: `src/ir/op/tensor_ops/`
**Python API**: `from pypto.ir.op import tensor`

**Operations:** `tensor.add/sub/mul/div` (element-wise with full N-D broadcasting), `tensor.set_validshape` (internal, update valid-shape metadata without data movement — compiler-generated only), `tensor.sort32` / `tensor.mrgsort_format1` / `tensor.mrgsort_format2` (sorting; tensor-level counterparts of `tile.sort32` / `tile.mrgsort` — converted to tile ops by `ConvertTensorToTileOps`), `tensor.gather` (per-dim indexing; MVP supports rank-2 inputs with `dim=-1` and lowers to a per-row `tile.gather` loop via `ConvertTensorToTileOps`), `tensor.gather_mask` (mask-pattern gather; tensor-level counterpart of `tile.gather_mask`, with optional same-bit-width `output_dtype`)
**Operations:** `tensor.add/sub/mul/div` (element-wise with full N-D broadcasting), `tensor.set_validshape` (internal, update valid-shape metadata without data movement — compiler-generated only), `tensor.sort32` / `tensor.mrgsort_format1` / `tensor.mrgsort_format2` (sorting; tensor-level counterparts of `tile.sort32` / `tile.mrgsort` — converted to tile ops by `ConvertTensorToTileOps`), `tensor.gather` (per-dim indexing; MVP supports rank-2 inputs with `dim=-1` and lowers to a per-row `tile.gather` loop via `ConvertTensorToTileOps`), `tensor.gather_mask` (mask-pattern gather; tensor-level counterpart of `tile.gather_mask`, with optional same-bit-width `output_dtype`), `tensor.ci` / `tensor.arange` (contiguous integer sequence generation; lowers to `tile.ci`)

**Example:**

Expand Down Expand Up @@ -258,6 +258,7 @@ with ib.function("tensor_example") as f:
| - | `tile.reshape` | Reshape tile to new dimensions (element count must match) |
| - | `tile.transpose` | Swap two axes of a tile |
| - | `tile.set_validshape` | Update valid-shape metadata without data movement |
| - | `tile.ci` | Generate contiguous integer sequence (start + k / start - k); dtype ∈ {INT16, INT32}; innermost dim != 1 |
| **Reduction** | `tile.sum` | Reduction along axis (axis, keepdim) |

**Data Flow:** `TensorType (DDR) → tile.load → TileType (Unified Buffer) → tile.{ops} → TileType → tile.store → TensorType (DDR)`
Expand Down
3 changes: 2 additions & 1 deletion docs/zh-cn/dev/ir/05-operators.md
Original file line number Diff line number Diff line change
Expand Up @@ -216,7 +216,7 @@ UINT32 + INT32 → INT32 (signed precedence)
**位置**:`src/ir/op/tensor_ops/`
**Python API**:`from pypto.ir.op import tensor`

**操作:** `tensor.add/sub/mul/div`(逐元素,支持完整 N 维广播),`tensor.set_validshape`(内部 API,更新 valid_shape 元数据,不搬移数据 — 仅供编译器生成代码使用),`tensor.sort32` / `tensor.mrgsort_format1` / `tensor.mrgsort_format2`(排序;分别对应 `tile.sort32` / `tile.mrgsort` 的 tensor 层接口,由 `ConvertTensorToTileOps` 转换为 tile 操作),`tensor.gather`(按维索引;MVP 仅支持 2D 输入 + `dim=-1`,由 `ConvertTensorToTileOps` 按行展开为 `tile.gather` 循环),`tensor.gather_mask`(掩码模式选择;对应 `tile.gather_mask`,支持可选同位宽 `output_dtype`)
**操作:** `tensor.add/sub/mul/div`(逐元素,支持完整 N 维广播),`tensor.set_validshape`(内部 API,更新 valid_shape 元数据,不搬移数据 — 仅供编译器生成代码使用),`tensor.sort32` / `tensor.mrgsort_format1` / `tensor.mrgsort_format2`(排序;分别对应 `tile.sort32` / `tile.mrgsort` 的 tensor 层接口,由 `ConvertTensorToTileOps` 转换为 tile 操作),`tensor.gather`(按维索引;MVP 仅支持 2D 输入 + `dim=-1`,由 `ConvertTensorToTileOps` 按行展开为 `tile.gather` 循环),`tensor.gather_mask`(掩码模式选择;对应 `tile.gather_mask`,支持可选同位宽 `output_dtype`),`tensor.ci` / `tensor.arange`(生成连续整数序列,下层降到 `tile.ci`)

**示例:**

Expand Down Expand Up @@ -255,6 +255,7 @@ with ib.function("tensor_example") as f:
| - | `tile.reshape` | 重塑 tile 维度(元素总数须一致) |
| - | `tile.transpose` | 交换 tile 的两个轴 |
| - | `tile.set_validshape` | 更新 valid_shape 元数据,不搬移数据 |
| - | `tile.ci` | 生成连续整数序列(升序 start+k 或降序 start-k);dtype ∈ {INT16, INT32};最内维 != 1 |
| **规约** | `tile.sum` | 沿轴规约(axis, keepdim) |

**数据流:** `TensorType (DDR) → tile.load → TileType (Unified Buffer) → tile.{ops} → TileType → tile.store → TensorType (DDR)`
Expand Down
39 changes: 39 additions & 0 deletions python/pypto/ir/op/tensor_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,45 @@ def full(
return _ir_core.create_op_call("tensor.full", [shape_tuple, value_expr], kwargs, actual_span)


def ci(
start: int | Expr,
shape: Sequence[int | Expr] | _ir_core.MakeTuple,
dtype: DataType = DataType.INT32,
descending: bool = False,
span: Span | None = None,
) -> Call:
"""Generate a contiguous integer sequence into a tensor (lowers to tile.ci).

Note:
Lowers to ``pto.tci`` which only populates the first row. Leading
dimensions must be 1 — prefer shapes of the form ``[1, N]``.

Args:
start: Starting integer (plain int or scalar Expr). Must match ``dtype``.
shape: Destination shape (leading dims must be 1, innermost dim != 1).
dtype: Destination dtype. One of {INT16, INT32}.
descending: If True, generate a descending sequence.
span: Optional source span for debugging (auto-captured if not provided).

Returns:
Call expression that returns a TensorType.
"""
actual_span = _get_span_or_capture(span)
if isinstance(start, Expr):
if isinstance(start, ConstInt) and start.dtype != dtype:
start_expr = ConstInt(start.value, dtype, actual_span)
else:
start_expr = start
else:
start_expr = ConstInt(start, dtype, actual_span)
shape_tuple = _to_make_tuple(shape, actual_span)
kwargs: dict[str, Any] = {"dtype": dtype, "descending": descending}
return _ir_core.create_op_call("tensor.ci", [start_expr, shape_tuple], kwargs, actual_span)


arange = ci


def read(
tensor: Expr, indices: Expr | list[int | Expr] | _ir_core.MakeTuple, span: Span | None = None
) -> Call:
Expand Down
44 changes: 44 additions & 0 deletions python/pypto/ir/op/tile_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -479,6 +479,50 @@ def full(
return _ir_core.create_op_call("tile.full", [shape_tuple, value_expr], kwargs, actual_span)


def ci(
start: int | Expr,
shape: Sequence[int | Expr] | _ir_core.MakeTuple,
dtype: DataType = DataType.INT32,
descending: bool = False,
span: Span | None = None,
) -> Call:
"""Generate a contiguous integer sequence into a tile (pto.tci).

For a column index ``k`` in the first row of the destination tile:
- Ascending: ``dst[0, k] = start + k``
- Descending: ``dst[0, k] = start - k``

Note:
``pto.tci`` uses the destination's valid-column count as the sequence
length and does NOT populate additional rows. Leading dimensions must
be 1 — prefer shapes of the form ``[1, N]``.

Args:
start: Starting integer (plain int or a scalar Expr). Its dtype must match ``dtype``.
shape: Destination tile shape (static, leading dims must be 1, innermost dim != 1).
dtype: Destination dtype. Must be one of {INT16, INT32}.
descending: If True, generate a descending sequence.
span: Optional source span for debugging (auto-captured if not provided).

Returns:
Call expression that returns a TileType with the generated sequence.
"""
actual_span = _get_span_or_capture(span)
if isinstance(start, Expr):
if isinstance(start, ConstInt) and start.dtype != dtype:
start_expr = ConstInt(start.value, dtype, actual_span)
else:
start_expr = start
else:
start_expr = ConstInt(start, dtype, actual_span)
shape_tuple = _to_make_tuple(shape, actual_span)
kwargs: dict[str, Any] = {"dtype": dtype, "descending": descending}
return _ir_core.create_op_call("tile.ci", [start_expr, shape_tuple], kwargs, actual_span)


arange = ci


def fillpad(tile: Expr, pad_value: PadValue = PadValue.zero, span: Span | None = None) -> Call:
"""Fill remaining tile elements with specified padding value.

Expand Down
29 changes: 29 additions & 0 deletions python/pypto/language/op/tensor_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,8 @@
"slice",
"fillpad",
"full",
"ci",
"arange",
"matmul",
"matmul_acc",
"mul",
Expand Down Expand Up @@ -233,6 +235,33 @@ def full(shape: Sequence[IntLike], dtype: DataType, value: int | float) -> Tenso
return Tensor(expr=call_expr)


def ci(
start: int | Scalar,
shape: Sequence[IntLike],
dtype: DataType = DataType.INT32,
descending: bool = False,
) -> Tensor:
"""Generate a contiguous integer sequence into a tensor.

Equivalent to ``numpy.arange`` / ``torch.arange``. Lowers to ``tile.ci`` → ``pto.tci``.

Args:
start: Starting integer (plain int or Scalar). Must match ``dtype``.
shape: Destination tensor shape (innermost dim != 1).
dtype: Destination dtype. One of {INT16, INT32}. Defaults to INT32.
descending: If True, generate a descending sequence.

Returns:
Tensor wrapping the ci operation.
"""
start_expr = start.unwrap() if isinstance(start, Scalar) else start
call_expr = _ir_ops.ci(start_expr, _normalize_intlike(shape), dtype=dtype, descending=descending)
return Tensor(expr=call_expr)


arange = ci


def matmul(
lhs: Tensor,
rhs: Tensor,
Expand Down
29 changes: 29 additions & 0 deletions python/pypto/language/op/tile_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,8 @@
"concat",
"move",
"full",
"ci",
"arange",
"fillpad",
"fillpad_inplace",
"get_block_idx",
Expand Down Expand Up @@ -413,6 +415,33 @@ def full(shape: list[int], dtype: DataType, value: int | float) -> Tile:
return Tile(expr=call_expr)


def ci(
start: int | Scalar,
shape: Sequence[int],
dtype: DataType = DataType.INT32,
descending: bool = False,
) -> Tile:
"""Generate a contiguous integer sequence into a tile.

Equivalent to ``numpy.arange``-style index generation. Maps to ``pto.tci``.

Args:
start: Starting integer (plain int or a Scalar). Must match ``dtype``.
shape: Shape of the destination tile (static, innermost dim != 1).
dtype: Destination dtype. One of {INT16, INT32}. Defaults to INT32.
descending: If True, generate a descending sequence.

Returns:
Tile wrapping the ci operation.
"""
start_expr = start.unwrap() if isinstance(start, Scalar) else start
call_expr = _ir_ops.ci(start_expr, list(shape), dtype=dtype, descending=descending)
return Tile(expr=call_expr)


arange = ci


def fillpad(tile: Tile, pad_value: PadValue = PadValue.zero) -> Tile:
"""Fill remaining tile elements with specified padding value.

Expand Down
18 changes: 15 additions & 3 deletions src/backend/common/pto_ops_common.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -456,13 +456,25 @@ static std::string MakeAssignCodegenPTO(const std::string& pto_op_name, const Ca
static std::string MakeCiCodegenPTO(const std::string& pto_op_name, const CallPtr& op,
codegen::CodegenBase& codegen_base) {
auto& codegen = dynamic_cast<codegen::PTOCodegen&>(codegen_base);
CHECK(op->args_.size() == 1) << "Operation:[" << pto_op_name << "] requires 1 argument, but got "
<< op->args_.size();
CHECK(op->args_.size() == 2) << "Operation:[" << pto_op_name
<< "] requires 2 arguments (start, shape), but got " << op->args_.size();
bool descending = op->GetKwarg<bool>("descending");
std::string src = codegen.GetExprAsCode(op->args_[0]);
std::string src_type = codegen.GetExprTypeAnnotation(op->args_[0]);
std::string config_attr = descending ? "{descending = true}" : "{descending = false}";
std::string dst = codegen.GetCurrentResultTarget();
codegen.Emit(pto_op_name + " ins(" + src + " " + config_attr + ") outs(" + dst + ")");
std::string dst_type = codegen.GetCurrentResultTileBufTypeString();
std::ostringstream oss;
oss << pto_op_name << " ins(" << src << " " << config_attr;
if (!src_type.empty()) {
oss << " : " << src_type;
}
oss << ") outs(" << dst;
if (!dst_type.empty()) {
oss << " : " << dst_type;
}
oss << ")";
codegen.Emit(oss.str());
return "";
}

Expand Down
83 changes: 83 additions & 0 deletions src/ir/op/tensor_ops/memory.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -393,6 +393,89 @@ REGISTER_OP("tensor.full")
return DeduceTensorFullType(args, kwargs);
});

TypePtr DeduceTensorCiType(const std::vector<ExprPtr>& args,
const std::vector<std::pair<std::string, std::any>>& kwargs) {
// tensor.ci signature: (start, shape) with attrs {dtype, descending}
CHECK(args.size() == 2) << "tensor.ci requires exactly 2 arguments (start, shape), but got " << args.size();

bool found_dtype = false;
DataType dtype;
for (const auto& [key, value] : kwargs) {
if (key == "dtype") {
dtype = AnyCast<DataType>(value, "kwarg key: dtype");
found_dtype = true;
break;
}
}
CHECK(found_dtype) << "tensor.ci requires 'dtype' kwarg";
CHECK(dtype == DataType::INT16 || dtype == DataType::INT32)
<< "tensor.ci dtype must be one of {INT16, INT32}, but got " << dtype.ToString();

// First arg: start scalar; dtype must match destination dtype.
auto start_scalar_type = As<ScalarType>(args[0]->GetType());
CHECK(start_scalar_type) << "tensor.ci requires first argument 'start' to be a scalar, but got "
<< args[0]->GetType()->TypeName();
CHECK(start_scalar_type->dtype_ == dtype)
<< "tensor.ci 'start' dtype (" << start_scalar_type->dtype_.ToString()
<< ") must match destination dtype (" << dtype.ToString() << ")";

// Second arg: shape TupleType.
auto shape_tuple_type = As<TupleType>(args[1]->GetType());
CHECK(shape_tuple_type) << "tensor.ci requires shape to be TupleType, but got "
<< args[1]->GetType()->TypeName();

for (size_t i = 0; i < shape_tuple_type->types_.size(); ++i) {
auto scalar_type = As<ScalarType>(shape_tuple_type->types_[i]);
CHECK(scalar_type) << "tensor.ci shape element " << i << " must be ScalarType, but got "
<< shape_tuple_type->types_[i]->TypeName();
CHECK(scalar_type->dtype_.IsInt())
<< "tensor.ci shape element " << i << " must have integer dtype, but got "
<< scalar_type->dtype_.ToString();
}

std::vector<ExprPtr> shape;
shape.reserve(shape_tuple_type->types_.size());
if (auto make_tuple = As<MakeTuple>(args[1])) {
shape = make_tuple->elements_;
} else {
for (size_t i = 0; i < shape_tuple_type->types_.size(); ++i) {
shape.emplace_back(std::make_shared<TupleGetItemExpr>(args[1], static_cast<int>(i), args[1]->span_));
}
}
CHECK(!shape.empty()) << "tensor.ci requires non-empty shape";

// ISA constraint: innermost dim Cols != 1.
if (auto last_const = As<ConstInt>(shape.back())) {
CHECK(last_const->value_ != 1) << "tensor.ci requires the innermost dimension (Cols) to be != 1, got "
<< last_const->value_;
}
Comment thread
Little-oil marked this conversation as resolved.

// ISA constraint: pto.tci only populates the first row. Reject multi-row compile-time
// shapes so tensor.ci metadata stays consistent with the tile.ci lowering.
for (size_t i = 0; i + 1 < shape.size(); ++i) {
if (auto const_dim = As<ConstInt>(shape[i])) {
CHECK(const_dim->value_ == 1)
<< "tensor.ci only populates the first row because pto.tci ignores valid rows; "
<< "leading dimensions must be 1, but got " << const_dim->value_ << " at index " << i;
}
}

(void)kwargs; // descending is optional bool kwarg, no validation needed beyond type.
return std::make_shared<TensorType>(shape, dtype);
Comment thread
coderabbitai[bot] marked this conversation as resolved.
}

REGISTER_OP("tensor.ci")
.set_op_category("TensorOp")
.set_description("Generate a contiguous integer sequence into a tensor (lowers to tile.ci)")
.add_argument("start", "Starting integer scalar (must match dst dtype)")
.add_argument("shape", "Destination shape (TupleType of ScalarType integer)")
.set_attr<DataType>("dtype")
.set_attr<bool>("descending")
.f_deduce_type([](const std::vector<ExprPtr>& args,
const std::vector<std::pair<std::string, std::any>>& kwargs) {
return DeduceTensorCiType(args, kwargs);
});

TypePtr DeduceTensorDimType(const std::vector<ExprPtr>& args,
const std::vector<std::pair<std::string, std::any>>& kwargs) {
// tensor.dim: Extract a shape dimension from a tensor as a scalar
Expand Down
Loading
Loading