diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 0cc1f5ef5..a64081ee2 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -86,8 +86,8 @@ jobs:
     env:
       ASCEND_HOME_PATH: /usr/local/Ascend/cann-8.5.0
       PTOAS_ROOT: ${{ github.workspace }}/ptoas-bin
-      PTOAS_VERSION: v0.26
-      PTOAS_SHA256: 8e8239f92b169d88fd117f7d4841739c129ecbdd8d3a01f96087df576ecd7814
+      PTOAS_VERSION: v0.29
+      PTOAS_SHA256: 9487c3f3366636f097076c57528781e2018856a2d2ec24f3e7b99c543286843c
       CMAKE_BUILD_PARALLEL_LEVEL: 16
       CMAKE_C_COMPILER_LAUNCHER: ccache
       CMAKE_CXX_COMPILER_LAUNCHER: ccache
@@ -169,8 +169,8 @@ jobs:
     runs-on: ubuntu-latest
     env:
       PTOAS_ROOT: ${{ github.workspace }}/ptoas-bin
-      PTOAS_VERSION: v0.26
-      PTOAS_SHA256: 5abf99f9729997e171bee08bdb25662a7a24b62921c5ecfc05790a56a6e1b076
+      PTOAS_VERSION: v0.29
+      PTOAS_SHA256: 6f33b8e89d995470fc0c81dd3ae025a69c63c8a432f341bdaef0eabc3831645b
     container:
       image: ghcr.io/hw-native-sys/pypto/github-ci:latest
     steps:
diff --git a/docs/en/dev/ir/05-operators.md b/docs/en/dev/ir/05-operators.md
index 7f0339563..23d54ed56 100644
--- a/docs/en/dev/ir/05-operators.md
+++ b/docs/en/dev/ir/05-operators.md
@@ -219,7 +219,7 @@ UINT32 + INT32 → INT32 (signed precedence)
 **Location**: `src/ir/op/tensor_ops/`
 **Python API**: `from pypto.ir.op import tensor`
 
-**Operations:** `tensor.add/sub/mul/div` (element-wise with full N-D broadcasting), `tensor.set_validshape` (internal, update valid-shape metadata without data movement — compiler-generated only), `tensor.sort32` / `tensor.mrgsort_format1` / `tensor.mrgsort_format2` (sorting; tensor-level counterparts of `tile.sort32` / `tile.mrgsort` — converted to tile ops by `ConvertTensorToTileOps`), `tensor.gather` (per-dim indexing; MVP supports rank-2 inputs with `dim=-1` and lowers to a per-row `tile.gather` loop via `ConvertTensorToTileOps`), `tensor.gather_mask` (mask-pattern gather; tensor-level counterpart of `tile.gather_mask`, with optional same-bit-width `output_dtype`)
+**Operations:** `tensor.add/sub/mul/div` (element-wise with full N-D broadcasting), `tensor.set_validshape` (internal, update valid-shape metadata without data movement — compiler-generated only), `tensor.sort32` / `tensor.mrgsort_format1` / `tensor.mrgsort_format2` (sorting; tensor-level counterparts of `tile.sort32` / `tile.mrgsort` — converted to tile ops by `ConvertTensorToTileOps`), `tensor.gather` (per-dim indexing; MVP supports rank-2 inputs with `dim=-1` and lowers to a per-row `tile.gather` loop via `ConvertTensorToTileOps`), `tensor.gather_mask` (mask-pattern gather; tensor-level counterpart of `tile.gather_mask`, with optional same-bit-width `output_dtype`), `tensor.ci` / `tensor.arange` (contiguous integer sequence generation; lowers to `tile.ci`)
 
 **Example:**
 
@@ -258,6 +258,7 @@ with ib.function("tensor_example") as f:
 | - | `tile.reshape` | Reshape tile to new dimensions (element count must match) |
 | - | `tile.transpose` | Swap two axes of a tile |
 | - | `tile.set_validshape` | Update valid-shape metadata without data movement |
+| - | `tile.ci` | Generate contiguous integer sequence (start + k / start - k); dtype ∈ {INT16, INT32}; innermost dim != 1 |
 | **Reduction** | `tile.sum` | Reduction along axis (axis, keepdim) |
 
 **Data Flow:** `TensorType (DDR) → tile.load → TileType (Unified Buffer) → tile.{ops} → TileType → tile.store → TensorType (DDR)`
diff --git a/docs/zh-cn/dev/ir/05-operators.md b/docs/zh-cn/dev/ir/05-operators.md
index 9ca8e9529..0df51021a 100644
--- a/docs/zh-cn/dev/ir/05-operators.md
+++ b/docs/zh-cn/dev/ir/05-operators.md
@@ -216,7 +216,7 @@ UINT32 + INT32 → INT32 (signed precedence)
 **位置**：`src/ir/op/tensor_ops/`
 **Python API**：`from pypto.ir.op import tensor`
 
-**操作：** `tensor.add/sub/mul/div`（逐元素，支持完整 N 维广播），`tensor.set_validshape`（内部 API，更新 valid_shape 元数据，不搬移数据 — 仅供编译器生成代码使用），`tensor.sort32` / `tensor.mrgsort_format1` / `tensor.mrgsort_format2`（排序；分别对应 `tile.sort32` / `tile.mrgsort` 的 tensor 层接口，由 `ConvertTensorToTileOps` 转换为 tile 操作），`tensor.gather`（按维索引；MVP 仅支持 2D 输入 + `dim=-1`，由 `ConvertTensorToTileOps` 按行展开为 `tile.gather` 循环），`tensor.gather_mask`（掩码模式选择；对应 `tile.gather_mask`，支持可选同位宽 `output_dtype`）
+**操作：** `tensor.add/sub/mul/div`（逐元素，支持完整 N 维广播），`tensor.set_validshape`（内部 API，更新 valid_shape 元数据，不搬移数据 — 仅供编译器生成代码使用），`tensor.sort32` / `tensor.mrgsort_format1` / `tensor.mrgsort_format2`（排序；分别对应 `tile.sort32` / `tile.mrgsort` 的 tensor 层接口，由 `ConvertTensorToTileOps` 转换为 tile 操作），`tensor.gather`（按维索引；MVP 仅支持 2D 输入 + `dim=-1`，由 `ConvertTensorToTileOps` 按行展开为 `tile.gather` 循环），`tensor.gather_mask`（掩码模式选择；对应 `tile.gather_mask`，支持可选同位宽 `output_dtype`），`tensor.ci` / `tensor.arange`（生成连续整数序列，下层降到 `tile.ci`）
 
 **示例：**
 
@@ -255,6 +255,7 @@ with ib.function("tensor_example") as f:
 | - | `tile.reshape` | 重塑 tile 维度（元素总数须一致） |
 | - | `tile.transpose` | 交换 tile 的两个轴 |
 | - | `tile.set_validshape` | 更新 valid_shape 元数据，不搬移数据 |
+| - | `tile.ci` | 生成连续整数序列（升序 start+k 或降序 start-k）；dtype ∈ {INT16, INT32}；最内维 != 1 |
 | **规约** | `tile.sum` | 沿轴规约（axis, keepdim） |
 
 **数据流：** `TensorType (DDR) → tile.load → TileType (Unified Buffer) → tile.{ops} → TileType → tile.store → TensorType (DDR)`
diff --git a/python/pypto/ir/op/tensor_ops.py b/python/pypto/ir/op/tensor_ops.py
index b18c7be77..3c943a94e 100644
--- a/python/pypto/ir/op/tensor_ops.py
+++ b/python/pypto/ir/op/tensor_ops.py
@@ -84,6 +84,45 @@ def full(
     return _ir_core.create_op_call("tensor.full", [shape_tuple, value_expr], kwargs, actual_span)
 
 
+def ci(
+    start: int | Expr,
+    shape: Sequence[int | Expr] | _ir_core.MakeTuple,
+    dtype: DataType = DataType.INT32,
+    descending: bool = False,
+    span: Span | None = None,
+) -> Call:
+    """Generate a contiguous integer sequence into a tensor (lowers to tile.ci).
+
+    Note:
+        Lowers to ``pto.tci`` which only populates the first row. Leading
+        dimensions must be 1 — prefer shapes of the form ``[1, N]``.
+
+    Args:
+        start: Starting integer (plain int or scalar Expr). Must match ``dtype``.
+        shape: Destination shape (leading dims must be 1, innermost dim != 1).
+        dtype: Destination dtype. One of {INT16, INT32}.
+        descending: If True, generate a descending sequence.
+        span: Optional source span for debugging (auto-captured if not provided).
+
+    Returns:
+        Call expression that returns a TensorType.
+    """
+    actual_span = _get_span_or_capture(span)
+    if isinstance(start, Expr):
+        if isinstance(start, ConstInt) and start.dtype != dtype:
+            start_expr = ConstInt(start.value, dtype, actual_span)
+        else:
+            start_expr = start
+    else:
+        start_expr = ConstInt(start, dtype, actual_span)
+    shape_tuple = _to_make_tuple(shape, actual_span)
+    kwargs: dict[str, Any] = {"dtype": dtype, "descending": descending}
+    return _ir_core.create_op_call("tensor.ci", [start_expr, shape_tuple], kwargs, actual_span)
+
+
+arange = ci
+
+
 def read(
     tensor: Expr, indices: Expr | list[int | Expr] | _ir_core.MakeTuple, span: Span | None = None
 ) -> Call:
diff --git a/python/pypto/ir/op/tile_ops.py b/python/pypto/ir/op/tile_ops.py
index 2b6af86ff..1314067bb 100644
--- a/python/pypto/ir/op/tile_ops.py
+++ b/python/pypto/ir/op/tile_ops.py
@@ -480,6 +480,50 @@ def full(
     return _ir_core.create_op_call("tile.full", [shape_tuple, value_expr], kwargs, actual_span)
 
 
+def ci(
+    start: int | Expr,
+    shape: Sequence[int | Expr] | _ir_core.MakeTuple,
+    dtype: DataType = DataType.INT32,
+    descending: bool = False,
+    span: Span | None = None,
+) -> Call:
+    """Generate a contiguous integer sequence into a tile (pto.tci).
+
+    For a column index ``k`` in the first row of the destination tile:
+    - Ascending: ``dst[0, k] = start + k``
+    - Descending: ``dst[0, k] = start - k``
+
+    Note:
+        ``pto.tci`` uses the destination's valid-column count as the sequence
+        length and does NOT populate additional rows. Leading dimensions must
+        be 1 — prefer shapes of the form ``[1, N]``.
+
+    Args:
+        start: Starting integer (plain int or a scalar Expr). Its dtype must match ``dtype``.
+        shape: Destination tile shape (static, leading dims must be 1, innermost dim != 1).
+        dtype: Destination dtype. Must be one of {INT16, INT32}.
+        descending: If True, generate a descending sequence.
+        span: Optional source span for debugging (auto-captured if not provided).
+
+    Returns:
+        Call expression that returns a TileType with the generated sequence.
+    """
+    actual_span = _get_span_or_capture(span)
+    if isinstance(start, Expr):
+        if isinstance(start, ConstInt) and start.dtype != dtype:
+            start_expr = ConstInt(start.value, dtype, actual_span)
+        else:
+            start_expr = start
+    else:
+        start_expr = ConstInt(start, dtype, actual_span)
+    shape_tuple = _to_make_tuple(shape, actual_span)
+    kwargs: dict[str, Any] = {"dtype": dtype, "descending": descending}
+    return _ir_core.create_op_call("tile.ci", [start_expr, shape_tuple], kwargs, actual_span)
+
+
+arange = ci
+
+
 def fillpad(tile: Expr, pad_value: PadValue | int | float = PadValue.zero, span: Span | None = None) -> Call:
     """Fill remaining tile elements with specified padding value.
 
diff --git a/python/pypto/language/op/tensor_ops.py b/python/pypto/language/op/tensor_ops.py
index a12a59e0a..6087a7e4b 100644
--- a/python/pypto/language/op/tensor_ops.py
+++ b/python/pypto/language/op/tensor_ops.py
@@ -26,6 +26,8 @@
     "slice",
     "fillpad",
     "full",
+    "ci",
+    "arange",
     "matmul",
     "matmul_acc",
     "mul",
@@ -264,6 +266,33 @@ def full(shape: Sequence[IntLike], dtype: DataType, value: int | float) -> Tenso
     return Tensor(expr=call_expr)
 
 
+def ci(
+    start: int | Scalar,
+    shape: Sequence[IntLike],
+    dtype: DataType = DataType.INT32,
+    descending: bool = False,
+) -> Tensor:
+    """Generate a contiguous integer sequence into a tensor.
+
+    Equivalent to ``numpy.arange`` / ``torch.arange``. Lowers to ``tile.ci`` → ``pto.tci``.
+
+    Args:
+        start: Starting integer (plain int or Scalar). Must match ``dtype``.
+        shape: Destination tensor shape (innermost dim != 1).
+        dtype: Destination dtype. One of {INT16, INT32}. Defaults to INT32.
+        descending: If True, generate a descending sequence.
+
+    Returns:
+        Tensor wrapping the ci operation.
+    """
+    start_expr = start.unwrap() if isinstance(start, Scalar) else start
+    call_expr = _ir_ops.ci(start_expr, _normalize_intlike(shape), dtype=dtype, descending=descending)
+    return Tensor(expr=call_expr)
+
+
+arange = ci
+
+
 def matmul(
     lhs: Tensor,
     rhs: Tensor,
diff --git a/python/pypto/language/op/tile_ops.py b/python/pypto/language/op/tile_ops.py
index 02ea27760..1e708b9bf 100644
--- a/python/pypto/language/op/tile_ops.py
+++ b/python/pypto/language/op/tile_ops.py
@@ -33,6 +33,8 @@
     "concat",
     "move",
     "full",
+    "ci",
+    "arange",
     "fillpad",
     "fillpad_inplace",
     "get_block_idx",
@@ -414,6 +416,33 @@ def full(shape: list[int], dtype: DataType, value: int | float) -> Tile:
     return Tile(expr=call_expr)
 
 
+def ci(
+    start: int | Scalar,
+    shape: Sequence[int],
+    dtype: DataType = DataType.INT32,
+    descending: bool = False,
+) -> Tile:
+    """Generate a contiguous integer sequence into a tile.
+
+    Equivalent to ``numpy.arange``-style index generation. Maps to ``pto.tci``.
+
+    Args:
+        start: Starting integer (plain int or a Scalar). Must match ``dtype``.
+        shape: Shape of the destination tile (static, innermost dim != 1).
+        dtype: Destination dtype. One of {INT16, INT32}. Defaults to INT32.
+        descending: If True, generate a descending sequence.
+
+    Returns:
+        Tile wrapping the ci operation.
+    """
+    start_expr = start.unwrap() if isinstance(start, Scalar) else start
+    call_expr = _ir_ops.ci(start_expr, list(shape), dtype=dtype, descending=descending)
+    return Tile(expr=call_expr)
+
+
+arange = ci
+
+
 def fillpad(tile: Tile, pad_value: PadValue | int | float = PadValue.zero) -> Tile:
     """Fill remaining tile elements with specified padding value.
 
diff --git a/src/backend/common/pto_ops_common.cpp b/src/backend/common/pto_ops_common.cpp
index 37c87a5a3..70d7ba38e 100644
--- a/src/backend/common/pto_ops_common.cpp
+++ b/src/backend/common/pto_ops_common.cpp
@@ -456,13 +456,25 @@ static std::string MakeAssignCodegenPTO(const std::string& pto_op_name, const Ca
 static std::string MakeCiCodegenPTO(const std::string& pto_op_name, const CallPtr& op,
                                     codegen::CodegenBase& codegen_base) {
   auto& codegen = dynamic_cast<codegen::PTOCodegen&>(codegen_base);
-  CHECK(op->args_.size() == 1) << "Operation:[" << pto_op_name << "] requires 1 argument, but got "
-                               << op->args_.size();
+  CHECK(op->args_.size() == 2) << "Operation:[" << pto_op_name
+                               << "] requires 2 arguments (start, shape), but got " << op->args_.size();
   bool descending = op->GetKwarg<bool>("descending");
   std::string src = codegen.GetExprAsCode(op->args_[0]);
+  std::string src_type = codegen.GetExprTypeAnnotation(op->args_[0]);
   std::string config_attr = descending ? "{descending = true}" : "{descending = false}";
   std::string dst = codegen.GetCurrentResultTarget();
-  codegen.Emit(pto_op_name + " ins(" + src + " " + config_attr + ") outs(" + dst + ")");
+  std::string dst_type = codegen.GetCurrentResultTileBufTypeString();
+  std::ostringstream oss;
+  oss << pto_op_name << " ins(" << src << " " << config_attr;
+  if (!src_type.empty()) {
+    oss << " : " << src_type;
+  }
+  oss << ") outs(" << dst;
+  if (!dst_type.empty()) {
+    oss << " : " << dst_type;
+  }
+  oss << ")";
+  codegen.Emit(oss.str());
   return "";
 }
 
diff --git a/src/codegen/pto/pto_codegen.cpp b/src/codegen/pto/pto_codegen.cpp
index 4843435fb..4c5531167 100644
--- a/src/codegen/pto/pto_codegen.cpp
+++ b/src/codegen/pto/pto_codegen.cpp
@@ -722,6 +722,13 @@ std::string PTOCodegen::GetOrEmitConstant(int64_t value, DataType dt) {
   if (it != fs_.emitted_numeric_constants.end()) return it->second;
 
   std::string mlir_type = GetTypeString(dt);
+  // MLIR's arith.constant requires signless integer return types (upstream
+  // ArithOps.cpp ConstantOp::verify). For unsigned dtypes, emit the constant
+  // at the signless type and bridge to the unsigned type via
+  // builtin.unrealized_conversion_cast; some consumer ops (e.g. pto.tci) in
+  // turn require the operand type to match the destination dtype exactly.
+  bool is_unsigned = dt.IsUnsignedInt() && !mlir_type.empty() && mlir_type[0] == 'u';
+  std::string signless_type = is_unsigned ? mlir_type.substr(1) : mlir_type;
   std::string ssa_suffix = "_" + mlir_type;
 
   std::string ssa_id;
@@ -741,8 +748,17 @@ std::string PTOCodegen::GetOrEmitConstant(int64_t value, DataType dt) {
   } else {
     name = NewTemp();
   }
-  fs_.constants_section << fs_.constants_indent << name << " = arith.constant " << value << " : " << mlir_type
-                        << "\n";
+
+  if (is_unsigned) {
+    std::string signless_name = NewTemp();
+    fs_.constants_section << fs_.constants_indent << signless_name << " = arith.constant " << value << " : "
+                          << signless_type << "\n";
+    fs_.constants_section << fs_.constants_indent << name << " = builtin.unrealized_conversion_cast "
+                          << signless_name << " : " << signless_type << " to " << mlir_type << "\n";
+  } else {
+    fs_.constants_section << fs_.constants_indent << name << " = arith.constant " << value << " : "
+                          << mlir_type << "\n";
+  }
   fs_.emitted_numeric_constants[key] = name;
   return name;
 }
@@ -1160,6 +1176,9 @@ std::string PTOCodegen::GetExprTypeAnnotation(const ir::ExprPtr& expr) {
     return GetTypeString(const_float->dtype());
   }
   if (auto const_int = As<ir::ConstInt>(expr)) {
+    // The SSA value produced by GetOrEmitConstant is cast back to the dtype's
+    // MLIR type (via unrealized_conversion_cast for unsigned), so the use-site
+    // annotation matches the declared dtype directly.
     return GetTypeString(const_int->dtype());
   }
   return "";
diff --git a/src/ir/op/tensor_ops/memory.cpp b/src/ir/op/tensor_ops/memory.cpp
index d86c3aa88..838bb16cf 100644
--- a/src/ir/op/tensor_ops/memory.cpp
+++ b/src/ir/op/tensor_ops/memory.cpp
@@ -412,6 +412,90 @@ REGISTER_OP("tensor.full")
       return DeduceTensorFullType(args, kwargs);
     });
 
+TypePtr DeduceTensorCiType(const std::vector<ExprPtr>& args,
+                           const std::vector<std::pair<std::string, std::any>>& kwargs) {
+  // tensor.ci signature: (start, shape) with attrs {dtype, descending}
+  CHECK(args.size() == 2) << "tensor.ci requires exactly 2 arguments (start, shape), but got " << args.size();
+
+  bool found_dtype = false;
+  DataType dtype;
+  for (const auto& [key, value] : kwargs) {
+    if (key == "dtype") {
+      dtype = AnyCast<DataType>(value, "kwarg key: dtype");
+      found_dtype = true;
+      break;
+    }
+  }
+  CHECK(found_dtype) << "tensor.ci requires 'dtype' kwarg";
+  CHECK(dtype == DataType::INT16 || dtype == DataType::INT32 || dtype == DataType::UINT16 ||
+        dtype == DataType::UINT32)
+      << "tensor.ci dtype must be one of {INT16, INT32, UINT16, UINT32}, but got " << dtype.ToString();
+
+  // First arg: start scalar; dtype must match destination dtype.
+  auto start_scalar_type = As<ScalarType>(args[0]->GetType());
+  CHECK(start_scalar_type) << "tensor.ci requires first argument 'start' to be a scalar, but got "
+                           << args[0]->GetType()->TypeName();
+  CHECK(start_scalar_type->dtype_ == dtype)
+      << "tensor.ci 'start' dtype (" << start_scalar_type->dtype_.ToString()
+      << ") must match destination dtype (" << dtype.ToString() << ")";
+
+  // Second arg: shape TupleType.
+  auto shape_tuple_type = As<TupleType>(args[1]->GetType());
+  CHECK(shape_tuple_type) << "tensor.ci requires shape to be TupleType, but got "
+                          << args[1]->GetType()->TypeName();
+
+  for (size_t i = 0; i < shape_tuple_type->types_.size(); ++i) {
+    auto scalar_type = As<ScalarType>(shape_tuple_type->types_[i]);
+    CHECK(scalar_type) << "tensor.ci shape element " << i << " must be ScalarType, but got "
+                       << shape_tuple_type->types_[i]->TypeName();
+    CHECK(scalar_type->dtype_.IsInt())
+        << "tensor.ci shape element " << i << " must have integer dtype, but got "
+        << scalar_type->dtype_.ToString();
+  }
+
+  std::vector<ExprPtr> shape;
+  shape.reserve(shape_tuple_type->types_.size());
+  if (auto make_tuple = As<MakeTuple>(args[1])) {
+    shape = make_tuple->elements_;
+  } else {
+    for (size_t i = 0; i < shape_tuple_type->types_.size(); ++i) {
+      shape.emplace_back(std::make_shared<TupleGetItemExpr>(args[1], static_cast<int>(i), args[1]->span_));
+    }
+  }
+  CHECK(!shape.empty()) << "tensor.ci requires non-empty shape";
+
+  // ISA constraint: innermost dim Cols != 1.
+  if (auto last_const = As<ConstInt>(shape.back())) {
+    CHECK(last_const->value_ != 1) << "tensor.ci requires the innermost dimension (Cols) to be != 1, got "
+                                   << last_const->value_;
+  }
+
+  // ISA constraint: pto.tci only populates the first row. Reject multi-row compile-time
+  // shapes so tensor.ci metadata stays consistent with the tile.ci lowering.
+  for (size_t i = 0; i + 1 < shape.size(); ++i) {
+    if (auto const_dim = As<ConstInt>(shape[i])) {
+      CHECK(const_dim->value_ == 1)
+          << "tensor.ci only populates the first row because pto.tci ignores valid rows; "
+          << "leading dimensions must be 1, but got " << const_dim->value_ << " at index " << i;
+    }
+  }
+
+  (void)kwargs;  // descending is optional bool kwarg, no validation needed beyond type.
+  return std::make_shared<TensorType>(shape, dtype);
+}
+
+REGISTER_OP("tensor.ci")
+    .set_op_category("TensorOp")
+    .set_description("Generate a contiguous integer sequence into a tensor (lowers to tile.ci)")
+    .add_argument("start", "Starting integer scalar (must match dst dtype)")
+    .add_argument("shape", "Destination shape (TupleType of ScalarType integer)")
+    .set_attr<DataType>("dtype")
+    .set_attr<bool>("descending")
+    .f_deduce_type([](const std::vector<ExprPtr>& args,
+                      const std::vector<std::pair<std::string, std::any>>& kwargs) {
+      return DeduceTensorCiType(args, kwargs);
+    });
+
 TypePtr DeduceTensorDimType(const std::vector<ExprPtr>& args,
                             const std::vector<std::pair<std::string, std::any>>& kwargs) {
   // tensor.dim: Extract a shape dimension from a tensor as a scalar
diff --git a/src/ir/op/tile_ops/memory.cpp b/src/ir/op/tile_ops/memory.cpp
index 563d02dab..20a2beed5 100644
--- a/src/ir/op/tile_ops/memory.cpp
+++ b/src/ir/op/tile_ops/memory.cpp
@@ -388,6 +388,73 @@ TypePtr DeduceTileFullType(const std::vector<ExprPtr>& args,
   return std::make_shared<TileType>(tile_shape, dtype, std::nullopt, tile_view);
 }
 
+TypePtr DeduceTileCiType(const std::vector<ExprPtr>& args,
+                         const std::vector<std::pair<std::string, std::any>>& kwargs,
+                         const std::string& op_name) {
+  // tile.ci signature: (start, shape) with attrs {dtype, descending}
+  CHECK(args.size() == 2) << "The operator " << op_name
+                          << " requires exactly 2 arguments (start, shape), but got " << args.size();
+
+  // Extract dtype and validate it is one of the supported integer types.
+  DataType dtype = GetKwarg<DataType>(kwargs, "dtype");
+  CHECK(dtype == DataType::INT16 || dtype == DataType::INT32 || dtype == DataType::UINT16 ||
+        dtype == DataType::UINT32)
+      << "The operator " << op_name << " requires dtype to be one of {INT16, INT32, UINT16, UINT32}, but got "
+      << dtype.ToString();
+
+  // First argument is the scalar start value; its dtype must match the destination dtype.
+  auto start_scalar_type = As<ScalarType>(args[0]->GetType());
+  CHECK(start_scalar_type) << "The operator " << op_name
+                           << " requires first argument 'start' to be a scalar, but got "
+                           << args[0]->GetType()->TypeName();
+  CHECK(start_scalar_type->dtype_ == dtype)
+      << "The operator " << op_name << " requires 'start' dtype (" << start_scalar_type->dtype_.ToString()
+      << ") to match destination dtype (" << dtype.ToString() << ")";
+
+  // Second argument must be a MakeTuple of static ConstInt elements.
+  auto make_tuple = As<MakeTuple>(args[1]);
+  CHECK(make_tuple)
+      << "The operator " << op_name
+      << " requires second argument 'shape' to be a MakeTuple of compile-time constants, but got "
+      << args[1]->TypeName();
+
+  std::vector<ExprPtr> tile_shape;
+  tile_shape.reserve(make_tuple->elements_.size());
+  for (size_t i = 0; i < make_tuple->elements_.size(); ++i) {
+    auto const_int = As<ConstInt>(make_tuple->elements_[i]);
+    CHECK(const_int) << "The operator " << op_name << " shape element " << i
+                     << " must be a compile-time constant (ConstInt), but got "
+                     << make_tuple->elements_[i]->TypeName();
+    CHECK(const_int->value_ > 0) << "The operator " << op_name << " shape element " << i
+                                 << " must be positive, got " << const_int->value_;
+    tile_shape.push_back(make_tuple->elements_[i]);
+  }
+  CHECK(!tile_shape.empty()) << "The operator " << op_name << " requires non-empty shape";
+
+  // ISA constraint: destination Cols != 1 (column vectors not supported by pto.tci).
+  auto last_dim = As<ConstInt>(tile_shape.back());
+  CHECK(last_dim && last_dim->value_ != 1)
+      << "The operator " << op_name << " requires the innermost dimension (Cols) to be != 1, got "
+      << (last_dim ? last_dim->value_ : -1);
+
+  // ISA constraint: pto.tci only populates the first row and ignores valid rows, so every
+  // leading dimension must be 1. Reject multi-row shapes here to keep type metadata truthful.
+  for (size_t i = 0; i + 1 < tile_shape.size(); ++i) {
+    auto leading_dim = As<ConstInt>(tile_shape[i]);
+    CHECK(leading_dim && leading_dim->value_ == 1)
+        << "The operator " << op_name << " only populates the first row because pto.tci ignores valid rows; "
+        << "leading dimensions must be 1, but got " << (leading_dim ? leading_dim->value_ : -1)
+        << " at index " << i;
+  }
+
+  // descending kwarg is optional and defaults to false.
+  (void)GetKwarg<bool>(kwargs, "descending", false);
+
+  TileView tile_view;
+  tile_view.valid_shape = tile_shape;
+  return std::make_shared<TileType>(tile_shape, dtype, std::nullopt, tile_view);
+}
+
 TypePtr DeduceTileReadType(const std::vector<ExprPtr>& args,
                            const std::vector<std::pair<std::string, std::any>>& kwargs,
                            const std::string& op_name) {
@@ -683,5 +750,18 @@ REGISTER_OP("tile.full")
       return DeduceTileFullType(args, kwargs, "tile.full");
     });
 
+REGISTER_OP("tile.ci")
+    .set_op_category("TileOp")
+    .set_description("Generate a contiguous integer sequence into a destination tile (pto.tci)")
+    .add_argument("start", "Starting integer scalar (must match dst dtype)")
+    .add_argument("shape", "Destination shape (TupleType of ConstInt)")
+    .set_attr<DataType>("dtype")
+    .set_attr<bool>("descending")
+    .set_output_memory(MemorySpace::Vec)
+    .f_deduce_type([](const std::vector<ExprPtr>& args,
+                      const std::vector<std::pair<std::string, std::any>>& kwargs) {
+      return DeduceTileCiType(args, kwargs, "tile.ci");
+    });
+
 }  // namespace ir
 }  // namespace pypto
diff --git a/src/ir/transforms/op_conversion_registry.cpp b/src/ir/transforms/op_conversion_registry.cpp
index 6a2e2cba5..68d850738 100644
--- a/src/ir/transforms/op_conversion_registry.cpp
+++ b/src/ir/transforms/op_conversion_registry.cpp
@@ -163,6 +163,7 @@ void OpConversionRegistry::RegisterBroadcastAndTransformOps() {
   RegisterSimple("tensor.set_validshape", "tile.set_validshape");
 
   RegisterSimple("tensor.full", "tile.full");
+  RegisterSimple("tensor.ci", "tile.ci");
 }
 
 // ============================================================================
diff --git a/tests/st/harness/core/harness.py b/tests/st/harness/core/harness.py
index 350cf454d..07a5b0105 100644
--- a/tests/st/harness/core/harness.py
+++ b/tests/st/harness/core/harness.py
@@ -89,6 +89,8 @@ class DataType(Enum):
     FP16 = "fp16"
     INT32 = "int32"
     UINT32 = "uint32"
+    INT16 = "int16"
+    UINT16 = "uint16"
     INT64 = "int64"
     BOOL = "bool"
 
@@ -101,6 +103,8 @@ def torch_dtype(self) -> torch.dtype:
             DataType.FP16: torch.float16,
             DataType.INT32: torch.int32,
             DataType.UINT32: torch.int32,  # PyTorch has no uint32; use int32 (same bits)
+            DataType.INT16: torch.int16,
+            DataType.UINT16: torch.int16,  # PyTorch has limited uint16 support; use int16 (same bits)
             DataType.INT64: torch.int64,
             DataType.BOOL: torch.bool,
         }
diff --git a/tests/st/runtime/test_ci.py b/tests/st/runtime/test_ci.py
new file mode 100644
index 000000000..4ee8a6f9f
--- /dev/null
+++ b/tests/st/runtime/test_ci.py
@@ -0,0 +1,464 @@
+# Copyright (c) PyPTO Contributors.
+# This program is free software, you can redistribute it and/or modify it under the terms and conditions of
+# CANN Open Software License Agreement Version 2.0 (the "License").
+# Please refer to the License for details. You may not use this file except in compliance with the License.
+# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR IMPLIED,
+# INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY, OR FITNESS FOR A PARTICULAR PURPOSE.
+# See LICENSE in the root of the software repository for the full text of the License.
+# -----------------------------------------------------------------------------------------------------------
+
+"""Test tile.ci / tensor.ci (arange) contiguous integer sequence generation.
+
+Covers:
+1. Ascending INT32 sequence (start=0).
+2. Ascending INT32 sequence with non-zero start.
+3. Descending INT32 sequence (tile.ci).
+4. tensor.ci ascending (lowers to tile.ci via conversion pass).
+5. tensor.ci descending.
+6. pl.tile.arange alias.
+7. pl.tensor.arange alias.
+"""
+
+from typing import Any
+
+import pypto.language as pl
+import pytest
+import torch
+from harness.core.harness import DataType, PTOTestCase, TensorSpec
+from pypto.backend import BackendType
+from pypto.ir.pass_manager import OptimizationStrategy
+
+ROWS = 1
+COLS = 32
+N = COLS
+
+
+# --- Programs ---
+
+
+@pl.program
+class CiAscendStart0Program:
+    @pl.function(type=pl.FunctionType.InCore)
+    def kernel(
+        self,
+        output: pl.Out[pl.Tensor[[ROWS, COLS], pl.INT32]],
+    ) -> pl.Tensor[[ROWS, COLS], pl.INT32]:
+        seq: pl.Tile[[ROWS, COLS], pl.INT32] = pl.tile.ci(0, [ROWS, COLS], dtype=pl.INT32)
+        out: pl.Tensor[[ROWS, COLS], pl.INT32] = pl.store(seq, offsets=[0, 0], output_tensor=output)
+        return out
+
+    @pl.function(type=pl.FunctionType.Orchestration)
+    def orchestrator(
+        self,
+        output: pl.Out[pl.Tensor[[ROWS, COLS], pl.INT32]],
+    ) -> pl.Tensor[[ROWS, COLS], pl.INT32]:
+        output = self.kernel(output)
+        return output
+
+
+@pl.program
+class CiAscendStart10Program:
+    @pl.function(type=pl.FunctionType.InCore)
+    def kernel(
+        self,
+        output: pl.Out[pl.Tensor[[ROWS, COLS], pl.INT32]],
+    ) -> pl.Tensor[[ROWS, COLS], pl.INT32]:
+        seq: pl.Tile[[ROWS, COLS], pl.INT32] = pl.tile.ci(10, [ROWS, COLS], dtype=pl.INT32)
+        out: pl.Tensor[[ROWS, COLS], pl.INT32] = pl.store(seq, offsets=[0, 0], output_tensor=output)
+        return out
+
+    @pl.function(type=pl.FunctionType.Orchestration)
+    def orchestrator(
+        self,
+        output: pl.Out[pl.Tensor[[ROWS, COLS], pl.INT32]],
+    ) -> pl.Tensor[[ROWS, COLS], pl.INT32]:
+        output = self.kernel(output)
+        return output
+
+
+@pl.program
+class CiDescendingProgram:
+    @pl.function(type=pl.FunctionType.InCore)
+    def kernel(
+        self,
+        output: pl.Out[pl.Tensor[[ROWS, COLS], pl.INT32]],
+    ) -> pl.Tensor[[ROWS, COLS], pl.INT32]:
+        seq: pl.Tile[[ROWS, COLS], pl.INT32] = pl.tile.ci(
+            N - 1, [ROWS, COLS], dtype=pl.INT32, descending=True
+        )
+        out: pl.Tensor[[ROWS, COLS], pl.INT32] = pl.store(seq, offsets=[0, 0], output_tensor=output)
+        return out
+
+    @pl.function(type=pl.FunctionType.Orchestration)
+    def orchestrator(
+        self,
+        output: pl.Out[pl.Tensor[[ROWS, COLS], pl.INT32]],
+    ) -> pl.Tensor[[ROWS, COLS], pl.INT32]:
+        output = self.kernel(output)
+        return output
+
+
+@pl.program
+class CiTensorAscendProgram:
+    """tensor.ci — Opaque main + pl.at(CORE_GROUP) + pl.assemble writes result into Out."""
+
+    @pl.function(type=pl.FunctionType.Opaque)
+    def main(
+        self,
+        output: pl.Out[pl.Tensor[[ROWS, COLS], pl.INT32]],
+    ) -> pl.Tensor[[ROWS, COLS], pl.INT32]:
+        with pl.at(level=pl.Level.CORE_GROUP):
+            seq = pl.tensor.ci(0, [ROWS, COLS], dtype=pl.INT32)
+            output = pl.assemble(output, seq, [0, 0])
+        return output
+
+
+@pl.program
+class CiTensorDescendingProgram:
+    @pl.function(type=pl.FunctionType.Opaque)
+    def main(
+        self,
+        output: pl.Out[pl.Tensor[[ROWS, COLS], pl.INT32]],
+    ) -> pl.Tensor[[ROWS, COLS], pl.INT32]:
+        with pl.at(level=pl.Level.CORE_GROUP):
+            seq = pl.tensor.ci(N - 1, [ROWS, COLS], dtype=pl.INT32, descending=True)
+            output = pl.assemble(output, seq, [0, 0])
+        return output
+
+
+@pl.program
+class TileArangeAliasProgram:
+    """pl.tile.arange should be the alias of pl.tile.ci."""
+
+    @pl.function(type=pl.FunctionType.InCore)
+    def kernel(
+        self,
+        output: pl.Out[pl.Tensor[[ROWS, COLS], pl.INT32]],
+    ) -> pl.Tensor[[ROWS, COLS], pl.INT32]:
+        seq: pl.Tile[[ROWS, COLS], pl.INT32] = pl.tile.arange(0, [ROWS, COLS], dtype=pl.INT32)
+        out: pl.Tensor[[ROWS, COLS], pl.INT32] = pl.store(seq, offsets=[0, 0], output_tensor=output)
+        return out
+
+    @pl.function(type=pl.FunctionType.Orchestration)
+    def orchestrator(
+        self,
+        output: pl.Out[pl.Tensor[[ROWS, COLS], pl.INT32]],
+    ) -> pl.Tensor[[ROWS, COLS], pl.INT32]:
+        output = self.kernel(output)
+        return output
+
+
+@pl.program
+class TileArangeDescendingProgram:
+    """pl.tile.arange descending — alias of pl.tile.ci with descending=True."""
+
+    @pl.function(type=pl.FunctionType.InCore)
+    def kernel(
+        self,
+        output: pl.Out[pl.Tensor[[ROWS, COLS], pl.INT32]],
+    ) -> pl.Tensor[[ROWS, COLS], pl.INT32]:
+        seq: pl.Tile[[ROWS, COLS], pl.INT32] = pl.tile.arange(
+            N - 1, [ROWS, COLS], dtype=pl.INT32, descending=True
+        )
+        out: pl.Tensor[[ROWS, COLS], pl.INT32] = pl.store(seq, offsets=[0, 0], output_tensor=output)
+        return out
+
+    @pl.function(type=pl.FunctionType.Orchestration)
+    def orchestrator(
+        self,
+        output: pl.Out[pl.Tensor[[ROWS, COLS], pl.INT32]],
+    ) -> pl.Tensor[[ROWS, COLS], pl.INT32]:
+        output = self.kernel(output)
+        return output
+
+
+@pl.program
+class TensorArangeAscendingProgram:
+    """pl.tensor.arange ascending — alias of pl.tensor.ci."""
+
+    @pl.function(type=pl.FunctionType.Opaque)
+    def main(
+        self,
+        output: pl.Out[pl.Tensor[[ROWS, COLS], pl.INT32]],
+    ) -> pl.Tensor[[ROWS, COLS], pl.INT32]:
+        with pl.at(level=pl.Level.CORE_GROUP):
+            seq = pl.tensor.arange(0, [ROWS, COLS], dtype=pl.INT32)
+            output = pl.assemble(output, seq, [0, 0])
+        return output
+
+
+@pl.program
+class TensorArangeAliasProgram:
+    """pl.tensor.arange should be the alias of pl.tensor.ci."""
+
+    @pl.function(type=pl.FunctionType.Opaque)
+    def main(
+        self,
+        output: pl.Out[pl.Tensor[[ROWS, COLS], pl.INT32]],
+    ) -> pl.Tensor[[ROWS, COLS], pl.INT32]:
+        with pl.at(level=pl.Level.CORE_GROUP):
+            seq = pl.tensor.arange(N - 1, [ROWS, COLS], dtype=pl.INT32, descending=True)
+            output = pl.assemble(output, seq, [0, 0])
+        return output
+
+
+@pl.program
+class CiUint32AscendProgram:
+    @pl.function(type=pl.FunctionType.InCore)
+    def kernel(
+        self,
+        output: pl.Out[pl.Tensor[[ROWS, COLS], pl.UINT32]],
+    ) -> pl.Tensor[[ROWS, COLS], pl.UINT32]:
+        seq: pl.Tile[[ROWS, COLS], pl.UINT32] = pl.tile.ci(5, [ROWS, COLS], dtype=pl.UINT32)
+        out: pl.Tensor[[ROWS, COLS], pl.UINT32] = pl.store(seq, offsets=[0, 0], output_tensor=output)
+        return out
+
+    @pl.function(type=pl.FunctionType.Orchestration)
+    def orchestrator(
+        self,
+        output: pl.Out[pl.Tensor[[ROWS, COLS], pl.UINT32]],
+    ) -> pl.Tensor[[ROWS, COLS], pl.UINT32]:
+        output = self.kernel(output)
+        return output
+
+
+@pl.program
+class CiUint16AscendProgram:
+    @pl.function(type=pl.FunctionType.InCore)
+    def kernel(
+        self,
+        output: pl.Out[pl.Tensor[[ROWS, COLS], pl.UINT16]],
+    ) -> pl.Tensor[[ROWS, COLS], pl.UINT16]:
+        seq: pl.Tile[[ROWS, COLS], pl.UINT16] = pl.tile.ci(0, [ROWS, COLS], dtype=pl.UINT16)
+        out: pl.Tensor[[ROWS, COLS], pl.UINT16] = pl.store(seq, offsets=[0, 0], output_tensor=output)
+        return out
+
+    @pl.function(type=pl.FunctionType.Orchestration)
+    def orchestrator(
+        self,
+        output: pl.Out[pl.Tensor[[ROWS, COLS], pl.UINT16]],
+    ) -> pl.Tensor[[ROWS, COLS], pl.UINT16]:
+        output = self.kernel(output)
+        return output
+
+
+# --- Test Cases ---
+
+
+class _CiBaseTestCase(PTOTestCase):
+    __test__ = False
+
+    def get_strategy(self) -> OptimizationStrategy:
+        return OptimizationStrategy.Default
+
+    def get_backend_type(self) -> BackendType:
+        return BackendType.Ascend910B
+
+
+class CiAscendStart0TestCase(_CiBaseTestCase):
+    def get_name(self) -> str:
+        return "ci_ascend_start0"
+
+    def define_tensors(self) -> list[TensorSpec]:
+        return [TensorSpec("output", [ROWS, COLS], DataType.INT32, is_output=True)]
+
+    def get_program(self) -> Any:
+        return CiAscendStart0Program
+
+    def compute_expected(self, tensors, params=None):
+        tensors["output"][:] = torch.arange(0, N, dtype=torch.int32).reshape(ROWS, COLS)
+
+
+class CiAscendStart10TestCase(_CiBaseTestCase):
+    def get_name(self) -> str:
+        return "ci_ascend_start10"
+
+    def define_tensors(self) -> list[TensorSpec]:
+        return [TensorSpec("output", [ROWS, COLS], DataType.INT32, is_output=True)]
+
+    def get_program(self) -> Any:
+        return CiAscendStart10Program
+
+    def compute_expected(self, tensors, params=None):
+        tensors["output"][:] = torch.arange(10, 10 + N, dtype=torch.int32).reshape(ROWS, COLS)
+
+
+class CiDescendingTestCase(_CiBaseTestCase):
+    def get_name(self) -> str:
+        return "ci_descending"
+
+    def define_tensors(self) -> list[TensorSpec]:
+        return [TensorSpec("output", [ROWS, COLS], DataType.INT32, is_output=True)]
+
+    def get_program(self) -> Any:
+        return CiDescendingProgram
+
+    def compute_expected(self, tensors, params=None):
+        tensors["output"][:] = torch.arange(N - 1, -1, -1, dtype=torch.int32).reshape(ROWS, COLS)
+
+
+class CiTensorAscendTestCase(_CiBaseTestCase):
+    def get_name(self) -> str:
+        return "ci_tensor_ascend"
+
+    def define_tensors(self) -> list[TensorSpec]:
+        return [TensorSpec("output", [ROWS, COLS], DataType.INT32, is_output=True)]
+
+    def get_program(self) -> Any:
+        return CiTensorAscendProgram
+
+    def compute_expected(self, tensors, params=None):
+        tensors["output"][:] = torch.arange(0, N, dtype=torch.int32).reshape(ROWS, COLS)
+
+
+class CiTensorDescendingTestCase(_CiBaseTestCase):
+    def get_name(self) -> str:
+        return "ci_tensor_descending"
+
+    def define_tensors(self) -> list[TensorSpec]:
+        return [TensorSpec("output", [ROWS, COLS], DataType.INT32, is_output=True)]
+
+    def get_program(self) -> Any:
+        return CiTensorDescendingProgram
+
+    def compute_expected(self, tensors, params=None):
+        tensors["output"][:] = torch.arange(N - 1, -1, -1, dtype=torch.int32).reshape(ROWS, COLS)
+
+
+class TileArangeAliasTestCase(_CiBaseTestCase):
+    def get_name(self) -> str:
+        return "tile_arange_alias"
+
+    def define_tensors(self) -> list[TensorSpec]:
+        return [TensorSpec("output", [ROWS, COLS], DataType.INT32, is_output=True)]
+
+    def get_program(self) -> Any:
+        return TileArangeAliasProgram
+
+    def compute_expected(self, tensors, params=None):
+        tensors["output"][:] = torch.arange(0, N, dtype=torch.int32).reshape(ROWS, COLS)
+
+
+class TileArangeDescendingTestCase(_CiBaseTestCase):
+    def get_name(self) -> str:
+        return "tile_arange_descending"
+
+    def define_tensors(self) -> list[TensorSpec]:
+        return [TensorSpec("output", [ROWS, COLS], DataType.INT32, is_output=True)]
+
+    def get_program(self) -> Any:
+        return TileArangeDescendingProgram
+
+    def compute_expected(self, tensors, params=None):
+        tensors["output"][:] = torch.arange(N - 1, -1, -1, dtype=torch.int32).reshape(ROWS, COLS)
+
+
+class TensorArangeAscendingTestCase(_CiBaseTestCase):
+    def get_name(self) -> str:
+        return "tensor_arange_ascending"
+
+    def define_tensors(self) -> list[TensorSpec]:
+        return [TensorSpec("output", [ROWS, COLS], DataType.INT32, is_output=True)]
+
+    def get_program(self) -> Any:
+        return TensorArangeAscendingProgram
+
+    def compute_expected(self, tensors, params=None):
+        tensors["output"][:] = torch.arange(0, N, dtype=torch.int32).reshape(ROWS, COLS)
+
+
+class TensorArangeAliasTestCase(_CiBaseTestCase):
+    def get_name(self) -> str:
+        return "tensor_arange_alias"
+
+    def define_tensors(self) -> list[TensorSpec]:
+        return [TensorSpec("output", [ROWS, COLS], DataType.INT32, is_output=True)]
+
+    def get_program(self) -> Any:
+        return TensorArangeAliasProgram
+
+    def compute_expected(self, tensors, params=None):
+        tensors["output"][:] = torch.arange(N - 1, -1, -1, dtype=torch.int32).reshape(ROWS, COLS)
+
+
+class CiUint32AscendTestCase(_CiBaseTestCase):
+    def get_name(self) -> str:
+        return "ci_uint32_ascend"
+
+    def define_tensors(self) -> list[TensorSpec]:
+        return [TensorSpec("output", [ROWS, COLS], DataType.UINT32, is_output=True)]
+
+    def get_program(self) -> Any:
+        return CiUint32AscendProgram
+
+    def compute_expected(self, tensors, params=None):
+        tensors["output"][:] = torch.arange(5, 5 + N, dtype=torch.int64).to(torch.uint32).reshape(ROWS, COLS)
+
+
+class CiUint16AscendTestCase(_CiBaseTestCase):
+    def get_name(self) -> str:
+        return "ci_uint16_ascend"
+
+    def define_tensors(self) -> list[TensorSpec]:
+        return [TensorSpec("output", [ROWS, COLS], DataType.UINT16, is_output=True)]
+
+    def get_program(self) -> Any:
+        return CiUint16AscendProgram
+
+    def compute_expected(self, tensors, params=None):
+        tensors["output"][:] = torch.arange(0, N, dtype=torch.int64).to(torch.uint16).reshape(ROWS, COLS)
+
+
+# --- Tests ---
+
+
+class TestCi:
+    """Verify tile.ci / tensor.ci produce correct integer sequences on device."""
+
+    def test_ci_ascend_start0(self, test_runner):
+        result = test_runner.run(CiAscendStart0TestCase())
+        assert result.passed, f"Test failed: {result.error}"
+
+    def test_ci_ascend_start10(self, test_runner):
+        result = test_runner.run(CiAscendStart10TestCase())
+        assert result.passed, f"Test failed: {result.error}"
+
+    def test_ci_descending(self, test_runner):
+        result = test_runner.run(CiDescendingTestCase())
+        assert result.passed, f"Test failed: {result.error}"
+
+    def test_ci_tensor_ascend(self, test_runner):
+        result = test_runner.run(CiTensorAscendTestCase())
+        assert result.passed, f"Test failed: {result.error}"
+
+    def test_ci_tensor_descending(self, test_runner):
+        result = test_runner.run(CiTensorDescendingTestCase())
+        assert result.passed, f"Test failed: {result.error}"
+
+    def test_tile_arange_alias(self, test_runner):
+        result = test_runner.run(TileArangeAliasTestCase())
+        assert result.passed, f"Test failed: {result.error}"
+
+    def test_tile_arange_descending(self, test_runner):
+        result = test_runner.run(TileArangeDescendingTestCase())
+        assert result.passed, f"Test failed: {result.error}"
+
+    def test_tensor_arange_alias(self, test_runner):
+        result = test_runner.run(TensorArangeAliasTestCase())
+        assert result.passed, f"Test failed: {result.error}"
+
+    def test_tensor_arange_ascending(self, test_runner):
+        result = test_runner.run(TensorArangeAscendingTestCase())
+        assert result.passed, f"Test failed: {result.error}"
+
+    def test_ci_uint32_ascend(self, test_runner):
+        result = test_runner.run(CiUint32AscendTestCase())
+        assert result.passed, f"Test failed: {result.error}"
+
+    def test_ci_uint16_ascend(self, test_runner):
+        result = test_runner.run(CiUint16AscendTestCase())
+        assert result.passed, f"Test failed: {result.error}"
+
+
+if __name__ == "__main__":
+    pytest.main([__file__, "-v"])
diff --git a/tests/ut/ir/operators/test_tensor_ops.py b/tests/ut/ir/operators/test_tensor_ops.py
index 9db52d020..c61e9b787 100644
--- a/tests/ut/ir/operators/test_tensor_ops.py
+++ b/tests/ut/ir/operators/test_tensor_ops.py
@@ -2212,5 +2212,44 @@ def test_tensor_gather_rejects_mixed_index_and_mask():
         ir.op.tensor.gather(inp, dim=-1, index=idx, mask_pattern=1)
 
 
+class TestTensorCiOp:
+    """Tests for tensor.ci (contiguous integer sequence)."""
+
+    def test_tensor_ci_ascending(self):
+        call = tensor.ci(0, [1, 32], dtype=DataType.INT32)
+        t = call.type
+        assert isinstance(t, ir.TensorType)
+        assert t.dtype == DataType.INT32
+        assert len(t.shape) == 2
+        assert "tensor.ci" in str(call)
+
+    def test_tensor_ci_descending_kwarg_printed(self):
+        call = tensor.ci(10, [1, 16], dtype=DataType.INT32, descending=True)
+        assert "descending=True" in str(call)
+
+    def test_tensor_ci_rejects_float_dtype(self):
+        with pytest.raises(ValueError, match=r"INT16.*INT32.*UINT16.*UINT32"):
+            tensor.ci(0, [1, 32], dtype=DataType.FP32)
+
+    @pytest.mark.parametrize("dtype", [DataType.INT16, DataType.UINT16, DataType.UINT32])
+    def test_tensor_ci_accepts_non_int32_dtypes(self, dtype):
+        call = tensor.ci(0, [1, 16], dtype=dtype)
+        t = call.type
+        assert isinstance(t, ir.TensorType)
+        assert t.dtype == dtype
+
+    def test_tensor_ci_rejects_cols_equal_one(self):
+        with pytest.raises(ValueError, match="innermost dimension"):
+            tensor.ci(0, [32, 1], dtype=DataType.INT32)
+
+    def test_tensor_ci_rejects_multi_row_shape(self):
+        """pto.tci only populates the first row, so leading dims must be 1."""
+        with pytest.raises(ValueError, match=r"leading dimensions must be 1"):
+            tensor.ci(0, [4, 32], dtype=DataType.INT32)
+
+    def test_tensor_arange_alias_is_ci(self):
+        assert pl.tensor.arange is pl.tensor.ci
+
+
 if __name__ == "__main__":
     pytest.main([__file__, "-v"])
diff --git a/tests/ut/ir/operators/test_tile_ops.py b/tests/ut/ir/operators/test_tile_ops.py
index 5a207ffce..30387f642 100644
--- a/tests/ut/ir/operators/test_tile_ops.py
+++ b/tests/ut/ir/operators/test_tile_ops.py
@@ -2513,5 +2513,50 @@ def test_tile_shape_mismatch_message(self, op_callable, lhs_dims, rhs_dims, matc
             op_callable(tile_a, tile_b)
 
 
+class TestTileCiOp:
+    """Tests for tile.ci (contiguous integer sequence generation, pto.tci)."""
+
+    def test_tile_ci_ascending(self):
+        """tile.ci returns a TileType with requested shape / dtype."""
+        call = tile.ci(0, [1, 32], dtype=DataType.INT32)
+        t = call.type
+        assert isinstance(t, ir.TileType)
+        assert t.dtype == DataType.INT32
+        assert len(t.shape) == 2
+        assert "tile.ci" in str(call)
+        assert "descending=False" in str(call)
+
+    def test_tile_ci_descending_kwarg_printed(self):
+        """descending=True should appear in the printed IR."""
+        call = tile.ci(10, [1, 16], dtype=DataType.INT32, descending=True)
+        assert "descending=True" in str(call)
+
+    def test_tile_ci_rejects_float_dtype(self):
+        with pytest.raises(ValueError, match=r"INT16.*INT32.*UINT16.*UINT32"):
+            tile.ci(0, [1, 32], dtype=DataType.FP32)
+
+    def test_tile_ci_accepts_uint_dtype(self):
+        call = tile.ci(0, [1, 16], dtype=DataType.UINT32)
+        assert call is not None
+
+    def test_tile_ci_rejects_cols_equal_one(self):
+        with pytest.raises(ValueError, match="innermost dimension"):
+            tile.ci(0, [32, 1], dtype=DataType.INT32)
+
+    def test_tile_ci_rejects_multi_row_shape(self):
+        """pto.tci only populates the first row, so leading dims must be 1."""
+        with pytest.raises(ValueError, match=r"leading dimensions must be 1"):
+            tile.ci(0, [4, 32], dtype=DataType.INT32)
+
+    def test_tile_ci_rejects_start_dtype_mismatch(self):
+        span = ir.Span.unknown()
+        start = ir.Var("s", ir.ScalarType(DataType.INT16), span)
+        with pytest.raises(ValueError, match=r"start.*dtype"):
+            tile.ci(start, [1, 32], dtype=DataType.INT32)
+
+    def test_tile_arange_alias_is_ci(self):
+        assert pl.tile.arange is pl.tile.ci
+
+
 if __name__ == "__main__":
     pytest.main([__file__, "-v"])
diff --git a/tests/ut/ir/transforms/test_convert_tensor_to_tile_ops.py b/tests/ut/ir/transforms/test_convert_tensor_to_tile_ops.py
index b2e17a7de..f1b9eaae9 100644
--- a/tests/ut/ir/transforms/test_convert_tensor_to_tile_ops.py
+++ b/tests/ut/ir/transforms/test_convert_tensor_to_tile_ops.py
@@ -1763,6 +1763,30 @@ def main(self, x: pl.Tensor[[64], pl.FP32]) -> pl.Tensor[[64], pl.FP32]:
         assert "tensor.full" not in ir_str
 
 
+class TestTensorCiConversion:
+    def test_tensor_ci_conversion(self):
+        """tensor.ci -> tile.ci conversion preserves dtype + descending kwargs."""
+
+        @pl.program
+        class Before:
+            @pl.function(type=pl.FunctionType.InCore)
+            def main_incore_0(self, x: pl.Tensor[[1, 32], pl.INT32]) -> pl.Tensor[[1, 32], pl.INT32]:
+                idx: pl.Tensor[[1, 32], pl.INT32] = pl.tensor.ci(0, [1, 32], dtype=pl.INT32, descending=True)
+                y: pl.Tensor[[1, 32], pl.INT32] = pl.add(idx, x)
+                return y
+
+            @pl.function
+            def main(self, x: pl.Tensor[[1, 32], pl.INT32]) -> pl.Tensor[[1, 32], pl.INT32]:
+                y: pl.Tensor[[1, 32], pl.INT32] = self.main_incore_0(x)
+                return y
+
+        After = passes.convert_tensor_to_tile_ops()(Before)
+        ir_str = str(After)
+        assert "tile.ci" in ir_str
+        assert "tensor.ci" not in ir_str
+        assert "descending=True" in ir_str
+
+
 class TestAssembleParentStride:
     """Tests for physical stride propagation when assemble is in orchestration."""