Skip to content
Draft
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,7 @@ set(PYPTO_SOURCES
src/ir/op/tile_ops/transform.cpp
src/ir/op/tile_ops/unary.cpp
src/ir/op/tile_ops/cross_core.cpp
src/ir/op/tile_ops/utility.cpp
src/ir/op/sync_ops/sync.cpp
src/ir/op/sync_ops/cross_core.cpp
src/ir/op/tensor_ops/broadcast.cpp
Expand All @@ -114,6 +115,7 @@ set(PYPTO_SOURCES
src/ir/op/tensor_ops/reduction.cpp
src/ir/op/tensor_ops/transform.cpp
src/ir/op/tensor_ops/unary.cpp
src/ir/op/tensor_ops/utility.cpp
src/ir/op/testing.cpp
src/ir/op/type_inference.cpp

Expand Down
16 changes: 16 additions & 0 deletions python/pypto/ir/op/tensor_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -940,3 +940,19 @@ def scatter_update(
op_args: list[Expr] = [input, index, src]
kwargs: dict[str, Any] = {"dim": dim_val}
return _ir_core.create_op_call("tensor.scatter_update", op_args, kwargs, actual_span)


def runtime_print(tensor: Expr, span: Span | None = None) -> Call:
"""Print tensor contents at runtime for debugging.

Generates a pto.tprint instruction in the compiled output.

Args:
tensor: Input tensor expression (TensorType)
span: Optional source span for debugging (auto-captured if not provided)

Returns:
Call expression (type is pass-through TensorType)
"""
actual_span = _get_span_or_capture(span)
return _ir_core.create_op_call("tensor.runtime_print", [tensor], {}, actual_span)
16 changes: 16 additions & 0 deletions python/pypto/ir/op/tile_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -1950,3 +1950,19 @@ def tpop_from_aiv(
op = _ir_core.get_op("tile.tpop_from_aiv")
return _ir_core.Call(op, [], {"split": split}, resolved_type, actual_span)
return _ir_core.create_op_call("tile.tpop_from_aiv", [], {"split": split}, actual_span)


def runtime_print(tile: Expr, span: Span | None = None) -> Call:
"""Print tile contents at runtime for debugging.

Generates a pto.tprint instruction in the compiled output.

Args:
tile: Input tile expression (TileType)
span: Optional source span for debugging (auto-captured if not provided)

Returns:
Call expression (type is pass-through TileType)
"""
actual_span = _get_span_or_capture(span)
return _ir_core.create_op_call("tile.runtime_print", [tile], {}, actual_span)
2 changes: 2 additions & 0 deletions python/pypto/language/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -159,6 +159,7 @@ def scalar_func(x: pl.Scalar[pl.FP32]) -> pl.Scalar[pl.FP32]:
row_min,
row_sum,
rsqrt,
runtime_print,
slice,
sqrt,
sub,
Expand Down Expand Up @@ -264,6 +265,7 @@ def scalar_func(x: pl.Scalar[pl.FP32]) -> pl.Scalar[pl.FP32]:
"recip",
"read",
"write",
"runtime_print",
# Promoted tile-only
"create_tile",
"fillpad",
Expand Down
13 changes: 13 additions & 0 deletions python/pypto/language/op/tensor_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@
"reshape",
"transpose",
"scatter_update",
"runtime_print",
]

from pypto.ir.op import tensor_ops as _ir_ops
Expand Down Expand Up @@ -779,3 +780,15 @@ def scatter_update(
"""
call_expr = _ir_ops.scatter_update(input.unwrap(), dim, index.unwrap(), src.unwrap())
return Tensor(expr=call_expr)


def runtime_print(tensor: Tensor) -> None:
"""Print tensor contents at runtime for debugging.

Generates a pto.tprint instruction in the compiled output.
This is a statement-only operation — no value is returned.

Args:
tensor: Input tensor to print
"""
_ir_ops.runtime_print(tensor.unwrap())
13 changes: 13 additions & 0 deletions python/pypto/language/op/tile_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,7 @@
"tpush_to_aic",
"tpop_from_aic",
"tpop_from_aiv",
"runtime_print",
]

from pypto.ir.op import tile_ops as _ir_ops
Expand Down Expand Up @@ -1547,3 +1548,15 @@ def sels(lhs: Tile, rhs: Tile, select_mode: int | float | Expr | Scalar) -> Tile
select_mode_expr = select_mode.unwrap() if isinstance(select_mode, Scalar) else select_mode
call_expr = _ir_ops.sels(lhs.unwrap(), rhs.unwrap(), select_mode_expr)
return Tile(expr=call_expr)


def runtime_print(tile: Tile) -> None:
"""Print tile contents at runtime for debugging.

Generates a pto.tprint instruction in the compiled output.
This is a statement-only operation — no value is returned.

Args:
tile: Input tile to print
"""
_ir_ops.runtime_print(tile.unwrap())
17 changes: 17 additions & 0 deletions python/pypto/language/op/unified_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@
"create_tile",
"read",
"write",
"runtime_print",
]

from pypto.ir.utils import resolve_cast_mode
Expand Down Expand Up @@ -552,3 +553,19 @@ def write(dst: Tensor | Tile, offset: IntLike | Sequence[IntLike], value: Scalar
if isinstance(dst, Tile):
return _tile.write(dst, offset, value)
raise TypeError(f"write: expected Tensor or Tile, got {type(dst).__name__}")


def runtime_print(src: Tensor | Tile) -> None:
"""Print tensor or tile contents at runtime for debugging.

Generates a pto.tprint instruction in the compiled output.
This is a statement-only operation — no value is returned.

Args:
src: Tensor or tile to print
"""
if isinstance(src, Tensor):
return _tensor.runtime_print(src)
if isinstance(src, Tile):
return _tile.runtime_print(src)
raise TypeError(f"runtime_print: expected Tensor or Tile, got {type(src).__name__}")
6 changes: 4 additions & 2 deletions src/backend/common/pto_ops_common.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1234,9 +1234,11 @@ void RegisterPTOOps(Backend& backend, const std::unordered_set<std::string>& exc
reg("tile.mrgsort", [](const ir::CallPtr& op, codegen::CodegenBase& codegen) {
return MakeMrgSortCodegenPTO("pto.tmrgsort", op, codegen);
});
reg("tile.print", [](const ir::CallPtr& op, codegen::CodegenBase& codegen) {
auto make_tprint = [](const ir::CallPtr& op, codegen::CodegenBase& codegen) {
return MakePrintCodegenPTO("pto.tprint", op, codegen);
});
};
reg("tile.runtime_print", make_tprint);
reg("tensor.runtime_print", make_tprint);

// In-place accumulation ops (matmul_acc, gemv_acc): ptoas expects the
// accumulator in ins() to be the same SSA value as outs(). InitMemRef
Expand Down
54 changes: 54 additions & 0 deletions src/ir/op/tensor_ops/utility.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
/*
* Copyright (c) PyPTO Contributors.
* This program is free software, you can redistribute it and/or modify it under the terms and conditions of
* CANN Open Software License Agreement Version 2.0 (the "License").
* Please refer to the License for details. You may not use this file except in compliance with the License.
* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR IMPLIED,
* INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY, OR FITNESS FOR A PARTICULAR PURPOSE.
* See LICENSE in the root of the software repository for the full text of the License.
* -----------------------------------------------------------------------------------------------------------
*/

/**
* @file utility.cpp
* @brief Utility tensor operations (print)
*
* This file implements utility/debugging operations for tensor-level programming.
*/

#include <any>
#include <string>
#include <utility>
#include <vector>

#include "pypto/core/logging.h"
#include "pypto/ir/kind_traits.h"
#include "pypto/ir/op_registry.h"
#include "pypto/ir/type.h"

namespace pypto {
namespace ir {

TypePtr DeduceTensorPrintType(const std::vector<ExprPtr>& args,
const std::vector<std::pair<std::string, std::any>>& kwargs,
const std::string& op_name) {
CHECK(args.size() == 1) << "The operator " << op_name << " requires 1 argument (tensor), but got "
<< args.size();
auto tensor_type = As<TensorType>(args[0]->GetType());
CHECK(tensor_type) << "The operator " << op_name << " requires argument to be a TensorType, but got "
<< args[0]->GetType()->TypeName();
// Pass-through: returns the input tensor type (print is a side-effect operation)
return tensor_type;
}

REGISTER_OP("tensor.runtime_print")
.set_op_category("TensorOp")
.set_description("Print tensor contents for debugging (generates pto.tprint)")
.add_argument("tensor", "Input tensor to print (TensorType)")
.f_deduce_type([](const std::vector<ExprPtr>& args,
const std::vector<std::pair<std::string, std::any>>& kwargs) {
return DeduceTensorPrintType(args, kwargs, "tensor.runtime_print");
});

} // namespace ir
} // namespace pypto
55 changes: 55 additions & 0 deletions src/ir/op/tile_ops/utility.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
/*
* Copyright (c) PyPTO Contributors.
* This program is free software, you can redistribute it and/or modify it under the terms and conditions of
* CANN Open Software License Agreement Version 2.0 (the "License").
* Please refer to the License for details. You may not use this file except in compliance with the License.
* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR IMPLIED,
* INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY, OR FITNESS FOR A PARTICULAR PURPOSE.
* See LICENSE in the root of the software repository for the full text of the License.
* -----------------------------------------------------------------------------------------------------------
*/

/**
* @file utility.cpp
* @brief Utility tile operations (print)
*
* This file implements utility/debugging operations for tile-level programming.
*/

#include <any>
#include <string>
#include <utility>
#include <vector>

#include "pypto/core/logging.h"
#include "pypto/ir/kind_traits.h"
#include "pypto/ir/op_registry.h"
#include "pypto/ir/type.h"

namespace pypto {
namespace ir {

TypePtr DeduceTilePrintType(const std::vector<ExprPtr>& args,
const std::vector<std::pair<std::string, std::any>>& kwargs,
const std::string& op_name) {
CHECK(args.size() == 1) << "The operator " << op_name << " requires 1 argument (tile), but got "
<< args.size();
auto tile_type = As<TileType>(args[0]->GetType());
CHECK(tile_type) << "The operator " << op_name << " requires argument to be a TileType, but got "
<< args[0]->GetType()->TypeName();
// Pass-through: returns the input tile type (print is a side-effect operation)
return tile_type;
}

REGISTER_OP("tile.runtime_print")
.set_op_category("TileOp")
.set_description("Print tile contents for debugging (generates pto.tprint)")
.add_argument("tile", "Input tile to print (TileType)")
.no_memory_spec()
.f_deduce_type([](const std::vector<ExprPtr>& args,
const std::vector<std::pair<std::string, std::any>>& kwargs) {
return DeduceTilePrintType(args, kwargs, "tile.runtime_print");
});

} // namespace ir
} // namespace pypto
36 changes: 36 additions & 0 deletions src/ir/transforms/op_conversion_registry.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -158,6 +158,42 @@ OpConversionRegistry::OpConversionRegistry() {
// Memory creation ops
RegisterSimple("tensor.full", "tile.full");

// Utility ops — runtime_print needs a custom converter because the
// argument may still be a TensorType (e.g. printing a function parameter
// before any explicit tile.load). In that case we insert a tile.load
// prologue to materialise the tile, matching the tensor.fillpad pattern.
RegisterCustom(
"tensor.runtime_print",
[](const std::vector<ExprPtr>& args, const std::vector<std::pair<std::string, std::any>>& kwargs,
const Span& span) -> ConversionResult {
CHECK(args.size() == 1) << "tensor.runtime_print conversion expects 1 arg (input)";
auto& op_reg = OpRegistry::GetInstance();
const auto& input = args[0];

// Already a tile — pass through.
if (As<TileType>(input->GetType())) {
return ConversionResult{op_reg.Create("tile.runtime_print", {input}, span)};
}

auto tensor_type = As<TensorType>(input->GetType());
CHECK(tensor_type) << "tensor.runtime_print conversion: input must be TensorType or TileType, got "
<< input->GetType()->TypeName();

auto offsets = MakeZeroOffsetsTuple(tensor_type->shape_.size(), span);
auto shapes = MakeShapesTuple(tensor_type->shape_, span);

std::vector<std::pair<std::string, std::any>> load_kwargs = {{"target_memory", MemorySpace::Vec},
{"transpose", false}};
auto load_call = op_reg.Create("tile.load", {input, offsets, shapes, shapes}, load_kwargs, span);
auto load_var = std::make_shared<Var>("runtime_print_src", load_call->GetType(), span);

std::vector<StmtPtr> prologue;
prologue.push_back(std::make_shared<AssignStmt>(load_var, load_call, span));

auto print_call = op_reg.Create("tile.runtime_print", {load_var}, span);
return ConversionResult{std::move(prologue), print_call};
});

// ────────────────────────────────────────────────────────────────────────
// Broadcast-aware elementwise binary ops
//
Expand Down
Loading
Loading