From eeb1aa69ad121ec9433e43e8b824baa5786b4f9b Mon Sep 17 00:00:00 2001 From: i582 <51853996+i582@users.noreply.github.com> Date: Thu, 17 Jul 2025 00:01:05 +0400 Subject: [PATCH 01/27] initial implementation for Tolk source maps --- tolk/CMakeLists.txt | 1 + tolk/abscode.cpp | 4 ++ tolk/analyzer.cpp | 5 +- tolk/asmops.cpp | 13 ++++++ tolk/codegen.cpp | 17 +++++++ tolk/compiler-state.h | 1 + tolk/pipe-ast-to-legacy.cpp | 49 +++++++++++++++++++- tolk/pipe-process-debug-info.cpp | 79 ++++++++++++++++++++++++++++++++ tolk/pipeline.h | 1 + tolk/src-file.h | 1 + tolk/tolk-main.cpp | 16 ++++++- tolk/tolk-wasm.cpp | 7 ++- tolk/tolk.cpp | 4 +- tolk/tolk.h | 18 +++++++- 14 files changed, 207 insertions(+), 9 deletions(-) create mode 100644 tolk/pipe-process-debug-info.cpp diff --git a/tolk/CMakeLists.txt b/tolk/CMakeLists.txt index 033456e7fa..72358e550f 100644 --- a/tolk/CMakeLists.txt +++ b/tolk/CMakeLists.txt @@ -29,6 +29,7 @@ set(TOLK_SOURCE pipe-ast-to-legacy.cpp pipe-find-unused-symbols.cpp pipe-generate-fif-output.cpp + pipe-process-debug-info.cpp type-system.cpp smart-casts-cfg.cpp generics-helpers.cpp diff --git a/tolk/abscode.cpp b/tolk/abscode.cpp index 462aad7b2d..2b5e73d8d7 100644 --- a/tolk/abscode.cpp +++ b/tolk/abscode.cpp @@ -254,6 +254,10 @@ void Op::show(std::ostream& os, const std::vector& vars, std::string pfx show_var_list(os, left, vars); os << " := " << str_const << std::endl; break; + case _DebugInfo: + os << pfx << dis << "DEBUGINFO "; + os << debug_info->idx << std::endl; + break; case _Import: os << pfx << dis << "IMPORT "; show_var_list(os, left, vars); diff --git a/tolk/analyzer.cpp b/tolk/analyzer.cpp index 8e9fe914e7..19cd038eda 100644 --- a/tolk/analyzer.cpp +++ b/tolk/analyzer.cpp @@ -303,7 +303,7 @@ bool Op::std_compute_used_vars(bool disabled) { bool Op::compute_used_vars(const CodeBlob& code, bool edit) { tolk_assert(next); const VarDescrList& next_var_info = next->var_info; - if (cl == _Nop) { + if (cl == _Nop || cl == _DebugInfo) { return set_var_info_except(next_var_info, left); } switch (cl) { @@ -528,6 +528,7 @@ bool prune_unreachable(std::unique_ptr& ops) { case Op::_UnTuple: case Op::_Import: case Op::_Let: + case Op::_DebugInfo: reach = true; break; case Op::_Return: @@ -693,6 +694,7 @@ VarDescrList Op::fwd_analyze(VarDescrList values) { switch (cl) { case _Nop: case _Import: + case _DebugInfo: break; case _Return: values.set_unreachable(); @@ -887,6 +889,7 @@ bool Op::mark_noreturn() { case _SetGlob: case _GlobVar: case _CallInd: + case _DebugInfo: return set_noreturn(next->mark_noreturn()); case _Return: return set_noreturn(); diff --git a/tolk/asmops.cpp b/tolk/asmops.cpp index 3dc39239b4..d61c3c6826 100644 --- a/tolk/asmops.cpp +++ b/tolk/asmops.cpp @@ -330,6 +330,19 @@ void AsmOpList::show_var_ext(std::ostream& os, std::pair } } +std::optional> AsmOpList::get_var(std::pair idx_pair) const { + if (!var_names_ || (unsigned)idx_pair.first >= var_names_->size()) { + return std::nullopt; + } + auto var = var_names_->at(idx_pair.first); + if ((unsigned)idx_pair.second < constants_.size() && constants_[idx_pair.second].not_null()) { + const auto value = constants_[idx_pair.second]; + auto value_str = value->to_dec_string(); + return std::tie(var, value_str); + } + return std::tie(var, ""); +} + void AsmOpList::out(std::ostream& os, int mode) const { std::size_t n = list_.size(); for (std::size_t i = 0; i < n; i++) { diff --git a/tolk/codegen.cpp b/tolk/codegen.cpp index f5ef89de3b..23ce50d378 100644 --- a/tolk/codegen.cpp +++ b/tolk/codegen.cpp @@ -276,6 +276,19 @@ void Stack::rearrange_top(SrcLocation loc, var_idx_t top, bool last) { } bool Op::generate_code_step(Stack& stack) { + // we need to handle it here to correctly handle case `IFJMP { DROP }` + if (cl == _DebugInfo) { + std::ostringstream ops; + ops << debug_info->idx << " DEBUGMARK"; // pseudo instruction + stack.o.insert(stack.o.list_.size() - 1, this->loc, ops.str()); + + for (auto i : stack.s) { + if (const auto var = stack.o.get_var(i)) { + debug_info->vars.push_back(*var); + } + } + } + stack.opt_show(); // detect `throw 123` (actually _IntConst 123 + _Call __throw) @@ -882,6 +895,10 @@ bool Op::generate_code_step(Stack& stack) { stack.o << AsmOp::Custom(loc, "TRY"); return true; } + case _DebugInfo: { + // already handled above + return true; + } default: std::cerr << "fatal: unknown operation \n"; throw ParseError(loc, "unknown operation in generate_code()"); diff --git a/tolk/compiler-state.h b/tolk/compiler-state.h index 6685e092a7..5daf03c29d 100644 --- a/tolk/compiler-state.h +++ b/tolk/compiler-state.h @@ -53,6 +53,7 @@ struct CompilerSettings { int optimization_level = 2; bool stack_layout_comments = true; bool tolk_src_as_line_comments = true; + bool with_debug_info = false; std::string output_filename; std::string boc_output_filename; diff --git a/tolk/pipe-ast-to-legacy.cpp b/tolk/pipe-ast-to-legacy.cpp index f22b0ab445..c14faa435a 100644 --- a/tolk/pipe-ast-to-legacy.cpp +++ b/tolk/pipe-ast-to-legacy.cpp @@ -27,6 +27,7 @@ #include "generics-helpers.h" #include "send-message-api.h" #include "gen-entrypoints.h" +#include /* * This pipe is the last one operating AST: it transforms AST to IR. @@ -121,6 +122,42 @@ static int calc_offset_on_stack(StructPtr struct_ref, int field_idx) { return stack_offset; } +void insert_debug_info_inner(SrcLocation loc, ASTNodeKind kind, CodeBlob& code) { + if (!G.settings.with_debug_info) { + return; + } + + if (kind == ast_block_statement) { + return; + } + + if (code.prev_ops_kind == Op::_DebugInfo) { + // std::cerr << "skip repeated debug info" << std::endl; + return; + } + + auto& op = code.emplace_back(loc, Op::_DebugInfo); + const auto info = std::make_shared(); + info->idx = code.debug_infos.size(); + + if (const auto src_file = loc.get_src_file()) { + const auto& pos = src_file->convert_offset(loc.get_char_offset()); + + info->loc_file = src_file->realpath; + info->loc_line = pos.line_no; + info->loc_pos = pos.char_no; + info->loc_len = pos.line_str.length(); + } + + info->func_name = code.name; + code.debug_infos.push_back(info); + + op.debug_info = info; +} + +void insert_debug_info(AnyV v, CodeBlob& code) { + insert_debug_info_inner(v->loc, v->kind, code); +} // Main goal of LValContext is to handle non-primitive lvalues. At IR level, a usual local variable // exists, but on its change, something non-trivial should happen. @@ -603,6 +640,8 @@ std::vector pre_compile_is_type(CodeBlob& code, TypePtr expr_type, Ty static std::vector gen_op_call(CodeBlob& code, TypePtr ret_type, SrcLocation loc, std::vector&& args_vars, FunctionPtr fun_ref, const char* debug_desc, bool arg_order_already_equals_asm = false) { + insert_debug_info_inner(loc, ast_function_call, code); + std::vector rvect = code.create_tmp_var(ret_type, loc, debug_desc); Op& op = code.emplace_back(loc, Op::_Call, rvect, std::move(args_vars), fun_ref); if (!fun_ref->is_marked_as_pure()) { @@ -2018,6 +2057,11 @@ static std::vector process_artificial_aux_vertex(V pre_compile_expr(AnyExprV v, CodeBlob& code, TypePtr target_type, LValContext* lval_ctx) { + if (v->kind != ast_binary_operator && v->kind != ast_unary_operator && v->kind != ast_reference && + v->kind != ast_is_type_operator && v->kind != ast_function_call) { + insert_debug_info(v, code); + } + switch (v->kind) { case ast_reference: return process_reference(v->as(), code, target_type, lval_ctx); @@ -2286,7 +2330,7 @@ static void process_return_statement(V v, CodeBlob& code) static void append_implicit_return_statement(SrcLocation loc_end, CodeBlob& code) { std::vector mutated_vars; if (code.fun_ref->has_mutate_params()) { - for (const LocalVarData& p_sym: code.fun_ref->parameters) { + for (const LocalVarData& p_sym : code.fun_ref->parameters) { if (p_sym.is_mutate_parameter()) { mutated_vars.insert(mutated_vars.end(), p_sym.ir_idx.begin(), p_sym.ir_idx.end()); } @@ -2295,8 +2339,9 @@ static void append_implicit_return_statement(SrcLocation loc_end, CodeBlob& code code.emplace_back(loc_end, Op::_Return, std::move(mutated_vars)); } - void process_any_statement(AnyV v, CodeBlob& code) { + insert_debug_info(v, code); + switch (v->kind) { case ast_block_statement: return process_block_statement(v->as(), code); diff --git a/tolk/pipe-process-debug-info.cpp b/tolk/pipe-process-debug-info.cpp new file mode 100644 index 0000000000..f1707eacc8 --- /dev/null +++ b/tolk/pipe-process-debug-info.cpp @@ -0,0 +1,79 @@ +#include "tolk.h" +#include "pipeline.h" +#include "compiler-state.h" +#include "type-system.h" +#include "td/utils/JsonBuilder.h" +#include + +namespace tolk { + +void pipeline_process_debug_info(std::ostream& debug_out) { + for (const auto& func : G.all_functions) { + if (func->name == "main") { + std::cout << func->name << std::endl; + + const auto debug_infos = std::get(func->body)->code->debug_infos; + std::cout << debug_infos[0]->func_name << std::endl; + + td::JsonBuilder _jb; + auto objb = _jb.enter_object(); + + { + td::JsonBuilder jsonb; + auto arrb = jsonb.enter_array(); + for (auto glob_var : G.all_global_vars) { + auto vb = arrb.enter_value(); + auto ob = vb.enter_object(); + + ob("name", glob_var->name); + ob("type", glob_var->declared_type->as_human_readable()); + } + arrb.leave(); + + objb("globals", td::JsonRaw(jsonb.string_builder().as_cslice())); + } + + { + td::JsonBuilder jsonb; + auto arrb = jsonb.enter_array(); + for (auto di_ptr : debug_infos) { + const auto di = *di_ptr; + auto vb = arrb.enter_value(); + auto ob = vb.enter_object(); + ob("file", di.loc_file); + ob("line", (td::int64)di.loc_line); + ob("pos", (td::int64)di.loc_pos); + ob("length", (td::int64)di.loc_len); + + td::JsonBuilder varb; + auto vararrb = varb.enter_array(); + for (auto varstr : di.vars) { + const auto [var, value] = varstr; + auto varb = vararrb.enter_value(); + auto varbo = varb.enter_object(); + varbo("name", var.name == "" ? "'" + std::to_string(var.ir_idx) : var.name); + varbo("type", var.v_type == nullptr ? "" : var.v_type->as_human_readable()); + if (value.length() > 0) { + varbo("value", value); + } + } + vararrb.leave(); + + td::JsonRaw vararrs(varb.string_builder().as_cslice()); + + ob("vars", vararrs); + ob("func", di.func_name); + } + arrb.leave(); + + objb("locations", td::JsonRaw(jsonb.string_builder().as_cslice())); + } + + objb.leave(); + + debug_out << _jb.string_builder().as_cslice().str(); + } + } +} + +} // namespace tolk diff --git a/tolk/pipeline.h b/tolk/pipeline.h index 641beb06b1..f52d569925 100644 --- a/tolk/pipeline.h +++ b/tolk/pipeline.h @@ -51,6 +51,7 @@ void pipeline_convert_ast_to_legacy_Expr_Op(); void pipeline_find_unused_symbols(); void pipeline_generate_fif_output_to_std_cout(); +void pipeline_process_debug_info(std::ostream& debug_out); // these pipes also can be called per-function individually // they are called for instantiated generics functions, when `f` is deeply cloned as `f` diff --git a/tolk/src-file.h b/tolk/src-file.h index 9b39a0accc..a42baffede 100644 --- a/tolk/src-file.h +++ b/tolk/src-file.h @@ -76,6 +76,7 @@ class SrcLocation { bool is_defined() const { return file_id != -1; } bool is_stdlib() const { return file_id == 0; } + int get_char_offset() const { return char_offset; }; const SrcFile* get_src_file() const; // similar to `this->get_src_file() == symbol->get_src_file() || symbol->get_src_file()->is_stdlib()` diff --git a/tolk/tolk-main.cpp b/tolk/tolk-main.cpp index 17a2b4c845..13d97e197a 100644 --- a/tolk/tolk-main.cpp +++ b/tolk/tolk-main.cpp @@ -51,6 +51,7 @@ void usage(const char* progname) { "-x\tEnables experimental options, comma-separated\n" "-S\tDon't include stack layout comments into Fift output\n" "-L\tDon't include original lines from Tolk src into Fift output\n" + "-d\tInclude debug information\n" "-e\tIncreases verbosity level (extra output into stderr)\n" "-v\tOutput version of Tolk and exit\n"; std::exit(2); @@ -211,7 +212,7 @@ class StdCoutRedirectToFile { int main(int argc, char* const argv[]) { int i; - while ((i = getopt(argc, argv, "o:b:O:x:SLevh")) != -1) { + while ((i = getopt(argc, argv, "o:b:O:x:SLedvh")) != -1) { switch (i) { case 'o': G.settings.output_filename = optarg; @@ -234,6 +235,9 @@ int main(int argc, char* const argv[]) { case 'e': G.settings.verbosity++; break; + case 'd': + G.settings.with_debug_info = true; + break; case 'v': std::cout << "Tolk compiler v" << TOLK_VERSION << std::endl; std::cout << "Build commit: " << GitMetadata::CommitSHA1() << std::endl; @@ -280,6 +284,14 @@ int main(int argc, char* const argv[]) { G.settings.read_callback = fs_read_callback; - int exit_code = tolk_proceed(argv[optind]); + const std::string source_map_filename = + G.settings.output_filename.empty() ? "./debug.source_map.json" : G.settings.output_filename + ".source_map.json"; + std::ofstream debug_out(source_map_filename); + if (!debug_out.is_open()) { + std::cerr << "failed to create output file " << source_map_filename << " for source map" << std::endl; + return 2; + } + + int exit_code = tolk_proceed(argv[optind], debug_out); return exit_code; } diff --git a/tolk/tolk-wasm.cpp b/tolk/tolk-wasm.cpp index b5149481bb..f6edffb2b2 100644 --- a/tolk/tolk-wasm.cpp +++ b/tolk/tolk-wasm.cpp @@ -41,6 +41,7 @@ static td::Result compile_internal(char *config_json) { TRY_RESULT(opt_level, td::get_json_object_int_field(config, "optimizationLevel", true, 2)); TRY_RESULT(stack_comments, td::get_json_object_bool_field(config, "withStackComments", true, false)); TRY_RESULT(src_line_comments, td::get_json_object_bool_field(config, "withSrcLineComments", true, false)); + TRY_RESULT(with_debug_info, td::get_json_object_bool_field(config, "withDebugInfo", true, false)); TRY_RESULT(entrypoint_filename, td::get_json_object_string_field(config, "entrypointFileName", false)); TRY_RESULT(experimental_options, td::get_json_object_string_field(config, "experimentalOptions", true)); @@ -48,14 +49,15 @@ static td::Result compile_internal(char *config_json) { G.settings.optimization_level = std::max(0, opt_level); G.settings.stack_layout_comments = stack_comments; G.settings.tolk_src_as_line_comments = src_line_comments; + G.settings.with_debug_info = with_debug_info; if (!experimental_options.empty()) { G.settings.parse_experimental_options_cmd_arg(experimental_options.c_str()); } - std::ostringstream outs, errs; + std::ostringstream outs, errs, debug_out; std::cout.rdbuf(outs.rdbuf()); std::cerr.rdbuf(errs.rdbuf()); - int exit_code = tolk_proceed(entrypoint_filename); + int exit_code = tolk_proceed(entrypoint_filename, debug_out); if (exit_code != 0) { return td::Status::Error(errs.str()); } @@ -68,6 +70,7 @@ static td::Result compile_internal(char *config_json) { obj("fiftCode", fift_res.fiftCode); obj("codeBoc64", fift_res.codeBoc64); obj("codeHashHex", fift_res.codeHashHex); + obj("debugInfo", td::JsonRaw(debug_out.str())); obj("stderr", errs.str().c_str()); obj.leave(); diff --git a/tolk/tolk.cpp b/tolk/tolk.cpp index 69b03f20ad..c61068f390 100644 --- a/tolk/tolk.cpp +++ b/tolk/tolk.cpp @@ -29,6 +29,7 @@ #include "lexer.h" #include "ast.h" #include "type-system.h" +#include namespace tolk { @@ -45,7 +46,7 @@ void on_assertion_failed(const char *description, const char *file_name, int lin throw Fatal(std::move(message)); } -int tolk_proceed(const std::string &entrypoint_filename) { +int tolk_proceed(const std::string &entrypoint_filename, std::ostream& debug_out) { type_system_init(); define_builtins(); lexer_init(); @@ -74,6 +75,7 @@ int tolk_proceed(const std::string &entrypoint_filename) { pipeline_find_unused_symbols(); pipeline_generate_fif_output_to_std_cout(); + pipeline_process_debug_info(debug_out); return 0; } catch (Fatal& fatal) { diff --git a/tolk/tolk.h b/tolk/tolk.h index 9bef3c4dfb..458e0a2cd8 100644 --- a/tolk/tolk.h +++ b/tolk/tolk.h @@ -264,6 +264,16 @@ class ListIterator { struct Stack; +struct DebugInfo { + size_t idx{}; + std::string loc_file; + long loc_line{}; + long loc_pos{}; + long loc_len{}; + std::vector> vars; + std::string func_name; +}; + struct Op { enum OpKind { _Nop, @@ -284,6 +294,7 @@ struct Op { _Again, _TryCatch, _SliceConst, + _DebugInfo, }; OpKind cl; enum { _Disabled = 1, _NoReturn = 2, _Impure = 4, _ArgOrderAlreadyEqualsAsm = 8 }; @@ -298,6 +309,7 @@ struct Op { std::unique_ptr block0, block1; td::RefInt256 int_const; std::string str_const; + std::shared_ptr debug_info; Op(SrcLocation loc, OpKind cl) : cl(cl), flags(0), loc(loc) { } Op(SrcLocation loc, OpKind cl, const std::vector& left) @@ -587,6 +599,7 @@ struct AsmOpList { std::vector constants_; bool retalt_{false}; bool retalt_inserted_{false}; + std::optional> get_var(std::pair idx_pair) const; void out(std::ostream& os, int mode = 0) const; AsmOpList(int indent = 0, const std::vector* var_names = nullptr) : indent_(indent), var_names_(var_names) { } @@ -1069,7 +1082,9 @@ struct CodeBlob { std::vector* inline_rvect_out = nullptr; bool inlining_before_immediate_return = false; std::unique_ptr ops; + Op::OpKind prev_ops_kind; std::unique_ptr* cur_ops; + std::vector> debug_infos; #ifdef TOLK_DEBUG std::vector _vector_of_ops; // to see it in debugger instead of nested pointers #endif @@ -1084,6 +1099,7 @@ struct CodeBlob { if (forced_loc.is_defined()) { res.loc = forced_loc; } + prev_ops_kind = res.cl; cur_ops = &(res.next); #ifdef TOLK_DEBUG _vector_of_ops.push_back(&res); @@ -1144,7 +1160,7 @@ void patch_builtins_after_stdlib_loaded(); * */ -int tolk_proceed(const std::string &entrypoint_filename); +int tolk_proceed(const std::string &entrypoint_filename, std::ostream& debug_out); } // namespace tolk From e8f055cc993ac4f11d56a30b7d65b04dddc71e79 Mon Sep 17 00:00:00 2001 From: i582 <51853996+i582@users.noreply.github.com> Date: Thu, 17 Jul 2025 00:18:41 +0400 Subject: [PATCH 02/27] add krigga Asm.fif path --- crypto/fift/lib/Asm.fif | 74 +++++++++++++++++++++++++----- crypto/fift/utils.cpp | 11 ++++- crypto/fift/utils.h | 3 +- crypto/funcfiftlib/funcfiftlib.cpp | 2 +- tolk/tolk-wasm.cpp | 3 +- 5 files changed, 76 insertions(+), 17 deletions(-) diff --git a/crypto/fift/lib/Asm.fif b/crypto/fift/lib/Asm.fif index 9413513972..e0c9f78006 100644 --- a/crypto/fift/lib/Asm.fif +++ b/crypto/fift/lib/Asm.fif @@ -4,6 +4,22 @@ namespace Asm Asm definitions "0.4.6" constant asm-fif-version +32 constant @marksize + +variable @alldebugmarks +variable @debugmarks +variable @currentdebugmarks + +// markid offset +{ over @debugmarks @ @marksize udict@ { ref@ } { dictnew } cond // markid offset markiddict + udict! { abort"could not set debug mark" } ifnot // markid markiddict + udict! { abort"could not set debug mark" } ifnot @debugmarks ! } : @adddebugmark +{ over bbits @adddebugmark } : DEBUGMARK + +32 constant @zcount +{ u@?+ { swap abort"first bits are not zeroes" } if } : @cut-zeroes + variable @atend variable @was-split false @was-split ! @@ -15,8 +31,24 @@ false @was-split ! { false @was-split ! `normal @endblk } : }> { }> b> } : }>c { }>c s -{ @atend @ 2 { true @was-split ! @atend ! rot b> ref, swap @endblk } does @atend ! udict! { abort"could not add subbed debug marks" } ifnot true } + dictforeach drop // dictmapb cutdebugmarks + ref, true } dictmap -rot } ifnot // cutdebugmarks isnottoplevel cutcodes + -rot // cutcodes cutdebugmarks isnottoplevel + u@ // tldebugmarksb cutcodehash + @alldebugmarks @ 256 b>udict! { abort"could not add to all debug marks" } ifnot @alldebugmarks ! } : @handledebugmarks +{ @atend @ @debugmarks @ 3 { true @was-split ! @debugmarks @ @currentdebugmarks ! @debugmarks ! @atend ! rot dup @handledebugmarks b> ref, swap @endblk } does @atend ! dictnew @debugmarks ! = -rot <= and } : 2x<= @@ -299,11 +331,26 @@ x{8A} @Defop(ref) PUSHREFCONT bbitrefs rot bbitrefs pair+ swap 32 + swap @havebitrefs nip } cond } : @two-cont-fit? +// initoffset fromhash +{ @alldebugmarks @ 256 udict@ { // initoffset tldebugmarks + 1 i@+ swap drop ref@? { // initoffset debugmarks + @marksize { // initoffset markid offsetsdicts + ref@ 10 { // initoffset markid offset emptys + drop 2 pick + // initoffset markid addedoffset + over swap @adddebugmark true } + dictforeach // initoffset markid true + swap drop } + dictforeach } + { null } cond } + { null } cond // initoffset true + 2drop +} : @readddebugmarks +{ over bbits over b> hashu @readddebugmarks } : @pushcontreadd { 2dup @cont-fits? not { b> PUSHREFCONT } { swap over bbitrefs 2dup 120 0 2x<= - { drop swap x{9} s, swap 3 >> 4 u, swap b+ } - { rot x{8F_} s, swap 2 u, swap 3 >> 7 u, swap b+ } cond + { drop swap x{9} s, swap 3 >> 4 u, swap @pushcontreadd b+ } + { rot x{8F_} s, swap 2 u, swap 3 >> 7 u, swap @pushcontreadd b+ } cond } cond } dup : PUSHCONT : CONT { }> PUSHCONT } : }>CONT @@ -1073,7 +1120,7 @@ x{EDFB} @Defop SAMEALTSAVE // // inline support { dup sbits - { @addop } + { dup shash 256 B>u@ -rot dup sbits -rot @addop dup bbits rot - rot @readddebugmarks } { dup srefs // { ref@ CALLREF } @@ -1535,9 +1582,10 @@ variable asm-mode 1 asm-mode ! variable @oldcurrent variable @oldctx Fift-wordlist dup @oldcurrent ! @oldctx ! { current@ @oldcurrent ! context@ @oldctx ! Asm definitions - @proccnt @ @proclist @ @procdict @ @procinfo @ @gvarcnt @ @parent-state @ current@ @oldcurrent @ @oldctx @ - 9 tuple @parent-state ! + @proccnt @ @proclist @ @procdict @ @procinfo @ @gvarcnt @ @parent-state @ current@ @oldcurrent @ @oldctx @ @alldebugmarks @ @debugmarks @ + 11 tuple @parent-state ! hole current! + @alldebugmarks null! @debugmarks null! 0 =: main @proclist null! @proccnt 0! @gvarcnt 0! { bl word @newproc } : NEWPROC { bl word dup (def?) ' drop ' @newproc cond } : DECLPROC @@ -1568,7 +1616,6 @@ Fift-wordlist dup @oldcurrent ! @oldctx ! { -1000 @def-proc } : PROCINLINE { @procdict @ @procdictkeylen idict@ abort"procedure already defined" } : @fail-ifdef -{ u@?+ { swap abort"first bits are not zeroes" } if } : @cut-zeroes { over @fail-ifdef 2 { rot @normal? rot b> } : }END> -{ }END> b> } : }END>c -{ }END>c s +{ }END> b> swap drop } : }END>c +{ }END>c s + +{ }END> b> swap } : }END>cd // This is the way how FunC assigns method_id for reserved functions. // Note, that Tolk entrypoints have other names (`onInternalMessage`, etc.), diff --git a/crypto/fift/utils.cpp b/crypto/fift/utils.cpp index 6057b2dc07..44f42af9a9 100644 --- a/crypto/fift/utils.cpp +++ b/crypto/fift/utils.cpp @@ -225,10 +225,15 @@ td::Result> compile_asm(td::Slice asm_code) { return vm::std_boc_deserialize(std::move(boc.data)); } -td::Result compile_asm_program(std::string&& program_code, const std::string& fift_dir) { +td::Result compile_asm_program(std::string&& program_code, const std::string& fift_dir, bool enable_debug_info) { std::string main_fif; - main_fif.reserve(program_code.size() + 100); + main_fif.reserve(program_code.size() + 200); main_fif.append(program_code.data(), program_code.size()); + if (enable_debug_info) { + main_fif.append(R"( boc>B B>base64 $>B "debugmarks" B>file)"); + } else { // todo: fix + main_fif.append(R"( "" $>B "debugmarks" B>file)"); + } main_fif.append(R"( dup hashB B>X $>B "hex" B>file)"); // write codeHashHex to a file main_fif.append(R"( boc>B B>base64 $>B "boc" B>file)"); // write codeBoc64 to a file @@ -238,11 +243,13 @@ td::Result compile_asm_program(std::string&& program_code TRY_RESULT(boc, res.read_file("boc")); TRY_RESULT(hex, res.read_file("hex")); + TRY_RESULT(debug_info, res.read_file("debugmarks")); return CompiledProgramOutput{ std::move(program_code), std::move(boc.data), std::move(hex.data), + std::move(debug_info.data), }; } diff --git a/crypto/fift/utils.h b/crypto/fift/utils.h index fab92c5420..d66364bba0 100644 --- a/crypto/fift/utils.h +++ b/crypto/fift/utils.h @@ -33,6 +33,7 @@ struct CompiledProgramOutput { std::string fiftCode; std::string codeBoc64; std::string codeHashHex; + std::string debugMarksBoc64; }; td::Result create_mem_source_lookup(std::string main, std::string fift_dir = "", @@ -42,5 +43,5 @@ td::Result create_mem_source_lookup(std::string main, std::s td::Result mem_run_fift(std::string source, std::vector args = {}, std::string fift_dir = ""); td::Result mem_run_fift(SourceLookup source_lookup, std::vector args); td::Result> compile_asm(td::Slice asm_code); -td::Result compile_asm_program(std::string&& program_code, const std::string& fift_dir); +td::Result compile_asm_program(std::string&& program_code, const std::string& fift_dir, bool enable_debug_info); } // namespace fift diff --git a/crypto/funcfiftlib/funcfiftlib.cpp b/crypto/funcfiftlib/funcfiftlib.cpp index 403c075dd2..148c65234e 100644 --- a/crypto/funcfiftlib/funcfiftlib.cpp +++ b/crypto/funcfiftlib/funcfiftlib.cpp @@ -62,7 +62,7 @@ td::Result compile_internal(char *config_json) { return td::Status::Error("FunC compilation error: " + errs.str()); } - TRY_RESULT(fift_res, fift::compile_asm_program(outs.str(), "/fiftlib/")); + TRY_RESULT(fift_res, fift::compile_asm_program(outs.str(), "/fiftlib/", false)); td::JsonBuilder result_json; auto obj = result_json.enter_object(); diff --git a/tolk/tolk-wasm.cpp b/tolk/tolk-wasm.cpp index f6edffb2b2..0f67602b4a 100644 --- a/tolk/tolk-wasm.cpp +++ b/tolk/tolk-wasm.cpp @@ -62,7 +62,7 @@ static td::Result compile_internal(char *config_json) { return td::Status::Error(errs.str()); } - TRY_RESULT(fift_res, fift::compile_asm_program(outs.str(), "/fiftlib/")); + TRY_RESULT(fift_res, fift::compile_asm_program(outs.str(), "/fiftlib/", with_debug_info)); td::JsonBuilder result_json; auto obj = result_json.enter_object(); @@ -71,6 +71,7 @@ static td::Result compile_internal(char *config_json) { obj("codeBoc64", fift_res.codeBoc64); obj("codeHashHex", fift_res.codeHashHex); obj("debugInfo", td::JsonRaw(debug_out.str())); + obj("debugMarksBoc", std::move(fift_res.debugMarksBoc64)); obj("stderr", errs.str().c_str()); obj.leave(); From f39a1a13dd67bff88806788bfdb80fbe1be7640a Mon Sep 17 00:00:00 2001 From: i582 <51853996+i582@users.noreply.github.com> Date: Thu, 17 Jul 2025 02:07:18 +0400 Subject: [PATCH 03/27] fixes --- crypto/fift/lib/Asm.fif | 63 +++++++++--------- tolk/abscode.cpp | 2 +- tolk/codegen.cpp | 11 ++-- tolk/compiler-state.h | 3 + tolk/pipe-ast-to-legacy.cpp | 22 +++---- tolk/pipe-generate-fif-output.cpp | 8 ++- tolk/pipe-process-debug-info.cpp | 102 ++++++++++++++---------------- tolk/tolk-wasm.cpp | 8 ++- tolk/tolk.h | 3 +- 9 files changed, 116 insertions(+), 106 deletions(-) diff --git a/crypto/fift/lib/Asm.fif b/crypto/fift/lib/Asm.fif index e0c9f78006..f54e14947f 100644 --- a/crypto/fift/lib/Asm.fif +++ b/crypto/fift/lib/Asm.fif @@ -334,13 +334,13 @@ x{8A} @Defop(ref) PUSHREFCONT // initoffset fromhash { @alldebugmarks @ 256 udict@ { // initoffset tldebugmarks 1 i@+ swap drop ref@? { // initoffset debugmarks - @marksize { // initoffset markid offsetsdicts - ref@ 10 { // initoffset markid offset emptys - drop 2 pick + // initoffset markid addedoffset - over swap @adddebugmark true } - dictforeach // initoffset markid true - swap drop } - dictforeach } + @marksize { // initoffset markid offsetsdicts + ref@ 10 { // initoffset markid offset emptys + drop 2 pick + // initoffset markid addedoffset + over swap @adddebugmark true } + dictforeach // initoffset markid true + swap drop } + dictforeach } { null } cond } { null } cond // initoffset true 2drop @@ -964,24 +964,24 @@ recursive IFELSE-cont2 { { 1 { swap @normal? swap IFELSE-cont2 } does @doafter<{ } : @doifelse { 1 { swap @normal? IFELSE-cont2 } does @doafter<{ } : @doifnotelse { - { dup `else eq? + { dup `else eq? { drop @doifelse } { dup `else: eq? { drop IFJMP-cont } { @normal? IF-cont - } cond - } cond - } @doafter<{ + } cond + } cond + } @doafter<{ } : IF:<{ { - { dup `else eq? + { dup `else eq? { drop @doifnotelse } { dup `else: eq? { drop IFNOTJMP-cont } { @normal? IFNOT-cont - } cond - } cond - } @doafter<{ + } cond + } cond + } @doafter<{ } : IFNOT:<{ x{E304} @Defop CONDSEL @@ -1009,12 +1009,12 @@ x{EB} dup @Defop AGAINEND @Defop AGAIN: { }> PUSHCONT UNTIL } : }>UNTIL { { @normal? PUSHCONT UNTIL } @doafter<{ } : UNTIL:<{ { PUSHCONT { @normal? PUSHCONT WHILE } @doafter<{ } : @dowhile -{ - { dup `do eq? - { drop @dowhile } - { `do: eq? not abort"`}>DO<{` expected" PUSHCONT WHILEEND - } cond - } @doafter<{ +{ + { dup `do eq? + { drop @dowhile } + { `do: eq? not abort"`}>DO<{` expected" PUSHCONT WHILEEND + } cond + } @doafter<{ } : WHILE:<{ { }> PUSHCONT AGAIN } : }>AGAIN { { @normal? PUSHCONT AGAIN } @doafter<{ } : AGAIN:<{ @@ -1034,11 +1034,11 @@ x{E31B} dup @Defop AGAINENDBRK @Defop AGAINBRK: { { @normal? PUSHCONT UNTILBRK } @doafter<{ } : UNTILBRK:<{ { PUSHCONT { @normal? PUSHCONT WHILEBRK } @doafter<{ } : @dowhile { - { dup `do eq? - { drop @dowhile } + { dup `do eq? + { drop @dowhile } { `do: eq? not abort"`}>DO<{` expected" PUSHCONT WHILEENDBRK - } cond - } @doafter<{ + } cond + } @doafter<{ } : WHILEBRK:<{ { }> PUSHCONT AGAINBRK } : }>AGAINBRK { { @normal? PUSHCONT AGAINBRK } @doafter<{ } : AGAINBRK:<{ @@ -1064,8 +1064,8 @@ x{ED1F} @Defop BLESSVARARGS { c4 PUSHCTR } : PUSHROOT { c4 POPCTR } : POPROOT x{ED6} dup @Defop(c) SETCONTCTR @Defop(c) SETCONT -x{ED7} @Defop(c) SETRETCTR -x{ED8} @Defop(c) SETALTCTR +x{ED7} @Defop(c) SETRETCTR +x{ED8} @Defop(c) SETALTCTR x{ED9} dup @Defop(c) POPSAVE @Defop(c) POPCTRSAVE x{EDA} dup @Defop(c) SAVE @Defop(c) SAVECTR x{EDB} dup @Defop(c) SAVEALT @Defop(c) SAVEALTCTR @@ -1122,7 +1122,7 @@ x{EDFB} @Defop SAMEALTSAVE { dup sbits { dup shash 256 B>u@ -rot dup sbits -rot @addop dup bbits rot - rot @readddebugmarks } { - dup srefs // + dup srefs // { ref@ CALLREF } { drop } cond @@ -1159,9 +1159,9 @@ x{F2FF} @Defop TRY x{F3} @Defop(4u,4u) TRYARGS { `catch @endblk } : }>CATCH<{ { PUSHCONT { @normal? PUSHCONT TRY } @doafter<{ } : @trycatch -{ +{ { `catch eq? not abort"`}>CATCH<{` expected" @trycatch - } @doafter<{ + } @doafter<{ } : TRY:<{ // // dictionary manipulation @@ -1311,7 +1311,7 @@ x{F4A1} @Defop DICTUGETJMP x{F4A2} @Defop DICTIGETEXEC x{F4A3} @Defop DICTUGETEXEC { dup sbitrefs tuck 1 > swap 1 <> or abort"not a dictionary" swap 1 u@ over <> abort"not a dictionary" } : @chkdicts -{ dup null? tuck { idict! diff --git a/tolk/abscode.cpp b/tolk/abscode.cpp index 2b5e73d8d7..f63570812f 100644 --- a/tolk/abscode.cpp +++ b/tolk/abscode.cpp @@ -256,7 +256,7 @@ void Op::show(std::ostream& os, const std::vector& vars, std::string pfx break; case _DebugInfo: os << pfx << dis << "DEBUGINFO "; - os << debug_info->idx << std::endl; + os << debug_idx << std::endl; break; case _Import: os << pfx << dis << "IMPORT "; diff --git a/tolk/codegen.cpp b/tolk/codegen.cpp index 23ce50d378..a9c0d7ede5 100644 --- a/tolk/codegen.cpp +++ b/tolk/codegen.cpp @@ -279,12 +279,15 @@ bool Op::generate_code_step(Stack& stack) { // we need to handle it here to correctly handle case `IFJMP { DROP }` if (cl == _DebugInfo) { std::ostringstream ops; - ops << debug_info->idx << " DEBUGMARK"; // pseudo instruction + ops << debug_idx << " DEBUGMARK"; // pseudo instruction stack.o.insert(stack.o.list_.size() - 1, this->loc, ops.str()); - for (auto i : stack.s) { - if (const auto var = stack.o.get_var(i)) { - debug_info->vars.push_back(*var); + if (debug_idx < G.debug_infos.size()) { + auto& debug_info = G.debug_infos.at(debug_idx); + for (auto i : stack.s) { + if (const auto var = stack.o.get_var(i)) { + debug_info.vars.push_back(*var); + } } } } diff --git a/tolk/compiler-state.h b/tolk/compiler-state.h index 5daf03c29d..e514f6de1e 100644 --- a/tolk/compiler-state.h +++ b/tolk/compiler-state.h @@ -22,6 +22,7 @@ #include #include #include +#include namespace tolk { @@ -105,6 +106,8 @@ struct CompilerState { std::vector all_structs; AllRegisteredSrcFiles all_src_files; + std::vector debug_infos; + bool is_verbosity(int gt_eq) const { return settings.verbosity >= gt_eq; } }; diff --git a/tolk/pipe-ast-to-legacy.cpp b/tolk/pipe-ast-to-legacy.cpp index c14faa435a..9e943de880 100644 --- a/tolk/pipe-ast-to-legacy.cpp +++ b/tolk/pipe-ast-to-legacy.cpp @@ -137,22 +137,22 @@ void insert_debug_info_inner(SrcLocation loc, ASTNodeKind kind, CodeBlob& code) } auto& op = code.emplace_back(loc, Op::_DebugInfo); - const auto info = std::make_shared(); - info->idx = code.debug_infos.size(); + auto info = DebugInfo{}; - if (const auto src_file = loc.get_src_file()) { + op.debug_idx = G.debug_infos.size(); + info.idx = op.debug_idx; + + if (const auto src_file = loc.get_src_file(); src_file != nullptr) { const auto& pos = src_file->convert_offset(loc.get_char_offset()); - info->loc_file = src_file->realpath; - info->loc_line = pos.line_no; - info->loc_pos = pos.char_no; - info->loc_len = pos.line_str.length(); + info.loc_file = src_file->realpath; + info.loc_line = pos.line_no; + info.loc_pos = pos.char_no; + info.loc_len = pos.line_str.length(); } - info->func_name = code.name; - code.debug_infos.push_back(info); - - op.debug_info = info; + info.func_name = code.name; + G.debug_infos.push_back(info); } void insert_debug_info(AnyV v, CodeBlob& code) { diff --git a/tolk/pipe-generate-fif-output.cpp b/tolk/pipe-generate-fif-output.cpp index 1dc513f054..166a2c0f29 100644 --- a/tolk/pipe-generate-fif-output.cpp +++ b/tolk/pipe-generate-fif-output.cpp @@ -190,7 +190,13 @@ void pipeline_generate_fif_output_to_std_cout() { generate_output_func(fun_ref); } - std::cout << "}END>c\n"; + std::cout << "}END>c"; + + if (G.settings.with_debug_info) { + std::cout << "d"; + } + std::cout << std::endl; + if (!G.settings.boc_output_filename.empty()) { std::cout << "boc>B \"" << G.settings.boc_output_filename << "\" B>file\n"; } diff --git a/tolk/pipe-process-debug-info.cpp b/tolk/pipe-process-debug-info.cpp index f1707eacc8..aa9c3ec3e3 100644 --- a/tolk/pipe-process-debug-info.cpp +++ b/tolk/pipe-process-debug-info.cpp @@ -8,72 +8,68 @@ namespace tolk { void pipeline_process_debug_info(std::ostream& debug_out) { - for (const auto& func : G.all_functions) { - if (func->name == "main") { - std::cout << func->name << std::endl; - - const auto debug_infos = std::get(func->body)->code->debug_infos; - std::cout << debug_infos[0]->func_name << std::endl; - - td::JsonBuilder _jb; - auto objb = _jb.enter_object(); + if (!G.settings.with_debug_info) { + return; + } - { - td::JsonBuilder jsonb; - auto arrb = jsonb.enter_array(); - for (auto glob_var : G.all_global_vars) { - auto vb = arrb.enter_value(); - auto ob = vb.enter_object(); + const auto debug_infos = G.debug_infos; - ob("name", glob_var->name); - ob("type", glob_var->declared_type->as_human_readable()); - } - arrb.leave(); + td::JsonBuilder _jb; + auto objb = _jb.enter_object(); - objb("globals", td::JsonRaw(jsonb.string_builder().as_cslice())); - } + { + td::JsonBuilder jsonb; + auto arrb = jsonb.enter_array(); + for (auto glob_var : G.all_global_vars) { + auto vb = arrb.enter_value(); + auto ob = vb.enter_object(); - { - td::JsonBuilder jsonb; - auto arrb = jsonb.enter_array(); - for (auto di_ptr : debug_infos) { - const auto di = *di_ptr; - auto vb = arrb.enter_value(); - auto ob = vb.enter_object(); - ob("file", di.loc_file); - ob("line", (td::int64)di.loc_line); - ob("pos", (td::int64)di.loc_pos); - ob("length", (td::int64)di.loc_len); + ob("name", glob_var->name); + ob("type", glob_var->declared_type->as_human_readable()); + } + arrb.leave(); - td::JsonBuilder varb; - auto vararrb = varb.enter_array(); - for (auto varstr : di.vars) { - const auto [var, value] = varstr; - auto varb = vararrb.enter_value(); - auto varbo = varb.enter_object(); - varbo("name", var.name == "" ? "'" + std::to_string(var.ir_idx) : var.name); - varbo("type", var.v_type == nullptr ? "" : var.v_type->as_human_readable()); - if (value.length() > 0) { - varbo("value", value); - } - } - vararrb.leave(); + objb("globals", td::JsonRaw(jsonb.string_builder().as_cslice())); + } - td::JsonRaw vararrs(varb.string_builder().as_cslice()); + { + td::JsonBuilder jsonb; + auto arrb = jsonb.enter_array(); + for (auto di : debug_infos) { + auto vb = arrb.enter_value(); + auto ob = vb.enter_object(); + ob("file", di.loc_file); + ob("line", (td::int64)di.loc_line); + ob("pos", (td::int64)di.loc_pos); + ob("length", (td::int64)di.loc_len); - ob("vars", vararrs); - ob("func", di.func_name); + td::JsonBuilder varb; + auto vararrb = varb.enter_array(); + for (auto varstr : di.vars) { + const auto [var, value] = varstr; + auto varb = vararrb.enter_value(); + auto varbo = varb.enter_object(); + varbo("name", var.name == "" ? "'" + std::to_string(var.ir_idx) : var.name); + varbo("type", var.v_type == nullptr ? "" : var.v_type->as_human_readable()); + if (value.length() > 0) { + varbo("value", value); } - arrb.leave(); - - objb("locations", td::JsonRaw(jsonb.string_builder().as_cslice())); } + vararrb.leave(); - objb.leave(); + td::JsonRaw vararrs(varb.string_builder().as_cslice()); - debug_out << _jb.string_builder().as_cslice().str(); + ob("vars", vararrs); + ob("func", di.func_name); } + arrb.leave(); + + objb("locations", td::JsonRaw(jsonb.string_builder().as_cslice())); } + + objb.leave(); + + debug_out << _jb.string_builder().as_cslice().str(); } } // namespace tolk diff --git a/tolk/tolk-wasm.cpp b/tolk/tolk-wasm.cpp index 0f67602b4a..5271cb8784 100644 --- a/tolk/tolk-wasm.cpp +++ b/tolk/tolk-wasm.cpp @@ -70,8 +70,12 @@ static td::Result compile_internal(char *config_json) { obj("fiftCode", fift_res.fiftCode); obj("codeBoc64", fift_res.codeBoc64); obj("codeHashHex", fift_res.codeHashHex); - obj("debugInfo", td::JsonRaw(debug_out.str())); - obj("debugMarksBoc", std::move(fift_res.debugMarksBoc64)); + + if (const auto debug_info = debug_out.str(); !debug_info.empty()) { + obj("debugInfo", td::JsonRaw(debug_info)); + obj("debugMarksBoc", std::move(fift_res.debugMarksBoc64)); + } + obj("stderr", errs.str().c_str()); obj.leave(); diff --git a/tolk/tolk.h b/tolk/tolk.h index 458e0a2cd8..77d2f61b02 100644 --- a/tolk/tolk.h +++ b/tolk/tolk.h @@ -309,7 +309,7 @@ struct Op { std::unique_ptr block0, block1; td::RefInt256 int_const; std::string str_const; - std::shared_ptr debug_info; + size_t debug_idx; Op(SrcLocation loc, OpKind cl) : cl(cl), flags(0), loc(loc) { } Op(SrcLocation loc, OpKind cl, const std::vector& left) @@ -1084,7 +1084,6 @@ struct CodeBlob { std::unique_ptr ops; Op::OpKind prev_ops_kind; std::unique_ptr* cur_ops; - std::vector> debug_infos; #ifdef TOLK_DEBUG std::vector _vector_of_ops; // to see it in debugger instead of nested pointers #endif From 3ae51394d740d62b5bee9a4725ca33d62dfa056f Mon Sep 17 00:00:00 2001 From: i582 <51853996+i582@users.noreply.github.com> Date: Thu, 17 Jul 2025 02:19:31 +0400 Subject: [PATCH 04/27] fixes --- tolk/asmops.cpp | 10 ++++++---- tolk/codegen.cpp | 4 ++-- tolk/pipe-ast-to-legacy.cpp | 8 ++++---- tolk/pipe-process-debug-info.cpp | 8 ++++---- 4 files changed, 16 insertions(+), 14 deletions(-) diff --git a/tolk/asmops.cpp b/tolk/asmops.cpp index d61c3c6826..c0a4c64e52 100644 --- a/tolk/asmops.cpp +++ b/tolk/asmops.cpp @@ -331,12 +331,14 @@ void AsmOpList::show_var_ext(std::ostream& os, std::pair } std::optional> AsmOpList::get_var(std::pair idx_pair) const { - if (!var_names_ || (unsigned)idx_pair.first >= var_names_->size()) { + const var_idx_t var_idx = idx_pair.first; + const const_idx_t const_idx = idx_pair.second; + if (!var_names_ || (unsigned)var_idx >= var_names_->size()) { return std::nullopt; } - auto var = var_names_->at(idx_pair.first); - if ((unsigned)idx_pair.second < constants_.size() && constants_[idx_pair.second].not_null()) { - const auto value = constants_[idx_pair.second]; + auto var = var_names_->at(var_idx); + if ((unsigned)const_idx < constants_.size() && constants_[const_idx].not_null()) { + const auto value = constants_[const_idx]; auto value_str = value->to_dec_string(); return std::tie(var, value_str); } diff --git a/tolk/codegen.cpp b/tolk/codegen.cpp index a9c0d7ede5..211e2b868a 100644 --- a/tolk/codegen.cpp +++ b/tolk/codegen.cpp @@ -280,12 +280,12 @@ bool Op::generate_code_step(Stack& stack) { if (cl == _DebugInfo) { std::ostringstream ops; ops << debug_idx << " DEBUGMARK"; // pseudo instruction - stack.o.insert(stack.o.list_.size() - 1, this->loc, ops.str()); + stack.o.insert(stack.o.list_.size() - 1, loc, ops.str()); if (debug_idx < G.debug_infos.size()) { auto& debug_info = G.debug_infos.at(debug_idx); for (auto i : stack.s) { - if (const auto var = stack.o.get_var(i)) { + if (const auto var = stack.o.get_var(i); var.has_value()) { debug_info.vars.push_back(*var); } } diff --git a/tolk/pipe-ast-to-legacy.cpp b/tolk/pipe-ast-to-legacy.cpp index 9e943de880..4e3491171d 100644 --- a/tolk/pipe-ast-to-legacy.cpp +++ b/tolk/pipe-ast-to-legacy.cpp @@ -137,12 +137,12 @@ void insert_debug_info_inner(SrcLocation loc, ASTNodeKind kind, CodeBlob& code) } auto& op = code.emplace_back(loc, Op::_DebugInfo); - auto info = DebugInfo{}; - op.debug_idx = G.debug_infos.size(); + + auto info = DebugInfo{}; info.idx = op.debug_idx; - if (const auto src_file = loc.get_src_file(); src_file != nullptr) { + if (const SrcFile* src_file = loc.get_src_file(); src_file != nullptr) { const auto& pos = src_file->convert_offset(loc.get_char_offset()); info.loc_file = src_file->realpath; @@ -2330,7 +2330,7 @@ static void process_return_statement(V v, CodeBlob& code) static void append_implicit_return_statement(SrcLocation loc_end, CodeBlob& code) { std::vector mutated_vars; if (code.fun_ref->has_mutate_params()) { - for (const LocalVarData& p_sym : code.fun_ref->parameters) { + for (const LocalVarData& p_sym: code.fun_ref->parameters) { if (p_sym.is_mutate_parameter()) { mutated_vars.insert(mutated_vars.end(), p_sym.ir_idx.begin(), p_sym.ir_idx.end()); } diff --git a/tolk/pipe-process-debug-info.cpp b/tolk/pipe-process-debug-info.cpp index aa9c3ec3e3..6670ada37e 100644 --- a/tolk/pipe-process-debug-info.cpp +++ b/tolk/pipe-process-debug-info.cpp @@ -45,13 +45,13 @@ void pipeline_process_debug_info(std::ostream& debug_out) { td::JsonBuilder varb; auto vararrb = varb.enter_array(); - for (auto varstr : di.vars) { - const auto [var, value] = varstr; + for (auto var_and_value : di.vars) { + const auto [var, value] = var_and_value; auto varb = vararrb.enter_value(); auto varbo = varb.enter_object(); - varbo("name", var.name == "" ? "'" + std::to_string(var.ir_idx) : var.name); + varbo("name", var.name.empty() ? "'" + std::to_string(var.ir_idx) : var.name); varbo("type", var.v_type == nullptr ? "" : var.v_type->as_human_readable()); - if (value.length() > 0) { + if (!value.empty()) { varbo("value", value); } } From 558a28e282d6432187e35e9c4b37f5dcc650affb Mon Sep 17 00:00:00 2001 From: i582 <51853996+i582@users.noreply.github.com> Date: Thu, 17 Jul 2025 13:36:26 +0400 Subject: [PATCH 05/27] revert Asm.fif changes --- crypto/fift/lib/Asm.fif | 84 +++++++----------------------- crypto/fift/utils.cpp | 9 +--- crypto/fift/utils.h | 3 +- crypto/funcfiftlib/funcfiftlib.cpp | 2 +- tolk/codegen.cpp | 6 ++- tolk/pipe-generate-fif-output.cpp | 7 +-- tolk/pipe-process-debug-info.cpp | 7 ++- tolk/tolk-wasm.cpp | 3 +- tolk/tolk.h | 2 +- 9 files changed, 34 insertions(+), 89 deletions(-) diff --git a/crypto/fift/lib/Asm.fif b/crypto/fift/lib/Asm.fif index f54e14947f..f3278d2f14 100644 --- a/crypto/fift/lib/Asm.fif +++ b/crypto/fift/lib/Asm.fif @@ -4,22 +4,6 @@ namespace Asm Asm definitions "0.4.6" constant asm-fif-version -32 constant @marksize - -variable @alldebugmarks -variable @debugmarks -variable @currentdebugmarks - -// markid offset -{ over @debugmarks @ @marksize udict@ { ref@ } { dictnew } cond // markid offset markiddict - udict! { abort"could not set debug mark" } ifnot // markid markiddict - udict! { abort"could not set debug mark" } ifnot @debugmarks ! } : @adddebugmark -{ over bbits @adddebugmark } : DEBUGMARK - -32 constant @zcount -{ u@?+ { swap abort"first bits are not zeroes" } if } : @cut-zeroes - variable @atend variable @was-split false @was-split ! @@ -31,24 +15,8 @@ false @was-split ! { false @was-split ! `normal @endblk } : }> { }> b> } : }>c { }>c s -{ @currentdebugmarks @ depth 6 < dup - { drop 5 pick null? not // this is a bad way to detect top level proc defs. too bad! - } ifnot // codeb debugmarks isnottoplevel - rot b> udict! { abort"could not add subbed debug marks" } ifnot true } - dictforeach drop // dictmapb cutdebugmarks - ref, true } dictmap -rot } ifnot // cutdebugmarks isnottoplevel cutcodes - -rot // cutcodes cutdebugmarks isnottoplevel - u@ // tldebugmarksb cutcodehash - @alldebugmarks @ 256 b>udict! { abort"could not add to all debug marks" } ifnot @alldebugmarks ! } : @handledebugmarks -{ @atend @ @debugmarks @ 3 { true @was-split ! @debugmarks @ @currentdebugmarks ! @debugmarks ! @atend ! rot dup @handledebugmarks b> ref, swap @endblk } does @atend ! dictnew @debugmarks ! ref, swap @endblk } does @atend ! = -rot <= and } : 2x<= @@ -216,7 +184,7 @@ x{6E} @Defop ISNULL x{6F0} @Defop(4u) TUPLE x{6F00} @Defop NIL x{6F01} @Defop SINGLE -x{6F02} dup @Defop PAIR @Defop CONS +x{6F02} dup @Defop PAIR @Defop CONS x{6F03} @Defop TRIPLE x{6F1} @Defop(4u) INDEX x{6F10} dup @Defop FIRST @Defop CAR @@ -283,7 +251,7 @@ x{7F} @Defop TRUE { dup 16 fits { abort"integer too large" 8 + 2dup fits } until - > 2- 5 u, -rot i, + > 2- 5 u, -rot i, } cond } cond } cond @@ -292,6 +260,11 @@ x{7F} @Defop TRUE x{83FF} @Defop PUSHNAN { hashu @readddebugmarks } : @pushcontreadd { 2dup @cont-fits? not { b> PUSHREFCONT } { swap over bbitrefs 2dup 120 0 2x<= - { drop swap x{9} s, swap 3 >> 4 u, swap @pushcontreadd b+ } - { rot x{8F_} s, swap 2 u, swap 3 >> 7 u, swap @pushcontreadd b+ } cond + { drop swap x{9} s, swap 3 >> 4 u, swap b+ } + { rot x{8F_} s, swap 2 u, swap 3 >> 7 u, swap b+ } cond } cond } dup : PUSHCONT : CONT { }> PUSHCONT } : }>CONT @@ -1120,7 +1078,7 @@ x{EDFB} @Defop SAMEALTSAVE // // inline support { dup sbits - { dup shash 256 B>u@ -rot dup sbits -rot @addop dup bbits rot - rot @readddebugmarks } + { @addop } { dup srefs // { ref@ CALLREF } @@ -1562,6 +1520,7 @@ variable @gvarcnt variable @parent-state variable asm-mode 1 asm-mode ! 19 constant @procdictkeylen +32 constant @zcount { pair @proclist @ cons @proclist ! } : @proclistadd { @procinfo @ @procdictkeylen idict@ { 16 i@ } { 0 } cond } : @procinfo@ { idict! @@ -1581,10 +1540,9 @@ variable asm-mode 1 asm-mode ! variable @oldcurrent variable @oldctx Fift-wordlist dup @oldcurrent ! @oldctx ! { current@ @oldcurrent ! context@ @oldctx ! Asm definitions - @proccnt @ @proclist @ @procdict @ @procinfo @ @gvarcnt @ @parent-state @ current@ @oldcurrent @ @oldctx @ @alldebugmarks @ @debugmarks @ - 11 tuple @parent-state ! + @proccnt @ @proclist @ @procdict @ @procinfo @ @gvarcnt @ @parent-state @ current@ @oldcurrent @ @oldctx @ + 9 tuple @parent-state ! hole current! - @alldebugmarks null! @debugmarks null! 0 =: main @proclist null! @proccnt 0! @gvarcnt 0! { bl word @newproc } : NEWPROC { bl word dup (def?) ' drop ' @newproc cond } : DECLPROC @@ -1615,6 +1573,7 @@ Fift-wordlist dup @oldcurrent ! @oldctx ! { -1000 @def-proc } : PROCINLINE { @procdict @ @procdictkeylen idict@ abort"procedure already defined" } : @fail-ifdef +{ u@?+ { swap abort"first bits are not zeroes" } if } : @cut-zeroes { over @fail-ifdef 2 { rot @normal? rot b> } : }END> -{ }END> b> swap drop } : }END>c -{ }END>c s - -{ }END> b> swap } : }END>cd +{ }END> b> } : }END>c +{ }END>c s // This is the way how FunC assigns method_id for reserved functions. // Note, that Tolk entrypoints have other names (`onInternalMessage`, etc.), diff --git a/crypto/fift/utils.cpp b/crypto/fift/utils.cpp index 44f42af9a9..47c11bc3c4 100644 --- a/crypto/fift/utils.cpp +++ b/crypto/fift/utils.cpp @@ -225,15 +225,10 @@ td::Result> compile_asm(td::Slice asm_code) { return vm::std_boc_deserialize(std::move(boc.data)); } -td::Result compile_asm_program(std::string&& program_code, const std::string& fift_dir, bool enable_debug_info) { +td::Result compile_asm_program(std::string&& program_code, const std::string& fift_dir) { std::string main_fif; main_fif.reserve(program_code.size() + 200); main_fif.append(program_code.data(), program_code.size()); - if (enable_debug_info) { - main_fif.append(R"( boc>B B>base64 $>B "debugmarks" B>file)"); - } else { // todo: fix - main_fif.append(R"( "" $>B "debugmarks" B>file)"); - } main_fif.append(R"( dup hashB B>X $>B "hex" B>file)"); // write codeHashHex to a file main_fif.append(R"( boc>B B>base64 $>B "boc" B>file)"); // write codeBoc64 to a file @@ -243,13 +238,11 @@ td::Result compile_asm_program(std::string&& program_code TRY_RESULT(boc, res.read_file("boc")); TRY_RESULT(hex, res.read_file("hex")); - TRY_RESULT(debug_info, res.read_file("debugmarks")); return CompiledProgramOutput{ std::move(program_code), std::move(boc.data), std::move(hex.data), - std::move(debug_info.data), }; } diff --git a/crypto/fift/utils.h b/crypto/fift/utils.h index d66364bba0..fab92c5420 100644 --- a/crypto/fift/utils.h +++ b/crypto/fift/utils.h @@ -33,7 +33,6 @@ struct CompiledProgramOutput { std::string fiftCode; std::string codeBoc64; std::string codeHashHex; - std::string debugMarksBoc64; }; td::Result create_mem_source_lookup(std::string main, std::string fift_dir = "", @@ -43,5 +42,5 @@ td::Result create_mem_source_lookup(std::string main, std::s td::Result mem_run_fift(std::string source, std::vector args = {}, std::string fift_dir = ""); td::Result mem_run_fift(SourceLookup source_lookup, std::vector args); td::Result> compile_asm(td::Slice asm_code); -td::Result compile_asm_program(std::string&& program_code, const std::string& fift_dir, bool enable_debug_info); +td::Result compile_asm_program(std::string&& program_code, const std::string& fift_dir); } // namespace fift diff --git a/crypto/funcfiftlib/funcfiftlib.cpp b/crypto/funcfiftlib/funcfiftlib.cpp index 148c65234e..403c075dd2 100644 --- a/crypto/funcfiftlib/funcfiftlib.cpp +++ b/crypto/funcfiftlib/funcfiftlib.cpp @@ -62,7 +62,7 @@ td::Result compile_internal(char *config_json) { return td::Status::Error("FunC compilation error: " + errs.str()); } - TRY_RESULT(fift_res, fift::compile_asm_program(outs.str(), "/fiftlib/", false)); + TRY_RESULT(fift_res, fift::compile_asm_program(outs.str(), "/fiftlib/")); td::JsonBuilder result_json; auto obj = result_json.enter_object(); diff --git a/tolk/codegen.cpp b/tolk/codegen.cpp index 211e2b868a..a24d6a2d8a 100644 --- a/tolk/codegen.cpp +++ b/tolk/codegen.cpp @@ -280,7 +280,11 @@ bool Op::generate_code_step(Stack& stack) { if (cl == _DebugInfo) { std::ostringstream ops; ops << debug_idx << " DEBUGMARK"; // pseudo instruction - stack.o.insert(stack.o.list_.size() - 1, loc, ops.str()); + + const auto list_size = stack.o.list_.size(); + if (list_size > 0) { + stack.o.insert(stack.o.list_.size() - 1, loc, ops.str()); + } if (debug_idx < G.debug_infos.size()) { auto& debug_info = G.debug_infos.at(debug_idx); diff --git a/tolk/pipe-generate-fif-output.cpp b/tolk/pipe-generate-fif-output.cpp index 166a2c0f29..f9ca23f6b3 100644 --- a/tolk/pipe-generate-fif-output.cpp +++ b/tolk/pipe-generate-fif-output.cpp @@ -190,12 +190,7 @@ void pipeline_generate_fif_output_to_std_cout() { generate_output_func(fun_ref); } - std::cout << "}END>c"; - - if (G.settings.with_debug_info) { - std::cout << "d"; - } - std::cout << std::endl; + std::cout << "}END>c\n"; if (!G.settings.boc_output_filename.empty()) { std::cout << "boc>B \"" << G.settings.boc_output_filename << "\" B>file\n"; diff --git a/tolk/pipe-process-debug-info.cpp b/tolk/pipe-process-debug-info.cpp index 6670ada37e..56eea65f95 100644 --- a/tolk/pipe-process-debug-info.cpp +++ b/tolk/pipe-process-debug-info.cpp @@ -12,7 +12,6 @@ void pipeline_process_debug_info(std::ostream& debug_out) { return; } - const auto debug_infos = G.debug_infos; td::JsonBuilder _jb; auto objb = _jb.enter_object(); @@ -35,7 +34,7 @@ void pipeline_process_debug_info(std::ostream& debug_out) { { td::JsonBuilder jsonb; auto arrb = jsonb.enter_array(); - for (auto di : debug_infos) { + for (auto di : G.debug_infos) { auto vb = arrb.enter_value(); auto ob = vb.enter_object(); ob("file", di.loc_file); @@ -47,8 +46,8 @@ void pipeline_process_debug_info(std::ostream& debug_out) { auto vararrb = varb.enter_array(); for (auto var_and_value : di.vars) { const auto [var, value] = var_and_value; - auto varb = vararrb.enter_value(); - auto varbo = varb.enter_object(); + auto varb2 = vararrb.enter_value(); + auto varbo = varb2.enter_object(); varbo("name", var.name.empty() ? "'" + std::to_string(var.ir_idx) : var.name); varbo("type", var.v_type == nullptr ? "" : var.v_type->as_human_readable()); if (!value.empty()) { diff --git a/tolk/tolk-wasm.cpp b/tolk/tolk-wasm.cpp index 5271cb8784..4576f89653 100644 --- a/tolk/tolk-wasm.cpp +++ b/tolk/tolk-wasm.cpp @@ -62,7 +62,7 @@ static td::Result compile_internal(char *config_json) { return td::Status::Error(errs.str()); } - TRY_RESULT(fift_res, fift::compile_asm_program(outs.str(), "/fiftlib/", with_debug_info)); + TRY_RESULT(fift_res, fift::compile_asm_program(outs.str(), "/fiftlib/")); td::JsonBuilder result_json; auto obj = result_json.enter_object(); @@ -73,7 +73,6 @@ static td::Result compile_internal(char *config_json) { if (const auto debug_info = debug_out.str(); !debug_info.empty()) { obj("debugInfo", td::JsonRaw(debug_info)); - obj("debugMarksBoc", std::move(fift_res.debugMarksBoc64)); } obj("stderr", errs.str().c_str()); diff --git a/tolk/tolk.h b/tolk/tolk.h index 77d2f61b02..2ee058f7c5 100644 --- a/tolk/tolk.h +++ b/tolk/tolk.h @@ -309,7 +309,7 @@ struct Op { std::unique_ptr block0, block1; td::RefInt256 int_const; std::string str_const; - size_t debug_idx; + size_t debug_idx{0}; Op(SrcLocation loc, OpKind cl) : cl(cl), flags(0), loc(loc) { } Op(SrcLocation loc, OpKind cl, const std::vector& left) From 09065044c7a5bd58f30c241ed42c46f98b1ea346 Mon Sep 17 00:00:00 2001 From: i582 <51853996+i582@users.noreply.github.com> Date: Sun, 27 Jul 2025 01:40:04 +0400 Subject: [PATCH 06/27] improvements --- tolk/CMakeLists.txt | 1 + tolk/codegen.cpp | 2 +- tolk/debug-info.cpp | 45 ++++++++++++++++++++ tolk/pack-unpack-api.cpp | 6 +++ tolk/pack-unpack-serializers.cpp | 9 ++++ tolk/pack-unpack-serializers.h | 1 + tolk/pipe-ast-to-legacy.cpp | 72 ++++++++++++++------------------ tolk/pipe-process-debug-info.cpp | 2 + tolk/src-file.cpp | 3 ++ tolk/tolk.h | 8 +++- 10 files changed, 105 insertions(+), 44 deletions(-) create mode 100644 tolk/debug-info.cpp diff --git a/tolk/CMakeLists.txt b/tolk/CMakeLists.txt index 72358e550f..6f4397d0bd 100644 --- a/tolk/CMakeLists.txt +++ b/tolk/CMakeLists.txt @@ -41,6 +41,7 @@ set(TOLK_SOURCE stack-transform.cpp optimize.cpp codegen.cpp + debug-info.cpp tolk.cpp ) diff --git a/tolk/codegen.cpp b/tolk/codegen.cpp index a24d6a2d8a..12b4213764 100644 --- a/tolk/codegen.cpp +++ b/tolk/codegen.cpp @@ -283,7 +283,7 @@ bool Op::generate_code_step(Stack& stack) { const auto list_size = stack.o.list_.size(); if (list_size > 0) { - stack.o.insert(stack.o.list_.size() - 1, loc, ops.str()); + stack.o.insert(stack.o.list_.size(), loc, ops.str()); } if (debug_idx < G.debug_infos.size()) { diff --git a/tolk/debug-info.cpp b/tolk/debug-info.cpp new file mode 100644 index 0000000000..7d4e9d4419 --- /dev/null +++ b/tolk/debug-info.cpp @@ -0,0 +1,45 @@ +#include "tolk.h" +#include +#include + +namespace tolk { + +void insert_debug_info_inner(SrcLocation loc, ASTNodeKind kind, CodeBlob& code) { + if (!G.settings.with_debug_info) { + return; + } + + if (kind == ast_artificial_aux_vertex || kind == ast_throw_statement) { + return; + } + + if (code.prev_ops_kind == Op::_DebugInfo) { + // std::cerr << "skip repeated debug info" << std::endl; + // return; + } + + auto& op = code.emplace_back(loc, Op::_DebugInfo); + op.debug_idx = G.debug_infos.size(); + + auto info = DebugInfo{}; + info.idx = op.debug_idx; + info.is_entry = kind == ast_function_declaration; + + if (const SrcFile* src_file = loc.get_src_file(); src_file != nullptr) { + const auto& pos = src_file->convert_offset(loc.get_char_offset()); + + info.loc_file = src_file->realpath; + info.loc_line = pos.line_no; + info.loc_pos = pos.char_no; + info.loc_len = pos.line_str.length(); + } + + info.func_name = code.name; + G.debug_infos.push_back(info); +} + +void insert_debug_info(AnyV v, CodeBlob& code) { + insert_debug_info_inner(v->loc, v->kind, code); +} + +} diff --git a/tolk/pack-unpack-api.cpp b/tolk/pack-unpack-api.cpp index abab637e97..d034a5bb5d 100644 --- a/tolk/pack-unpack-api.cpp +++ b/tolk/pack-unpack-api.cpp @@ -241,6 +241,8 @@ std::vector generate_pack_struct_to_cell(CodeBlob& code, SrcLocation FunctionPtr f_beginCell = lookup_function("beginCell"); FunctionPtr f_endCell = lookup_function("builder.endCell"); std::vector rvect_builder = code.create_var(TypeDataBuilder::create(), loc, "b"); + + insert_debug_info_inner(loc, ast_function_call, code); code.emplace_back(loc, Op::_Call, rvect_builder, std::vector{}, f_beginCell); tolk_assert(ir_options.size() == 1); // struct PackOptions @@ -280,6 +282,8 @@ std::vector generate_unpack_struct_from_slice(CodeBlob& code, SrcLoca } std::vector generate_unpack_struct_from_cell(CodeBlob& code, SrcLocation loc, TypePtr any_type, std::vector&& ir_cell, const std::vector& ir_options) { + insert_debug_info_inner(loc, ast_function_call, code); + FunctionPtr f_beginParse = lookup_function("cell.beginParse"); std::vector ir_slice = code.create_var(TypeDataSlice::create(), loc, "s"); code.emplace_back(loc, Op::_Call, ir_slice, std::move(ir_cell), f_beginParse); @@ -365,6 +369,8 @@ std::vector generate_lazy_struct_to_cell(CodeBlob& code, SrcLocation StructPtr original_struct = loaded_state->original_struct; StructPtr hidden_struct = loaded_state->hidden_struct; + insert_debug_info_inner(loc, ast_function_call, code); + std::vector rvect_builder = code.create_var(TypeDataBuilder::create(), loc, "b"); code.emplace_back(loc, Op::_Call, rvect_builder, std::vector{}, lookup_function("beginCell")); diff --git a/tolk/pack-unpack-serializers.cpp b/tolk/pack-unpack-serializers.cpp index 5f1ebcc8f5..560d656e21 100644 --- a/tolk/pack-unpack-serializers.cpp +++ b/tolk/pack-unpack-serializers.cpp @@ -725,8 +725,10 @@ struct S_Either final : ISerializer { } } tolk_assert(options.match_blocks.size() == 2); + insert_debug_info_inner(loc, ast_match_arm, code); std::vector ir_result = code.create_tmp_var(options.match_expr_type, loc, "(match-expression)"); std::vector ir_is_right = ctx->loadUint(1, "(eitherBit)"); + Op& if_op = code.emplace_back(loc, Op::_If, std::move(ir_is_right)); { code.push_set_cur(if_op.block0); @@ -738,6 +740,7 @@ struct S_Either final : ISerializer { { code.push_set_cur(if_op.block1); const LazyMatchOptions::MatchBlock* m_block = options.find_match_block(t_left); + insert_debug_info_inner(m_block->v_body->loc, ast_match_arm, code); std::vector ith_result = pre_compile_expr(m_block->v_body, code); options.save_match_result_on_arm_end(code, loc, m_block, std::move(ith_result), ir_result); code.close_pop_cur(loc); @@ -848,9 +851,12 @@ struct S_MultipleConstructors final : ISerializer { std::vector ir_prefix_eq = code.create_tmp_var(TypeDataInt::create(), loc, "(prefix-eq)"); for (int i = 0; i < t_union->size(); ++i) { + const LazyMatchOptions::MatchBlock* m_block = options.find_match_block(t_union->variants[i]); StructData::PackOpcode opcode = opcodes[opcodes_order_mapping[i]]; std::vector args = { ctx->ir_slice0, code.create_int(loc, opcode.pack_prefix, "(pack-prefix)"), code.create_int(loc, opcode.prefix_len, "(prefix-len)") }; + insert_debug_info_inner(m_block->arm_variant_node->loc, ast_match_arm, code); code.emplace_back(loc, Op::_Call, std::vector{ctx->ir_slice0, ir_prefix_eq[0]}, std::move(args), f_tryStripPrefix); + Op& if_op = code.emplace_back(loc, Op::_If, ir_prefix_eq); code.push_set_cur(if_op.block0); std::vector ith_result = pre_compile_expr(options.match_blocks[i].v_body, code); @@ -1004,6 +1010,8 @@ struct S_CustomStruct final : ISerializer { std::vector ir_result = code.create_tmp_var(options.match_expr_type, loc, "(match-expression)"); std::vector ir_prefix_eq = code.create_tmp_var(TypeDataInt::create(), loc, "(prefix-eq)"); + insert_debug_info_inner(loc, ast_match_arm, code); + StructData::PackOpcode opcode = struct_ref->opcode; if (opcode.exists()) { // it's `match` over a struct (makes sense for a struct with prefix and `else` branch) std::vector args = { ctx->ir_slice0, code.create_int(loc, opcode.pack_prefix, "(pack-prefix)"), code.create_int(loc, opcode.prefix_len, "(prefix-len)") }; @@ -1011,6 +1019,7 @@ struct S_CustomStruct final : ISerializer { } else { code.emplace_back(loc, Op::_Let, ir_prefix_eq, std::vector{code.create_int(loc, -1, "(true)")}); } + Op& if_op = code.emplace_back(loc, Op::_If, ir_prefix_eq); { code.push_set_cur(if_op.block0); diff --git a/tolk/pack-unpack-serializers.h b/tolk/pack-unpack-serializers.h index ab11733aa5..f8e0e885d8 100644 --- a/tolk/pack-unpack-serializers.h +++ b/tolk/pack-unpack-serializers.h @@ -94,6 +94,7 @@ enum class PrefixReadMode { struct LazyMatchOptions { struct MatchBlock { TypePtr arm_variant; // left of `V => ...`; nullptr for `else => ...` + AnyV arm_variant_node; // left of `V => ...` as node for debug info; nullptr for `else => ...` AnyExprV v_body; // right of `V => ...` TypePtr block_expr_type; // for match expression, if `V => expr`, it's expr's inferred_type }; diff --git a/tolk/pipe-ast-to-legacy.cpp b/tolk/pipe-ast-to-legacy.cpp index 4e3491171d..06a2684e63 100644 --- a/tolk/pipe-ast-to-legacy.cpp +++ b/tolk/pipe-ast-to-legacy.cpp @@ -122,43 +122,6 @@ static int calc_offset_on_stack(StructPtr struct_ref, int field_idx) { return stack_offset; } -void insert_debug_info_inner(SrcLocation loc, ASTNodeKind kind, CodeBlob& code) { - if (!G.settings.with_debug_info) { - return; - } - - if (kind == ast_block_statement) { - return; - } - - if (code.prev_ops_kind == Op::_DebugInfo) { - // std::cerr << "skip repeated debug info" << std::endl; - return; - } - - auto& op = code.emplace_back(loc, Op::_DebugInfo); - op.debug_idx = G.debug_infos.size(); - - auto info = DebugInfo{}; - info.idx = op.debug_idx; - - if (const SrcFile* src_file = loc.get_src_file(); src_file != nullptr) { - const auto& pos = src_file->convert_offset(loc.get_char_offset()); - - info.loc_file = src_file->realpath; - info.loc_line = pos.line_no; - info.loc_pos = pos.char_no; - info.loc_len = pos.line_str.length(); - } - - info.func_name = code.name; - G.debug_infos.push_back(info); -} - -void insert_debug_info(AnyV v, CodeBlob& code) { - insert_debug_info_inner(v->loc, v->kind, code); -} - // Main goal of LValContext is to handle non-primitive lvalues. At IR level, a usual local variable // exists, but on its change, something non-trivial should happen. // Example: `globalVar = 9` actually does `Const $5 = 9` + `Let $6 = $5` + `SetGlob "globVar" = $6` @@ -754,6 +717,8 @@ static std::vector gen_compile_time_code_instead_of_fun_call(CodeBlob } std::vector gen_inline_fun_call_in_place(CodeBlob& code, TypePtr ret_type, SrcLocation loc, FunctionPtr f_inlined, AnyExprV self_obj, bool is_before_immediate_return, const std::vector>& vars_per_arg) { + insert_debug_info_inner(loc, ast_function_call, code); + tolk_assert(vars_per_arg.size() == f_inlined->parameters.size()); for (int i = 0; i < f_inlined->get_num_params(); ++i) { const LocalVarData& param_i = f_inlined->get_param(i); @@ -1277,6 +1242,8 @@ static std::vector process_reference(V v, CodeBlob& co } static std::vector process_assignment(V v, CodeBlob& code, TypePtr target_type) { + insert_debug_info_inner(v->loc, v->kind, code); + AnyExprV lhs = v->get_lhs(); AnyExprV rhs = v->get_rhs(); @@ -1295,6 +1262,8 @@ static std::vector process_assignment(V v, CodeBlob& code } static std::vector process_set_assign(V v, CodeBlob& code, TypePtr target_type) { + insert_debug_info_inner(v->loc, v->kind, code); + // for "a += b", emulate "a = a + b" // seems not beautiful, but it works; probably, this transformation should be done at AST level in advance std::string_view calc_operator = v->operator_name; // "+" for operator += @@ -1328,11 +1297,17 @@ static std::vector process_binary_operator(V v, std::vector cond = pre_compile_expr(v->get_lhs(), code, nullptr); tolk_assert(cond.size() == 1); std::vector rvect = code.create_tmp_var(v->inferred_type, v->loc, "(ternary)"); + + insert_debug_info_inner(v->loc, ast_binary_operator, code); + Op& if_op = code.emplace_back(v->loc, Op::_If, cond); code.push_set_cur(if_op.block0); + + insert_debug_info_inner(v->get_lhs()->loc, ast_binary_operator, code); code.emplace_back(v->loc, Op::_Let, rvect, pre_compile_expr(t == tok_logical_and ? v_b_ne_0 : v_1, code, nullptr)); code.close_pop_cur(v->loc); code.push_set_cur(if_op.block1); + insert_debug_info_inner(v->get_rhs()->loc, ast_binary_operator, code); code.emplace_back(v->loc, Op::_Let, rvect, pre_compile_expr(t == tok_logical_and ? v_0 : v_b_ne_0, code, nullptr)); code.close_pop_cur(v->loc); return transition_to_target_type(std::move(rvect), code, target_type, v); @@ -1472,6 +1447,8 @@ static std::vector process_lazy_operator(V v, Code } static std::vector process_match_expression(V v, CodeBlob& code, TypePtr target_type) { + insert_debug_info_inner(v->loc, ast_function_call, code); + TypePtr lhs_type = v->get_subject()->inferred_type->unwrap_alias(); int n_arms = v->get_arms_count(); @@ -1511,11 +1488,13 @@ static std::vector process_match_expression(V v if (is_match_by_type) { TypePtr cmp_type = v_ith_arm->pattern_type_node->resolved_type->unwrap_alias(); tolk_assert(!cmp_type->try_as()); // `match` over `int|slice` is a type checker error + insert_debug_info_inner(v_ith_arm->loc, ast_function_call, code); eq_ith_ir_idx = pre_compile_is_type(code, lhs_type, cmp_type, subj_ir_idx, v_ith_arm->loc, "(arm-cond-eq)"); } else { std::vector ith_ir_idx = pre_compile_expr(v_ith_arm->get_pattern_expr(), code); tolk_assert(subj_ir_idx.size() == 1 && ith_ir_idx.size() == 1); eq_ith_ir_idx = code.create_tmp_var(TypeDataBool::create(), v_ith_arm->loc, "(arm-cond-eq)"); + insert_debug_info_inner(v_ith_arm->loc, ast_function_call, code); code.emplace_back(v_ith_arm->loc, Op::_Call, eq_ith_ir_idx, std::vector{subj_ir_idx[0], ith_ir_idx[0]}, eq_sym); } Op& if_op = code.emplace_back(v_ith_arm->loc, Op::_If, std::move(eq_ith_ir_idx)); @@ -1537,6 +1516,7 @@ static std::vector process_match_expression(V v // we're inside the last ELSE auto v_last_arm = v->get_arm(n_arms - 1); if (v->is_statement()) { + insert_debug_info_inner(v_last_arm->loc, ast_function_call, code); pre_compile_expr(v_last_arm->get_body(), code); if (v == stmt_before_immediate_return) { code.emplace_back(v_last_arm->loc, Op::_Return); @@ -2018,6 +1998,8 @@ static std::vector process_artificial_aux_vertex(V v_match = v->get_wrapped_expr()->as(); pre_compile_expr(v_match->get_subject(), code, nullptr); + // insert_debug_info_inner(v_match->loc, ast_match_expression, code); + const LazyVariableLoadedState* lazy_variable = code.get_lazy_variable(data->var_ref); tolk_assert(lazy_variable); TypePtr t_union = data->field_ref ? data->field_ref->declared_type : data->var_ref->declared_type; @@ -2027,12 +2009,14 @@ static std::vector process_artificial_aux_vertex(Vget_arms_count(); ++i) { auto v_arm = v_match->get_arm(i); TypePtr arm_variant = nullptr; + AnyV arm_variant_node = nullptr; if (v_arm->pattern_kind == MatchArmKind::exact_type) { arm_variant = v_arm->pattern_type_node->resolved_type->unwrap_alias(); + arm_variant_node = v_arm->pattern_type_node; } else { tolk_assert(v_arm->pattern_kind == MatchArmKind::else_branch); // `else` allowed in a lazy match } - match_blocks.emplace_back(LazyMatchOptions::MatchBlock{arm_variant, v_arm->get_body(), v_arm->get_body()->inferred_type}); + match_blocks.emplace_back(LazyMatchOptions::MatchBlock{arm_variant, arm_variant_node, v_arm->get_body(), v_arm->get_body()->inferred_type}); } LazyMatchOptions options = { @@ -2059,7 +2043,7 @@ static std::vector process_artificial_aux_vertex(V pre_compile_expr(AnyExprV v, CodeBlob& code, TypePtr target_type, LValContext* lval_ctx) { if (v->kind != ast_binary_operator && v->kind != ast_unary_operator && v->kind != ast_reference && v->kind != ast_is_type_operator && v->kind != ast_function_call) { - insert_debug_info(v, code); + // insert_debug_info(v, code); } switch (v->kind) { @@ -2315,6 +2299,8 @@ static void process_return_statement(V v, CodeBlob& code) return_vars.insert(return_vars.begin(), mutated_vars.begin(), mutated_vars.end()); } + insert_debug_info_inner(v->loc, ast_return_statement, code); + // if fun_ref is called and inlined into a parent, assign a result instead of generating a return statement if (code.inline_rvect_out) { code.emplace_back(v->loc, Op::_Let, *code.inline_rvect_out, std::move(return_vars)); @@ -2340,7 +2326,7 @@ static void append_implicit_return_statement(SrcLocation loc_end, CodeBlob& code } void process_any_statement(AnyV v, CodeBlob& code) { - insert_debug_info(v, code); + // insert_debug_info(v, code); switch (v->kind) { case ast_block_statement: @@ -2369,7 +2355,8 @@ void process_any_statement(AnyV v, CodeBlob& code) { } static void convert_function_body_to_CodeBlob(FunctionPtr fun_ref, FunctionBodyCode* code_body) { - auto v_body = fun_ref->ast_root->as()->get_body()->as(); + auto v_fun_decl = fun_ref->ast_root->as(); + auto v_body = v_fun_decl->get_body()->as(); CodeBlob* blob = new CodeBlob(fun_ref); std::vector rvect_import; @@ -2389,6 +2376,8 @@ static void convert_function_body_to_CodeBlob(FunctionPtr fun_ref, FunctionBodyC blob->in_var_cnt = blob->var_cnt; tolk_assert(blob->var_cnt == total_arg_width); + // insert_debug_info_inner(v_fun_decl->get_identifier()->loc, ast_function_declaration, *blob); + if (fun_ref->name == "onInternalMessage") { handle_onInternalMessage_codegen_start(fun_ref, rvect_import, *blob, fun_ref->loc); } @@ -2491,6 +2480,7 @@ class ConvertASTToLegacyOpVisitor final { tolk_assert(fun_ref->is_type_inferring_done()); if (fun_ref->is_code_function() && !fun_ref->is_inlined_in_place()) { convert_function_body_to_CodeBlob(fun_ref, std::get(fun_ref->body)); + std::get(fun_ref->body)->code->print(std::cerr); } else if (fun_ref->is_asm_function()) { convert_asm_body_to_AsmOp(fun_ref, std::get(fun_ref->body)); } diff --git a/tolk/pipe-process-debug-info.cpp b/tolk/pipe-process-debug-info.cpp index 56eea65f95..a8926a0314 100644 --- a/tolk/pipe-process-debug-info.cpp +++ b/tolk/pipe-process-debug-info.cpp @@ -37,6 +37,8 @@ void pipeline_process_debug_info(std::ostream& debug_out) { for (auto di : G.debug_infos) { auto vb = arrb.enter_value(); auto ob = vb.enter_object(); + ob("idx", std::to_string(di.idx)); + ob("is_entry", td::JsonBool(di.is_entry)); ob("file", di.loc_file); ob("line", (td::int64)di.loc_line); ob("pos", (td::int64)di.loc_pos); diff --git a/tolk/src-file.cpp b/tolk/src-file.cpp index 07db0eb88b..a5b42570d4 100644 --- a/tolk/src-file.cpp +++ b/tolk/src-file.cpp @@ -145,6 +145,9 @@ std::ostream& operator<<(std::ostream& os, const Fatal& fatal) { } const SrcFile* SrcLocation::get_src_file() const { + if (file_id == -1) { + return nullptr; + } return G.all_src_files.get_file(file_id); } diff --git a/tolk/tolk.h b/tolk/tolk.h index 2ee058f7c5..ce850846b8 100644 --- a/tolk/tolk.h +++ b/tolk/tolk.h @@ -21,6 +21,8 @@ #include "symtable.h" #include "crypto/common/refint.h" #include "td/utils/Status.h" + +#include #include #include #include @@ -266,6 +268,7 @@ struct Stack; struct DebugInfo { size_t idx{}; + bool is_entry{}; std::string loc_file; long loc_line{}; long loc_pos{}; @@ -1072,6 +1075,9 @@ struct LazyVarRefAtCodegen { : var_ref(var_ref), var_state(var_state) {} }; +void insert_debug_info_inner(SrcLocation loc, ASTNodeKind kind, CodeBlob& code); +void insert_debug_info(AnyV v, CodeBlob& code); + struct CodeBlob { int var_cnt, in_var_cnt; FunctionPtr fun_ref; @@ -1151,8 +1157,6 @@ AsmOp push_const(SrcLocation loc, td::RefInt256 x); void define_builtins(); void patch_builtins_after_stdlib_loaded(); - - /* * * OUTPUT CODE GENERATOR From 0c7e0f4aba7a371ffabd21be8e18a9f4d91d95a5 Mon Sep 17 00:00:00 2001 From: i582 <51853996+i582@users.noreply.github.com> Date: Thu, 28 Aug 2025 15:18:59 +0400 Subject: [PATCH 07/27] more --- tolk/codegen.cpp | 14 ++++++++++++++ tolk/pack-unpack-api.cpp | 4 ++++ tolk/pipe-ast-to-legacy.cpp | 38 ++++++++++++++++++++++++++++++++----- tolk/send-message-api.cpp | 5 +++++ 4 files changed, 56 insertions(+), 5 deletions(-) diff --git a/tolk/codegen.cpp b/tolk/codegen.cpp index 12b4213764..3f28c1a9de 100644 --- a/tolk/codegen.cpp +++ b/tolk/codegen.cpp @@ -281,6 +281,20 @@ bool Op::generate_code_step(Stack& stack) { std::ostringstream ops; ops << debug_idx << " DEBUGMARK"; // pseudo instruction + // Attach a source snippet as a Fift comment to make mapping explicit in compiled output + if (const SrcFile* src_file = loc.get_src_file()) { + const auto& pos = src_file->convert_offset(loc.get_char_offset()); + std::string line = std::string(pos.line_str); + // Trim trailing CR/LF and excessive spaces to keep output compact + while (!line.empty() && (line.back() == '\\r' || line.back() == '\\n')) line.pop_back(); + // Avoid extremely long comments + if (line.size() > 200) { + line.resize(200); + line += "..."; + } + ops << " // " << line; + } + const auto list_size = stack.o.list_.size(); if (list_size > 0) { stack.o.insert(stack.o.list_.size(), loc, ops.str()); diff --git a/tolk/pack-unpack-api.cpp b/tolk/pack-unpack-api.cpp index d034a5bb5d..ea424ead36 100644 --- a/tolk/pack-unpack-api.cpp +++ b/tolk/pack-unpack-api.cpp @@ -256,6 +256,7 @@ std::vector generate_pack_struct_to_cell(CodeBlob& code, SrcLocation } std::vector generate_pack_struct_to_builder(CodeBlob& code, SrcLocation loc, TypePtr any_type, std::vector&& ir_builder, std::vector&& ir_obj, const std::vector& ir_options) { + insert_debug_info_inner(loc, ast_function_call, code); PackContext ctx(code, loc, ir_builder, ir_options); // mutate this builder ctx.generate_pack_any(any_type, std::move(ir_obj)); @@ -263,6 +264,7 @@ std::vector generate_pack_struct_to_builder(CodeBlob& code, SrcLocati } std::vector generate_unpack_struct_from_slice(CodeBlob& code, SrcLocation loc, TypePtr any_type, std::vector&& ir_slice, bool mutate_slice, const std::vector& ir_options) { + insert_debug_info_inner(loc, ast_function_call, code); if (!mutate_slice) { std::vector slice_copy = code.create_var(TypeDataSlice::create(), loc, "s"); code.emplace_back(loc, Op::_Let, slice_copy, std::move(ir_slice)); @@ -301,6 +303,7 @@ std::vector generate_unpack_struct_from_cell(CodeBlob& code, SrcLocat } std::vector generate_skip_struct_in_slice(CodeBlob& code, SrcLocation loc, TypePtr any_type, std::vector&& ir_slice, const std::vector& ir_options) { + insert_debug_info_inner(loc, ast_function_call, code); UnpackContext ctx(code, loc, ir_slice, ir_options); // mutate this slice ctx.generate_skip_any(any_type); @@ -435,6 +438,7 @@ PackSize estimate_serialization_size(TypePtr any_type) { } std::vector generate_estimate_size_call(CodeBlob& code, SrcLocation loc, TypePtr any_type) { + insert_debug_info_inner(loc, ast_function_call, code); EstimateContext ctx; PackSize pack_size = ctx.estimate_any(any_type); diff --git a/tolk/pipe-ast-to-legacy.cpp b/tolk/pipe-ast-to-legacy.cpp index 06a2684e63..25593852d3 100644 --- a/tolk/pipe-ast-to-legacy.cpp +++ b/tolk/pipe-ast-to-legacy.cpp @@ -620,6 +620,9 @@ static std::vector gen_compile_time_code_instead_of_fun_call(CodeBlob SrcLocation loc = v_call->loc; FunctionPtr called_f = v_call->fun_maybe; + // Mark compile-time call site explicitly + insert_debug_info_inner(loc, ast_function_call, code); + if (called_f->is_method() && called_f->is_instantiation_of_generic_function()) { std::string_view f_name = called_f->base_fun_ref->name; TypePtr typeT = called_f->substitutedTs->typeT_at(0); @@ -1279,6 +1282,7 @@ static std::vector process_binary_operator(V v, TokenType t = v->tok; if (v->fun_ref) { // almost all operators, fun_ref was assigned at type inferring + insert_debug_info_inner(v->loc, ast_binary_operator, code); std::vector args_vars = pre_compile_tensor(code, {v->get_lhs(), v->get_rhs()}); std::vector rvect = gen_op_call(code, v->inferred_type, v->loc, std::move(args_vars), v->fun_ref, "(binary-op)"); return transition_to_target_type(std::move(rvect), code, target_type, v); @@ -1303,17 +1307,25 @@ static std::vector process_binary_operator(V v, Op& if_op = code.emplace_back(v->loc, Op::_If, cond); code.push_set_cur(if_op.block0); - insert_debug_info_inner(v->get_lhs()->loc, ast_binary_operator, code); + // For &&: true-branch evaluates RHS; mark RHS location + if (t == tok_logical_and) { + insert_debug_info_inner(v->get_rhs()->loc, ast_binary_operator, code); + } code.emplace_back(v->loc, Op::_Let, rvect, pre_compile_expr(t == tok_logical_and ? v_b_ne_0 : v_1, code, nullptr)); code.close_pop_cur(v->loc); code.push_set_cur(if_op.block1); - insert_debug_info_inner(v->get_rhs()->loc, ast_binary_operator, code); + + // For ||: false-branch evaluates RHS; mark RHS location + if (t == tok_logical_or) { + insert_debug_info_inner(v->get_rhs()->loc, ast_binary_operator, code); + } code.emplace_back(v->loc, Op::_Let, rvect, pre_compile_expr(t == tok_logical_and ? v_0 : v_b_ne_0, code, nullptr)); code.close_pop_cur(v->loc); return transition_to_target_type(std::move(rvect), code, target_type, v); } if (t == tok_eq || t == tok_neq) { if (v->get_lhs()->inferred_type->unwrap_alias() == TypeDataAddress::create() && v->get_rhs()->inferred_type->unwrap_alias() == TypeDataAddress::create()) { + insert_debug_info_inner(v->loc, ast_binary_operator, code); FunctionPtr f_sliceEq = lookup_function("slice.bitsEqual"); std::vector ir_lhs_slice = pre_compile_expr(v->get_lhs(), code); std::vector ir_rhs_slice = pre_compile_expr(v->get_rhs(), code); @@ -1331,12 +1343,14 @@ static std::vector process_binary_operator(V v, } static std::vector process_unary_operator(V v, CodeBlob& code, TypePtr target_type) { + insert_debug_info_inner(v->loc, ast_unary_operator, code); std::vector rhs_vars = pre_compile_expr(v->get_rhs(), code, nullptr); std::vector rvect = gen_op_call(code, v->inferred_type, v->loc, std::move(rhs_vars), v->fun_ref, "(unary-op)"); return transition_to_target_type(std::move(rvect), code, target_type, v); } static std::vector process_ternary_operator(V v, CodeBlob& code, TypePtr target_type) { + insert_debug_info_inner(v->loc, ast_ternary_operator, code); std::vector cond = pre_compile_expr(v->get_cond(), code, nullptr); tolk_assert(cond.size() == 1); std::vector rvect = code.create_tmp_var(v->inferred_type, v->loc, "(cond)"); @@ -1364,12 +1378,14 @@ static std::vector process_ternary_operator(V v } static std::vector process_cast_as_operator(V v, CodeBlob& code, TypePtr target_type, LValContext* lval_ctx) { + insert_debug_info_inner(v->loc, ast_cast_as_operator, code); TypePtr child_target_type = v->type_node->resolved_type; std::vector rvect = pre_compile_expr(v->get_expr(), code, child_target_type, lval_ctx); return transition_to_target_type(std::move(rvect), code, target_type, v); } static std::vector process_is_type_operator(V v, CodeBlob& code, TypePtr target_type) { + insert_debug_info_inner(v->loc, ast_is_type_operator, code); TypePtr lhs_type = v->get_expr()->inferred_type->unwrap_alias(); TypePtr cmp_type = v->type_node->resolved_type->unwrap_alias(); bool is_null_check = cmp_type == TypeDataNullLiteral::create(); // `v == null`, not `v is T` @@ -1386,6 +1402,7 @@ static std::vector process_is_type_operator(V v } static std::vector process_not_null_operator(V v, CodeBlob& code, TypePtr target_type, LValContext* lval_ctx) { + insert_debug_info_inner(v->loc, ast_not_null_operator, code); TypePtr expr_type = v->get_expr()->inferred_type->unwrap_alias(); TypePtr without_null_type = calculate_type_subtract_rhs_type(expr_type, TypeDataNullLiteral::create()); TypePtr child_target_type = without_null_type != TypeDataNever::create() ? without_null_type : expr_type; @@ -1447,7 +1464,7 @@ static std::vector process_lazy_operator(V v, Code } static std::vector process_match_expression(V v, CodeBlob& code, TypePtr target_type) { - insert_debug_info_inner(v->loc, ast_function_call, code); + insert_debug_info_inner(v->loc, ast_match_expression, code); TypePtr lhs_type = v->get_subject()->inferred_type->unwrap_alias(); @@ -1499,6 +1516,7 @@ static std::vector process_match_expression(V v } Op& if_op = code.emplace_back(v_ith_arm->loc, Op::_If, std::move(eq_ith_ir_idx)); code.push_set_cur(if_op.block0); + insert_debug_info_inner(v_ith_arm->loc, ast_match_arm, code); if (v->is_statement()) { pre_compile_expr(v_ith_arm->get_body(), code); if (v == stmt_before_immediate_return) { @@ -1516,7 +1534,7 @@ static std::vector process_match_expression(V v // we're inside the last ELSE auto v_last_arm = v->get_arm(n_arms - 1); if (v->is_statement()) { - insert_debug_info_inner(v_last_arm->loc, ast_function_call, code); + insert_debug_info_inner(v_last_arm->loc, ast_match_arm, code); pre_compile_expr(v_last_arm->get_body(), code); if (v == stmt_before_immediate_return) { code.emplace_back(v_last_arm->loc, Op::_Return); @@ -1644,6 +1662,7 @@ static std::vector process_function_call(V v, Code std::vector tfunc = pre_compile_expr(v->get_callee(), code, nullptr); tolk_assert(tfunc.size() == 1); args_vars.push_back(tfunc[0]); + insert_debug_info_inner(v->loc, ast_function_call, code); std::vector rvect = code.create_tmp_var(v->inferred_type, v->loc, "(call-ind)"); Op& op = code.emplace_back(v->loc, Op::_CallInd, rvect, std::move(args_vars)); op.set_impure_flag(); @@ -1785,6 +1804,7 @@ static std::vector process_braced_expression(V static std::vector process_tensor(V v, CodeBlob& code, TypePtr target_type, LValContext* lval_ctx) { // tensor is compiled "as is", for example `(1, null)` occupies 2 slots // and if assigned/passed to something other, like `(int, (int,int)?)`, a whole tensor is transitioned, it works + insert_debug_info_inner(v->loc, ast_tensor, code); std::vector rvect = pre_compile_tensor(code, v->get_items(), lval_ctx); return transition_to_target_type(std::move(rvect), code, target_type, v); } @@ -1793,6 +1813,7 @@ static std::vector process_typed_tuple(V v, CodeBl if (lval_ctx) { // todo some time, make "var (a, [b,c]) = (1, [2,3])" work v->error("[...] can not be used as lvalue here"); } + insert_debug_info_inner(v->loc, ast_bracket_tuple, code); std::vector left = code.create_tmp_var(v->inferred_type, v->loc, "(pack-tuple)"); std::vector right = pre_compile_tensor(code, v->get_items(), lval_ctx); code.emplace_back(v->loc, Op::_Tuple, left, std::move(right)); @@ -1855,6 +1876,7 @@ static std::vector process_object_literal(V v, Co // an object (an instance of a struct) is actually a tensor at low-level // for example, `struct User { id: int; name: slice; }` occupies 2 slots // fields of a tensor are placed in order of declaration (in a literal they might be shuffled) + insert_debug_info_inner(v->loc, ast_object_literal, code); bool are_fields_shuffled = false; for (int i = 1; i < v->get_body()->get_num_fields(); ++i) { StructFieldPtr field_ref = v->struct_ref->find_field(v->get_body()->get_field(i)->get_field_name()); @@ -2136,6 +2158,7 @@ static void process_block_statement(V v, CodeBlob& code) { } static void process_assert_statement(V v, CodeBlob& code) { + insert_debug_info_inner(v->loc, ast_assert_statement, code); std::vector ir_thrown_code = pre_compile_expr(v->get_thrown_code(), code); std::vector ir_cond = pre_compile_expr(v->get_cond(), code); tolk_assert(ir_cond.size() == 1 && ir_thrown_code.size() == 1); @@ -2154,6 +2177,7 @@ static void process_catch_variable(AnyExprV v_catch_var, CodeBlob& code) { } static void process_try_catch_statement(V v, CodeBlob& code) { + insert_debug_info_inner(v->loc, ast_try_catch_statement, code); code.require_callxargs = true; Op& try_catch_op = code.emplace_back(v->loc, Op::_TryCatch); code.push_set_cur(try_catch_op.block0); @@ -2172,6 +2196,7 @@ static void process_try_catch_statement(V v, CodeBlob& } static void process_repeat_statement(V v, CodeBlob& code) { + insert_debug_info_inner(v->loc, ast_repeat_statement, code); std::vector tmp_vars = pre_compile_expr(v->get_cond(), code, nullptr); Op& repeat_op = code.emplace_back(v->loc, Op::_Repeat, tmp_vars); code.push_set_cur(repeat_op.block0); @@ -2180,6 +2205,7 @@ static void process_repeat_statement(V v, CodeBlob& code) } static void process_if_statement(V v, CodeBlob& code) { + insert_debug_info_inner(v->loc, ast_if_statement, code); std::vector cond = pre_compile_expr(v->get_cond(), code, nullptr); tolk_assert(cond.size() == 1); @@ -2211,6 +2237,7 @@ static void process_if_statement(V v, CodeBlob& code) { } static void process_do_while_statement(V v, CodeBlob& code) { + insert_debug_info_inner(v->loc, ast_do_while_statement, code); Op& until_op = code.emplace_back(v->loc, Op::_Until); code.push_set_cur(until_op.block0); process_any_statement(v->get_body(), code); @@ -2253,6 +2280,7 @@ static void process_do_while_statement(V v, CodeBlob& co } static void process_while_statement(V v, CodeBlob& code) { + insert_debug_info_inner(v->loc, ast_while_statement, code); Op& while_op = code.emplace_back(v->loc, Op::_While); code.push_set_cur(while_op.block0); while_op.left = pre_compile_expr(v->get_cond(), code, nullptr); @@ -2376,7 +2404,7 @@ static void convert_function_body_to_CodeBlob(FunctionPtr fun_ref, FunctionBodyC blob->in_var_cnt = blob->var_cnt; tolk_assert(blob->var_cnt == total_arg_width); - // insert_debug_info_inner(v_fun_decl->get_identifier()->loc, ast_function_declaration, *blob); + insert_debug_info_inner(v_fun_decl->get_identifier()->loc, ast_function_declaration, *blob); if (fun_ref->name == "onInternalMessage") { handle_onInternalMessage_codegen_start(fun_ref, rvect_import, *blob, fun_ref->loc); diff --git a/tolk/send-message-api.cpp b/tolk/send-message-api.cpp index 2b4d80be8e..36f8817bf0 100644 --- a/tolk/send-message-api.cpp +++ b/tolk/send-message-api.cpp @@ -87,6 +87,7 @@ struct IR_AutoDeployAddress { }; std::vector generate_createMessage(CodeBlob& code, SrcLocation loc, TypePtr bodyT, std::vector&& rvect) { + insert_debug_info_inner(loc, ast_function_call, code); StructPtr s_Options = lookup_global_symbol("CreateMessageOptions")->try_as(); StructPtr s_AutoDeployAddress = lookup_global_symbol("AutoDeployAddress")->try_as(); @@ -357,6 +358,7 @@ std::vector generate_createMessage(CodeBlob& code, SrcLocation loc, T } std::vector generate_createExternalLogMessage(CodeBlob& code, SrcLocation loc, TypePtr bodyT, std::vector&& rvect) { + insert_debug_info_inner(loc, ast_function_call, code); StructPtr s_Options = lookup_global_symbol("CreateExternalLogMessageOptions")->try_as(); StructPtr s_ExtOutLogBucket = lookup_global_symbol("ExtOutLogBucket")->try_as(); @@ -491,6 +493,7 @@ std::vector generate_createExternalLogMessage(CodeBlob& code, SrcLoca } std::vector generate_address_buildInAnotherShard(CodeBlob& code, SrcLocation loc, std::vector&& ir_self_address, std::vector&& ir_shard_options) { + insert_debug_info_inner(loc, ast_function_call, code); tolk_assert(ir_shard_options.size() == 2); // example for fixedPrefixLength (shard depth) = 8: @@ -522,6 +525,7 @@ std::vector generate_address_buildInAnotherShard(CodeBlob& code, SrcL } std::vector generate_AutoDeployAddress_buildAddress(CodeBlob& code, SrcLocation loc, std::vector&& ir_auto_deploy) { + insert_debug_info_inner(loc, ast_function_call, code); IR_AutoDeployAddress ir_self(code, loc, ir_auto_deploy); std::vector ir_builder = code.create_tmp_var(TypeDataSlice::create(), loc, "(addr-b)"); @@ -600,6 +604,7 @@ std::vector generate_AutoDeployAddress_buildAddress(CodeBlob& code, S } std::vector generate_AutoDeployAddress_addressMatches(CodeBlob& code, SrcLocation loc, std::vector&& ir_auto_deploy, std::vector&& ir_address) { + insert_debug_info_inner(loc, ast_function_call, code); IR_AutoDeployAddress ir_self(code, loc, ir_auto_deploy); // at first, calculate stateInitHash = (hash of StateInit cell would be, but without constructing a cell) From dd2a07f79d396eee8df5744ee24a5fe6f9c75865 Mon Sep 17 00:00:00 2001 From: i582 <51853996+i582@users.noreply.github.com> Date: Mon, 15 Sep 2025 00:44:15 +0400 Subject: [PATCH 08/27] improvements --- tolk/abscode.cpp | 18 +++--- tolk/codegen.cpp | 19 ++++--- tolk/compiler-state.h | 2 +- tolk/debug-info.cpp | 42 ++++++++++---- tolk/pack-unpack-serializers.cpp | 3 +- tolk/pipe-ast-to-legacy.cpp | 10 +++- tolk/pipe-process-debug-info.cpp | 96 ++++++++++++++++++++++++++++---- tolk/tolk.h | 86 ++++++++++++++++++++++++---- 8 files changed, 222 insertions(+), 54 deletions(-) diff --git a/tolk/abscode.cpp b/tolk/abscode.cpp index f63570812f..cabfd84ec6 100644 --- a/tolk/abscode.cpp +++ b/tolk/abscode.cpp @@ -256,7 +256,7 @@ void Op::show(std::ostream& os, const std::vector& vars, std::string pfx break; case _DebugInfo: os << pfx << dis << "DEBUGINFO "; - os << debug_idx << std::endl; + os << source_map_entry_idx << std::endl; break; case _Import: os << pfx << dis << "IMPORT "; @@ -398,7 +398,7 @@ void CodeBlob::print(std::ostream& os, int flags) const { os << "-------- END ---------\n\n"; } -std::vector CodeBlob::create_var(TypePtr var_type, SrcLocation loc, std::string name) { +std::vector CodeBlob::create_var(TypePtr var_type, SrcLocation loc, std::string name, TypePtr parent_type) { std::vector ir_idx; int stack_w = var_type->get_width_on_stack(); ir_idx.reserve(stack_w); @@ -406,33 +406,33 @@ std::vector CodeBlob::create_var(TypePtr var_type, SrcLocation loc, s for (int i = 0; i < t_struct->struct_ref->get_num_fields(); ++i) { StructFieldPtr field_ref = t_struct->struct_ref->get_field(i); std::string sub_name = name.empty() || t_struct->struct_ref->get_num_fields() == 1 ? name : name + "." + field_ref->name; - std::vector nested = create_var(field_ref->declared_type, loc, std::move(sub_name)); + std::vector nested = create_var(field_ref->declared_type, loc, std::move(sub_name), parent_type); ir_idx.insert(ir_idx.end(), nested.begin(), nested.end()); } } else if (const TypeDataTensor* t_tensor = var_type->try_as()) { for (int i = 0; i < t_tensor->size(); ++i) { std::string sub_name = name.empty() ? name : name + "." + std::to_string(i); - std::vector nested = create_var(t_tensor->items[i], loc, std::move(sub_name)); + std::vector nested = create_var(t_tensor->items[i], loc, std::move(sub_name), parent_type); ir_idx.insert(ir_idx.end(), nested.begin(), nested.end()); } } else if (const TypeDataAlias* t_alias = var_type->try_as()) { - ir_idx = create_var(t_alias->underlying_type, loc, std::move(name)); + ir_idx = create_var(t_alias->underlying_type, loc, std::move(name), parent_type); } else if (const TypeDataUnion* t_union = var_type->try_as(); t_union && stack_w != 1) { std::string utag_name = name.empty() ? "'UTag" : name + ".UTag"; if (t_union->or_null) { // in stack comments, `a:(int,int)?` will be "a.0 a.1 a.UTag" - ir_idx = create_var(t_union->or_null, loc, std::move(name)); + ir_idx = create_var(t_union->or_null, loc, std::move(name), parent_type); } else { // in stack comments, `a:int|slice` will be "a.USlot1 a.UTag" for (int i = 0; i < stack_w - 1; ++i) { std::string slot_name = name.empty() ? "'USlot" + std::to_string(i + 1) : name + ".USlot" + std::to_string(i + 1); - ir_idx.emplace_back(create_var(TypeDataUnknown::create(), loc, std::move(slot_name))[0]); + ir_idx.emplace_back(create_var(TypeDataUnknown::create(), loc, std::move(slot_name), var_type)[0]); } } - ir_idx.emplace_back(create_var(TypeDataInt::create(), loc, std::move(utag_name))[0]); + ir_idx.emplace_back(create_var(TypeDataInt::create(), loc, std::move(utag_name), parent_type)[0]); } else if (var_type != TypeDataVoid::create() && var_type != TypeDataNever::create()) { #ifdef TOLK_DEBUG tolk_assert(stack_w == 1); #endif - vars.emplace_back(var_cnt, var_type, std::move(name), loc); + vars.emplace_back(var_cnt, var_type, std::move(name), loc, parent_type); ir_idx.emplace_back(var_cnt); var_cnt++; } diff --git a/tolk/codegen.cpp b/tolk/codegen.cpp index 3f28c1a9de..4d4474b0f5 100644 --- a/tolk/codegen.cpp +++ b/tolk/codegen.cpp @@ -279,14 +279,14 @@ bool Op::generate_code_step(Stack& stack) { // we need to handle it here to correctly handle case `IFJMP { DROP }` if (cl == _DebugInfo) { std::ostringstream ops; - ops << debug_idx << " DEBUGMARK"; // pseudo instruction + ops << source_map_entry_idx << " DEBUGMARK"; // pseudo instruction // Attach a source snippet as a Fift comment to make mapping explicit in compiled output if (const SrcFile* src_file = loc.get_src_file()) { const auto& pos = src_file->convert_offset(loc.get_char_offset()); std::string line = std::string(pos.line_str); // Trim trailing CR/LF and excessive spaces to keep output compact - while (!line.empty() && (line.back() == '\\r' || line.back() == '\\n')) line.pop_back(); + while (!line.empty() && (line.back() == '\r' || line.back() == '\n')) line.pop_back(); // Avoid extremely long comments if (line.size() > 200) { line.resize(200); @@ -295,16 +295,17 @@ bool Op::generate_code_step(Stack& stack) { ops << " // " << line; } - const auto list_size = stack.o.list_.size(); - if (list_size > 0) { + // Append opcode to a list + if (const auto list_size = stack.o.list_.size(); list_size > 0) { stack.o.insert(stack.o.list_.size(), loc, ops.str()); } - if (debug_idx < G.debug_infos.size()) { - auto& debug_info = G.debug_infos.at(debug_idx); - for (auto i : stack.s) { - if (const auto var = stack.o.get_var(i); var.has_value()) { - debug_info.vars.push_back(*var); + if (source_map_entry_idx < G.source_map.size()) { + auto& entry = G.source_map.at(source_map_entry_idx); + for (auto index : stack.s) { + if (const auto var = stack.o.get_var(index); var.has_value()) { + const auto& [data, value] = *var; + entry.vars.push_back({data, value}); } } } diff --git a/tolk/compiler-state.h b/tolk/compiler-state.h index e514f6de1e..9511540057 100644 --- a/tolk/compiler-state.h +++ b/tolk/compiler-state.h @@ -106,7 +106,7 @@ struct CompilerState { std::vector all_structs; AllRegisteredSrcFiles all_src_files; - std::vector debug_infos; + std::vector source_map; bool is_verbosity(int gt_eq) const { return settings.verbosity >= gt_eq; } }; diff --git a/tolk/debug-info.cpp b/tolk/debug-info.cpp index 7d4e9d4419..6a7be4f979 100644 --- a/tolk/debug-info.cpp +++ b/tolk/debug-info.cpp @@ -1,10 +1,11 @@ #include "tolk.h" #include #include +#include "ast-stringifier.h" namespace tolk { -void insert_debug_info_inner(SrcLocation loc, ASTNodeKind kind, CodeBlob& code) { +void insert_debug_info_inner(SrcLocation loc, ASTNodeKind kind, CodeBlob& code, size_t line_offset) { if (!G.settings.with_debug_info) { return; } @@ -18,28 +19,47 @@ void insert_debug_info_inner(SrcLocation loc, ASTNodeKind kind, CodeBlob& code) // return; } +#ifdef TOLK_DEBUG + const auto last_op = *std::find_if(code._vector_of_ops.rbegin(), code._vector_of_ops.rend(), [](const auto& it) { + return it->cl != Op::_DebugInfo; + }); +#endif + auto& op = code.emplace_back(loc, Op::_DebugInfo); - op.debug_idx = G.debug_infos.size(); + op.source_map_entry_idx = G.source_map.size(); - auto info = DebugInfo{}; - info.idx = op.debug_idx; + auto info = SourceMapEntry{}; + info.idx = op.source_map_entry_idx; info.is_entry = kind == ast_function_declaration; +#ifdef TOLK_DEBUG + if (last_op) { + std::stringstream st; + last_op->show(st, code.vars, "", 4); + + info.opcode = st.str(); + } +#endif + info.ast_kind = ASTStringifier::ast_node_kind_to_string(kind); + if (const SrcFile* src_file = loc.get_src_file(); src_file != nullptr) { const auto& pos = src_file->convert_offset(loc.get_char_offset()); - info.loc_file = src_file->realpath; - info.loc_line = pos.line_no; - info.loc_pos = pos.char_no; - info.loc_len = pos.line_str.length(); + info.loc.file = src_file->realpath; + info.loc.offset = loc.get_char_offset(); + info.loc.line = pos.line_no; + info.loc.line_offset = line_offset; + info.loc.col = pos.char_no - 1; + info.loc.length = pos.line_str.length(); } - info.func_name = code.name; - G.debug_infos.push_back(info); + info.func_name = code.fun_ref->name; + info.func_inline_mode = code.fun_ref->inline_mode; + G.source_map.push_back(info); } void insert_debug_info(AnyV v, CodeBlob& code) { - insert_debug_info_inner(v->loc, v->kind, code); + insert_debug_info_inner(v->loc, v->kind, code, 0); } } diff --git a/tolk/pack-unpack-serializers.cpp b/tolk/pack-unpack-serializers.cpp index 560d656e21..71c1b900bc 100644 --- a/tolk/pack-unpack-serializers.cpp +++ b/tolk/pack-unpack-serializers.cpp @@ -725,7 +725,7 @@ struct S_Either final : ISerializer { } } tolk_assert(options.match_blocks.size() == 2); - insert_debug_info_inner(loc, ast_match_arm, code); + insert_debug_info_inner(loc, ast_match_expression, code); std::vector ir_result = code.create_tmp_var(options.match_expr_type, loc, "(match-expression)"); std::vector ir_is_right = ctx->loadUint(1, "(eitherBit)"); @@ -733,6 +733,7 @@ struct S_Either final : ISerializer { { code.push_set_cur(if_op.block0); const LazyMatchOptions::MatchBlock* m_block = options.find_match_block(t_right); + insert_debug_info_inner(m_block->v_body->loc, ast_match_arm, code); std::vector ith_result = pre_compile_expr(m_block->v_body, code); options.save_match_result_on_arm_end(code, loc, m_block, std::move(ith_result), ir_result); code.close_pop_cur(loc); diff --git a/tolk/pipe-ast-to-legacy.cpp b/tolk/pipe-ast-to-legacy.cpp index 25593852d3..f3be9e2cbf 100644 --- a/tolk/pipe-ast-to-legacy.cpp +++ b/tolk/pipe-ast-to-legacy.cpp @@ -721,6 +721,7 @@ static std::vector gen_compile_time_code_instead_of_fun_call(CodeBlob std::vector gen_inline_fun_call_in_place(CodeBlob& code, TypePtr ret_type, SrcLocation loc, FunctionPtr f_inlined, AnyExprV self_obj, bool is_before_immediate_return, const std::vector>& vars_per_arg) { insert_debug_info_inner(loc, ast_function_call, code); + G.source_map.at(G.source_map.size() - 1).before_inlined_function_call = true; tolk_assert(vars_per_arg.size() == f_inlined->parameters.size()); for (int i = 0; i < f_inlined->get_num_params(); ++i) { @@ -770,6 +771,9 @@ std::vector gen_inline_fun_call_in_place(CodeBlob& code, TypePtr ret_ ClearStateAfterInlineInPlace visitor; visitor.start_visiting_function(f_inlined, v_ast_root); + insert_debug_info_inner(loc, ast_function_call, code); + G.source_map.at(G.source_map.size() - 1).after_inlined_function_call = true; + code.fun_ref = backup_cur_fun; code.inline_rvect_out = backup_outer_inline; code.inlining_before_immediate_return = backup_inline_before_return; @@ -1282,7 +1286,7 @@ static std::vector process_binary_operator(V v, TokenType t = v->tok; if (v->fun_ref) { // almost all operators, fun_ref was assigned at type inferring - insert_debug_info_inner(v->loc, ast_binary_operator, code); + // insert_debug_info_inner(v->loc, ast_binary_operator, code); std::vector args_vars = pre_compile_tensor(code, {v->get_lhs(), v->get_rhs()}); std::vector rvect = gen_op_call(code, v->inferred_type, v->loc, std::move(args_vars), v->fun_ref, "(binary-op)"); return transition_to_target_type(std::move(rvect), code, target_type, v); @@ -1926,6 +1930,7 @@ static std::vector process_object_literal(V v, Co } static std::vector process_int_const(V v, CodeBlob& code, TypePtr target_type) { + insert_debug_info(v, code); std::vector rvect = code.create_tmp_var(v->inferred_type, v->loc, "(int-const)"); code.emplace_back(v->loc, Op::_IntConst, rvect, v->intval); // here, like everywhere, even for just `int`, there might be a potential transition due to union types @@ -2327,7 +2332,8 @@ static void process_return_statement(V v, CodeBlob& code) return_vars.insert(return_vars.begin(), mutated_vars.begin(), mutated_vars.end()); } - insert_debug_info_inner(v->loc, ast_return_statement, code); + // Point to the next line after return + insert_debug_info_inner(v->loc, ast_return_statement, code, 1); // if fun_ref is called and inlined into a parent, assign a result instead of generating a return statement if (code.inline_rvect_out) { diff --git a/tolk/pipe-process-debug-info.cpp b/tolk/pipe-process-debug-info.cpp index a8926a0314..efd614ac6a 100644 --- a/tolk/pipe-process-debug-info.cpp +++ b/tolk/pipe-process-debug-info.cpp @@ -34,24 +34,93 @@ void pipeline_process_debug_info(std::ostream& debug_out) { { td::JsonBuilder jsonb; auto arrb = jsonb.enter_array(); - for (auto di : G.debug_infos) { + + for (size_t i = 0; i < G.source_map.size(); ++i) { + const auto &entry = G.source_map[i]; auto vb = arrb.enter_value(); auto ob = vb.enter_object(); - ob("idx", std::to_string(di.idx)); - ob("is_entry", td::JsonBool(di.is_entry)); - ob("file", di.loc_file); - ob("line", (td::int64)di.loc_line); - ob("pos", (td::int64)di.loc_pos); - ob("length", (td::int64)di.loc_len); + ob("idx", td::JsonRaw(std::to_string(entry.idx))); + + if (entry.is_entry) { + ob("is_entry", td::JsonBool(entry.is_entry)); + } + +#ifdef TOLK_DEBUG + if (i + 1 < G.source_map.size()) { + ob("opcode", G.source_map[i + 1].opcode); + } +#endif + ob("ast_kind", entry.ast_kind); + + // Used only for source map debug + if (const auto file = G.all_src_files.find_file(entry.loc.file)) { + int start_offset = -1; + int end_offset = -1; + int cur_line = 0; + long search_line = entry.loc.line; + + for (size_t ch_idx = 0; ch_idx < file->text.length(); ++ch_idx) { + const auto &ch = file->text[ch_idx]; + if (ch == '\n') { + cur_line++; + + if (cur_line == search_line - 1) { + start_offset = static_cast(ch_idx + 1); + } + + if (cur_line == search_line && start_offset != -1) { + end_offset = static_cast(ch_idx); + break; + } + } + } + + const std::string line = file->text.substr(start_offset, end_offset - start_offset); + + // const auto& pos = file->convert_offset(entry.loc.offset); + // std::string line = std::string(pos.line_str); + ob("line_str", line); + + std::string underline = ""; + for (int j = 0; j < entry.loc.col; ++j) { + underline += " "; + } + underline += "^"; + + ob("line_off", underline); + } + + ob("file", entry.loc.file); + ob("line", static_cast(entry.loc.line)); + ob("pos", static_cast(entry.loc.col)); + ob("line_offset", static_cast(entry.loc.line_offset)); + ob("length", static_cast(entry.loc.length)); td::JsonBuilder varb; auto vararrb = varb.enter_array(); - for (auto var_and_value : di.vars) { - const auto [var, value] = var_and_value; + for (const auto &[var, value] : entry.vars) { auto varb2 = vararrb.enter_value(); auto varbo = varb2.enter_object(); varbo("name", var.name.empty() ? "'" + std::to_string(var.ir_idx) : var.name); varbo("type", var.v_type == nullptr ? "" : var.v_type->as_human_readable()); + + if (var.parent_type != nullptr) { + auto union_parent = var.parent_type->try_as(); + if (union_parent != nullptr) { + td::JsonBuilder parent_type_builder; + auto parent_type_array_builder = parent_type_builder.enter_array(); + + for (auto variant : union_parent->variants) { + auto array_value = parent_type_array_builder.enter_value(); + array_value << variant->as_human_readable(); + } + + parent_type_array_builder.leave(); + varbo("possible_qualifier_types", td::JsonRaw(parent_type_builder.string_builder().as_cslice())); + } + } + + // varbo("parent_type", var.parent_type == nullptr ? "" : var.parent_type->as_human_readable()); if (!value.empty()) { varbo("value", value); } @@ -61,7 +130,14 @@ void pipeline_process_debug_info(std::ostream& debug_out) { td::JsonRaw vararrs(varb.string_builder().as_cslice()); ob("vars", vararrs); - ob("func", di.func_name); + ob("func", entry.func_name); + ob("func_inline_mode", static_cast(entry.func_inline_mode)); + if (entry.before_inlined_function_call) { + ob("before_inlined_function_call", td::JsonBool(entry.before_inlined_function_call)); + } + if (entry.after_inlined_function_call) { + ob("after_inlined_function_call", td::JsonBool(entry.after_inlined_function_call)); + } } arrb.leave(); diff --git a/tolk/tolk.h b/tolk/tolk.h index ce850846b8..6e709597d1 100644 --- a/tolk/tolk.h +++ b/tolk/tolk.h @@ -32,6 +32,7 @@ #define tolk_assert(expr) if(UNLIKELY(!(expr))) on_assertion_failed(#expr, __FILE__, __LINE__); namespace tolk { +struct Op; GNU_ATTRIBUTE_COLD GNU_ATTRIBUTE_NORETURN void on_assertion_failed(const char *description, const char *file_name, int line_number); @@ -55,15 +56,17 @@ struct TmpVar { TypePtr v_type; // get_width_on_stack() is 1 std::string name; // "x" for vars originated from user sources; "x.0" for tensor components; empty for implicitly created tmp vars SrcLocation loc; // location of var declaration in sources or where a tmp var was originated + TypePtr parent_type = nullptr; // type of "stack" in "stack.USlot1" #ifdef TOLK_DEBUG const char* desc = nullptr; // "origin" of tmp var, for debug output like `'15 (binary-op) '16 (glob-var)` #endif - TmpVar(var_idx_t ir_idx, TypePtr v_type, std::string name, SrcLocation loc) + TmpVar(var_idx_t ir_idx, TypePtr v_type, std::string name, SrcLocation loc, TypePtr parent_type = nullptr) : ir_idx(ir_idx) , v_type(v_type) , name(std::move(name)) - , loc(loc) { + , loc(loc) + , parent_type(parent_type) { } void show_as_stack_comment(std::ostream& os) const; @@ -266,15 +269,76 @@ class ListIterator { struct Stack; -struct DebugInfo { +struct SourceMapLocation { + std::string file; + int offset{}; + long line{}; + long line_offset{}; + long col{}; + long length{}; +}; + +struct SourceMapVariable { + /** + * All information about variable. + */ + TmpVar data; + + /** + * If a variable has a constant value (rarely) it will be placed here. + */ + std::string constant_value; +}; + +struct SourceMapEntry { + /** + * Unique ID of this entry. + */ size_t idx{}; bool is_entry{}; - std::string loc_file; - long loc_line{}; - long loc_pos{}; - long loc_len{}; - std::vector> vars; + + /** + * Location of this entry. + */ + SourceMapLocation loc{}; + + /** + * Variables available in current position. + */ + std::vector vars; + + /** + * Name oj outer function which contains this code. + */ std::string func_name; + + /** + * Whenever outer function is inlined and how. + */ + FunctionInlineMode func_inline_mode; + bool before_inlined_function_call{false}; + bool after_inlined_function_call{false}; +#ifdef TOLK_DEBUG + std::string opcode; +#endif + std::string ast_kind; +}; + +struct SourceMapGlobalVariable { + /** + * Name of this global variable. + */ + std::string name; + /** + * Human-readable type pf this global variable. + */ + std::string type; +}; + +struct SourceMap { + std::string version; + std::vector globals; + std::vector entries; }; struct Op { @@ -312,7 +376,7 @@ struct Op { std::unique_ptr block0, block1; td::RefInt256 int_const; std::string str_const; - size_t debug_idx{0}; + size_t source_map_entry_idx{0}; Op(SrcLocation loc, OpKind cl) : cl(cl), flags(0), loc(loc) { } Op(SrcLocation loc, OpKind cl, const std::vector& left) @@ -1075,7 +1139,7 @@ struct LazyVarRefAtCodegen { : var_ref(var_ref), var_state(var_state) {} }; -void insert_debug_info_inner(SrcLocation loc, ASTNodeKind kind, CodeBlob& code); +void insert_debug_info_inner(SrcLocation loc, ASTNodeKind kind, CodeBlob& code, size_t line_offset = 0); void insert_debug_info(AnyV v, CodeBlob& code); struct CodeBlob { @@ -1111,7 +1175,7 @@ struct CodeBlob { #endif return res; } - std::vector create_var(TypePtr var_type, SrcLocation loc, std::string name); + std::vector create_var(TypePtr var_type, SrcLocation loc, std::string name, TypePtr parent_type = nullptr); std::vector create_tmp_var(TypePtr var_type, SrcLocation loc, const char* desc) { std::vector ir_idx = create_var(var_type, loc, {}); #ifdef TOLK_DEBUG From d27e8730e25294a2b7eab658288589451d9a1770 Mon Sep 17 00:00:00 2001 From: i582 <51853996+i582@users.noreply.github.com> Date: Mon, 15 Sep 2025 01:33:10 +0400 Subject: [PATCH 09/27] improvements --- tolk/debug-info.cpp | 5 +++-- tolk/pipe-ast-to-legacy.cpp | 11 +++++++++-- tolk/pipe-process-debug-info.cpp | 4 ++++ tolk/tolk.h | 3 ++- 4 files changed, 18 insertions(+), 5 deletions(-) diff --git a/tolk/debug-info.cpp b/tolk/debug-info.cpp index 6a7be4f979..f50d49f132 100644 --- a/tolk/debug-info.cpp +++ b/tolk/debug-info.cpp @@ -5,7 +5,7 @@ namespace tolk { -void insert_debug_info_inner(SrcLocation loc, ASTNodeKind kind, CodeBlob& code, size_t line_offset) { +void insert_debug_info_inner(SrcLocation loc, ASTNodeKind kind, CodeBlob& code, size_t line_offset, std::string descr) { if (!G.settings.with_debug_info) { return; } @@ -30,6 +30,7 @@ void insert_debug_info_inner(SrcLocation loc, ASTNodeKind kind, CodeBlob& code, auto info = SourceMapEntry{}; info.idx = op.source_map_entry_idx; + info.descr = descr; info.is_entry = kind == ast_function_declaration; #ifdef TOLK_DEBUG @@ -59,7 +60,7 @@ void insert_debug_info_inner(SrcLocation loc, ASTNodeKind kind, CodeBlob& code, } void insert_debug_info(AnyV v, CodeBlob& code) { - insert_debug_info_inner(v->loc, v->kind, code, 0); + insert_debug_info_inner(v->loc, v->kind, code, 0, ""); } } diff --git a/tolk/pipe-ast-to-legacy.cpp b/tolk/pipe-ast-to-legacy.cpp index f3be9e2cbf..7da1f8f99a 100644 --- a/tolk/pipe-ast-to-legacy.cpp +++ b/tolk/pipe-ast-to-legacy.cpp @@ -1269,8 +1269,6 @@ static std::vector process_assignment(V v, CodeBlob& code } static std::vector process_set_assign(V v, CodeBlob& code, TypePtr target_type) { - insert_debug_info_inner(v->loc, v->kind, code); - // for "a += b", emulate "a = a + b" // seems not beautiful, but it works; probably, this transformation should be done at AST level in advance std::string_view calc_operator = v->operator_name; // "+" for operator += @@ -1311,6 +1309,9 @@ static std::vector process_binary_operator(V v, Op& if_op = code.emplace_back(v->loc, Op::_If, cond); code.push_set_cur(if_op.block0); + if (t == tok_logical_or) { + insert_debug_info_inner(v->loc, ast_binary_operator, code, 0, "lhs of || is true"); + } // For &&: true-branch evaluates RHS; mark RHS location if (t == tok_logical_and) { insert_debug_info_inner(v->get_rhs()->loc, ast_binary_operator, code); @@ -1323,6 +1324,9 @@ static std::vector process_binary_operator(V v, if (t == tok_logical_or) { insert_debug_info_inner(v->get_rhs()->loc, ast_binary_operator, code); } + if (t == tok_logical_and) { + insert_debug_info_inner(v->loc, ast_binary_operator, code, 0, "rhs of && is false"); + } code.emplace_back(v->loc, Op::_Let, rvect, pre_compile_expr(t == tok_logical_and ? v_0 : v_b_ne_0, code, nullptr)); code.close_pop_cur(v->loc); return transition_to_target_type(std::move(rvect), code, target_type, v); @@ -1438,14 +1442,17 @@ static std::vector process_lazy_operator(V v, Code bool has_passed_options = false; if (f_name == "T.fromSlice") { std::vector passed_slice = pre_compile_expr(v_call->get_arg(0)->get_expr(), code); + insert_debug_info_inner(v->loc, ast_function_call, code); code.emplace_back(v->loc, Op::_Let, ir_slice, std::move(passed_slice)); has_passed_options = v_call->get_num_args() == 2; } else if (f_name == "T.fromCell") { std::vector ir_cell = pre_compile_expr(v_call->get_arg(0)->get_expr(), code); + insert_debug_info_inner(v->loc, ast_function_call, code); code.emplace_back(v->loc, Op::_Call, ir_slice, ir_cell, lookup_function("cell.beginParse")); has_passed_options = v_call->get_num_args() == 2; } else if (f_name == "Cell.load") { std::vector ir_cell = pre_compile_expr(v_call->get_callee()->try_as()->get_obj(), code); + insert_debug_info_inner(v->loc, ast_function_call, code); code.emplace_back(v->loc, Op::_Call, ir_slice, ir_cell, lookup_function("cell.beginParse")); has_passed_options = v_call->get_num_args() == 1; } else { diff --git a/tolk/pipe-process-debug-info.cpp b/tolk/pipe-process-debug-info.cpp index efd614ac6a..a27e8f8856 100644 --- a/tolk/pipe-process-debug-info.cpp +++ b/tolk/pipe-process-debug-info.cpp @@ -41,6 +41,10 @@ void pipeline_process_debug_info(std::ostream& debug_out) { auto ob = vb.enter_object(); ob("idx", td::JsonRaw(std::to_string(entry.idx))); + if (entry.descr.size() != 0) { + ob("descr", entry.descr); + } + if (entry.is_entry) { ob("is_entry", td::JsonBool(entry.is_entry)); } diff --git a/tolk/tolk.h b/tolk/tolk.h index 6e709597d1..73b8df3860 100644 --- a/tolk/tolk.h +++ b/tolk/tolk.h @@ -296,6 +296,7 @@ struct SourceMapEntry { */ size_t idx{}; bool is_entry{}; + std::string descr{}; /** * Location of this entry. @@ -1139,7 +1140,7 @@ struct LazyVarRefAtCodegen { : var_ref(var_ref), var_state(var_state) {} }; -void insert_debug_info_inner(SrcLocation loc, ASTNodeKind kind, CodeBlob& code, size_t line_offset = 0); +void insert_debug_info_inner(SrcLocation loc, ASTNodeKind kind, CodeBlob& code, size_t line_offset = 0, std::string descr = ""); void insert_debug_info(AnyV v, CodeBlob& code); struct CodeBlob { From 26a29420100b448bdcda6d63242eae021f6cd344 Mon Sep 17 00:00:00 2001 From: i582 <51853996+i582@users.noreply.github.com> Date: Mon, 15 Sep 2025 01:51:13 +0400 Subject: [PATCH 10/27] better lazy handling --- tolk/pipe-ast-to-legacy.cpp | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/tolk/pipe-ast-to-legacy.cpp b/tolk/pipe-ast-to-legacy.cpp index 7da1f8f99a..4d7fd7e39c 100644 --- a/tolk/pipe-ast-to-legacy.cpp +++ b/tolk/pipe-ast-to-legacy.cpp @@ -1427,6 +1427,7 @@ static std::vector process_lazy_operator(V v, Code FunctionPtr called_f = v_call->fun_maybe; if (called_f->is_code_function()) { // `lazy loadStorage()` is allowed, it contains just `return ...`, inline it here + insert_debug_info_inner(v->loc, ast_function_call, code); auto f_body = called_f->ast_root->as()->get_body()->as(); tolk_assert(f_body->size() == 1 && f_body->get_item(0)->kind == ast_return_statement); auto f_returns = f_body->get_item(0)->as(); @@ -1442,17 +1443,17 @@ static std::vector process_lazy_operator(V v, Code bool has_passed_options = false; if (f_name == "T.fromSlice") { std::vector passed_slice = pre_compile_expr(v_call->get_arg(0)->get_expr(), code); - insert_debug_info_inner(v->loc, ast_function_call, code); + insert_debug_info_inner(v_call->loc, ast_function_call, code); code.emplace_back(v->loc, Op::_Let, ir_slice, std::move(passed_slice)); has_passed_options = v_call->get_num_args() == 2; } else if (f_name == "T.fromCell") { std::vector ir_cell = pre_compile_expr(v_call->get_arg(0)->get_expr(), code); - insert_debug_info_inner(v->loc, ast_function_call, code); + insert_debug_info_inner(v_call->loc, ast_function_call, code); code.emplace_back(v->loc, Op::_Call, ir_slice, ir_cell, lookup_function("cell.beginParse")); has_passed_options = v_call->get_num_args() == 2; } else if (f_name == "Cell.load") { std::vector ir_cell = pre_compile_expr(v_call->get_callee()->try_as()->get_obj(), code); - insert_debug_info_inner(v->loc, ast_function_call, code); + insert_debug_info_inner(v_call->loc, ast_function_call, code); code.emplace_back(v->loc, Op::_Call, ir_slice, ir_cell, lookup_function("cell.beginParse")); has_passed_options = v_call->get_num_args() == 1; } else { From 4e3603d3ee08560463a505d3b3c41fef1ca703cb Mon Sep 17 00:00:00 2001 From: i582 <51853996+i582@users.noreply.github.com> Date: Fri, 19 Sep 2025 16:14:32 +0400 Subject: [PATCH 11/27] add `inlined_to_func_name` field, add "version" field, add sources as array of object --- tolk/debug-info.cpp | 3 ++ tolk/pipe-ast-to-legacy.cpp | 8 ++++-- tolk/pipe-process-debug-info.cpp | 49 +++++++++++++++----------------- tolk/tolk.h | 1 + 4 files changed, 33 insertions(+), 28 deletions(-) diff --git a/tolk/debug-info.cpp b/tolk/debug-info.cpp index f50d49f132..56c9a21048 100644 --- a/tolk/debug-info.cpp +++ b/tolk/debug-info.cpp @@ -55,6 +55,9 @@ void insert_debug_info_inner(SrcLocation loc, ASTNodeKind kind, CodeBlob& code, } info.func_name = code.fun_ref->name; + if (code.name != info.func_name) { + info.inlined_to_func_name = code.name; + } info.func_inline_mode = code.fun_ref->inline_mode; G.source_map.push_back(info); } diff --git a/tolk/pipe-ast-to-legacy.cpp b/tolk/pipe-ast-to-legacy.cpp index 4d7fd7e39c..3644d456e3 100644 --- a/tolk/pipe-ast-to-legacy.cpp +++ b/tolk/pipe-ast-to-legacy.cpp @@ -721,7 +721,9 @@ static std::vector gen_compile_time_code_instead_of_fun_call(CodeBlob std::vector gen_inline_fun_call_in_place(CodeBlob& code, TypePtr ret_type, SrcLocation loc, FunctionPtr f_inlined, AnyExprV self_obj, bool is_before_immediate_return, const std::vector>& vars_per_arg) { insert_debug_info_inner(loc, ast_function_call, code); - G.source_map.at(G.source_map.size() - 1).before_inlined_function_call = true; + if (G.settings.with_debug_info) { + G.source_map.at(G.source_map.size() - 1).before_inlined_function_call = true; + } tolk_assert(vars_per_arg.size() == f_inlined->parameters.size()); for (int i = 0; i < f_inlined->get_num_params(); ++i) { @@ -772,7 +774,9 @@ std::vector gen_inline_fun_call_in_place(CodeBlob& code, TypePtr ret_ visitor.start_visiting_function(f_inlined, v_ast_root); insert_debug_info_inner(loc, ast_function_call, code); - G.source_map.at(G.source_map.size() - 1).after_inlined_function_call = true; + if (G.settings.with_debug_info) { + G.source_map.at(G.source_map.size() - 1).after_inlined_function_call = true; + } code.fun_ref = backup_cur_fun; code.inline_rvect_out = backup_outer_inline; diff --git a/tolk/pipe-process-debug-info.cpp b/tolk/pipe-process-debug-info.cpp index a27e8f8856..53037b63e0 100644 --- a/tolk/pipe-process-debug-info.cpp +++ b/tolk/pipe-process-debug-info.cpp @@ -12,10 +12,11 @@ void pipeline_process_debug_info(std::ostream& debug_out) { return; } - td::JsonBuilder _jb; auto objb = _jb.enter_object(); + objb("version", "1"); + { td::JsonBuilder jsonb; auto arrb = jsonb.enter_array(); @@ -31,6 +32,22 @@ void pipeline_process_debug_info(std::ostream& debug_out) { objb("globals", td::JsonRaw(jsonb.string_builder().as_cslice())); } + { + td::JsonBuilder jsonb; + auto arrb = jsonb.enter_array(); + for (auto file : G.all_src_files) { + auto vb = arrb.enter_value(); + auto ob = vb.enter_object(); + + ob("path", file->realpath); + ob("is_stdlib", td::JsonBool(file->is_stdlib_file)); + ob("content", file->text); + } + arrb.leave(); + + objb("files", td::JsonRaw(jsonb.string_builder().as_cslice())); + } + { td::JsonBuilder jsonb; auto arrb = jsonb.enter_array(); @@ -58,31 +75,8 @@ void pipeline_process_debug_info(std::ostream& debug_out) { // Used only for source map debug if (const auto file = G.all_src_files.find_file(entry.loc.file)) { - int start_offset = -1; - int end_offset = -1; - int cur_line = 0; - long search_line = entry.loc.line; - - for (size_t ch_idx = 0; ch_idx < file->text.length(); ++ch_idx) { - const auto &ch = file->text[ch_idx]; - if (ch == '\n') { - cur_line++; - - if (cur_line == search_line - 1) { - start_offset = static_cast(ch_idx + 1); - } - - if (cur_line == search_line && start_offset != -1) { - end_offset = static_cast(ch_idx); - break; - } - } - } - - const std::string line = file->text.substr(start_offset, end_offset - start_offset); - - // const auto& pos = file->convert_offset(entry.loc.offset); - // std::string line = std::string(pos.line_str); + const auto& pos = file->convert_offset(entry.loc.offset); + std::string line = std::string(pos.line_str); ob("line_str", line); std::string underline = ""; @@ -135,6 +129,9 @@ void pipeline_process_debug_info(std::ostream& debug_out) { ob("vars", vararrs); ob("func", entry.func_name); + if (entry.inlined_to_func_name != "") { + ob("inlined_to_func", entry.inlined_to_func_name); + } ob("func_inline_mode", static_cast(entry.func_inline_mode)); if (entry.before_inlined_function_call) { ob("before_inlined_function_call", td::JsonBool(entry.before_inlined_function_call)); diff --git a/tolk/tolk.h b/tolk/tolk.h index 73b8df3860..7a315712d4 100644 --- a/tolk/tolk.h +++ b/tolk/tolk.h @@ -312,6 +312,7 @@ struct SourceMapEntry { * Name oj outer function which contains this code. */ std::string func_name; + std::string inlined_to_func_name; /** * Whenever outer function is inlined and how. From 610766eeb2ca3bef65521368899c591901683452 Mon Sep 17 00:00:00 2001 From: i582 <51853996+i582@users.noreply.github.com> Date: Fri, 19 Sep 2025 16:28:24 +0400 Subject: [PATCH 12/27] fixes after merge --- tolk/pack-unpack-api.cpp | 6 +++--- tolk/pipe-ast-to-legacy.cpp | 1 - 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/tolk/pack-unpack-api.cpp b/tolk/pack-unpack-api.cpp index 601e049ca7..f1126c2ee8 100644 --- a/tolk/pack-unpack-api.cpp +++ b/tolk/pack-unpack-api.cpp @@ -473,9 +473,9 @@ PackSize estimate_serialization_size(TypePtr any_type) { return ctx.estimate_any(any_type); } -std::vector generate_estimate_size_call(CodeBlob& code, SrcLocation loc, TypePtr any_type) { - EstimateContext ctx; - PackSize pack_size = ctx.estimate_any(any_type); +std::vector generate_T_estimatePackSize(FunctionPtr called_f, CodeBlob& code, SrcLocation loc, const std::vector>& args) { + TypePtr typeT = called_f->substitutedTs->typeT_at(0); + PackSize pack_size = estimate_serialization_size(typeT); std::vector ir_tensor = code.create_tmp_var(TypeDataTensor::create({TypeDataInt::create(), TypeDataInt::create(), TypeDataInt::create(), TypeDataInt::create()}), loc, "(result-tensor)"); code.emplace_back(loc, Op::_IntConst, std::vector{ir_tensor[0]}, td::make_refint(pack_size.min_bits)); diff --git a/tolk/pipe-ast-to-legacy.cpp b/tolk/pipe-ast-to-legacy.cpp index 239fefb3fa..28d6a63250 100644 --- a/tolk/pipe-ast-to-legacy.cpp +++ b/tolk/pipe-ast-to-legacy.cpp @@ -25,7 +25,6 @@ #include "pack-unpack-api.h" #include "gen-entrypoints.h" #include "generics-helpers.h" -#include "send-message-api.h" #include "gen-entrypoints.h" #include From c0c6d18219cdeef1d205600fe2a63e98f991ac58 Mon Sep 17 00:00:00 2001 From: i582 <51853996+i582@users.noreply.github.com> Date: Fri, 19 Sep 2025 16:29:48 +0400 Subject: [PATCH 13/27] remove unused field --- tolk/debug-info.cpp | 5 ----- tolk/tolk.h | 2 -- 2 files changed, 7 deletions(-) diff --git a/tolk/debug-info.cpp b/tolk/debug-info.cpp index 56c9a21048..c527ced898 100644 --- a/tolk/debug-info.cpp +++ b/tolk/debug-info.cpp @@ -14,11 +14,6 @@ void insert_debug_info_inner(SrcLocation loc, ASTNodeKind kind, CodeBlob& code, return; } - if (code.prev_ops_kind == Op::_DebugInfo) { - // std::cerr << "skip repeated debug info" << std::endl; - // return; - } - #ifdef TOLK_DEBUG const auto last_op = *std::find_if(code._vector_of_ops.rbegin(), code._vector_of_ops.rend(), [](const auto& it) { return it->cl != Op::_DebugInfo; diff --git a/tolk/tolk.h b/tolk/tolk.h index 99e153729d..a38e09bca3 100644 --- a/tolk/tolk.h +++ b/tolk/tolk.h @@ -1172,7 +1172,6 @@ struct CodeBlob { bool inside_evaluating_constant = false; bool inlining_before_immediate_return = false; std::unique_ptr ops; - Op::OpKind prev_ops_kind; std::unique_ptr* cur_ops; #ifdef TOLK_DEBUG std::vector _vector_of_ops; // to see it in debugger instead of nested pointers @@ -1188,7 +1187,6 @@ struct CodeBlob { if (forced_loc.is_defined()) { res.loc = forced_loc; } - prev_ops_kind = res.cl; cur_ops = &(res.next); #ifdef TOLK_DEBUG _vector_of_ops.push_back(&res); From dd970d53724561a1ded2652f98e515068e013972 Mon Sep 17 00:00:00 2001 From: i582 <51853996+i582@users.noreply.github.com> Date: Fri, 19 Sep 2025 16:35:49 +0400 Subject: [PATCH 14/27] fixes and improvements --- tolk/compiler-state.h | 2 +- tolk/debug-info.cpp | 2 +- tolk/pipe-ast-to-legacy.cpp | 4 ++-- tolk/pipe-process-debug-info.cpp | 2 +- tolk/send-message-api.cpp | 2 +- tolk/tolk-main.cpp | 4 ++-- tolk/tolk-wasm.cpp | 12 ++++++------ tolk/tolk.cpp | 4 ++-- tolk/tolk.h | 2 +- 9 files changed, 17 insertions(+), 17 deletions(-) diff --git a/tolk/compiler-state.h b/tolk/compiler-state.h index 9a4edc983a..27348d82e5 100644 --- a/tolk/compiler-state.h +++ b/tolk/compiler-state.h @@ -54,7 +54,7 @@ struct CompilerSettings { int optimization_level = 2; bool stack_layout_comments = true; bool tolk_src_as_line_comments = true; - bool with_debug_info = false; + bool collect_source_map = false; std::string output_filename; std::string boc_output_filename; diff --git a/tolk/debug-info.cpp b/tolk/debug-info.cpp index c527ced898..943361f607 100644 --- a/tolk/debug-info.cpp +++ b/tolk/debug-info.cpp @@ -6,7 +6,7 @@ namespace tolk { void insert_debug_info_inner(SrcLocation loc, ASTNodeKind kind, CodeBlob& code, size_t line_offset, std::string descr) { - if (!G.settings.with_debug_info) { + if (!G.settings.collect_source_map) { return; } diff --git a/tolk/pipe-ast-to-legacy.cpp b/tolk/pipe-ast-to-legacy.cpp index 28d6a63250..75f6609b04 100644 --- a/tolk/pipe-ast-to-legacy.cpp +++ b/tolk/pipe-ast-to-legacy.cpp @@ -657,7 +657,7 @@ static std::vector gen_compile_time_code_instead_of_fun_call(CodeBlob std::vector gen_inline_fun_call_in_place(CodeBlob& code, TypePtr ret_type, SrcLocation loc, FunctionPtr f_inlined, AnyExprV self_obj, bool is_before_immediate_return, const std::vector>& vars_per_arg) { insert_debug_info_inner(loc, ast_function_call, code); - if (G.settings.with_debug_info) { + if (G.settings.collect_source_map) { G.source_map.at(G.source_map.size() - 1).before_inlined_function_call = true; } @@ -710,7 +710,7 @@ std::vector gen_inline_fun_call_in_place(CodeBlob& code, TypePtr ret_ visitor.start_visiting_function(f_inlined, v_ast_root); insert_debug_info_inner(loc, ast_function_call, code); - if (G.settings.with_debug_info) { + if (G.settings.collect_source_map) { G.source_map.at(G.source_map.size() - 1).after_inlined_function_call = true; } diff --git a/tolk/pipe-process-debug-info.cpp b/tolk/pipe-process-debug-info.cpp index 53037b63e0..4d38099e53 100644 --- a/tolk/pipe-process-debug-info.cpp +++ b/tolk/pipe-process-debug-info.cpp @@ -8,7 +8,7 @@ namespace tolk { void pipeline_process_debug_info(std::ostream& debug_out) { - if (!G.settings.with_debug_info) { + if (!G.settings.collect_source_map) { return; } diff --git a/tolk/send-message-api.cpp b/tolk/send-message-api.cpp index f2dbd9f2f8..66cdff8670 100644 --- a/tolk/send-message-api.cpp +++ b/tolk/send-message-api.cpp @@ -358,7 +358,7 @@ std::vector generate_createMessage(FunctionPtr called_f, CodeBlob& co } // fun createExternalLogMessage(options: CreateExternalLogMessageOptions): OutMessage -std::vector generate_createExternalLogMessage(FunctionPtr called_f, CodeBlob& code, SrcLocation loc, const std::vector>& args) { insert_debug_info_inner(loc, ast_function_call, code); +std::vector generate_createExternalLogMessage(FunctionPtr called_f, CodeBlob& code, SrcLocation loc, const std::vector>& args) { insert_debug_info_inner(loc, ast_function_call, code); TypePtr bodyT = called_f->substitutedTs->typeT_at(0); diff --git a/tolk/tolk-main.cpp b/tolk/tolk-main.cpp index 13d97e197a..52aeee8774 100644 --- a/tolk/tolk-main.cpp +++ b/tolk/tolk-main.cpp @@ -51,7 +51,7 @@ void usage(const char* progname) { "-x\tEnables experimental options, comma-separated\n" "-S\tDon't include stack layout comments into Fift output\n" "-L\tDon't include original lines from Tolk src into Fift output\n" - "-d\tInclude debug information\n" + "-d\tCollect source map\n" "-e\tIncreases verbosity level (extra output into stderr)\n" "-v\tOutput version of Tolk and exit\n"; std::exit(2); @@ -236,7 +236,7 @@ int main(int argc, char* const argv[]) { G.settings.verbosity++; break; case 'd': - G.settings.with_debug_info = true; + G.settings.collect_source_map = true; break; case 'v': std::cout << "Tolk compiler v" << TOLK_VERSION << std::endl; diff --git a/tolk/tolk-wasm.cpp b/tolk/tolk-wasm.cpp index 4576f89653..4be5ca94c9 100644 --- a/tolk/tolk-wasm.cpp +++ b/tolk/tolk-wasm.cpp @@ -41,7 +41,7 @@ static td::Result compile_internal(char *config_json) { TRY_RESULT(opt_level, td::get_json_object_int_field(config, "optimizationLevel", true, 2)); TRY_RESULT(stack_comments, td::get_json_object_bool_field(config, "withStackComments", true, false)); TRY_RESULT(src_line_comments, td::get_json_object_bool_field(config, "withSrcLineComments", true, false)); - TRY_RESULT(with_debug_info, td::get_json_object_bool_field(config, "withDebugInfo", true, false)); + TRY_RESULT(collect_source_map, td::get_json_object_bool_field(config, "collectSourceMap", true, false)); TRY_RESULT(entrypoint_filename, td::get_json_object_string_field(config, "entrypointFileName", false)); TRY_RESULT(experimental_options, td::get_json_object_string_field(config, "experimentalOptions", true)); @@ -49,15 +49,15 @@ static td::Result compile_internal(char *config_json) { G.settings.optimization_level = std::max(0, opt_level); G.settings.stack_layout_comments = stack_comments; G.settings.tolk_src_as_line_comments = src_line_comments; - G.settings.with_debug_info = with_debug_info; + G.settings.collect_source_map = collect_source_map; if (!experimental_options.empty()) { G.settings.parse_experimental_options_cmd_arg(experimental_options.c_str()); } - std::ostringstream outs, errs, debug_out; + std::ostringstream outs, errs, source_map_out; std::cout.rdbuf(outs.rdbuf()); std::cerr.rdbuf(errs.rdbuf()); - int exit_code = tolk_proceed(entrypoint_filename, debug_out); + int exit_code = tolk_proceed(entrypoint_filename, source_map_out); if (exit_code != 0) { return td::Status::Error(errs.str()); } @@ -71,8 +71,8 @@ static td::Result compile_internal(char *config_json) { obj("codeBoc64", fift_res.codeBoc64); obj("codeHashHex", fift_res.codeHashHex); - if (const auto debug_info = debug_out.str(); !debug_info.empty()) { - obj("debugInfo", td::JsonRaw(debug_info)); + if (const auto source_map = source_map_out.str(); !source_map.empty()) { + obj("sourceMap", td::JsonRaw(source_map)); } obj("stderr", errs.str().c_str()); diff --git a/tolk/tolk.cpp b/tolk/tolk.cpp index e42813b6cc..f803018dbd 100644 --- a/tolk/tolk.cpp +++ b/tolk/tolk.cpp @@ -46,7 +46,7 @@ void on_assertion_failed(const char *description, const char *file_name, int lin throw Fatal(std::move(message)); } -int tolk_proceed(const std::string &entrypoint_filename, std::ostream& debug_out) { +int tolk_proceed(const std::string &entrypoint_filename, std::ostream& source_map_out) { type_system_init(); define_builtins(); lexer_init(); @@ -76,7 +76,7 @@ int tolk_proceed(const std::string &entrypoint_filename, std::ostream& debug_out pipeline_find_unused_symbols(); pipeline_generate_fif_output_to_std_cout(); - pipeline_process_debug_info(debug_out); + pipeline_process_debug_info(source_map_out); return 0; } catch (Fatal& fatal) { diff --git a/tolk/tolk.h b/tolk/tolk.h index a38e09bca3..defade96b3 100644 --- a/tolk/tolk.h +++ b/tolk/tolk.h @@ -1246,7 +1246,7 @@ void patch_builtins_after_stdlib_loaded(); * */ -int tolk_proceed(const std::string &entrypoint_filename, std::ostream& debug_out); +int tolk_proceed(const std::string &entrypoint_filename, std::ostream& source_map_out); } // namespace tolk From f10dfa2dc62c234af04d2ca58e750bd6aef90a8f Mon Sep 17 00:00:00 2001 From: i582 <51853996+i582@users.noreply.github.com> Date: Fri, 19 Sep 2025 17:28:55 +0400 Subject: [PATCH 15/27] accept source map path, better wasm return, filter out DEBUGMARK for first compilation and return raw code from second compilation in new field, add comments --- tolk/compiler-state.h | 1 + tolk/pipe-process-debug-info.cpp | 2 +- tolk/pipeline.h | 2 +- tolk/tolk-main.cpp | 15 ++++----- tolk/tolk-wasm.cpp | 52 +++++++++++++++++++++++++++++++- tolk/tolk.cpp | 2 +- 6 files changed, 63 insertions(+), 11 deletions(-) diff --git a/tolk/compiler-state.h b/tolk/compiler-state.h index 27348d82e5..0a5c4ba05c 100644 --- a/tolk/compiler-state.h +++ b/tolk/compiler-state.h @@ -58,6 +58,7 @@ struct CompilerSettings { std::string output_filename; std::string boc_output_filename; + std::string source_map_output_filename; std::string stdlib_folder; // path to tolk-stdlib/; note: from tolk-js it's empty! tolk-js reads files via js callback FsReadCallback read_callback; diff --git a/tolk/pipe-process-debug-info.cpp b/tolk/pipe-process-debug-info.cpp index 4d38099e53..639daa8bc8 100644 --- a/tolk/pipe-process-debug-info.cpp +++ b/tolk/pipe-process-debug-info.cpp @@ -7,7 +7,7 @@ namespace tolk { -void pipeline_process_debug_info(std::ostream& debug_out) { +void pipeline_generate_source_map(std::ostream& debug_out) { if (!G.settings.collect_source_map) { return; } diff --git a/tolk/pipeline.h b/tolk/pipeline.h index bfd10e5ecc..6cb45b411c 100644 --- a/tolk/pipeline.h +++ b/tolk/pipeline.h @@ -52,7 +52,7 @@ void pipeline_convert_ast_to_legacy_Expr_Op(); void pipeline_find_unused_symbols(); void pipeline_generate_fif_output_to_std_cout(); -void pipeline_process_debug_info(std::ostream& debug_out); +void pipeline_generate_source_map(std::ostream& debug_out); // these pipes also can be called per-function individually // they are called for instantiated generics functions, when `f` is deeply cloned as `f` diff --git a/tolk/tolk-main.cpp b/tolk/tolk-main.cpp index 52aeee8774..a523615500 100644 --- a/tolk/tolk-main.cpp +++ b/tolk/tolk-main.cpp @@ -212,7 +212,7 @@ class StdCoutRedirectToFile { int main(int argc, char* const argv[]) { int i; - while ((i = getopt(argc, argv, "o:b:O:x:SLedvh")) != -1) { + while ((i = getopt(argc, argv, "o:b:O:x:d:SLevh")) != -1) { switch (i) { case 'o': G.settings.output_filename = optarg; @@ -237,6 +237,7 @@ int main(int argc, char* const argv[]) { break; case 'd': G.settings.collect_source_map = true; + G.settings.source_map_output_filename = optarg; break; case 'v': std::cout << "Tolk compiler v" << TOLK_VERSION << std::endl; @@ -284,14 +285,14 @@ int main(int argc, char* const argv[]) { G.settings.read_callback = fs_read_callback; - const std::string source_map_filename = - G.settings.output_filename.empty() ? "./debug.source_map.json" : G.settings.output_filename + ".source_map.json"; - std::ofstream debug_out(source_map_filename); - if (!debug_out.is_open()) { - std::cerr << "failed to create output file " << source_map_filename << " for source map" << std::endl; + const auto source_map_filename = + G.settings.source_map_output_filename.empty() ? "./source_map.json" : G.settings.source_map_output_filename; + std::ofstream source_map_out(source_map_filename); + if (!source_map_out.is_open()) { + std::cerr << "Failed to create source map file " << source_map_filename << std::endl; return 2; } - int exit_code = tolk_proceed(argv[optind], debug_out); + int exit_code = tolk_proceed(argv[optind], source_map_out); return exit_code; } diff --git a/tolk/tolk-wasm.cpp b/tolk/tolk-wasm.cpp index 4be5ca94c9..12fe27e64e 100644 --- a/tolk/tolk-wasm.cpp +++ b/tolk/tolk-wasm.cpp @@ -30,10 +30,13 @@ #include "td/utils/JsonBuilder.h" #include "fift/utils.h" #include "td/utils/Status.h" +#include "td/utils/misc.h" #include using namespace tolk; +static std::string postprocess_fift_output_after_source_msp(std::string fift_code); + static td::Result compile_internal(char *config_json) { TRY_RESULT(input_json, td::json_decode(td::MutableSlice(config_json))) td::JsonObject& config = input_json.get_object(); @@ -62,7 +65,21 @@ static td::Result compile_internal(char *config_json) { return td::Status::Error(errs.str()); } - TRY_RESULT(fift_res, fift::compile_asm_program(outs.str(), "/fiftlib/")); + std::string raw_fift_code = outs.str(); + + // Due to the implementation specifics of source maps, with `collect_source_map` enabled, + // the Fift code contains additional DEBUGMARK instructions, which allow the real code in Tolk + // to be matched with specific TVM instructions after Fift compilation. + // + // Since DEBUGMARK instructions are absent from the TVM, attempting to compile and run such code + // will result in an error. Therefore, `fift_code` contains the compiled code as if + // no DEBUGMARK instructions exist. + // + // This code will be the same as the code that would be generated without source maps enabled, + // ensuring that users get the correct code in this mode. + std::string fift_code = postprocess_fift_output_after_source_msp(raw_fift_code); + + TRY_RESULT(fift_res, fift::compile_asm_program(std::move(fift_code), "/fiftlib/")); td::JsonBuilder result_json; auto obj = result_json.enter_object(); @@ -72,6 +89,17 @@ static td::Result compile_internal(char *config_json) { obj("codeHashHex", fift_res.codeHashHex); if (const auto source_map = source_map_out.str(); !source_map.empty()) { + // To correctly map Tolk code to TVM instructions, we also need to return the compiled code + // with the DEBUGMARK instructions. This "poisoned for execution" code is used for mapping in tolk-js. + // The bitcode with DEBUGMARK is recompiled into bitcode without it (i.e., valid for execution), + // and in the process, the TASM assembler assembles the mapping of the DEBUGMARK index to TVM instructions, + // which is a key part of source map construction. + auto fift_source_map_res = fift::compile_asm_program(std::move(raw_fift_code), "/fiftlib/"); + if (fift_source_map_res.is_ok()) { + auto res = fift_source_map_res.move_as_ok(); + obj("fiftSourceMapCode", res.fiftCode); + obj("fiftSourceMapBoc64", res.codeBoc64); + } obj("sourceMap", td::JsonRaw(source_map)); } @@ -133,4 +161,26 @@ const char *tolk_compile(char *config_json, WasmFsReadCallback callback) { return strdup(res_string.c_str()); } +static std::string postprocess_fift_output_after_source_msp(std::string fift_code) { + if (!G.settings.collect_source_map) { + // Without enabled source maps code is always good + return fift_code; + } + + std::string processed_code; + bool first = true; + for (auto& line : td::full_split(fift_code, '\n')) { + if (line.find("DEBUGMARK") != std::string::npos) { + // filter out all DEBUGMARK instructions + continue; + } + if (!first) { + processed_code.push_back('\n'); + } + first = false; + processed_code += line; + } + return processed_code; +} + } // extern "C" diff --git a/tolk/tolk.cpp b/tolk/tolk.cpp index f803018dbd..e4590de971 100644 --- a/tolk/tolk.cpp +++ b/tolk/tolk.cpp @@ -76,7 +76,7 @@ int tolk_proceed(const std::string &entrypoint_filename, std::ostream& source_ma pipeline_find_unused_symbols(); pipeline_generate_fif_output_to_std_cout(); - pipeline_process_debug_info(source_map_out); + pipeline_generate_source_map(source_map_out); return 0; } catch (Fatal& fatal) { From d0cd8f4aa7132df83dcfecf741b1e6843049e5ff Mon Sep 17 00:00:00 2001 From: i582 <51853996+i582@users.noreply.github.com> Date: Mon, 22 Sep 2025 10:59:22 +0400 Subject: [PATCH 16/27] minor fix --- tolk/tolk-wasm.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tolk/tolk-wasm.cpp b/tolk/tolk-wasm.cpp index 12fe27e64e..1f93f9e6ad 100644 --- a/tolk/tolk-wasm.cpp +++ b/tolk/tolk-wasm.cpp @@ -96,9 +96,9 @@ static td::Result compile_internal(char *config_json) { // which is a key part of source map construction. auto fift_source_map_res = fift::compile_asm_program(std::move(raw_fift_code), "/fiftlib/"); if (fift_source_map_res.is_ok()) { - auto res = fift_source_map_res.move_as_ok(); - obj("fiftSourceMapCode", res.fiftCode); - obj("fiftSourceMapBoc64", res.codeBoc64); + const auto fift_source_map_res_ok = fift_source_map_res.move_as_ok(); + obj("fiftSourceMapCode", fift_source_map_res_ok.fiftCode); + obj("sourceMapCodeBoc64", fift_source_map_res_ok.codeBoc64); } obj("sourceMap", td::JsonRaw(source_map)); } From 39dd52e70741d212f797ad54423441200c068f41 Mon Sep 17 00:00:00 2001 From: i582 <51853996+i582@users.noreply.github.com> Date: Mon, 22 Sep 2025 11:07:40 +0400 Subject: [PATCH 17/27] small fixes --- tolk/CMakeLists.txt | 2 +- tolk/asmops.cpp | 10 +++++----- ...ess-debug-info.cpp => pipe-generate-source-map.cpp} | 0 tolk/tolk.h | 2 +- 4 files changed, 7 insertions(+), 7 deletions(-) rename tolk/{pipe-process-debug-info.cpp => pipe-generate-source-map.cpp} (100%) diff --git a/tolk/CMakeLists.txt b/tolk/CMakeLists.txt index c63b68329d..3f35710213 100644 --- a/tolk/CMakeLists.txt +++ b/tolk/CMakeLists.txt @@ -31,7 +31,7 @@ set(TOLK_SOURCE pipe-ast-to-legacy.cpp pipe-find-unused-symbols.cpp pipe-generate-fif-output.cpp - pipe-process-debug-info.cpp + pipe-generate-source-map.cpp type-system.cpp smart-casts-cfg.cpp generics-helpers.cpp diff --git a/tolk/asmops.cpp b/tolk/asmops.cpp index c0a4c64e52..335cfddab2 100644 --- a/tolk/asmops.cpp +++ b/tolk/asmops.cpp @@ -330,16 +330,16 @@ void AsmOpList::show_var_ext(std::ostream& os, std::pair } } -std::optional> AsmOpList::get_var(std::pair idx_pair) const { +std::optional> AsmOpList::get_var(const std::pair& idx_pair) const { const var_idx_t var_idx = idx_pair.first; const const_idx_t const_idx = idx_pair.second; - if (!var_names_ || (unsigned)var_idx >= var_names_->size()) { + if (!var_names_ || static_cast(var_idx) >= var_names_->size()) { return std::nullopt; } - auto var = var_names_->at(var_idx); - if ((unsigned)const_idx < constants_.size() && constants_[const_idx].not_null()) { + const auto var = var_names_->at(var_idx); + if (static_cast(const_idx) < constants_.size() && constants_[const_idx].not_null()) { const auto value = constants_[const_idx]; - auto value_str = value->to_dec_string(); + const auto value_str = value->to_dec_string(); return std::tie(var, value_str); } return std::tie(var, ""); diff --git a/tolk/pipe-process-debug-info.cpp b/tolk/pipe-generate-source-map.cpp similarity index 100% rename from tolk/pipe-process-debug-info.cpp rename to tolk/pipe-generate-source-map.cpp diff --git a/tolk/tolk.h b/tolk/tolk.h index defade96b3..11d0bee125 100644 --- a/tolk/tolk.h +++ b/tolk/tolk.h @@ -668,7 +668,6 @@ struct AsmOpList { std::vector constants_; bool retalt_{false}; bool retalt_inserted_{false}; - std::optional> get_var(std::pair idx_pair) const; void out(std::ostream& os, int mode = 0) const; AsmOpList(int indent = 0, const std::vector* var_names = nullptr) : indent_(indent), var_names_(var_names) { } @@ -679,6 +678,7 @@ struct AsmOpList { } const_idx_t register_const(td::RefInt256 new_const); td::RefInt256 get_const(const_idx_t idx); + std::optional> get_var(const std::pair& idx_pair) const; void show_var_ext(std::ostream& os, std::pair idx_pair) const; void adjust_last() { if (list_.back().is_nop()) { From 03769f30aeca25d270a6525e21d37466bb55a67f Mon Sep 17 00:00:00 2001 From: i582 <51853996+i582@users.noreply.github.com> Date: Mon, 22 Sep 2025 11:11:27 +0400 Subject: [PATCH 18/27] simplify --- tolk/codegen.cpp | 18 +++--------------- 1 file changed, 3 insertions(+), 15 deletions(-) diff --git a/tolk/codegen.cpp b/tolk/codegen.cpp index 7382b9f465..9d1556b783 100644 --- a/tolk/codegen.cpp +++ b/tolk/codegen.cpp @@ -281,20 +281,6 @@ bool Op::generate_code_step(Stack& stack) { std::ostringstream ops; ops << source_map_entry_idx << " DEBUGMARK"; // pseudo instruction - // Attach a source snippet as a Fift comment to make mapping explicit in compiled output - if (const SrcFile* src_file = loc.get_src_file()) { - const auto& pos = src_file->convert_offset(loc.get_char_offset()); - std::string line = std::string(pos.line_str); - // Trim trailing CR/LF and excessive spaces to keep output compact - while (!line.empty() && (line.back() == '\r' || line.back() == '\n')) line.pop_back(); - // Avoid extremely long comments - if (line.size() > 200) { - line.resize(200); - line += "..."; - } - ops << " // " << line; - } - // Append opcode to a list if (const auto list_size = stack.o.list_.size(); list_size > 0) { stack.o.insert(stack.o.list_.size(), loc, ops.str()); @@ -302,7 +288,9 @@ bool Op::generate_code_step(Stack& stack) { if (source_map_entry_idx < G.source_map.size()) { auto& entry = G.source_map.at(source_map_entry_idx); - for (auto index : stack.s) { + + // Collect all available variables at this point + for (const auto index : stack.s) { if (const auto var = stack.o.get_var(index); var.has_value()) { const auto& [data, value] = *var; entry.vars.push_back({data, value}); From 4cc318e5fa93f441d851cac7633535528fc39923 Mon Sep 17 00:00:00 2001 From: i582 <51853996+i582@users.noreply.github.com> Date: Mon, 22 Sep 2025 11:16:02 +0400 Subject: [PATCH 19/27] rename + comment --- tolk/debug-info.cpp | 7 ++- tolk/pack-unpack-api.cpp | 12 +++--- tolk/pack-unpack-serializers.cpp | 10 ++--- tolk/pipe-ast-to-legacy.cpp | 74 ++++++++++++++++---------------- tolk/send-message-api.cpp | 10 ++--- tolk/tolk.h | 2 +- 6 files changed, 59 insertions(+), 56 deletions(-) diff --git a/tolk/debug-info.cpp b/tolk/debug-info.cpp index 943361f607..5d6c260cd4 100644 --- a/tolk/debug-info.cpp +++ b/tolk/debug-info.cpp @@ -5,7 +5,7 @@ namespace tolk { -void insert_debug_info_inner(SrcLocation loc, ASTNodeKind kind, CodeBlob& code, size_t line_offset, std::string descr) { +void insert_debug_info(SrcLocation loc, ASTNodeKind kind, CodeBlob& code, size_t line_offset, std::string descr) { if (!G.settings.collect_source_map) { return; } @@ -36,6 +36,7 @@ void insert_debug_info_inner(SrcLocation loc, ASTNodeKind kind, CodeBlob& code, info.opcode = st.str(); } #endif + info.ast_kind = ASTStringifier::ast_node_kind_to_string(kind); if (const SrcFile* src_file = loc.get_src_file(); src_file != nullptr) { @@ -51,14 +52,16 @@ void insert_debug_info_inner(SrcLocation loc, ASTNodeKind kind, CodeBlob& code, info.func_name = code.fun_ref->name; if (code.name != info.func_name) { + // If a function was inlined, `code.name` will contain the name of the function we are inlining into info.inlined_to_func_name = code.name; } info.func_inline_mode = code.fun_ref->inline_mode; + G.source_map.push_back(info); } void insert_debug_info(AnyV v, CodeBlob& code) { - insert_debug_info_inner(v->loc, v->kind, code, 0, ""); + insert_debug_info(v->loc, v->kind, code, 0, ""); } } diff --git a/tolk/pack-unpack-api.cpp b/tolk/pack-unpack-api.cpp index f1126c2ee8..8d0f632553 100644 --- a/tolk/pack-unpack-api.cpp +++ b/tolk/pack-unpack-api.cpp @@ -254,7 +254,7 @@ std::vector generate_T_toCell(FunctionPtr called_f, CodeBlob& code, S FunctionPtr f_endCell = lookup_function("builder.endCell"); std::vector rvect_builder = code.create_var(TypeDataBuilder::create(), loc, "b"); - insert_debug_info_inner(loc, ast_function_call, code); + insert_debug_info(loc, ast_function_call, code); code.emplace_back(loc, Op::_Call, rvect_builder, std::vector{}, f_beginCell); PackContext ctx(code, loc, rvect_builder, args[1]); @@ -269,7 +269,7 @@ std::vector generate_T_toCell(FunctionPtr called_f, CodeBlob& code, S // fun builder.storeAny(mutate self, v: T, options: PackOptions = {}): self std::vector generate_builder_storeAny(FunctionPtr called_f, CodeBlob& code, SrcLocation loc, const std::vector>& args) { TypePtr typeT = called_f->substitutedTs->typeT_at(0); - insert_debug_info_inner(loc, ast_function_call, code); + insert_debug_info(loc, ast_function_call, code); PackContext ctx(code, loc, args[0], args[2]); // mutate this builder ctx.generate_pack_any(typeT, std::vector(args[1])); @@ -278,7 +278,7 @@ std::vector generate_builder_storeAny(FunctionPtr called_f, CodeBlob& // fun T.fromSlice(rawSlice: slice, options: UnpackOptions): T std::vector generate_T_fromSlice(FunctionPtr called_f, CodeBlob& code, SrcLocation loc, const std::vector>& args) { - insert_debug_info_inner(loc, ast_function_call, code); + insert_debug_info(loc, ast_function_call, code); std::vector slice_copy = code.create_var(TypeDataSlice::create(), loc, "s"); code.emplace_back(loc, Op::_Let, slice_copy, args[0]); @@ -310,7 +310,7 @@ std::vector generate_slice_loadAny(FunctionPtr called_f, CodeBlob& co // fun T.fromCell(packedCell: cell, options: UnpackOptions): T // fun Cell.load(self, options: UnpackOptions): T std::vector generate_T_fromCell(FunctionPtr called_f, CodeBlob& code, SrcLocation loc, const std::vector>& args) { - insert_debug_info_inner(loc, ast_function_call, code); + insert_debug_info(loc, ast_function_call, code); TypePtr typeT = called_f->substitutedTs->typeT_at(0); FunctionPtr f_beginParse = lookup_function("cell.beginParse"); @@ -331,7 +331,7 @@ std::vector generate_T_fromCell(FunctionPtr called_f, CodeBlob& code, // fun slice.skipAny(mutate self, options: UnpackOptions): self std::vector generate_slice_skipAny(FunctionPtr called_f, CodeBlob& code, SrcLocation loc, const std::vector>& args) { TypePtr typeT = called_f->substitutedTs->typeT_at(0); - insert_debug_info_inner(loc, ast_function_call, code); + insert_debug_info(loc, ast_function_call, code); UnpackContext ctx(code, loc, args[0], args[1]); // mutate this slice ctx.generate_skip_any(typeT); @@ -400,7 +400,7 @@ std::vector generate_lazy_struct_to_cell(CodeBlob& code, SrcLocation StructPtr original_struct = loaded_state->original_struct; StructPtr hidden_struct = loaded_state->hidden_struct; - insert_debug_info_inner(loc, ast_function_call, code); + insert_debug_info(loc, ast_function_call, code); std::vector rvect_builder = code.create_var(TypeDataBuilder::create(), loc, "b"); code.emplace_back(loc, Op::_Call, rvect_builder, std::vector{}, lookup_function("beginCell")); diff --git a/tolk/pack-unpack-serializers.cpp b/tolk/pack-unpack-serializers.cpp index 22aed6e8ca..058337410d 100644 --- a/tolk/pack-unpack-serializers.cpp +++ b/tolk/pack-unpack-serializers.cpp @@ -725,7 +725,7 @@ struct S_Either final : ISerializer { } } tolk_assert(options.match_blocks.size() == 2); - insert_debug_info_inner(loc, ast_match_expression, code); + insert_debug_info(loc, ast_match_expression, code); std::vector ir_result = code.create_tmp_var(options.match_expr_type, loc, "(match-expression)"); std::vector ir_is_right = ctx->loadUint(1, "(eitherBit)"); @@ -733,7 +733,7 @@ struct S_Either final : ISerializer { { code.push_set_cur(if_op.block0); const LazyMatchOptions::MatchBlock* m_block = options.find_match_block(t_right); - insert_debug_info_inner(m_block->v_body->loc, ast_match_arm, code); + insert_debug_info(m_block->v_body->loc, ast_match_arm, code); std::vector ith_result = pre_compile_expr(m_block->v_body, code); options.save_match_result_on_arm_end(code, loc, m_block, std::move(ith_result), ir_result); code.close_pop_cur(loc); @@ -741,7 +741,7 @@ struct S_Either final : ISerializer { { code.push_set_cur(if_op.block1); const LazyMatchOptions::MatchBlock* m_block = options.find_match_block(t_left); - insert_debug_info_inner(m_block->v_body->loc, ast_match_arm, code); + insert_debug_info(m_block->v_body->loc, ast_match_arm, code); std::vector ith_result = pre_compile_expr(m_block->v_body, code); options.save_match_result_on_arm_end(code, loc, m_block, std::move(ith_result), ir_result); code.close_pop_cur(loc); @@ -855,7 +855,7 @@ struct S_MultipleConstructors final : ISerializer { const LazyMatchOptions::MatchBlock* m_block = options.find_match_block(t_union->variants[i]); StructData::PackOpcode opcode = opcodes[opcodes_order_mapping[i]]; std::vector args = { ctx->ir_slice0, code.create_int(loc, opcode.pack_prefix, "(pack-prefix)"), code.create_int(loc, opcode.prefix_len, "(prefix-len)") }; - insert_debug_info_inner(m_block->arm_variant_node->loc, ast_match_arm, code); + insert_debug_info(m_block->arm_variant_node->loc, ast_match_arm, code); code.emplace_back(loc, Op::_Call, std::vector{ctx->ir_slice0, ir_prefix_eq[0]}, std::move(args), f_tryStripPrefix); Op& if_op = code.emplace_back(loc, Op::_If, ir_prefix_eq); @@ -1011,7 +1011,7 @@ struct S_CustomStruct final : ISerializer { std::vector ir_result = code.create_tmp_var(options.match_expr_type, loc, "(match-expression)"); std::vector ir_prefix_eq = code.create_tmp_var(TypeDataInt::create(), loc, "(prefix-eq)"); - insert_debug_info_inner(loc, ast_match_arm, code); + insert_debug_info(loc, ast_match_arm, code); StructData::PackOpcode opcode = struct_ref->opcode; if (opcode.exists()) { // it's `match` over a struct (makes sense for a struct with prefix and `else` branch) diff --git a/tolk/pipe-ast-to-legacy.cpp b/tolk/pipe-ast-to-legacy.cpp index 75f6609b04..dc0768df88 100644 --- a/tolk/pipe-ast-to-legacy.cpp +++ b/tolk/pipe-ast-to-legacy.cpp @@ -611,7 +611,7 @@ std::vector pre_compile_is_type(CodeBlob& code, TypePtr expr_type, Ty static std::vector gen_op_call(CodeBlob& code, TypePtr ret_type, SrcLocation loc, std::vector&& args_vars, FunctionPtr fun_ref, const char* debug_desc, bool arg_order_already_equals_asm = false) { - insert_debug_info_inner(loc, ast_function_call, code); + insert_debug_info(loc, ast_function_call, code); std::vector rvect = code.create_tmp_var(ret_type, loc, debug_desc); Op& op = code.emplace_back(loc, Op::_Call, rvect, std::move(args_vars), fun_ref); @@ -629,7 +629,7 @@ static std::vector gen_compile_time_code_instead_of_fun_call(CodeBlob FunctionPtr called_f = v_call->fun_maybe; // Mark compile-time call site explicitly - insert_debug_info_inner(loc, ast_function_call, code); + insert_debug_info(loc, ast_function_call, code); if (called_f->is_method() && called_f->is_instantiation_of_generic_function()) { std::string_view f_name = called_f->base_fun_ref->name; @@ -656,7 +656,7 @@ static std::vector gen_compile_time_code_instead_of_fun_call(CodeBlob } std::vector gen_inline_fun_call_in_place(CodeBlob& code, TypePtr ret_type, SrcLocation loc, FunctionPtr f_inlined, AnyExprV self_obj, bool is_before_immediate_return, const std::vector>& vars_per_arg) { - insert_debug_info_inner(loc, ast_function_call, code); + insert_debug_info(loc, ast_function_call, code); if (G.settings.collect_source_map) { G.source_map.at(G.source_map.size() - 1).before_inlined_function_call = true; } @@ -709,7 +709,7 @@ std::vector gen_inline_fun_call_in_place(CodeBlob& code, TypePtr ret_ ClearStateAfterInlineInPlace visitor; visitor.start_visiting_function(f_inlined, v_ast_root); - insert_debug_info_inner(loc, ast_function_call, code); + insert_debug_info(loc, ast_function_call, code); if (G.settings.collect_source_map) { G.source_map.at(G.source_map.size() - 1).after_inlined_function_call = true; } @@ -1234,7 +1234,7 @@ static std::vector process_reference(V v, CodeBlob& co } static std::vector process_assignment(V v, CodeBlob& code, TypePtr target_type) { - insert_debug_info_inner(v->loc, v->kind, code); + insert_debug_info(v->loc, v->kind, code); AnyExprV lhs = v->get_lhs(); AnyExprV rhs = v->get_rhs(); @@ -1289,17 +1289,17 @@ static std::vector process_binary_operator(V v, tolk_assert(cond.size() == 1); std::vector rvect = code.create_tmp_var(v->inferred_type, v->loc, "(ternary)"); - insert_debug_info_inner(v->loc, ast_binary_operator, code); + insert_debug_info(v->loc, ast_binary_operator, code); Op& if_op = code.emplace_back(v->loc, Op::_If, cond); code.push_set_cur(if_op.block0); if (t == tok_logical_or) { - insert_debug_info_inner(v->loc, ast_binary_operator, code, 0, "lhs of || is true"); + insert_debug_info(v->loc, ast_binary_operator, code, 0, "lhs of || is true"); } // For &&: true-branch evaluates RHS; mark RHS location if (t == tok_logical_and) { - insert_debug_info_inner(v->get_rhs()->loc, ast_binary_operator, code); + insert_debug_info(v->get_rhs()->loc, ast_binary_operator, code); } code.emplace_back(v->loc, Op::_Let, rvect, pre_compile_expr(t == tok_logical_and ? v_b_ne_0 : v_1, code, nullptr)); code.close_pop_cur(v->loc); @@ -1307,10 +1307,10 @@ static std::vector process_binary_operator(V v, // For ||: false-branch evaluates RHS; mark RHS location if (t == tok_logical_or) { - insert_debug_info_inner(v->get_rhs()->loc, ast_binary_operator, code); + insert_debug_info(v->get_rhs()->loc, ast_binary_operator, code); } if (t == tok_logical_and) { - insert_debug_info_inner(v->loc, ast_binary_operator, code, 0, "rhs of && is false"); + insert_debug_info(v->loc, ast_binary_operator, code, 0, "rhs of && is false"); } code.emplace_back(v->loc, Op::_Let, rvect, pre_compile_expr(t == tok_logical_and ? v_0 : v_b_ne_0, code, nullptr)); code.close_pop_cur(v->loc); @@ -1318,7 +1318,7 @@ static std::vector process_binary_operator(V v, } if (t == tok_eq || t == tok_neq) { if (v->get_lhs()->inferred_type->unwrap_alias() == TypeDataAddress::create() && v->get_rhs()->inferred_type->unwrap_alias() == TypeDataAddress::create()) { - insert_debug_info_inner(v->loc, ast_binary_operator, code); + insert_debug_info(v->loc, ast_binary_operator, code); FunctionPtr f_sliceEq = lookup_function("slice.bitsEqual"); std::vector ir_lhs_slice = pre_compile_expr(v->get_lhs(), code); std::vector ir_rhs_slice = pre_compile_expr(v->get_rhs(), code); @@ -1336,14 +1336,14 @@ static std::vector process_binary_operator(V v, } static std::vector process_unary_operator(V v, CodeBlob& code, TypePtr target_type) { - insert_debug_info_inner(v->loc, ast_unary_operator, code); + insert_debug_info(v->loc, ast_unary_operator, code); std::vector rhs_vars = pre_compile_expr(v->get_rhs(), code, nullptr); std::vector rvect = gen_op_call(code, v->inferred_type, v->loc, std::move(rhs_vars), v->fun_ref, "(unary-op)"); return transition_to_target_type(std::move(rvect), code, target_type, v); } static std::vector process_ternary_operator(V v, CodeBlob& code, TypePtr target_type) { - insert_debug_info_inner(v->loc, ast_ternary_operator, code); + insert_debug_info(v->loc, ast_ternary_operator, code); std::vector cond = pre_compile_expr(v->get_cond(), code, nullptr); tolk_assert(cond.size() == 1); std::vector rvect = code.create_tmp_var(v->inferred_type, v->loc, "(cond)"); @@ -1371,14 +1371,14 @@ static std::vector process_ternary_operator(V v } static std::vector process_cast_as_operator(V v, CodeBlob& code, TypePtr target_type, LValContext* lval_ctx) { - insert_debug_info_inner(v->loc, ast_cast_as_operator, code); + insert_debug_info(v->loc, ast_cast_as_operator, code); TypePtr child_target_type = v->type_node->resolved_type; std::vector rvect = pre_compile_expr(v->get_expr(), code, child_target_type, lval_ctx); return transition_to_target_type(std::move(rvect), code, target_type, v); } static std::vector process_is_type_operator(V v, CodeBlob& code, TypePtr target_type) { - insert_debug_info_inner(v->loc, ast_is_type_operator, code); + insert_debug_info(v->loc, ast_is_type_operator, code); TypePtr lhs_type = v->get_expr()->inferred_type; TypePtr cmp_type = v->type_node->resolved_type; bool is_null_check = cmp_type == TypeDataNullLiteral::create(); // `v == null`, not `v is T` @@ -1395,7 +1395,7 @@ static std::vector process_is_type_operator(V v } static std::vector process_not_null_operator(V v, CodeBlob& code, TypePtr target_type, LValContext* lval_ctx) { - insert_debug_info_inner(v->loc, ast_not_null_operator, code); + insert_debug_info(v->loc, ast_not_null_operator, code); TypePtr expr_type = v->get_expr()->inferred_type; TypePtr without_null_type = calculate_type_subtract_rhs_type(expr_type, TypeDataNullLiteral::create()); TypePtr child_target_type = without_null_type != TypeDataNever::create() ? without_null_type : expr_type; @@ -1412,7 +1412,7 @@ static std::vector process_lazy_operator(V v, Code FunctionPtr called_f = v_call->fun_maybe; if (called_f->is_code_function()) { // `lazy loadStorage()` is allowed, it contains just `return ...`, inline it here - insert_debug_info_inner(v->loc, ast_function_call, code); + insert_debug_info(v->loc, ast_function_call, code); auto f_body = called_f->ast_root->as()->get_body()->as(); tolk_assert(f_body->size() == 1 && f_body->get_item(0)->kind == ast_return_statement); auto f_returns = f_body->get_item(0)->as(); @@ -1428,17 +1428,17 @@ static std::vector process_lazy_operator(V v, Code bool has_passed_options = false; if (f_name == "T.fromSlice") { std::vector passed_slice = pre_compile_expr(v_call->get_arg(0)->get_expr(), code); - insert_debug_info_inner(v_call->loc, ast_function_call, code); + insert_debug_info(v_call->loc, ast_function_call, code); code.emplace_back(v->loc, Op::_Let, ir_slice, std::move(passed_slice)); has_passed_options = v_call->get_num_args() == 2; } else if (f_name == "T.fromCell") { std::vector ir_cell = pre_compile_expr(v_call->get_arg(0)->get_expr(), code); - insert_debug_info_inner(v_call->loc, ast_function_call, code); + insert_debug_info(v_call->loc, ast_function_call, code); code.emplace_back(v->loc, Op::_Call, ir_slice, ir_cell, lookup_function("cell.beginParse")); has_passed_options = v_call->get_num_args() == 2; } else if (f_name == "Cell.load") { std::vector ir_cell = pre_compile_expr(v_call->get_callee()->try_as()->get_obj(), code); - insert_debug_info_inner(v_call->loc, ast_function_call, code); + insert_debug_info(v_call->loc, ast_function_call, code); code.emplace_back(v->loc, Op::_Call, ir_slice, ir_cell, lookup_function("cell.beginParse")); has_passed_options = v_call->get_num_args() == 1; } else { @@ -1461,7 +1461,7 @@ static std::vector process_lazy_operator(V v, Code } static std::vector process_match_expression(V v, CodeBlob& code, TypePtr target_type) { - insert_debug_info_inner(v->loc, ast_match_expression, code); + insert_debug_info(v->loc, ast_match_expression, code); TypePtr subject_type = v->get_subject()->inferred_type; const TypeDataEnum* subject_enum = subject_type->unwrap_alias()->try_as(); @@ -1513,18 +1513,18 @@ static std::vector process_match_expression(V v if (is_match_by_type) { TypePtr cmp_type = v_ith_arm->pattern_type_node->resolved_type; tolk_assert(!cmp_type->unwrap_alias()->try_as()); // `match` over `int|slice` is a type checker error - insert_debug_info_inner(v_ith_arm->loc, ast_function_call, code); + insert_debug_info(v_ith_arm->loc, ast_function_call, code); eq_ith_ir_idx = pre_compile_is_type(code, subject_type, cmp_type, subj_ir_idx, v_ith_arm->loc, "(arm-cond-eq)"); } else { std::vector ith_ir_idx = pre_compile_expr(v_ith_arm->get_pattern_expr(), code); tolk_assert(subj_ir_idx.size() == 1 && ith_ir_idx.size() == 1); eq_ith_ir_idx = code.create_tmp_var(TypeDataBool::create(), v_ith_arm->loc, "(arm-cond-eq)"); - insert_debug_info_inner(v_ith_arm->loc, ast_function_call, code); + insert_debug_info(v_ith_arm->loc, ast_function_call, code); code.emplace_back(v_ith_arm->loc, Op::_Call, eq_ith_ir_idx, std::vector{subj_ir_idx[0], ith_ir_idx[0]}, eq_sym); } Op& if_op = code.emplace_back(v_ith_arm->loc, Op::_If, std::move(eq_ith_ir_idx)); code.push_set_cur(if_op.block0); - insert_debug_info_inner(v_ith_arm->loc, ast_match_arm, code); + insert_debug_info(v_ith_arm->loc, ast_match_arm, code); if (v->is_statement()) { pre_compile_expr(v_ith_arm->get_body(), code); if (v == stmt_before_immediate_return) { @@ -1542,7 +1542,7 @@ static std::vector process_match_expression(V v // we're inside the last ELSE auto v_last_arm = v->get_arm(n_arms - 1); if (v->is_statement()) { - insert_debug_info_inner(v_last_arm->loc, ast_match_arm, code); + insert_debug_info(v_last_arm->loc, ast_match_arm, code); pre_compile_expr(v_last_arm->get_body(), code); if (v == stmt_before_immediate_return) { code.emplace_back(v_last_arm->loc, Op::_Return); @@ -1679,7 +1679,7 @@ static std::vector process_function_call(V v, Code std::vector tfunc = pre_compile_expr(v->get_callee(), code, nullptr); tolk_assert(tfunc.size() == 1); args_vars.push_back(tfunc[0]); - insert_debug_info_inner(v->loc, ast_function_call, code); + insert_debug_info(v->loc, ast_function_call, code); std::vector rvect = code.create_tmp_var(v->inferred_type, v->loc, "(call-ind)"); Op& op = code.emplace_back(v->loc, Op::_CallInd, rvect, std::move(args_vars)); op.set_impure_flag(); @@ -1821,7 +1821,7 @@ static std::vector process_braced_expression(V static std::vector process_tensor(V v, CodeBlob& code, TypePtr target_type, LValContext* lval_ctx) { // tensor is compiled "as is", for example `(1, null)` occupies 2 slots // and if assigned/passed to something other, like `(int, (int,int)?)`, a whole tensor is transitioned, it works - insert_debug_info_inner(v->loc, ast_tensor, code); + insert_debug_info(v->loc, ast_tensor, code); std::vector rvect = pre_compile_tensor(code, v->get_items(), lval_ctx); return transition_to_target_type(std::move(rvect), code, target_type, v); } @@ -1830,7 +1830,7 @@ static std::vector process_typed_tuple(V v, CodeBl if (lval_ctx) { // todo some time, make "var (a, [b,c]) = (1, [2,3])" work v->error("[...] can not be used as lvalue here"); } - insert_debug_info_inner(v->loc, ast_bracket_tuple, code); + insert_debug_info(v->loc, ast_bracket_tuple, code); std::vector left = code.create_tmp_var(v->inferred_type, v->loc, "(pack-tuple)"); std::vector right = pre_compile_tensor(code, v->get_items(), lval_ctx); code.emplace_back(v->loc, Op::_Tuple, left, std::move(right)); @@ -1893,7 +1893,7 @@ static std::vector process_object_literal(V v, Co // an object (an instance of a struct) is actually a tensor at low-level // for example, `struct User { id: int; name: slice; }` occupies 2 slots // fields of a tensor are placed in order of declaration (in a literal they might be shuffled) - insert_debug_info_inner(v->loc, ast_object_literal, code); + insert_debug_info(v->loc, ast_object_literal, code); bool are_fields_shuffled = false; for (int i = 1; i < v->get_body()->get_num_fields(); ++i) { StructFieldPtr field_ref = v->struct_ref->find_field(v->get_body()->get_field(i)->get_field_name()); @@ -2176,7 +2176,7 @@ static void process_block_statement(V v, CodeBlob& code) { } static void process_assert_statement(V v, CodeBlob& code) { - insert_debug_info_inner(v->loc, ast_assert_statement, code); + insert_debug_info(v->loc, ast_assert_statement, code); std::vector ir_thrown_code = pre_compile_expr(v->get_thrown_code(), code); std::vector ir_cond = pre_compile_expr(v->get_cond(), code); tolk_assert(ir_cond.size() == 1 && ir_thrown_code.size() == 1); @@ -2195,7 +2195,7 @@ static void process_catch_variable(AnyExprV v_catch_var, CodeBlob& code) { } static void process_try_catch_statement(V v, CodeBlob& code) { - insert_debug_info_inner(v->loc, ast_try_catch_statement, code); + insert_debug_info(v->loc, ast_try_catch_statement, code); code.require_callxargs = true; Op& try_catch_op = code.emplace_back(v->loc, Op::_TryCatch); code.push_set_cur(try_catch_op.block0); @@ -2214,7 +2214,7 @@ static void process_try_catch_statement(V v, CodeBlob& } static void process_repeat_statement(V v, CodeBlob& code) { - insert_debug_info_inner(v->loc, ast_repeat_statement, code); + insert_debug_info(v->loc, ast_repeat_statement, code); std::vector tmp_vars = pre_compile_expr(v->get_cond(), code, nullptr); Op& repeat_op = code.emplace_back(v->loc, Op::_Repeat, tmp_vars); code.push_set_cur(repeat_op.block0); @@ -2223,7 +2223,7 @@ static void process_repeat_statement(V v, CodeBlob& code) } static void process_if_statement(V v, CodeBlob& code) { - insert_debug_info_inner(v->loc, ast_if_statement, code); + insert_debug_info(v->loc, ast_if_statement, code); std::vector cond = pre_compile_expr(v->get_cond(), code, nullptr); tolk_assert(cond.size() == 1); @@ -2255,7 +2255,7 @@ static void process_if_statement(V v, CodeBlob& code) { } static void process_do_while_statement(V v, CodeBlob& code) { - insert_debug_info_inner(v->loc, ast_do_while_statement, code); + insert_debug_info(v->loc, ast_do_while_statement, code); Op& until_op = code.emplace_back(v->loc, Op::_Until); code.push_set_cur(until_op.block0); process_any_statement(v->get_body(), code); @@ -2298,7 +2298,7 @@ static void process_do_while_statement(V v, CodeBlob& co } static void process_while_statement(V v, CodeBlob& code) { - insert_debug_info_inner(v->loc, ast_while_statement, code); + insert_debug_info(v->loc, ast_while_statement, code); Op& while_op = code.emplace_back(v->loc, Op::_While); code.push_set_cur(while_op.block0); while_op.left = pre_compile_expr(v->get_cond(), code, nullptr); @@ -2346,7 +2346,7 @@ static void process_return_statement(V v, CodeBlob& code) } // Point to the next line after return - insert_debug_info_inner(v->loc, ast_return_statement, code, 1); + insert_debug_info(v->loc, ast_return_statement, code, 1); // if fun_ref is called and inlined into a parent, assign a result instead of generating a return statement if (code.inline_rvect_out) { @@ -2423,7 +2423,7 @@ static void convert_function_body_to_CodeBlob(FunctionPtr fun_ref, FunctionBodyC blob->in_var_cnt = blob->var_cnt; tolk_assert(blob->var_cnt == total_arg_width); - insert_debug_info_inner(v_fun_decl->get_identifier()->loc, ast_function_declaration, *blob); + insert_debug_info(v_fun_decl->get_identifier()->loc, ast_function_declaration, *blob); if (fun_ref->name == "onInternalMessage") { handle_onInternalMessage_codegen_start(fun_ref, rvect_import, *blob, fun_ref->loc); diff --git a/tolk/send-message-api.cpp b/tolk/send-message-api.cpp index 66cdff8670..b0dcda209a 100644 --- a/tolk/send-message-api.cpp +++ b/tolk/send-message-api.cpp @@ -87,7 +87,7 @@ struct IR_AutoDeployAddress { // fun createMessage(options: CreateMessageOptions): OutMessage std::vector generate_createMessage(FunctionPtr called_f, CodeBlob& code, SrcLocation loc, const std::vector>& ir_options) { - insert_debug_info_inner(loc, ast_function_call, code); + insert_debug_info(loc, ast_function_call, code); TypePtr bodyT = called_f->substitutedTs->typeT_at(0); StructPtr s_Options = lookup_global_symbol("CreateMessageOptions")->try_as(); @@ -359,7 +359,7 @@ std::vector generate_createMessage(FunctionPtr called_f, CodeBlob& co // fun createExternalLogMessage(options: CreateExternalLogMessageOptions): OutMessage std::vector generate_createExternalLogMessage(FunctionPtr called_f, CodeBlob& code, SrcLocation loc, const std::vector>& args) { - insert_debug_info_inner(loc, ast_function_call, code); + insert_debug_info(loc, ast_function_call, code); TypePtr bodyT = called_f->substitutedTs->typeT_at(0); StructPtr s_Options = lookup_global_symbol("CreateExternalLogMessageOptions")->try_as(); @@ -498,7 +498,7 @@ std::vector generate_createExternalLogMessage(FunctionPtr called_f, C // fun address.buildSameAddressInAnotherShard(self, options: AddressShardingOptions): builder std::vector generate_address_buildInAnotherShard(FunctionPtr called_f, CodeBlob& code, SrcLocation loc, const std::vector>& args) { - insert_debug_info_inner(loc, ast_function_call, code); + insert_debug_info(loc, ast_function_call, code); std::vector ir_shard_options = args[1]; tolk_assert(ir_shard_options.size() == 2); @@ -533,7 +533,7 @@ std::vector generate_address_buildInAnotherShard(FunctionPtr called_f // fun AutoDeployAddress.buildAddress(self): builder std::vector generate_AutoDeployAddress_buildAddress(FunctionPtr called_f, CodeBlob& code, SrcLocation loc, const std::vector>& ir_options) { - insert_debug_info_inner(loc, ast_function_call, code); + insert_debug_info(loc, ast_function_call, code); IR_AutoDeployAddress ir_self(code, loc, ir_options[0]); std::vector ir_builder = code.create_tmp_var(TypeDataSlice::create(), loc, "(addr-b)"); @@ -613,7 +613,7 @@ std::vector generate_AutoDeployAddress_buildAddress(FunctionPtr calle // fun AutoDeployAddress.addressMatches(self, addr: address): bool std::vector generate_AutoDeployAddress_addressMatches(FunctionPtr called_f, CodeBlob& code, SrcLocation loc, const std::vector>& ir_self_and_addr) { - insert_debug_info_inner(loc, ast_function_call, code); + insert_debug_info(loc, ast_function_call, code); IR_AutoDeployAddress ir_self(code, loc, ir_self_and_addr[0]); // at first, calculate stateInitHash = (hash of StateInit cell would be, but without constructing a cell) diff --git a/tolk/tolk.h b/tolk/tolk.h index 11d0bee125..673cee185a 100644 --- a/tolk/tolk.h +++ b/tolk/tolk.h @@ -1151,7 +1151,7 @@ struct LazyVarRefAtCodegen { : var_ref(var_ref), var_state(var_state) {} }; -void insert_debug_info_inner(SrcLocation loc, ASTNodeKind kind, CodeBlob& code, size_t line_offset = 0, std::string descr = ""); +void insert_debug_info(SrcLocation loc, ASTNodeKind kind, CodeBlob& code, size_t line_offset = 0, std::string descr = ""); void insert_debug_info(AnyV v, CodeBlob& code); // CachedConstValueAtCodegen is used for a map [some_const => '5] From 0eb7d0b1de3e6482bd0e34f472c024d081a01aa1 Mon Sep 17 00:00:00 2001 From: i582 <51853996+i582@users.noreply.github.com> Date: Mon, 22 Sep 2025 11:31:22 +0400 Subject: [PATCH 20/27] fixes and improvements --- tolk/pack-unpack-serializers.cpp | 3 -- tolk/pipe-ast-to-legacy.cpp | 17 +++--- tolk/pipe-generate-fif-output.cpp | 1 - tolk/pipe-generate-source-map.cpp | 87 +++++++++++++++---------------- tolk/tolk.h | 1 - 5 files changed, 49 insertions(+), 60 deletions(-) diff --git a/tolk/pack-unpack-serializers.cpp b/tolk/pack-unpack-serializers.cpp index 058337410d..6bd9551ec9 100644 --- a/tolk/pack-unpack-serializers.cpp +++ b/tolk/pack-unpack-serializers.cpp @@ -728,7 +728,6 @@ struct S_Either final : ISerializer { insert_debug_info(loc, ast_match_expression, code); std::vector ir_result = code.create_tmp_var(options.match_expr_type, loc, "(match-expression)"); std::vector ir_is_right = ctx->loadUint(1, "(eitherBit)"); - Op& if_op = code.emplace_back(loc, Op::_If, std::move(ir_is_right)); { code.push_set_cur(if_op.block0); @@ -857,7 +856,6 @@ struct S_MultipleConstructors final : ISerializer { std::vector args = { ctx->ir_slice0, code.create_int(loc, opcode.pack_prefix, "(pack-prefix)"), code.create_int(loc, opcode.prefix_len, "(prefix-len)") }; insert_debug_info(m_block->arm_variant_node->loc, ast_match_arm, code); code.emplace_back(loc, Op::_Call, std::vector{ctx->ir_slice0, ir_prefix_eq[0]}, std::move(args), f_tryStripPrefix); - Op& if_op = code.emplace_back(loc, Op::_If, ir_prefix_eq); code.push_set_cur(if_op.block0); std::vector ith_result = pre_compile_expr(options.match_blocks[i].v_body, code); @@ -1020,7 +1018,6 @@ struct S_CustomStruct final : ISerializer { } else { code.emplace_back(loc, Op::_Let, ir_prefix_eq, std::vector{code.create_int(loc, -1, "(true)")}); } - Op& if_op = code.emplace_back(loc, Op::_If, ir_prefix_eq); { code.push_set_cur(if_op.block0); diff --git a/tolk/pipe-ast-to-legacy.cpp b/tolk/pipe-ast-to-legacy.cpp index dc0768df88..8ea9ab1b99 100644 --- a/tolk/pipe-ast-to-legacy.cpp +++ b/tolk/pipe-ast-to-legacy.cpp @@ -121,6 +121,7 @@ static int calc_offset_on_stack(StructPtr struct_ref, int field_idx) { return stack_offset; } + // Main goal of LValContext is to handle non-primitive lvalues. At IR level, a usual local variable // exists, but on its change, something non-trivial should happen. // Example: `globalVar = 9` actually does `Const $5 = 9` + `Let $6 = $5` + `SetGlob "globVar" = $6` @@ -658,6 +659,9 @@ static std::vector gen_compile_time_code_instead_of_fun_call(CodeBlob std::vector gen_inline_fun_call_in_place(CodeBlob& code, TypePtr ret_type, SrcLocation loc, FunctionPtr f_inlined, AnyExprV self_obj, bool is_before_immediate_return, const std::vector>& vars_per_arg) { insert_debug_info(loc, ast_function_call, code); if (G.settings.collect_source_map) { + // Inlined functions are tricky for handling in debuggers, code coverage and other tools + // which uses source maps. To simplify handling we explicitly mark start and end instructions + // of inlined function, so tools can understand when we step into and step out inlined function. G.source_map.at(G.source_map.size() - 1).before_inlined_function_call = true; } @@ -711,6 +715,7 @@ std::vector gen_inline_fun_call_in_place(CodeBlob& code, TypePtr ret_ insert_debug_info(loc, ast_function_call, code); if (G.settings.collect_source_map) { + // Mark end instruction as well G.source_map.at(G.source_map.size() - 1).after_inlined_function_call = true; } @@ -1269,7 +1274,6 @@ static std::vector process_binary_operator(V v, TokenType t = v->tok; if (v->fun_ref) { // almost all operators, fun_ref was assigned at type inferring - // insert_debug_info_inner(v->loc, ast_binary_operator, code); std::vector args_vars = pre_compile_tensor(code, {v->get_lhs(), v->get_rhs()}); std::vector rvect = gen_op_call(code, v->inferred_type, v->loc, std::move(args_vars), v->fun_ref, "(binary-op)"); return transition_to_target_type(std::move(rvect), code, target_type, v); @@ -2038,8 +2042,6 @@ static std::vector process_artificial_aux_vertex(V v_match = v->get_wrapped_expr()->as(); pre_compile_expr(v_match->get_subject(), code, nullptr); - // insert_debug_info_inner(v_match->loc, ast_match_expression, code); - const LazyVariableLoadedState* lazy_variable = code.get_lazy_variable(data->var_ref); tolk_assert(lazy_variable); TypePtr t_union = data->field_ref ? data->field_ref->declared_type : data->var_ref->declared_type; @@ -2081,11 +2083,6 @@ static std::vector process_artificial_aux_vertex(V pre_compile_expr(AnyExprV v, CodeBlob& code, TypePtr target_type, LValContext* lval_ctx) { - if (v->kind != ast_binary_operator && v->kind != ast_unary_operator && v->kind != ast_reference && - v->kind != ast_is_type_operator && v->kind != ast_function_call) { - // insert_debug_info(v, code); - } - switch (v->kind) { case ast_reference: return process_reference(v->as(), code, target_type, lval_ctx); @@ -2372,9 +2369,8 @@ static void append_implicit_return_statement(SrcLocation loc_end, CodeBlob& code code.emplace_back(loc_end, Op::_Return, std::move(mutated_vars)); } -void process_any_statement(AnyV v, CodeBlob& code) { - // insert_debug_info(v, code); +void process_any_statement(AnyV v, CodeBlob& code) { switch (v->kind) { case ast_block_statement: return process_block_statement(v->as(), code); @@ -2527,7 +2523,6 @@ class ConvertASTToLegacyOpVisitor final { tolk_assert(fun_ref->is_type_inferring_done()); if (fun_ref->is_code_function() && !fun_ref->is_inlined_in_place()) { convert_function_body_to_CodeBlob(fun_ref, std::get(fun_ref->body)); - std::get(fun_ref->body)->code->print(std::cerr); } else if (fun_ref->is_asm_function()) { convert_asm_body_to_AsmOp(fun_ref, std::get(fun_ref->body)); } diff --git a/tolk/pipe-generate-fif-output.cpp b/tolk/pipe-generate-fif-output.cpp index f25b9409ef..8f55737d9d 100644 --- a/tolk/pipe-generate-fif-output.cpp +++ b/tolk/pipe-generate-fif-output.cpp @@ -187,7 +187,6 @@ void pipeline_generate_fif_output_to_std_cout() { } std::cout << "}END>c\n"; - if (!G.settings.boc_output_filename.empty()) { std::cout << "boc>B \"" << G.settings.boc_output_filename << "\" B>file\n"; } diff --git a/tolk/pipe-generate-source-map.cpp b/tolk/pipe-generate-source-map.cpp index 639daa8bc8..9c0c368993 100644 --- a/tolk/pipe-generate-source-map.cpp +++ b/tolk/pipe-generate-source-map.cpp @@ -12,50 +12,51 @@ void pipeline_generate_source_map(std::ostream& debug_out) { return; } - td::JsonBuilder _jb; - auto objb = _jb.enter_object(); + td::JsonBuilder root_builder; + auto root_builder_obj = root_builder.enter_object(); - objb("version", "1"); + root_builder_obj("version", "1"); { td::JsonBuilder jsonb; - auto arrb = jsonb.enter_array(); - for (auto glob_var : G.all_global_vars) { - auto vb = arrb.enter_value(); - auto ob = vb.enter_object(); + auto array_builder = jsonb.enter_array(); + for (const auto& file : G.all_src_files) { + auto value_builder = array_builder.enter_value(); + auto ob = value_builder.enter_object(); - ob("name", glob_var->name); - ob("type", glob_var->declared_type->as_human_readable()); + ob("path", file->realpath); + ob("is_stdlib", td::JsonBool(file->is_stdlib_file)); + ob("content", file->text); } - arrb.leave(); + array_builder.leave(); - objb("globals", td::JsonRaw(jsonb.string_builder().as_cslice())); + root_builder_obj("files", td::JsonRaw(jsonb.string_builder().as_cslice())); } { td::JsonBuilder jsonb; - auto arrb = jsonb.enter_array(); - for (auto file : G.all_src_files) { - auto vb = arrb.enter_value(); - auto ob = vb.enter_object(); + auto array_builder = jsonb.enter_array(); + for (const auto& glob_var : G.all_global_vars) { + auto value_builder = array_builder.enter_value(); + auto ob = value_builder.enter_object(); - ob("path", file->realpath); - ob("is_stdlib", td::JsonBool(file->is_stdlib_file)); - ob("content", file->text); + ob("name", glob_var->name); + ob("type", glob_var->declared_type->as_human_readable()); } - arrb.leave(); + array_builder.leave(); - objb("files", td::JsonRaw(jsonb.string_builder().as_cslice())); + root_builder_obj("globals", td::JsonRaw(jsonb.string_builder().as_cslice())); } { td::JsonBuilder jsonb; - auto arrb = jsonb.enter_array(); + auto array_builder = jsonb.enter_array(); for (size_t i = 0; i < G.source_map.size(); ++i) { const auto &entry = G.source_map[i]; - auto vb = arrb.enter_value(); - auto ob = vb.enter_object(); + auto value_builder = array_builder.enter_value(); + auto ob = value_builder.enter_object(); + ob("idx", td::JsonRaw(std::to_string(entry.idx))); if (entry.descr.size() != 0) { @@ -66,14 +67,13 @@ void pipeline_generate_source_map(std::ostream& debug_out) { ob("is_entry", td::JsonBool(entry.is_entry)); } + ob("ast_kind", entry.ast_kind); + #ifdef TOLK_DEBUG if (i + 1 < G.source_map.size()) { ob("opcode", G.source_map[i + 1].opcode); } -#endif - ob("ast_kind", entry.ast_kind); - // Used only for source map debug if (const auto file = G.all_src_files.find_file(entry.loc.file)) { const auto& pos = file->convert_offset(entry.loc.offset); std::string line = std::string(pos.line_str); @@ -87,6 +87,7 @@ void pipeline_generate_source_map(std::ostream& debug_out) { ob("line_off", underline); } +#endif ob("file", entry.loc.file); ob("line", static_cast(entry.loc.line)); @@ -94,16 +95,17 @@ void pipeline_generate_source_map(std::ostream& debug_out) { ob("line_offset", static_cast(entry.loc.line_offset)); ob("length", static_cast(entry.loc.length)); - td::JsonBuilder varb; - auto vararrb = varb.enter_array(); + td::JsonBuilder var_builder; + auto var_array_builder = var_builder.enter_array(); for (const auto &[var, value] : entry.vars) { - auto varb2 = vararrb.enter_value(); - auto varbo = varb2.enter_object(); - varbo("name", var.name.empty() ? "'" + std::to_string(var.ir_idx) : var.name); - varbo("type", var.v_type == nullptr ? "" : var.v_type->as_human_readable()); + auto var_array_builder_value = var_array_builder.enter_value(); + auto var_array_value_object = var_array_builder_value.enter_object(); + + var_array_value_object("name", var.name.empty() ? "'" + std::to_string(var.ir_idx) : var.name); + var_array_value_object("type", var.v_type == nullptr ? "" : var.v_type->as_human_readable()); if (var.parent_type != nullptr) { - auto union_parent = var.parent_type->try_as(); + const auto union_parent = var.parent_type->try_as(); if (union_parent != nullptr) { td::JsonBuilder parent_type_builder; auto parent_type_array_builder = parent_type_builder.enter_array(); @@ -114,20 +116,17 @@ void pipeline_generate_source_map(std::ostream& debug_out) { } parent_type_array_builder.leave(); - varbo("possible_qualifier_types", td::JsonRaw(parent_type_builder.string_builder().as_cslice())); + var_array_value_object("possible_qualifier_types", td::JsonRaw(parent_type_builder.string_builder().as_cslice())); } } - // varbo("parent_type", var.parent_type == nullptr ? "" : var.parent_type->as_human_readable()); if (!value.empty()) { - varbo("value", value); + var_array_value_object("value", value); } } - vararrb.leave(); - - td::JsonRaw vararrs(varb.string_builder().as_cslice()); + var_array_builder.leave(); - ob("vars", vararrs); + ob("vars", td::JsonRaw(var_builder.string_builder().as_cslice())); ob("func", entry.func_name); if (entry.inlined_to_func_name != "") { ob("inlined_to_func", entry.inlined_to_func_name); @@ -140,14 +139,14 @@ void pipeline_generate_source_map(std::ostream& debug_out) { ob("after_inlined_function_call", td::JsonBool(entry.after_inlined_function_call)); } } - arrb.leave(); + array_builder.leave(); - objb("locations", td::JsonRaw(jsonb.string_builder().as_cslice())); + root_builder_obj("locations", td::JsonRaw(jsonb.string_builder().as_cslice())); } - objb.leave(); + root_builder_obj.leave(); - debug_out << _jb.string_builder().as_cslice().str(); + debug_out << root_builder.string_builder().as_cslice().str(); } } // namespace tolk diff --git a/tolk/tolk.h b/tolk/tolk.h index 673cee185a..c09be0d23d 100644 --- a/tolk/tolk.h +++ b/tolk/tolk.h @@ -32,7 +32,6 @@ #define tolk_assert(expr) if(UNLIKELY(!(expr))) on_assertion_failed(#expr, __FILE__, __LINE__); namespace tolk { -struct Op; GNU_ATTRIBUTE_COLD GNU_ATTRIBUTE_NORETURN void on_assertion_failed(const char *description, const char *file_name, int line_number); From 40a81d691bb7ac8ed5ad5014ed9b96e722041179 Mon Sep 17 00:00:00 2001 From: i582 <51853996+i582@users.noreply.github.com> Date: Mon, 22 Sep 2025 11:41:06 +0400 Subject: [PATCH 21/27] more comments --- tolk/debug-info.cpp | 2 +- tolk/pipe-generate-source-map.cpp | 4 ++-- tolk/tolk.h | 37 +++++++++++++++++++++++++------ 3 files changed, 33 insertions(+), 10 deletions(-) diff --git a/tolk/debug-info.cpp b/tolk/debug-info.cpp index 5d6c260cd4..60ac01391d 100644 --- a/tolk/debug-info.cpp +++ b/tolk/debug-info.cpp @@ -47,7 +47,7 @@ void insert_debug_info(SrcLocation loc, ASTNodeKind kind, CodeBlob& code, size_t info.loc.line = pos.line_no; info.loc.line_offset = line_offset; info.loc.col = pos.char_no - 1; - info.loc.length = pos.line_str.length(); + info.loc.length = 1; // Once we have the actual length of node, we should use it here } info.func_name = code.fun_ref->name; diff --git a/tolk/pipe-generate-source-map.cpp b/tolk/pipe-generate-source-map.cpp index 9c0c368993..6692adebe1 100644 --- a/tolk/pipe-generate-source-map.cpp +++ b/tolk/pipe-generate-source-map.cpp @@ -91,9 +91,9 @@ void pipeline_generate_source_map(std::ostream& debug_out) { ob("file", entry.loc.file); ob("line", static_cast(entry.loc.line)); - ob("pos", static_cast(entry.loc.col)); + ob("col", static_cast(entry.loc.col)); ob("line_offset", static_cast(entry.loc.line_offset)); - ob("length", static_cast(entry.loc.length)); + ob("length", static_cast(entry.loc.length)); // Always 1 for now td::JsonBuilder var_builder; auto var_array_builder = var_builder.enter_array(); diff --git a/tolk/tolk.h b/tolk/tolk.h index c09be0d23d..c5b6ae8ae5 100644 --- a/tolk/tolk.h +++ b/tolk/tolk.h @@ -294,7 +294,15 @@ struct SourceMapEntry { * Unique ID of this entry. */ size_t idx{}; + + /** + * If true, entry represents code before first statement. + */ bool is_entry{}; + + /** + * Human-readable description of current entry. + */ std::string descr{}; /** @@ -311,18 +319,39 @@ struct SourceMapEntry { * Name oj outer function which contains this code. */ std::string func_name; + + /** + * If a function was inlined, this field will contain the name + * of the function where the code was inlined. + */ std::string inlined_to_func_name; /** * Whenever outer function is inlined and how. */ FunctionInlineMode func_inline_mode; + + /** + * Marks the first instruction of inlined function. + */ bool before_inlined_function_call{false}; + + /** + * Marks the last instruction of inlined function. + */ bool after_inlined_function_call{false}; + + /** + * The AST node for which this entry was generated. + */ + std::string ast_kind; + #ifdef TOLK_DEBUG + /** + * String representation of `Op` for which this entry was generated. + */ std::string opcode; #endif - std::string ast_kind; }; struct SourceMapGlobalVariable { @@ -336,12 +365,6 @@ struct SourceMapGlobalVariable { std::string type; }; -struct SourceMap { - std::string version; - std::vector globals; - std::vector entries; -}; - struct Op { enum OpKind { _Nop, From 8e81f1b75c7e5ed5e2bc27e9b3e4f04f55b7d9ef Mon Sep 17 00:00:00 2001 From: i582 <51853996+i582@users.noreply.github.com> Date: Mon, 22 Sep 2025 11:42:49 +0400 Subject: [PATCH 22/27] more comments --- tolk/tolk.h | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/tolk/tolk.h b/tolk/tolk.h index c5b6ae8ae5..45dd803cf7 100644 --- a/tolk/tolk.h +++ b/tolk/tolk.h @@ -400,7 +400,12 @@ struct Op { std::unique_ptr block0, block1; td::RefInt256 int_const; std::string str_const; + + /** + * Current ID of source map entry, see insert_debug_info. + */ size_t source_map_entry_idx{0}; + Op(SrcLocation loc, OpKind cl) : cl(cl), flags(0), loc(loc) { } Op(SrcLocation loc, OpKind cl, const std::vector& left) @@ -1262,6 +1267,8 @@ AsmOp push_const(SrcLocation loc, td::RefInt256 x); void define_builtins(); void patch_builtins_after_stdlib_loaded(); + + /* * * OUTPUT CODE GENERATOR From c1ebc3c5d7fff104cea68e5878a061af47ef3c79 Mon Sep 17 00:00:00 2001 From: i582 <51853996+i582@users.noreply.github.com> Date: Mon, 22 Sep 2025 12:20:38 +0400 Subject: [PATCH 23/27] don't open source map file if collect_source_map is false --- tolk/tolk-main.cpp | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/tolk/tolk-main.cpp b/tolk/tolk-main.cpp index a523615500..cacd68fa5f 100644 --- a/tolk/tolk-main.cpp +++ b/tolk/tolk-main.cpp @@ -287,10 +287,14 @@ int main(int argc, char* const argv[]) { const auto source_map_filename = G.settings.source_map_output_filename.empty() ? "./source_map.json" : G.settings.source_map_output_filename; - std::ofstream source_map_out(source_map_filename); - if (!source_map_out.is_open()) { - std::cerr << "Failed to create source map file " << source_map_filename << std::endl; - return 2; + + std::ofstream source_map_out; + if (G.settings.collect_source_map) { + source_map_out.open(source_map_filename); + if (!source_map_out.is_open()) { + std::cerr << "Failed to create source map file " << source_map_filename << std::endl; + return 2; + } } int exit_code = tolk_proceed(argv[optind], source_map_out); From 8445fec761cc213f670269c9c17aa7f5606be86f Mon Sep 17 00:00:00 2001 From: i582 <51853996+i582@users.noreply.github.com> Date: Mon, 22 Sep 2025 12:30:10 +0400 Subject: [PATCH 24/27] fix --- tolk/debug-info.cpp | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/tolk/debug-info.cpp b/tolk/debug-info.cpp index 60ac01391d..6cdc2efb50 100644 --- a/tolk/debug-info.cpp +++ b/tolk/debug-info.cpp @@ -15,9 +15,10 @@ void insert_debug_info(SrcLocation loc, ASTNodeKind kind, CodeBlob& code, size_t } #ifdef TOLK_DEBUG - const auto last_op = *std::find_if(code._vector_of_ops.rbegin(), code._vector_of_ops.rend(), [](const auto& it) { + const auto last_op = std::find_if(code._vector_of_ops.rbegin(), code._vector_of_ops.rend(), [](const auto& it) { return it->cl != Op::_DebugInfo; }); + const Op* last_op_ptr = last_op != code._vector_of_ops.rend() ? *last_op : nullptr; #endif auto& op = code.emplace_back(loc, Op::_DebugInfo); @@ -29,9 +30,9 @@ void insert_debug_info(SrcLocation loc, ASTNodeKind kind, CodeBlob& code, size_t info.is_entry = kind == ast_function_declaration; #ifdef TOLK_DEBUG - if (last_op) { + if (last_op_ptr) { std::stringstream st; - last_op->show(st, code.vars, "", 4); + last_op_ptr->show(st, code.vars, "", 4); info.opcode = st.str(); } From cefc842157d0f0d35f39b05675fe77d067cac289 Mon Sep 17 00:00:00 2001 From: i582 <51853996+i582@users.noreply.github.com> Date: Mon, 22 Sep 2025 12:32:17 +0400 Subject: [PATCH 25/27] fix --- tolk/pipe-ast-to-legacy.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tolk/pipe-ast-to-legacy.cpp b/tolk/pipe-ast-to-legacy.cpp index 8ea9ab1b99..da80d93067 100644 --- a/tolk/pipe-ast-to-legacy.cpp +++ b/tolk/pipe-ast-to-legacy.cpp @@ -658,7 +658,7 @@ static std::vector gen_compile_time_code_instead_of_fun_call(CodeBlob std::vector gen_inline_fun_call_in_place(CodeBlob& code, TypePtr ret_type, SrcLocation loc, FunctionPtr f_inlined, AnyExprV self_obj, bool is_before_immediate_return, const std::vector>& vars_per_arg) { insert_debug_info(loc, ast_function_call, code); - if (G.settings.collect_source_map) { + if (G.settings.collect_source_map && G.source_map.size() > 0) { // Inlined functions are tricky for handling in debuggers, code coverage and other tools // which uses source maps. To simplify handling we explicitly mark start and end instructions // of inlined function, so tools can understand when we step into and step out inlined function. @@ -714,7 +714,7 @@ std::vector gen_inline_fun_call_in_place(CodeBlob& code, TypePtr ret_ visitor.start_visiting_function(f_inlined, v_ast_root); insert_debug_info(loc, ast_function_call, code); - if (G.settings.collect_source_map) { + if (G.settings.collect_source_map && G.source_map.size() > 0) { // Mark end instruction as well G.source_map.at(G.source_map.size() - 1).after_inlined_function_call = true; } From e737e6e20b5cb68a7bb8ae54504ce43d4864002d Mon Sep 17 00:00:00 2001 From: i582 <51853996+i582@users.noreply.github.com> Date: Mon, 22 Sep 2025 13:00:55 +0400 Subject: [PATCH 26/27] better JSON format --- tolk/pipe-generate-source-map.cpp | 109 +++++++++++++++++++----------- 1 file changed, 68 insertions(+), 41 deletions(-) diff --git a/tolk/pipe-generate-source-map.cpp b/tolk/pipe-generate-source-map.cpp index 6692adebe1..a771e8e61d 100644 --- a/tolk/pipe-generate-source-map.cpp +++ b/tolk/pipe-generate-source-map.cpp @@ -53,51 +53,55 @@ void pipeline_generate_source_map(std::ostream& debug_out) { auto array_builder = jsonb.enter_array(); for (size_t i = 0; i < G.source_map.size(); ++i) { - const auto &entry = G.source_map[i]; + const auto& entry = G.source_map[i]; auto value_builder = array_builder.enter_value(); auto ob = value_builder.enter_object(); ob("idx", td::JsonRaw(std::to_string(entry.idx))); - if (entry.descr.size() != 0) { - ob("descr", entry.descr); - } - - if (entry.is_entry) { - ob("is_entry", td::JsonBool(entry.is_entry)); - } - - ob("ast_kind", entry.ast_kind); - #ifdef TOLK_DEBUG - if (i + 1 < G.source_map.size()) { - ob("opcode", G.source_map[i + 1].opcode); - } + { + td::JsonBuilder debugb; + auto debug_builder = debugb.enter_object(); + if (i + 1 < G.source_map.size()) { + debug_builder("opcode", G.source_map[i + 1].opcode); + } - if (const auto file = G.all_src_files.find_file(entry.loc.file)) { - const auto& pos = file->convert_offset(entry.loc.offset); - std::string line = std::string(pos.line_str); - ob("line_str", line); + if (const auto file = G.all_src_files.find_file(entry.loc.file)) { + const auto& pos = file->convert_offset(entry.loc.offset); + const auto line = std::string(pos.line_str); + debug_builder("line_str", line); - std::string underline = ""; - for (int j = 0; j < entry.loc.col; ++j) { - underline += " "; + std::string underline = ""; + for (int j = 0; j < entry.loc.col; ++j) { + underline += " "; + } + underline += "^"; + + debug_builder("line_off", underline); } - underline += "^"; + debug_builder.leave(); - ob("line_off", underline); + ob("debug", td::JsonRaw(debugb.string_builder().as_cslice())); } #endif - ob("file", entry.loc.file); - ob("line", static_cast(entry.loc.line)); - ob("col", static_cast(entry.loc.col)); - ob("line_offset", static_cast(entry.loc.line_offset)); - ob("length", static_cast(entry.loc.length)); // Always 1 for now + { + td::JsonBuilder locb; + auto loc_builder = locb.enter_object(); + loc_builder("file", entry.loc.file); + loc_builder("line", static_cast(entry.loc.line)); + loc_builder("col", static_cast(entry.loc.col)); + loc_builder("line_offset", static_cast(entry.loc.line_offset)); + loc_builder("length", static_cast(entry.loc.length)); + loc_builder.leave(); + + ob("loc", td::JsonRaw(locb.string_builder().as_cslice())); + } td::JsonBuilder var_builder; auto var_array_builder = var_builder.enter_array(); - for (const auto &[var, value] : entry.vars) { + for (const auto& [var, value] : entry.vars) { auto var_array_builder_value = var_array_builder.enter_value(); auto var_array_value_object = var_array_builder_value.enter_object(); @@ -116,27 +120,50 @@ void pipeline_generate_source_map(std::ostream& debug_out) { } parent_type_array_builder.leave(); - var_array_value_object("possible_qualifier_types", td::JsonRaw(parent_type_builder.string_builder().as_cslice())); + var_array_value_object("possible_qualifier_types", + td::JsonRaw(parent_type_builder.string_builder().as_cslice())); } } if (!value.empty()) { - var_array_value_object("value", value); + var_array_value_object("constant_value", value); } } var_array_builder.leave(); ob("vars", td::JsonRaw(var_builder.string_builder().as_cslice())); - ob("func", entry.func_name); - if (entry.inlined_to_func_name != "") { - ob("inlined_to_func", entry.inlined_to_func_name); - } - ob("func_inline_mode", static_cast(entry.func_inline_mode)); - if (entry.before_inlined_function_call) { - ob("before_inlined_function_call", td::JsonBool(entry.before_inlined_function_call)); - } - if (entry.after_inlined_function_call) { - ob("after_inlined_function_call", td::JsonBool(entry.after_inlined_function_call)); + + { + td::JsonBuilder ctxb; + auto ctx_builder = ctxb.enter_object(); + + if (entry.descr.size() != 0) { + ctx_builder("descr", entry.descr); // Human-readable description + } + + if (entry.is_entry) { + ctx_builder("is_entry", td::JsonBool(entry.is_entry)); // Marks function entry points + } + + ctx_builder("ast_kind", entry.ast_kind); // AST node type + + ctx_builder("func_name", entry.func_name); + if (entry.inlined_to_func_name != "") { + ctx_builder("inlined_to_func", entry.inlined_to_func_name); + } + + ctx_builder("func_inline_mode", static_cast(entry.func_inline_mode)); + + if (entry.before_inlined_function_call) { + ctx_builder("before_inlined_function_call", td::JsonBool(entry.before_inlined_function_call)); + } + + if (entry.after_inlined_function_call) { + ctx_builder("after_inlined_function_call", td::JsonBool(entry.after_inlined_function_call)); + } + ctx_builder.leave(); + + ob("context", td::JsonRaw(ctxb.string_builder().as_cslice())); } } array_builder.leave(); From 011384bc16bfdf1ec5564e7e3836ac0e5fea4a25 Mon Sep 17 00:00:00 2001 From: i582 <51853996+i582@users.noreply.github.com> Date: Mon, 22 Sep 2025 13:24:28 +0400 Subject: [PATCH 27/27] add JSON schema --- tolk/source-map-schema-v1.json | 239 +++++++++++++++++++++++++++++++++ 1 file changed, 239 insertions(+) create mode 100644 tolk/source-map-schema-v1.json diff --git a/tolk/source-map-schema-v1.json b/tolk/source-map-schema-v1.json new file mode 100644 index 0000000000..342282e8ff --- /dev/null +++ b/tolk/source-map-schema-v1.json @@ -0,0 +1,239 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "title": "Tolk Source Map Schema v1", + "description": "JSON Schema for Tolk compiler source map v1 files", + "type": "object", + "properties": { + "version": { + "description": "Schema version", + "type": "string", + "enum": [ + "1" + ] + }, + "files": { + "description": "Array of source files processed by the compiler", + "type": "array", + "items": { + "$ref": "#/$defs/sourceFile" + } + }, + "globals": { + "description": "Array of global variables declared in the program", + "type": "array", + "items": { + "$ref": "#/$defs/globalVariable" + } + }, + "locations": { + "description": "Array of debug locations with variable state information", + "type": "array", + "items": { + "$ref": "#/$defs/debugLocation" + } + } + }, + "required": [ + "version", + "files", + "globals", + "locations" + ], + "$defs": { + "sourceFile": { + "description": "Information about a source file", + "type": "object", + "properties": { + "path": { + "description": "Path to the source file", + "type": "string" + }, + "is_stdlib": { + "description": "Whether this is a standard library file", + "type": "boolean" + }, + "content": { + "description": "Complete source code content of the file", + "type": "string" + } + }, + "required": [ + "path", + "is_stdlib", + "content" + ] + }, + "globalVariable": { + "description": "Information about a global variable", + "type": "object", + "properties": { + "name": { + "description": "Name of the global variable", + "type": "string" + }, + "type": "string" + }, + "required": [ + "name", + "type" + ] + }, + "sourceLocation": { + "description": "Source code location information", + "type": "object", + "properties": { + "file": { + "description": "Path to the source file", + "type": "string" + }, + "line": { + "description": "0-based line number", + "type": "integer" + }, + "col": { + "description": "0-based column number", + "type": "integer" + }, + "line_offset": { + "description": "Offset within the line", + "type": "integer", + "minimum": 0 + }, + "length": { + "description": "Length of the relevant code segment", + "type": "integer" + } + }, + "required": [ + "file", + "line", + "col", + "line_offset", + "length" + ] + }, + "functionContext": { + "description": "Function execution context", + "type": "object", + "properties": { + "descr": { + "description": "Human-readable description", + "type": "string" + }, + "is_entry": { + "description": "Whether this is a function entry point", + "type": "boolean" + }, + "ast_kind": { + "description": "AST node type", + "type": "string" + }, + "func_name": { + "description": "Name of the containing function", + "type": "string" + }, + "inlined_to_func": { + "description": "Name of function where this was inlined (if applicable)", + "type": "string" + }, + "func_inline_mode": { + "description": "Function inline mode: 0 = notCalculated (inline mode not yet determined), 1 = inlineViaFif (inlined via .fif representation), 2 = inlineRef (inlined by reference), 3 = inlineInPlace (inlined directly in place), 4 = noInline (function is not inlined)", + "type": "integer", + "enum": [0, 1, 2, 3, 4] + }, + "before_inlined_function_call": { + "description": "Whether this is before an inlined function call", + "type": "boolean" + }, + "after_inlined_function_call": { + "description": "Whether this is after an inlined function call", + "type": "boolean" + } + }, + "required": [ + "func_name", + "func_inline_mode", + "ast_kind" + ] + }, + "debugInfo": { + "description": "Debug-specific information (only present in debug builds)", + "type": "object", + "properties": { + "opcode": { + "description": "Generated TVM opcode", + "type": "string" + }, + "line_str": { + "description": "Source line content", + "type": "string" + }, + "line_off": { + "description": "Visual pointer showing the location in the source line (spaces followed by ^)", + "type": "string" + } + } + }, + "debugLocation": { + "description": "Debug location with variable state and context", + "type": "object", + "properties": { + "idx": { + "description": "Unique identifier for this debug location", + "type": "integer", + "minimum": 0 + }, + "loc": { + "description": "Source code location", + "$ref": "#/$defs/sourceLocation" + }, + "vars": { + "description": "Variables available at this debug location", + "type": "array", + "items": { + "$ref": "#/$defs/variable" + } + }, + "context": { + "description": "Execution context information", + "$ref": "#/$defs/functionContext" + }, + "debug": { + "description": "Debug-specific information", + "$ref": "#/$defs/debugInfo" + } + }, + "required": [ + "idx", + "loc", + "context" + ] + }, + "variable": { + "description": "Variable information at a debug location", + "type": "object", + "properties": { + "name": { + "description": "Variable name", + "type": "string" + }, + "type": "string", + "constant_value": { + "description": "Constant value if variable has one", + "type": "string" + }, + "possible_qualifier_types": { + "description": "Possible types for union variables", + "type": "array", + "items": { + "type": "string" + } + } + }, + "required": [ + "name", + "type" + ] + } + } +}