diff --git a/lib/liquid.rb b/lib/liquid.rb index 4d0a71a64..b28d828ff 100644 --- a/lib/liquid.rb +++ b/lib/liquid.rb @@ -52,6 +52,7 @@ module Liquid require "liquid/version" require "liquid/deprecations" require "liquid/const" +require "liquid/byte_tables" require 'liquid/standardfilters' require 'liquid/file_system' require 'liquid/parser_switching' diff --git a/lib/liquid/block_body.rb b/lib/liquid/block_body.rb index e4ada7d16..262ddec2e 100644 --- a/lib/liquid/block_body.rb +++ b/lib/liquid/block_body.rb @@ -130,16 +130,21 @@ def self.rescue_render_node(context, output, line_number, exc, blank_tag) case when token.start_with?(TAGSTART) whitespace_handler(token, parse_context) - unless token =~ FullToken + # rubocop:disable Metrics/BlockNesting + fast = try_parse_tag_token(token) + if fast + tag_name, markup, newlines = fast + elsif token =~ FullToken + tag_name = Regexp.last_match(2) + markup = Regexp.last_match(4) + newlines = parse_context.line_number ? Regexp.last_match(1).count("\n") + Regexp.last_match(3).count("\n") : 0 + else return handle_invalid_tag_token(token, parse_context, &block) end - tag_name = Regexp.last_match(2) - markup = Regexp.last_match(4) + # rubocop:enable Metrics/BlockNesting - if parse_context.line_number - # newlines inside the tag should increase the line number, - # particularly important for multiline {% liquid %} tags - parse_context.line_number += Regexp.last_match(1).count("\n") + Regexp.last_match(3).count("\n") + if parse_context.line_number && newlines > 0 + parse_context.line_number += newlines end if tag_name == 'liquid' @@ -260,6 +265,77 @@ def create_variable(token, parse_context) BlockBody.raise_missing_variable_terminator(token, parse_context) end + # Fast path for parsing "{%[-] tag_name markup [-]%}" tag tokens. + # Returns [tag_name, markup, newline_count] or nil. + # + # Accepts tokens where: + # - Tag name is '#' or starts with [a-zA-Z_] followed by \w chars + # (matching TagName = /#|\w+/ exactly — no hyphens, no '?' suffix) + # - Whitespace is spaces, tabs, newlines, \r, \f, \v + # - Whitespace control dashes are at positions 2 and len-3 + # Rejects (returns nil → caller falls back to FullToken regex): + # - Tokens shorter than "{%x%}" (4 bytes) + # - Tag names starting with a digit (valid in FullToken but rare) + # - Any structure the byte-walk can't confidently parse + # Fallback: nil return triggers the original `token =~ FullToken` regex + # match in parse_for_document, preserving identical behavior for any + # input the fast path doesn't handle. + def try_parse_tag_token(token) + len = token.bytesize + pos = 2 # skip "{%" + return if pos >= len + + pos += 1 if token.getbyte(pos) == ByteTables::DASH + newline_count = 0 + + # Skip whitespace before tag name, count newlines + while pos < len + b = token.getbyte(pos) + if b == ByteTables::NEWLINE + pos += 1 + newline_count += 1 + elsif ByteTables::WHITESPACE[b] + pos += 1 + else + break + end + end + return if pos >= len + + # Scan tag name: '#' or \w+ (matching TagName = /#|\w+/) + name_start = pos + b = token.getbyte(pos) + if b == ByteTables::HASH + pos += 1 + elsif ByteTables::IDENT_START[b] + pos += 1 + pos += 1 while pos < len && ByteTables::WORD[token.getbyte(pos)] + else + return + end + tag_name = token.byteslice(name_start, pos - name_start) + + # Skip whitespace after tag name, count newlines + while pos < len + b = token.getbyte(pos) + if b == ByteTables::NEWLINE + pos += 1 + newline_count += 1 + elsif ByteTables::WHITESPACE[b] + pos += 1 + else + break + end + end + + # Markup: everything up to optional '-' before '%}' + markup_end = len - 2 # skip '%}' + markup_end -= 1 if markup_end > pos && token.getbyte(markup_end - 1) == ByteTables::DASH + markup = pos >= markup_end ? "" : token.byteslice(pos, markup_end - pos) + + [tag_name, markup, newline_count] + end + # @deprecated Use {.raise_missing_tag_terminator} instead def raise_missing_tag_terminator(token, parse_context) BlockBody.raise_missing_tag_terminator(token, parse_context) diff --git a/lib/liquid/byte_tables.rb b/lib/liquid/byte_tables.rb new file mode 100644 index 000000000..b3728e753 --- /dev/null +++ b/lib/liquid/byte_tables.rb @@ -0,0 +1,48 @@ +# frozen_string_literal: true + +module Liquid + # Pre-computed 256-entry boolean lookup tables for byte classification. + # Built once at load time; used as TABLE[byte] — a single array index + # instead of 3-5 comparison operators per check. + module ByteTables + # [a-zA-Z_] — valid first byte of an identifier + IDENT_START = Array.new(256, false).tap do |t| + (97..122).each { |b| t[b] = true } # a-z + (65..90).each { |b| t[b] = true } # A-Z + t[95] = true # _ + end.freeze + + # [a-zA-Z0-9_-] — valid continuation byte of an identifier + IDENT_CONT = Array.new(256, false).tap do |t| + (97..122).each { |b| t[b] = true } # a-z + (65..90).each { |b| t[b] = true } # A-Z + (48..57).each { |b| t[b] = true } # 0-9 + t[95] = true # _ + t[45] = true # - + end.freeze + + # [a-zA-Z0-9_] — \w equivalent (no hyphen), for tag name scanning + WORD = Array.new(256, false).tap do |t| + (97..122).each { |b| t[b] = true } # a-z + (65..90).each { |b| t[b] = true } # A-Z + (48..57).each { |b| t[b] = true } # 0-9 + t[95] = true # _ + end.freeze + + # [0-9] — ASCII digit + DIGIT = Array.new(256, false).tap do |t| + (48..57).each { |b| t[b] = true } + end.freeze + + # Matches bytes removed by Ruby's String#strip: \x00, \t, \n, \v, \f, \r, space + WHITESPACE = Array.new(256, false).tap do |t| + [0, 9, 10, 11, 12, 13, 32].each { |b| t[b] = true } + end.freeze + + # Byte constants for delimiters and punctuation + NEWLINE = 10 + DASH = 45 # '-' + DOT = 46 # '.' + HASH = 35 # '#' + end +end diff --git a/lib/liquid/expression.rb b/lib/liquid/expression.rb index 00c40a4c3..640a7a4c2 100644 --- a/lib/liquid/expression.rb +++ b/lib/liquid/expression.rb @@ -16,16 +16,9 @@ class Expression '-' => VariableLookup.parse("-", nil).freeze, }.freeze - DOT = ".".ord - ZERO = "0".ord - NINE = "9".ord - DASH = "-".ord - # Use an atomic group (?>...) to avoid pathological backtracing from # malicious input as described in https://github.com/Shopify/liquid/issues/1357 RANGES_REGEX = /\A\(\s*(?>(\S+)\s*\.\.)\s*(\S+)\s*\)\z/ - INTEGER_REGEX = /\A(-?\d+)\z/ - FLOAT_REGEX = /\A(-?\d+)\.\d+\z/ class << self def safe_parse(parser, ss = StringScanner.new(""), cache = nil) @@ -35,7 +28,15 @@ def safe_parse(parser, ss = StringScanner.new(""), cache = nil) def parse(markup, ss = StringScanner.new(""), cache = nil) return unless markup - markup = markup.strip # markup can be a frozen string + # Guard: only call .strip when the first or last byte is whitespace. + # String#strip always allocates a new String, even when there's nothing + # to strip. ByteTables::WHITESPACE matches the same bytes that strip + # removes (space, \t, \n, \v, \f, \r, \x00). When neither end has + # whitespace, we skip the call and avoid ~4,464 allocations per compile. + first = markup.getbyte(0) + if first && (ByteTables::WHITESPACE[first] || ByteTables::WHITESPACE[markup.getbyte(markup.bytesize - 1)]) + markup = markup.strip + end if (markup.start_with?('"') && markup.end_with?('"')) || (markup.start_with?("'") && markup.end_with?("'")) @@ -71,56 +72,74 @@ def inner_parse(markup, ss, cache) end end - def parse_number(markup, ss) - # check if the markup is simple integer or float - case markup - when INTEGER_REGEX - return Integer(markup, 10) - when FLOAT_REGEX - return markup.to_f - end - - ss.string = markup - # the first byte must be a digit or a dash - byte = ss.scan_byte - - return false if byte != DASH && (byte < ZERO || byte > NINE) - - if byte == DASH - peek_byte = ss.peek_byte - - # if it starts with a dash, the next byte must be a digit - return false if peek_byte.nil? || !(peek_byte >= ZERO && peek_byte <= NINE) + # Fast path for number parsing. Accepts: + # - Simple integers: "42", "-7" + # - Simple floats: "3.14", "-0.5" + # - Multi-dot floats (truncated at second dot): "1.2.3" → 1.2 + # - Trailing-dot floats: "123." → 123.0 + # Rejects (returns nil → caller treats as VariableLookup): + # - Non-numeric input: "hello", "" + # - Inputs with non-digit/non-dot bytes after the number: "1.2.3a" + # Fallback: nil return causes caller to fall through to VariableLookup.parse, + # which is the same path the old regex-based code took on non-match. + def parse_number(markup, _ss = nil) + len = markup.bytesize + return if len == 0 + + pos = 0 + first = markup.getbyte(pos) + + if first == ByteTables::DASH + pos += 1 + return if pos >= len + return unless ByteTables::DIGIT[markup.getbyte(pos)] + + pos += 1 + elsif ByteTables::DIGIT[first] + pos += 1 + else + return end - # The markup could be a float with multiple dots - first_dot_pos = nil - num_end_pos = nil - - while (byte = ss.scan_byte) - return false if byte != DOT && (byte < ZERO || byte > NINE) - - # we found our number and now we are just scanning the rest of the string - next if num_end_pos - - if byte == DOT - if first_dot_pos.nil? - first_dot_pos = ss.pos - else - # we found another dot, so we know that the number ends here - num_end_pos = ss.pos - 1 + # Scan digits + pos += 1 while pos < len && ByteTables::DIGIT[markup.getbyte(pos)] + + # Consumed everything = simple integer + return Integer(markup, 10) if pos == len + + # Check for dot — three float cases: + # 1. Simple float: "123.456" → markup.to_f + # 2. Multi-dot: "1.2.3.4" → truncate at second dot → 1.2 + # 3. Trailing dot: "123." → truncate before dot → 123.0 + return unless markup.getbyte(pos) == ByteTables::DOT + + dot_pos = pos + pos += 1 + digit_start = pos + pos += 1 while pos < len && ByteTables::DIGIT[markup.getbyte(pos)] + + if pos > digit_start && pos == len + # Case 1: simple float like "123.456" + markup.to_f + elsif pos > digit_start + # Case 2: multi-dot like "1.2.3.4" — find where the numeric + # portion ends. Reject if any non-digit, non-dot byte is found + # (e.g. "1.2.3a" → nil, matching the old regex-based behavior). + num_end = nil + check = pos + while check < len + b = markup.getbyte(check) + if b == ByteTables::DOT + num_end ||= check + elsif !ByteTables::DIGIT[b] + return end + check += 1 end - end - - num_end_pos = markup.length if ss.eos? - - if num_end_pos - # number ends with a number "123.123" - markup.byteslice(0, num_end_pos).to_f + markup.byteslice(0, num_end || len).to_f else - # number ends with a dot "123." - markup.byteslice(0, first_dot_pos).to_f + # Case 3: trailing dot like "123." + markup.byteslice(0, dot_pos).to_f end end end diff --git a/lib/liquid/variable_lookup.rb b/lib/liquid/variable_lookup.rb index 4fba2a658..56054c287 100644 --- a/lib/liquid/variable_lookup.rb +++ b/lib/liquid/variable_lookup.rb @@ -4,6 +4,25 @@ module Liquid class VariableLookup COMMAND_METHODS = ['size', 'first', 'last'].freeze + # Matches simple identifier chains: name(.name)* with no brackets/quotes + SIMPLE_LOOKUP_RE = /\A[\w-]+\??(?:\.[\w-]+\??)*\z/ + + # Returns true when markup is a simple dotted identifier chain that the + # fast path in initialize can handle. Accepts: + # - Single names: "product", "item" + # - Dotted chains: "product.title", "cart.items.first" + # - Question-mark suffixes: "product.available?" + # - Hyphens in names: "my-var.some-field" + # Rejects (falls through to VariableParser regex): + # - Bracket lookups: "product[0]", "hash['key']" + # - Quoted strings, empty input, leading/trailing dots + # Fallback: when this returns false, initialize uses the original + # markup.scan(VariableParser) path — behavior is identical to + # the pre-optimization code for any input the fast path rejects. + def self.simple_lookup?(markup) + markup.bytesize > 0 && markup.match?(SIMPLE_LOOKUP_RE) + end + attr_reader :name, :lookups def self.parse(markup, string_scanner = StringScanner.new(""), cache = nil) @@ -11,6 +30,31 @@ def self.parse(markup, string_scanner = StringScanner.new(""), cache = nil) end def initialize(markup, string_scanner = StringScanner.new(""), cache = nil) + if self.class.simple_lookup?(markup) + dot_pos = markup.index('.') + if dot_pos.nil? + @name = markup + @lookups = Const::EMPTY_ARRAY + @command_flags = 0 + return + end + + @name = markup.byteslice(0, dot_pos) + @lookups = [] + @command_flags = 0 + pos = dot_pos + 1 + len = markup.bytesize + while pos < len + seg_start = pos + pos += 1 while pos < len && markup.getbyte(pos) != ByteTables::DOT + seg = markup.byteslice(seg_start, pos - seg_start) + @command_flags |= 1 << @lookups.length if COMMAND_METHODS.include?(seg) + @lookups << seg + pos += 1 # skip dot + end + return + end + lookups = markup.scan(VariableParser) name = lookups.shift diff --git a/performance/bench_quick.rb b/performance/bench_quick.rb new file mode 100644 index 000000000..a98333b9d --- /dev/null +++ b/performance/bench_quick.rb @@ -0,0 +1,62 @@ +# frozen_string_literal: true + +# Quick benchmark: measures parse µs, render µs, and object allocations +# Outputs machine-readable metrics to stdout + +require_relative 'theme_runner' + +RubyVM::YJIT.enable if defined?(RubyVM::YJIT) + +runner = ThemeRunner.new + +# Warmup — enough iterations for YJIT to fully optimize hot paths +20.times { runner.compile } +20.times { runner.render } + +GC.start +GC.compact if GC.respond_to?(:compact) + +# Measure parse +parse_times = [] +10.times do + GC.disable + t0 = Process.clock_gettime(Process::CLOCK_MONOTONIC) + runner.compile + t1 = Process.clock_gettime(Process::CLOCK_MONOTONIC) + GC.enable + GC.start + parse_times << (t1 - t0) * 1_000_000 # µs +end + +# Measure render +render_times = [] +10.times do + GC.disable + t0 = Process.clock_gettime(Process::CLOCK_MONOTONIC) + runner.render + t1 = Process.clock_gettime(Process::CLOCK_MONOTONIC) + GC.enable + GC.start + render_times << (t1 - t0) * 1_000_000 # µs +end + +# Measure object allocations for one parse+render cycle +require 'objspace' +GC.start +GC.disable +before = ObjectSpace.count_objects.values_at(:TOTAL).first - ObjectSpace.count_objects.values_at(:FREE).first +runner.compile +runner.render +after = ObjectSpace.count_objects.values_at(:TOTAL).first - ObjectSpace.count_objects.values_at(:FREE).first +GC.enable +allocations = after - before + +parse_us = parse_times.min.round(0) +render_us = render_times.min.round(0) +combined_us = parse_us + render_us + +puts "RESULTS" +puts "parse_us=#{parse_us}" +puts "render_us=#{render_us}" +puts "combined_us=#{combined_us}" +puts "allocations=#{allocations}" diff --git a/test/unit/fast_path_unit_test.rb b/test/unit/fast_path_unit_test.rb new file mode 100644 index 000000000..5505b40a6 --- /dev/null +++ b/test/unit/fast_path_unit_test.rb @@ -0,0 +1,256 @@ +# frozen_string_literal: true + +require 'test_helper' + +# Tests for the byte-walking fast paths introduced in the ByteTables optimization. +# Each fast path is tested for equivalence with the original regex-based code path +# it replaces, covering accepted inputs, rejected inputs, and boundary cases. +class FastPathUnitTest < Minitest::Test + # ── Expression.parse_number ──────────────────────────────────────── + # Fast path: byte-walk with ByteTables::DIGIT + # Replaces: INTEGER_REGEX, FLOAT_REGEX, StringScanner loop + + def test_parse_number_simple_integers + assert_equal(42, Liquid::Expression.parse_number("42")) + assert_equal(0, Liquid::Expression.parse_number("0")) + assert_equal(999, Liquid::Expression.parse_number("999")) + end + + def test_parse_number_negative_integers + assert_equal(-7, Liquid::Expression.parse_number("-7")) + assert_equal(-0, Liquid::Expression.parse_number("-0")) + assert_equal(-123, Liquid::Expression.parse_number("-123")) + end + + def test_parse_number_simple_floats + assert_equal(3.14, Liquid::Expression.parse_number("3.14")) + assert_equal(0.5, Liquid::Expression.parse_number("0.5")) + assert_equal(-0.5, Liquid::Expression.parse_number("-0.5")) + assert_equal(100.0, Liquid::Expression.parse_number("100.0")) + end + + def test_parse_number_trailing_dot + # "123." → 123.0 (truncate before dot) + assert_equal(123.0, Liquid::Expression.parse_number("123.")) + assert_equal(0.0, Liquid::Expression.parse_number("0.")) + end + + def test_parse_number_multi_dot_floats + # "1.2.3" → 1.2 (truncate at second dot) + assert_equal(1.2, Liquid::Expression.parse_number("1.2.3")) + assert_equal(1.2, Liquid::Expression.parse_number("1.2.3.4")) + assert_equal(0.0, Liquid::Expression.parse_number("0.0.0")) + end + + def test_parse_number_rejects_non_numeric + assert_nil(Liquid::Expression.parse_number("hello")) + assert_nil(Liquid::Expression.parse_number("")) + assert_nil(Liquid::Expression.parse_number("abc123")) + assert_nil(Liquid::Expression.parse_number(".5")) + assert_nil(Liquid::Expression.parse_number("-.5")) + end + + def test_parse_number_rejects_trailing_alpha_after_multi_dot + # "1.2.3a" must be nil, not 1.2 — these are not valid numbers + assert_nil(Liquid::Expression.parse_number("1.2.3a")) + assert_nil(Liquid::Expression.parse_number("1.2.3.4a")) + assert_nil(Liquid::Expression.parse_number("1.2.34a")) + assert_nil(Liquid::Expression.parse_number("-1.2.3a")) + end + + def test_parse_number_rejects_bare_dash + assert_nil(Liquid::Expression.parse_number("-")) + assert_nil(Liquid::Expression.parse_number("-a")) + end + + # ── Expression.parse strip guard ─────────────────────────────────── + # Fast path: skip String#strip when no leading/trailing whitespace + # Must produce identical results to unconditional .strip + + def test_parse_strips_leading_whitespace + assert_equal(42, Liquid::Expression.parse(" 42")) + assert_equal(42, Liquid::Expression.parse("\t42")) + assert_equal(42, Liquid::Expression.parse("\n42")) + end + + def test_parse_strips_trailing_whitespace + assert_equal(42, Liquid::Expression.parse("42 ")) + assert_equal(42, Liquid::Expression.parse("42\t")) + assert_equal(42, Liquid::Expression.parse("42\n")) + end + + def test_parse_strips_both_sides + assert_equal(42, Liquid::Expression.parse(" 42 ")) + assert_equal("hello", Liquid::Expression.parse(" 'hello' ")) + end + + def test_parse_no_strip_needed + assert_equal(42, Liquid::Expression.parse("42")) + assert_equal("hello", Liquid::Expression.parse("'hello'")) + assert_equal(true, Liquid::Expression.parse("true")) + end + + def test_parse_strips_null_bytes + # String#strip removes \x00 — the WHITESPACE table must match + assert_equal(true, Liquid::Expression.parse("\x00true")) + assert_equal(true, Liquid::Expression.parse("true\x00")) + end + + # ── VariableLookup.simple_lookup? ────────────────────────────────── + # Fast path: regex gate for simple a.b.c chains + # Must accept only inputs the byte-walk can handle correctly + + def test_simple_lookup_accepts_single_names + assert(Liquid::VariableLookup.simple_lookup?("product")) + assert(Liquid::VariableLookup.simple_lookup?("x")) + assert(Liquid::VariableLookup.simple_lookup?("_private")) + end + + def test_simple_lookup_accepts_dotted_chains + assert(Liquid::VariableLookup.simple_lookup?("product.title")) + assert(Liquid::VariableLookup.simple_lookup?("a.b.c.d")) + end + + def test_simple_lookup_accepts_question_marks + assert(Liquid::VariableLookup.simple_lookup?("product.available?")) + assert(Liquid::VariableLookup.simple_lookup?("empty?")) + end + + def test_simple_lookup_accepts_hyphens + assert(Liquid::VariableLookup.simple_lookup?("my-var")) + assert(Liquid::VariableLookup.simple_lookup?("my-var.some-field")) + end + + def test_simple_lookup_rejects_brackets + refute(Liquid::VariableLookup.simple_lookup?("product[0]")) + refute(Liquid::VariableLookup.simple_lookup?("hash['key']")) + refute(Liquid::VariableLookup.simple_lookup?("[0]")) + end + + def test_simple_lookup_rejects_empty_and_malformed + refute(Liquid::VariableLookup.simple_lookup?("")) + refute(Liquid::VariableLookup.simple_lookup?(".leading")) + refute(Liquid::VariableLookup.simple_lookup?("trailing.")) + refute(Liquid::VariableLookup.simple_lookup?("a..b")) + end + + # ── VariableLookup fast path equivalence ─────────────────────────── + # The fast path must produce identical name, lookups, and command_flags + # to the original VariableParser regex path + + def test_fast_path_simple_name + vl = Liquid::VariableLookup.new("product") + assert_equal("product", vl.name) + assert_equal([], vl.lookups) + end + + def test_fast_path_dotted_chain + vl = Liquid::VariableLookup.new("product.title") + assert_equal("product", vl.name) + assert_equal(["title"], vl.lookups) + end + + def test_fast_path_deep_chain + vl = Liquid::VariableLookup.new("a.b.c.d") + assert_equal("a", vl.name) + assert_equal(["b", "c", "d"], vl.lookups) + end + + def test_fast_path_command_methods + vl = Liquid::VariableLookup.new("items.size") + assert_equal("items", vl.name) + assert_equal(["size"], vl.lookups) + assert(vl.lookup_command?(0)) + + vl2 = Liquid::VariableLookup.new("items.first") + assert(vl2.lookup_command?(0)) + + vl3 = Liquid::VariableLookup.new("items.last") + assert(vl3.lookup_command?(0)) + end + + def test_fast_path_non_command_lookups + vl = Liquid::VariableLookup.new("product.title") + refute(vl.lookup_command?(0)) + end + + def test_fast_path_question_mark + vl = Liquid::VariableLookup.new("product.available?") + assert_equal("product", vl.name) + assert_equal(["available?"], vl.lookups) + end + + def test_bracket_lookup_falls_to_regex_path + vl = Liquid::VariableLookup.new("product[0]") + assert_equal("product", vl.name) + assert_equal([0], vl.lookups) + end + + # ── BlockBody.try_parse_tag_token ────────────────────────────────── + # Fast path: byte-walk tag tokens instead of FullToken regex + # Must produce identical [tag_name, markup, newline_count] or nil + + def test_tag_token_simple + body = Liquid::BlockBody.new + result = body.send(:try_parse_tag_token, "{% if x %}") + assert_equal(["if", "x ", 0], result) + end + + def test_tag_token_whitespace_control_leading + body = Liquid::BlockBody.new + result = body.send(:try_parse_tag_token, "{%- if x %}") + assert_equal(["if", "x ", 0], result) + end + + def test_tag_token_whitespace_control_trailing + body = Liquid::BlockBody.new + result = body.send(:try_parse_tag_token, "{% if x -%}") + assert_equal(["if", "x ", 0], result) + end + + def test_tag_token_whitespace_control_both + body = Liquid::BlockBody.new + result = body.send(:try_parse_tag_token, "{%- if x -%}") + assert_equal(["if", "x ", 0], result) + end + + def test_tag_token_no_markup + body = Liquid::BlockBody.new + result = body.send(:try_parse_tag_token, "{% endif %}") + assert_equal(["endif", "", 0], result) + end + + def test_tag_token_hash_comment + body = Liquid::BlockBody.new + result = body.send(:try_parse_tag_token, "{% # this is a comment %}") + assert_equal(["#", "this is a comment ", 0], result) + end + + def test_tag_token_with_newlines + body = Liquid::BlockBody.new + result = body.send(:try_parse_tag_token, "{% \n if \n x %}") + assert_equal(["if", "x ", 2], result) + end + + def test_tag_token_hyphenated_name_stops_at_hyphen + # TagName = /\w+/ does not include hyphens + body = Liquid::BlockBody.new + result = body.send(:try_parse_tag_token, "{% my-tag markup %}") + assert_equal("my", result[0]) + end + + def test_tag_token_complex_markup + body = Liquid::BlockBody.new + result = body.send(:try_parse_tag_token, "{% for item in collection reversed %}") + assert_equal("for", result[0]) + assert_equal("item in collection reversed ", result[1]) + end + + def test_tag_token_malformed_returns_nil + body = Liquid::BlockBody.new + # Token too short + assert_nil(body.send(:try_parse_tag_token, "{%")) + # No valid tag name start (digit) + assert_nil(body.send(:try_parse_tag_token, "{% 123 %}")) + end +end