Shopify · cpakman · Apr 6, 2026 · Apr 6, 2026 · Apr 6, 2026 · Apr 6, 2026
diff --git a/lib/liquid.rb b/lib/liquid.rb
@@ -52,6 +52,7 @@ module Liquid
 require "liquid/version"
 require "liquid/deprecations"
 require "liquid/const"
+require "liquid/byte_tables"
 require 'liquid/standardfilters'
 require 'liquid/file_system'
 require 'liquid/parser_switching'

diff --git a/lib/liquid/block_body.rb b/lib/liquid/block_body.rb
@@ -130,16 +130,21 @@ def self.rescue_render_node(context, output, line_number, exc, blank_tag)
         case
         when token.start_with?(TAGSTART)
           whitespace_handler(token, parse_context)
-          unless token =~ FullToken
+          # rubocop:disable Metrics/BlockNesting
+          fast = try_parse_tag_token(token)
+          if fast
+            tag_name, markup, newlines = fast
+          elsif token =~ FullToken
+            tag_name = Regexp.last_match(2)
+            markup   = Regexp.last_match(4)
+            newlines = parse_context.line_number ? Regexp.last_match(1).count("\n") + Regexp.last_match(3).count("\n") : 0
+          else
             return handle_invalid_tag_token(token, parse_context, &block)
           end
-          tag_name = Regexp.last_match(2)
-          markup   = Regexp.last_match(4)
+          # rubocop:enable Metrics/BlockNesting
 
-          if parse_context.line_number
-            # newlines inside the tag should increase the line number,
-            # particularly important for multiline {% liquid %} tags
-            parse_context.line_number += Regexp.last_match(1).count("\n") + Regexp.last_match(3).count("\n")
+          if parse_context.line_number && newlines > 0
+            parse_context.line_number += newlines
           end
 
           if tag_name == 'liquid'
@@ -260,6 +265,77 @@ def create_variable(token, parse_context)
       BlockBody.raise_missing_variable_terminator(token, parse_context)
     end
 
+    # Fast path for parsing "{%[-] tag_name markup [-]%}" tag tokens.
+    # Returns [tag_name, markup, newline_count] or nil.
+    #
+    # Accepts tokens where:
+    #   - Tag name is '#' or starts with [a-zA-Z_] followed by \w chars
+    #     (matching TagName = /#|\w+/ exactly — no hyphens, no '?' suffix)
+    #   - Whitespace is spaces, tabs, newlines, \r, \f, \v
+    #   - Whitespace control dashes are at positions 2 and len-3
+    # Rejects (returns nil → caller falls back to FullToken regex):
+    #   - Tokens shorter than "{%x%}" (4 bytes)
+    #   - Tag names starting with a digit (valid in FullToken but rare)
+    #   - Any structure the byte-walk can't confidently parse
+    # Fallback: nil return triggers the original `token =~ FullToken` regex
+    #   match in parse_for_document, preserving identical behavior for any
+    #   input the fast path doesn't handle.
+    def try_parse_tag_token(token)
+      len = token.bytesize
+      pos = 2 # skip "{%"
+      return if pos >= len
+
+      pos += 1 if token.getbyte(pos) == ByteTables::DASH
+      newline_count = 0
+
+      # Skip whitespace before tag name, count newlines
+      while pos < len
+        b = token.getbyte(pos)
+        if b == ByteTables::NEWLINE
+          pos += 1
+          newline_count += 1
+        elsif ByteTables::WHITESPACE[b]
+          pos += 1
+        else
+          break
+        end
+      end
+      return if pos >= len
+
+      # Scan tag name: '#' or \w+ (matching TagName = /#|\w+/)
+      name_start = pos
+      b = token.getbyte(pos)
+      if b == ByteTables::HASH
+        pos += 1
+      elsif ByteTables::IDENT_START[b]
+        pos += 1
+        pos += 1 while pos < len && ByteTables::WORD[token.getbyte(pos)]
+      else
+        return
+      end
+      tag_name = token.byteslice(name_start, pos - name_start)
+
+      # Skip whitespace after tag name, count newlines
+      while pos < len
+        b = token.getbyte(pos)
+        if b == ByteTables::NEWLINE
+          pos += 1
+          newline_count += 1
+        elsif ByteTables::WHITESPACE[b]
+          pos += 1
+        else
+          break
+        end
+      end
+
+      # Markup: everything up to optional '-' before '%}'
+      markup_end = len - 2 # skip '%}'
+      markup_end -= 1 if markup_end > pos && token.getbyte(markup_end - 1) == ByteTables::DASH
+      markup = pos >= markup_end ? "" : token.byteslice(pos, markup_end - pos)
+
+      [tag_name, markup, newline_count]
+    end
+
     # @deprecated Use {.raise_missing_tag_terminator} instead
     def raise_missing_tag_terminator(token, parse_context)
       BlockBody.raise_missing_tag_terminator(token, parse_context)

diff --git a/lib/liquid/byte_tables.rb b/lib/liquid/byte_tables.rb
@@ -0,0 +1,48 @@
+# frozen_string_literal: true
+
+module Liquid
+  # Pre-computed 256-entry boolean lookup tables for byte classification.
+  # Built once at load time; used as TABLE[byte] — a single array index
+  # instead of 3-5 comparison operators per check.
+  module ByteTables
+    # [a-zA-Z_] — valid first byte of an identifier
+    IDENT_START = Array.new(256, false).tap do |t|
+      (97..122).each { |b| t[b] = true }  # a-z
+      (65..90).each  { |b| t[b] = true }  # A-Z
+      t[95] = true # _
+    end.freeze
+
+    # [a-zA-Z0-9_-] — valid continuation byte of an identifier
+    IDENT_CONT = Array.new(256, false).tap do |t|
+      (97..122).each { |b| t[b] = true }  # a-z
+      (65..90).each  { |b| t[b] = true }  # A-Z
+      (48..57).each  { |b| t[b] = true }  # 0-9
+      t[95] = true                          # _
+      t[45] = true                          # -
+    end.freeze
+
+    # [a-zA-Z0-9_] — \w equivalent (no hyphen), for tag name scanning
+    WORD = Array.new(256, false).tap do |t|
+      (97..122).each { |b| t[b] = true }  # a-z
+      (65..90).each  { |b| t[b] = true }  # A-Z
+      (48..57).each  { |b| t[b] = true }  # 0-9
+      t[95] = true # _
+    end.freeze
+
+    # [0-9] — ASCII digit
+    DIGIT = Array.new(256, false).tap do |t|
+      (48..57).each { |b| t[b] = true }
+    end.freeze
+
+    # Matches bytes removed by Ruby's String#strip: \x00, \t, \n, \v, \f, \r, space
+    WHITESPACE = Array.new(256, false).tap do |t|
+      [0, 9, 10, 11, 12, 13, 32].each { |b| t[b] = true }
+    end.freeze
+
+    # Byte constants for delimiters and punctuation
+    NEWLINE = 10
+    DASH    = 45  # '-'
+    DOT     = 46  # '.'
+    HASH    = 35  # '#'
+  end
+end
diff --git a/lib/liquid/expression.rb b/lib/liquid/expression.rb
@@ -16,16 +16,9 @@ class Expression
       '-' => VariableLookup.parse("-", nil).freeze,
     }.freeze
 
-    DOT = ".".ord
-    ZERO = "0".ord
-    NINE = "9".ord
-    DASH = "-".ord
-
     # Use an atomic group (?>...) to avoid pathological backtracing from
     # malicious input as described in https://github.com/Shopify/liquid/issues/1357
     RANGES_REGEX = /\A\(\s*(?>(\S+)\s*\.\.)\s*(\S+)\s*\)\z/
-    INTEGER_REGEX = /\A(-?\d+)\z/
-    FLOAT_REGEX = /\A(-?\d+)\.\d+\z/
 
     class << self
       def safe_parse(parser, ss = StringScanner.new(""), cache = nil)
@@ -35,7 +28,15 @@ def safe_parse(parser, ss = StringScanner.new(""), cache = nil)
       def parse(markup, ss = StringScanner.new(""), cache = nil)
         return unless markup
 
-        markup = markup.strip # markup can be a frozen string
+        # Guard: only call .strip when the first or last byte is whitespace.
+        # String#strip always allocates a new String, even when there's nothing
+        # to strip. ByteTables::WHITESPACE matches the same bytes that strip
+        # removes (space, \t, \n, \v, \f, \r, \x00). When neither end has
+        # whitespace, we skip the call and avoid ~4,464 allocations per compile.
+        first = markup.getbyte(0)
+        if first && (ByteTables::WHITESPACE[first] || ByteTables::WHITESPACE[markup.getbyte(markup.bytesize - 1)])
+          markup = markup.strip
+        end
 
         if (markup.start_with?('"') && markup.end_with?('"')) ||
           (markup.start_with?("'") && markup.end_with?("'"))
@@ -71,56 +72,74 @@ def inner_parse(markup, ss, cache)
         end
       end
 
-      def parse_number(markup, ss)
-        # check if the markup is simple integer or float
-        case markup
-        when INTEGER_REGEX
-          return Integer(markup, 10)
-        when FLOAT_REGEX
-          return markup.to_f
-        end
-
-        ss.string = markup
-        # the first byte must be a digit or  a dash
-        byte = ss.scan_byte
-
-        return false if byte != DASH && (byte < ZERO || byte > NINE)
-
-        if byte == DASH
-          peek_byte = ss.peek_byte
-
-          # if it starts with a dash, the next byte must be a digit
-          return false if peek_byte.nil? || !(peek_byte >= ZERO && peek_byte <= NINE)
+      # Fast path for number parsing. Accepts:
+      #   - Simple integers: "42", "-7"
+      #   - Simple floats: "3.14", "-0.5"
+      #   - Multi-dot floats (truncated at second dot): "1.2.3" → 1.2
+      #   - Trailing-dot floats: "123." → 123.0
+      # Rejects (returns nil → caller treats as VariableLookup):
+      #   - Non-numeric input: "hello", ""
+      #   - Inputs with non-digit/non-dot bytes after the number: "1.2.3a"
+      # Fallback: nil return causes caller to fall through to VariableLookup.parse,
+      #   which is the same path the old regex-based code took on non-match.
+      def parse_number(markup, _ss = nil)
+        len = markup.bytesize
+        return if len == 0
+
+        pos = 0
+        first = markup.getbyte(pos)
+
+        if first == ByteTables::DASH
+          pos += 1
+          return if pos >= len
+          return unless ByteTables::DIGIT[markup.getbyte(pos)]
+
+          pos += 1
+        elsif ByteTables::DIGIT[first]
+          pos += 1
+        else
+          return
         end
 
-        # The markup could be a float with multiple dots
-        first_dot_pos = nil
-        num_end_pos = nil
-
-        while (byte = ss.scan_byte)
-          return false if byte != DOT && (byte < ZERO || byte > NINE)
-
-          # we found our number and now we are just scanning the rest of the string
-          next if num_end_pos
-
-          if byte == DOT
-            if first_dot_pos.nil?
-              first_dot_pos = ss.pos
-            else
-              # we found another dot, so we know that the number ends here
-              num_end_pos = ss.pos - 1
+        # Scan digits
+        pos += 1 while pos < len && ByteTables::DIGIT[markup.getbyte(pos)]
+
+        # Consumed everything = simple integer
+        return Integer(markup, 10) if pos == len
+
+        # Check for dot — three float cases:
+        #   1. Simple float:   "123.456"   → markup.to_f
+        #   2. Multi-dot:      "1.2.3.4"   → truncate at second dot → 1.2
+        #   3. Trailing dot:   "123."      → truncate before dot → 123.0
+        return unless markup.getbyte(pos) == ByteTables::DOT
+
+        dot_pos = pos
+        pos += 1
+        digit_start = pos
+        pos += 1 while pos < len && ByteTables::DIGIT[markup.getbyte(pos)]
+
+        if pos > digit_start && pos == len
+          # Case 1: simple float like "123.456"
+          markup.to_f
+        elsif pos > digit_start
+          # Case 2: multi-dot like "1.2.3.4" — find where the numeric
+          # portion ends. Reject if any non-digit, non-dot byte is found
+          # (e.g. "1.2.3a" → nil, matching the old regex-based behavior).
+          num_end = nil
+          check = pos
+          while check < len
+            b = markup.getbyte(check)
+            if b == ByteTables::DOT
+              num_end ||= check
+            elsif !ByteTables::DIGIT[b]
+              return
             end
+            check += 1
           end
-        end
-
-        num_end_pos = markup.length if ss.eos?
-
-        if num_end_pos
-          # number ends with a number "123.123"
-          markup.byteslice(0, num_end_pos).to_f
+          markup.byteslice(0, num_end || len).to_f
         else
-          # number ends with a dot "123."
-          markup.byteslice(0, first_dot_pos).to_f
+          # Case 3: trailing dot like "123."
+          markup.byteslice(0, dot_pos).to_f
         end
       end
     end

diff --git a/lib/liquid/variable_lookup.rb b/lib/liquid/variable_lookup.rb
@@ -4,13 +4,57 @@ module Liquid
   class VariableLookup
     COMMAND_METHODS = ['size', 'first', 'last'].freeze
 
+    # Matches simple identifier chains: name(.name)* with no brackets/quotes
+    SIMPLE_LOOKUP_RE = /\A[\w-]+\??(?:\.[\w-]+\??)*\z/
+
+    # Returns true when markup is a simple dotted identifier chain that the
+    # fast path in initialize can handle. Accepts:
+    #   - Single names: "product", "item"
+    #   - Dotted chains: "product.title", "cart.items.first"
+    #   - Question-mark suffixes: "product.available?"
+    #   - Hyphens in names: "my-var.some-field"
+    # Rejects (falls through to VariableParser regex):
+    #   - Bracket lookups: "product[0]", "hash['key']"
+    #   - Quoted strings, empty input, leading/trailing dots
+    # Fallback: when this returns false, initialize uses the original
+    #   markup.scan(VariableParser) path — behavior is identical to
+    #   the pre-optimization code for any input the fast path rejects.
+    def self.simple_lookup?(markup)
+      markup.bytesize > 0 && markup.match?(SIMPLE_LOOKUP_RE)
+    end
+
     attr_reader :name, :lookups
 
     def self.parse(markup, string_scanner = StringScanner.new(""), cache = nil)
       new(markup, string_scanner, cache)
     end
 
     def initialize(markup, string_scanner = StringScanner.new(""), cache = nil)
+      if self.class.simple_lookup?(markup)
+        dot_pos = markup.index('.')
+        if dot_pos.nil?
+          @name = markup
+          @lookups = Const::EMPTY_ARRAY
+          @command_flags = 0
+          return
+        end
+
+        @name = markup.byteslice(0, dot_pos)
+        @lookups = []
+        @command_flags = 0
+        pos = dot_pos + 1
+        len = markup.bytesize
+        while pos < len
+          seg_start = pos
+          pos += 1 while pos < len && markup.getbyte(pos) != ByteTables::DOT
+          seg = markup.byteslice(seg_start, pos - seg_start)
+          @command_flags |= 1 << @lookups.length if COMMAND_METHODS.include?(seg)
+          @lookups << seg
+          pos += 1 # skip dot
+        end
+        return
+      end
+
       lookups = markup.scan(VariableParser)
 
       name = lookups.shift