Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions lib/liquid.rb
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@ module Liquid
require "liquid/version"
require "liquid/deprecations"
require "liquid/const"
require "liquid/byte_tables"
require 'liquid/standardfilters'
require 'liquid/file_system'
require 'liquid/parser_switching'
Expand Down
90 changes: 83 additions & 7 deletions lib/liquid/block_body.rb
Original file line number Diff line number Diff line change
Expand Up @@ -130,16 +130,21 @@ def self.rescue_render_node(context, output, line_number, exc, blank_tag)
case
when token.start_with?(TAGSTART)
whitespace_handler(token, parse_context)
unless token =~ FullToken
# rubocop:disable Metrics/BlockNesting
fast = try_parse_tag_token(token)
if fast
tag_name, markup, newlines = fast
elsif token =~ FullToken
tag_name = Regexp.last_match(2)
markup = Regexp.last_match(4)
newlines = parse_context.line_number ? Regexp.last_match(1).count("\n") + Regexp.last_match(3).count("\n") : 0
else
return handle_invalid_tag_token(token, parse_context, &block)
end
tag_name = Regexp.last_match(2)
markup = Regexp.last_match(4)
# rubocop:enable Metrics/BlockNesting

if parse_context.line_number
# newlines inside the tag should increase the line number,
# particularly important for multiline {% liquid %} tags
parse_context.line_number += Regexp.last_match(1).count("\n") + Regexp.last_match(3).count("\n")
if parse_context.line_number && newlines > 0
parse_context.line_number += newlines
end

if tag_name == 'liquid'
Expand Down Expand Up @@ -260,6 +265,77 @@ def create_variable(token, parse_context)
BlockBody.raise_missing_variable_terminator(token, parse_context)
end

# Fast path for parsing "{%[-] tag_name markup [-]%}" tag tokens.
# Returns [tag_name, markup, newline_count] or nil.
#
# Accepts tokens where:
# - Tag name is '#' or starts with [a-zA-Z_] followed by \w chars
# (matching TagName = /#|\w+/ exactly — no hyphens, no '?' suffix)
# - Whitespace is spaces, tabs, newlines, \r, \f, \v
# - Whitespace control dashes are at positions 2 and len-3
# Rejects (returns nil → caller falls back to FullToken regex):
# - Tokens shorter than "{%x%}" (4 bytes)
# - Tag names starting with a digit (valid in FullToken but rare)
# - Any structure the byte-walk can't confidently parse
# Fallback: nil return triggers the original `token =~ FullToken` regex
# match in parse_for_document, preserving identical behavior for any
# input the fast path doesn't handle.
def try_parse_tag_token(token)
len = token.bytesize
pos = 2 # skip "{%"
return if pos >= len

pos += 1 if token.getbyte(pos) == ByteTables::DASH
newline_count = 0

# Skip whitespace before tag name, count newlines
while pos < len
b = token.getbyte(pos)
if b == ByteTables::NEWLINE
pos += 1
newline_count += 1
elsif ByteTables::WHITESPACE[b]
pos += 1
else
break
end
end
return if pos >= len

# Scan tag name: '#' or \w+ (matching TagName = /#|\w+/)
name_start = pos
b = token.getbyte(pos)
if b == ByteTables::HASH
pos += 1
elsif ByteTables::IDENT_START[b]
pos += 1
pos += 1 while pos < len && ByteTables::WORD[token.getbyte(pos)]
else
return
end
tag_name = token.byteslice(name_start, pos - name_start)

# Skip whitespace after tag name, count newlines
while pos < len
b = token.getbyte(pos)
if b == ByteTables::NEWLINE
pos += 1
newline_count += 1
elsif ByteTables::WHITESPACE[b]
pos += 1
else
break
end
end

# Markup: everything up to optional '-' before '%}'
markup_end = len - 2 # skip '%}'
markup_end -= 1 if markup_end > pos && token.getbyte(markup_end - 1) == ByteTables::DASH
markup = pos >= markup_end ? "" : token.byteslice(pos, markup_end - pos)

[tag_name, markup, newline_count]
end

# @deprecated Use {.raise_missing_tag_terminator} instead
def raise_missing_tag_terminator(token, parse_context)
BlockBody.raise_missing_tag_terminator(token, parse_context)
Expand Down
48 changes: 48 additions & 0 deletions lib/liquid/byte_tables.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
# frozen_string_literal: true

module Liquid
# Pre-computed 256-entry boolean lookup tables for byte classification.
# Built once at load time; used as TABLE[byte] — a single array index
# instead of 3-5 comparison operators per check.
module ByteTables
# [a-zA-Z_] — valid first byte of an identifier
IDENT_START = Array.new(256, false).tap do |t|
(97..122).each { |b| t[b] = true } # a-z
(65..90).each { |b| t[b] = true } # A-Z
t[95] = true # _
end.freeze

# [a-zA-Z0-9_-] — valid continuation byte of an identifier
IDENT_CONT = Array.new(256, false).tap do |t|
(97..122).each { |b| t[b] = true } # a-z
(65..90).each { |b| t[b] = true } # A-Z
(48..57).each { |b| t[b] = true } # 0-9
t[95] = true # _
t[45] = true # -
end.freeze

# [a-zA-Z0-9_] — \w equivalent (no hyphen), for tag name scanning
WORD = Array.new(256, false).tap do |t|
(97..122).each { |b| t[b] = true } # a-z
(65..90).each { |b| t[b] = true } # A-Z
(48..57).each { |b| t[b] = true } # 0-9
t[95] = true # _
end.freeze

# [0-9] — ASCII digit
DIGIT = Array.new(256, false).tap do |t|
(48..57).each { |b| t[b] = true }
end.freeze

# Matches bytes removed by Ruby's String#strip: \x00, \t, \n, \v, \f, \r, space
WHITESPACE = Array.new(256, false).tap do |t|
[0, 9, 10, 11, 12, 13, 32].each { |b| t[b] = true }
end.freeze

# Byte constants for delimiters and punctuation
NEWLINE = 10
DASH = 45 # '-'
DOT = 46 # '.'
HASH = 35 # '#'
end
end
125 changes: 72 additions & 53 deletions lib/liquid/expression.rb
Original file line number Diff line number Diff line change
Expand Up @@ -16,16 +16,9 @@ class Expression
'-' => VariableLookup.parse("-", nil).freeze,
}.freeze

DOT = ".".ord
ZERO = "0".ord
NINE = "9".ord
DASH = "-".ord

# Use an atomic group (?>...) to avoid pathological backtracing from
# malicious input as described in https://github.com/Shopify/liquid/issues/1357
RANGES_REGEX = /\A\(\s*(?>(\S+)\s*\.\.)\s*(\S+)\s*\)\z/
INTEGER_REGEX = /\A(-?\d+)\z/
FLOAT_REGEX = /\A(-?\d+)\.\d+\z/

class << self
def safe_parse(parser, ss = StringScanner.new(""), cache = nil)
Expand All @@ -35,7 +28,15 @@ def safe_parse(parser, ss = StringScanner.new(""), cache = nil)
def parse(markup, ss = StringScanner.new(""), cache = nil)
return unless markup

markup = markup.strip # markup can be a frozen string
# Guard: only call .strip when the first or last byte is whitespace.
# String#strip always allocates a new String, even when there's nothing
# to strip. ByteTables::WHITESPACE matches the same bytes that strip
# removes (space, \t, \n, \v, \f, \r, \x00). When neither end has
# whitespace, we skip the call and avoid ~4,464 allocations per compile.
first = markup.getbyte(0)
if first && (ByteTables::WHITESPACE[first] || ByteTables::WHITESPACE[markup.getbyte(markup.bytesize - 1)])
markup = markup.strip
end

if (markup.start_with?('"') && markup.end_with?('"')) ||
(markup.start_with?("'") && markup.end_with?("'"))
Expand Down Expand Up @@ -71,56 +72,74 @@ def inner_parse(markup, ss, cache)
end
end

def parse_number(markup, ss)
# check if the markup is simple integer or float
case markup
when INTEGER_REGEX
return Integer(markup, 10)
when FLOAT_REGEX
return markup.to_f
end

ss.string = markup
# the first byte must be a digit or a dash
byte = ss.scan_byte

return false if byte != DASH && (byte < ZERO || byte > NINE)

if byte == DASH
peek_byte = ss.peek_byte

# if it starts with a dash, the next byte must be a digit
return false if peek_byte.nil? || !(peek_byte >= ZERO && peek_byte <= NINE)
# Fast path for number parsing. Accepts:
# - Simple integers: "42", "-7"
# - Simple floats: "3.14", "-0.5"
# - Multi-dot floats (truncated at second dot): "1.2.3" → 1.2
# - Trailing-dot floats: "123." → 123.0
# Rejects (returns nil → caller treats as VariableLookup):
# - Non-numeric input: "hello", ""
# - Inputs with non-digit/non-dot bytes after the number: "1.2.3a"
# Fallback: nil return causes caller to fall through to VariableLookup.parse,
# which is the same path the old regex-based code took on non-match.
def parse_number(markup, _ss = nil)
len = markup.bytesize
return if len == 0

pos = 0
first = markup.getbyte(pos)

if first == ByteTables::DASH
pos += 1
return if pos >= len
return unless ByteTables::DIGIT[markup.getbyte(pos)]

pos += 1
elsif ByteTables::DIGIT[first]
pos += 1
else
return
end

# The markup could be a float with multiple dots
first_dot_pos = nil
num_end_pos = nil

while (byte = ss.scan_byte)
return false if byte != DOT && (byte < ZERO || byte > NINE)

# we found our number and now we are just scanning the rest of the string
next if num_end_pos

if byte == DOT
if first_dot_pos.nil?
first_dot_pos = ss.pos
else
# we found another dot, so we know that the number ends here
num_end_pos = ss.pos - 1
# Scan digits
pos += 1 while pos < len && ByteTables::DIGIT[markup.getbyte(pos)]

# Consumed everything = simple integer
return Integer(markup, 10) if pos == len

# Check for dot — three float cases:
# 1. Simple float: "123.456" → markup.to_f
# 2. Multi-dot: "1.2.3.4" → truncate at second dot → 1.2
# 3. Trailing dot: "123." → truncate before dot → 123.0
return unless markup.getbyte(pos) == ByteTables::DOT

dot_pos = pos
pos += 1
digit_start = pos
pos += 1 while pos < len && ByteTables::DIGIT[markup.getbyte(pos)]

if pos > digit_start && pos == len
# Case 1: simple float like "123.456"
markup.to_f
elsif pos > digit_start
# Case 2: multi-dot like "1.2.3.4" — find where the numeric
# portion ends. Reject if any non-digit, non-dot byte is found
# (e.g. "1.2.3a" → nil, matching the old regex-based behavior).
num_end = nil
check = pos
while check < len
b = markup.getbyte(check)
if b == ByteTables::DOT
num_end ||= check
elsif !ByteTables::DIGIT[b]
return
end
check += 1
end
end

num_end_pos = markup.length if ss.eos?

if num_end_pos
# number ends with a number "123.123"
markup.byteslice(0, num_end_pos).to_f
markup.byteslice(0, num_end || len).to_f
else
# number ends with a dot "123."
markup.byteslice(0, first_dot_pos).to_f
# Case 3: trailing dot like "123."
markup.byteslice(0, dot_pos).to_f
end
end
end
Expand Down
44 changes: 44 additions & 0 deletions lib/liquid/variable_lookup.rb
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,57 @@ module Liquid
class VariableLookup
COMMAND_METHODS = ['size', 'first', 'last'].freeze

# Matches simple identifier chains: name(.name)* with no brackets/quotes
SIMPLE_LOOKUP_RE = /\A[\w-]+\??(?:\.[\w-]+\??)*\z/

# Returns true when markup is a simple dotted identifier chain that the
# fast path in initialize can handle. Accepts:
# - Single names: "product", "item"
# - Dotted chains: "product.title", "cart.items.first"
# - Question-mark suffixes: "product.available?"
# - Hyphens in names: "my-var.some-field"
# Rejects (falls through to VariableParser regex):
# - Bracket lookups: "product[0]", "hash['key']"
# - Quoted strings, empty input, leading/trailing dots
# Fallback: when this returns false, initialize uses the original
# markup.scan(VariableParser) path — behavior is identical to
# the pre-optimization code for any input the fast path rejects.
def self.simple_lookup?(markup)
markup.bytesize > 0 && markup.match?(SIMPLE_LOOKUP_RE)
end

attr_reader :name, :lookups

def self.parse(markup, string_scanner = StringScanner.new(""), cache = nil)
new(markup, string_scanner, cache)
end

def initialize(markup, string_scanner = StringScanner.new(""), cache = nil)
if self.class.simple_lookup?(markup)
dot_pos = markup.index('.')
if dot_pos.nil?
@name = markup
@lookups = Const::EMPTY_ARRAY
@command_flags = 0
return
end

@name = markup.byteslice(0, dot_pos)
@lookups = []
@command_flags = 0
pos = dot_pos + 1
len = markup.bytesize
while pos < len
seg_start = pos
pos += 1 while pos < len && markup.getbyte(pos) != ByteTables::DOT
seg = markup.byteslice(seg_start, pos - seg_start)
@command_flags |= 1 << @lookups.length if COMMAND_METHODS.include?(seg)
@lookups << seg
pos += 1 # skip dot
end
return
end

lookups = markup.scan(VariableParser)

name = lookups.shift
Expand Down
Loading
Loading