Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion lib/liquid/parse_context.rb
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,6 @@ def new_parser(input)
def new_tokenizer(source, start_line_number: nil, for_liquid_tag: false)
Tokenizer.new(
source: source,
string_scanner: @string_scanner,
line_number: start_line_number,
for_liquid_tag: for_liquid_tag,
)
Expand Down
170 changes: 66 additions & 104 deletions lib/liquid/tokenizer.rb
Original file line number Diff line number Diff line change
@@ -1,22 +1,11 @@
# frozen_string_literal: true

require "strscan"

module Liquid
class Tokenizer
attr_reader :line_number, :for_liquid_tag

TAG_END = /%\}/
TAG_OR_VARIABLE_START = /\{[\{\%]/
NEWLINE = /\n/

OPEN_CURLEY = "{".ord
CLOSE_CURLEY = "}".ord
PERCENTAGE = "%".ord

def initialize(
source:,
string_scanner:,
line_numbers: false,
line_number: nil,
for_liquid_tag: false
Expand All @@ -28,8 +17,6 @@ def initialize(
@tokens = []

if @source
@ss = string_scanner
@ss.string = @source
tokenize
end
end
Expand All @@ -51,111 +38,86 @@ def shift
private

def tokenize
if @for_liquid_tag
@tokens = @source.split("\n")
@tokens = if @for_liquid_tag
@source.split("\n")
else
@tokens << shift_normal until @ss.eos?
scan(@source)
end

@source = nil
@ss = nil
end

def shift_normal
token = next_token

return unless token

token
end

def next_token
# possible states: :text, :tag, :variable
byte_a = @ss.peek_byte

if byte_a == OPEN_CURLEY
@ss.scan_byte

byte_b = @ss.peek_byte

if byte_b == PERCENTAGE
@ss.scan_byte
return next_tag_token
elsif byte_b == OPEN_CURLEY
@ss.scan_byte
return next_variable_token
end

@ss.pos -= 1
end

next_text_token
end

def next_text_token
start = @ss.pos

unless @ss.skip_until(TAG_OR_VARIABLE_START)
token = @ss.rest
@ss.terminate
return token
end

pos = @ss.pos -= 2
@source.byteslice(start, pos - start)
rescue ::ArgumentError => e
if e.message == "invalid byte sequence in #{@ss.string.encoding}"
raise SyntaxError, "Invalid byte sequence in #{@ss.string.encoding}"
else
raise
end
end

def next_variable_token
start = @ss.pos - 2

byte_a = byte_b = @ss.scan_byte
# @param source [String]
# @return [Array<String>]
def scan(source)
raise SyntaxError, "Invalid byte sequence in #{source.encoding}" unless source.valid_encoding?

tokens = [] # : Array[String]
pos = 0
eos = source.bytesize

# rubocop:disable Metrics/BlockNesting
while pos < eos
byte = source.getbyte(pos)
next_byte = source.getbyte(pos + 1)

if byte == 123 && next_byte == 123 # {{
if (index = source.byteindex("}", pos + 2))
if source.getbyte(index + 1) == 125 # }}
tokens << source.byteslice(pos, index + 2 - pos)
pos = index + 2
else # } or %}
tokens << source.byteslice(pos, index + 1 - pos)
pos = index + 1
end
else
tokens << "{{"
pos += 2
end
elsif byte == 123 && next_byte == 37 # {%
if (index = source.byteindex("%}", pos + 2))
tokens << source.byteslice(pos, index + 2 - pos)
pos = index + 2
else
tokens << "{%"
pos += 2
end
else
# Not markup. Scan until but not including {{ or {%
index = source.byteindex("{", pos)

unless index
# No more markup. Scan until end of string.
tokens << source.byteslice(pos, eos - pos)
break
end

while byte_b
byte_a = @ss.scan_byte while byte_a && byte_a != CLOSE_CURLEY && byte_a != OPEN_CURLEY
next_byte = source.getbyte(index + 1)

break unless byte_a
while next_byte != 37 && next_byte != 123
index = source.byteindex("{", index + 1)
break unless index

if @ss.eos?
return byte_a == CLOSE_CURLEY ? @source.byteslice(start, @ss.pos - start) : "{{"
end

byte_b = @ss.scan_byte
next_byte = source.getbyte(index + 1)
unless next_byte
index = nil
break
end
end

if byte_a == CLOSE_CURLEY
if byte_b == CLOSE_CURLEY
return @source.byteslice(start, @ss.pos - start)
elsif byte_b != CLOSE_CURLEY
@ss.pos -= 1
return @source.byteslice(start, @ss.pos - start)
if index
tokens << source.byteslice(pos, index - pos)
pos = index
else
# No more markup. Scan until end of string.
tokens << source.byteslice(pos, eos - pos)
break
end
elsif byte_a == OPEN_CURLEY && byte_b == PERCENTAGE
return next_tag_token_with_start(start)
end

byte_a = byte_b
end
# rubocop:enable Metrics/BlockNesting

"{{"
end

def next_tag_token
start = @ss.pos - 2
if (len = @ss.skip_until(TAG_END))
@source.byteslice(start, len + 2)
else
"{%"
end
end

def next_tag_token_with_start(start)
@ss.skip_until(TAG_END)
@source.byteslice(start, @ss.pos - start)
tokens
end
end
end
2 changes: 0 additions & 2 deletions performance/theme_runner.rb
Original file line number Diff line number Diff line change
Expand Up @@ -50,11 +50,9 @@ def compile

# `tokenize` will just test the tokenizen portion of liquid without any templates
def tokenize
ss = StringScanner.new("")
@tests.each do |test_hash|
tokenizer = Liquid::Tokenizer.new(
source: test_hash[:liquid],
string_scanner: ss,
line_numbers: true,
)
while tokenizer.shift; end
Expand Down
5 changes: 1 addition & 4 deletions test/unit/tag_unit_test.rb
Original file line number Diff line number Diff line change
Expand Up @@ -33,9 +33,6 @@ def test_tag_render_to_output_buffer_nil_value
private

def new_tokenizer
Tokenizer.new(
source: "",
string_scanner: StringScanner.new(""),
)
Tokenizer.new(source: "")
end
end
2 changes: 1 addition & 1 deletion test/unit/template_unit_test.rb
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ def test_template_inheritance
def test_invalid_utf8
input = "\xff\x00"
error = assert_raises(SyntaxError) do
Liquid::Tokenizer.new(source: input, string_scanner: StringScanner.new(input))
Liquid::Tokenizer.new(source: input)
end
assert_equal(
'Liquid syntax error: Invalid byte sequence in UTF-8',
Expand Down
Loading