From 90471f3c2750eb6f822fe1bbbba726fb8bb86906 Mon Sep 17 00:00:00 2001 From: Tim Radvan Date: Sat, 23 Feb 2019 11:57:34 +0000 Subject: [PATCH 1/3] Allow ignoreCase flag if all RegExps use it --- moo.js | 12 +++++++++--- test/test.js | 22 ++++++++++++++++++++-- 2 files changed, 29 insertions(+), 5 deletions(-) diff --git a/moo.js b/moo.js index 7418b74..58e370a 100644 --- a/moo.js +++ b/moo.js @@ -41,8 +41,6 @@ return '(?:' + reEscape(obj) + ')' } else if (isRegExp(obj)) { - // TODO: consider /u support - if (obj.ignoreCase) throw new Error('RegExp /i flag not allowed') if (obj.global) throw new Error('RegExp /g flag is implied') if (obj.sticky) throw new Error('RegExp /y flag is implied') if (obj.multiline) throw new Error('RegExp /m flag is implied') @@ -154,6 +152,7 @@ var fast = Object.create(null) var fastAllowed = true var unicodeFlag = null + var ignoreCaseFlag = null var groups = [] var parts = [] @@ -210,7 +209,7 @@ groups.push(options) - // Check unicode flag is used everywhere or nowhere + // Check unicode and ignoreCase flags are used everywhere or nowhere for (var j = 0; j < match.length; j++) { var obj = match[j] if (!isRegExp(obj)) { @@ -222,6 +221,12 @@ } else if (unicodeFlag !== obj.unicode) { throw new Error("If one rule is /u then all must be") } + + if (ignoreCaseFlag === null) { + ignoreCaseFlag = obj.ignoreCase + } else if (ignoreCaseFlag !== obj.ignoreCase) { + throw new Error("If one rule is /i then all must be") + } } // convert to RegExp @@ -257,6 +262,7 @@ var suffix = hasSticky || fallbackRule ? '' : '|' if (unicodeFlag === true) flags += "u" + if (ignoreCaseFlag === true) flags += "i" var combined = new RegExp(reUnion(parts) + suffix, flags) return {regexp: combined, groups: groups, fast: fast, error: errorRule || defaultErrorRule} } diff --git a/test/test.js b/test/test.js index 40158f6..32345bc 100644 --- a/test/test.js +++ b/test/test.js @@ -28,10 +28,9 @@ describe('compiler', () => { expect(lex4.next()).toMatchObject({type: 'err', text: 'nope!'}) }) - test("warns for /g, /y, /i, /m", () => { + test("warns for /g, /y, /m", () => { expect(() => compile({ word: /foo/ })).not.toThrow() expect(() => compile({ word: /foo/g })).toThrow('implied') - expect(() => compile({ word: /foo/i })).toThrow('not allowed') expect(() => compile({ word: /foo/y })).toThrow('implied') expect(() => compile({ word: /foo/m })).toThrow('implied') }) @@ -1211,3 +1210,22 @@ describe("unicode flag", () => { }) }) + + +describe('ignoreCase flag', () => { + + test("allows all rules to be /i", () => { + expect(() => compile({ a: /foo/i, b: /bar/i, c: "quxx" })).not.toThrow() + expect(() => compile({ a: /foo/i, b: /bar/, c: "quxx" })).toThrow("If one rule is /i then all must be") + expect(() => compile({ a: /foo/, b: /bar/i, c: "quxx" })).toThrow("If one rule is /i then all must be") + }) + + test("supports ignoreCase", () => { + const lexer = compile({ a: /foo/i, b: /bar/i, c: "quxx" }) + lexer.reset("FoObArQuXx") + expect(lexer.next()).toMatchObject({value: "FoO"}) + expect(lexer.next()).toMatchObject({value: "bAr"}) + expect(lexer.next()).toMatchObject({value: "QuXx"}) + }) + +}) From 47c216d2e2eadd6e3f344c1131ec12219707249c Mon Sep 17 00:00:00 2001 From: Tim Radvan Date: Sat, 23 Feb 2019 12:02:29 +0000 Subject: [PATCH 2/3] Add test for /ui RegExps --- test/test.js | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/test/test.js b/test/test.js index 32345bc..6bafd90 100644 --- a/test/test.js +++ b/test/test.js @@ -1220,6 +1220,16 @@ describe('ignoreCase flag', () => { expect(() => compile({ a: /foo/, b: /bar/i, c: "quxx" })).toThrow("If one rule is /i then all must be") }) + test("allows all rules to be /ui", () => { + expect(() => compile({ a: /foo/ui, b: /bar/ui, c: "quxx" })).not.toThrow() + expect(() => compile({ a: /foo/u, b: /bar/i, c: "quxx" })).toThrow("If one rule is /i then all must be") + expect(() => compile({ a: /foo/i, b: /bar/u, c: "quxx" })).toThrow("If one rule is /i then all must be") + expect(() => compile({ a: /foo/ui, b: /bar/i, c: "quxx" })).toThrow("If one rule is /u then all must be") + expect(() => compile({ a: /foo/ui, b: /bar/u, c: "quxx" })).toThrow("If one rule is /i then all must be") + expect(() => compile({ a: /foo/i, b: /bar/ui, c: "quxx" })).toThrow("If one rule is /u then all must be") + expect(() => compile({ a: /foo/u, b: /bar/ui, c: "quxx" })).toThrow("If one rule is /i then all must be") + }) + test("supports ignoreCase", () => { const lexer = compile({ a: /foo/i, b: /bar/i, c: "quxx" }) lexer.reset("FoObArQuXx") From f8a5814a1994025d250bc6281d46e7bd677ad784 Mon Sep 17 00:00:00 2001 From: Tim Radvan Date: Sun, 24 Feb 2019 16:54:21 +0000 Subject: [PATCH 3/3] Require literals to be marked ignoreCase if RegExps are --- moo.js | 24 ++++++++++++++++ test/test.js | 78 +++++++++++++++++++++++++++++++++++++++++++--------- 2 files changed, 89 insertions(+), 13 deletions(-) diff --git a/moo.js b/moo.js index 58e370a..0e9e22d 100644 --- a/moo.js +++ b/moo.js @@ -118,6 +118,7 @@ value: null, type: null, shouldThrow: false, + ignoreCase: null, } // Avoid Object.assign(), so we support IE9+ @@ -210,8 +211,12 @@ groups.push(options) // Check unicode and ignoreCase flags are used everywhere or nowhere + var hasLiteralsWithCase = false for (var j = 0; j < match.length; j++) { var obj = match[j] + if (typeof obj === "string" && obj.toLowerCase() !== obj.toUpperCase()) { + hasLiteralsWithCase = true + } if (!isRegExp(obj)) { continue } @@ -227,6 +232,25 @@ } else if (ignoreCaseFlag !== obj.ignoreCase) { throw new Error("If one rule is /i then all must be") } + + // RegExp flags must match the rule's ignoreCase option, if set + if (options.ignoreCase !== null && obj.ignoreCase !== options.ignoreCase) { + throw new Error("ignoreCase option must match RegExp flags (in token '" + options.defaultType + "')") + } + } + + if (hasLiteralsWithCase) { + var ignoreCase = !!options.ignoreCase + if (ignoreCaseFlag === null) { + ignoreCaseFlag = ignoreCase + } else if (ignoreCaseFlag !== ignoreCase) { + if (ignoreCaseFlag) { + throw new Error("Literal must be marked with {ignoreCase: true} (in token '" + options.defaultType + "')") + } else { + // TODO transform literals to ignore case, even if it's not set globally + throw new Error("If one rule sets ignoreCase then all must (in token '" + options.defaultType + "')") + } + } } // convert to RegExp diff --git a/test/test.js b/test/test.js index 6bafd90..d278616 100644 --- a/test/test.js +++ b/test/test.js @@ -1215,27 +1215,79 @@ describe("unicode flag", () => { describe('ignoreCase flag', () => { test("allows all rules to be /i", () => { - expect(() => compile({ a: /foo/i, b: /bar/i, c: "quxx" })).not.toThrow() - expect(() => compile({ a: /foo/i, b: /bar/, c: "quxx" })).toThrow("If one rule is /i then all must be") - expect(() => compile({ a: /foo/, b: /bar/i, c: "quxx" })).toThrow("If one rule is /i then all must be") + expect(() => compile({ a: /foo/i, b: /bar/i })).not.toThrow() + expect(() => compile({ a: /foo/i, b: /bar/ })).toThrow("If one rule is /i then all must be") + expect(() => compile({ a: /foo/, b: /bar/i })).toThrow("If one rule is /i then all must be") }) test("allows all rules to be /ui", () => { - expect(() => compile({ a: /foo/ui, b: /bar/ui, c: "quxx" })).not.toThrow() - expect(() => compile({ a: /foo/u, b: /bar/i, c: "quxx" })).toThrow("If one rule is /i then all must be") - expect(() => compile({ a: /foo/i, b: /bar/u, c: "quxx" })).toThrow("If one rule is /i then all must be") - expect(() => compile({ a: /foo/ui, b: /bar/i, c: "quxx" })).toThrow("If one rule is /u then all must be") - expect(() => compile({ a: /foo/ui, b: /bar/u, c: "quxx" })).toThrow("If one rule is /i then all must be") - expect(() => compile({ a: /foo/i, b: /bar/ui, c: "quxx" })).toThrow("If one rule is /u then all must be") - expect(() => compile({ a: /foo/u, b: /bar/ui, c: "quxx" })).toThrow("If one rule is /i then all must be") + expect(() => compile({ a: /foo/ui, b: /bar/ui })).not.toThrow() + expect(() => compile({ a: /foo/u, b: /bar/i })).toThrow("If one rule is /u then all must be") + expect(() => compile({ a: /foo/i, b: /bar/u })).toThrow("If one rule is /u then all must be") + expect(() => compile({ a: /foo/ui, b: /bar/i })).toThrow("If one rule is /u then all must be") + expect(() => compile({ a: /foo/ui, b: /bar/u })).toThrow("If one rule is /i then all must be") + expect(() => compile({ a: /foo/i, b: /bar/ui })).toThrow("If one rule is /u then all must be") + expect(() => compile({ a: /foo/u, b: /bar/ui })).toThrow("If one rule is /i then all must be") + }) + + test("allow literals to be marked ignoreCase", () => { + expect(() => compile({ + a: /foo/i, + lit: {match: "quxx", ignoreCase: true}, + })).not.toThrow() + expect(() => compile([ + { type: "a", match: /foo/i }, + { type: "lit", match: "quxx", ignoreCase: true }, + ])).not.toThrow() + }) + + test("require literals to be marked ignoreCase", () => { + expect(() => compile({ + a: /foo/i, + lit: "quxx" , + })).toThrow("Literal must be marked with {ignoreCase: true} (in token 'lit')") + expect(() => compile([ + { type: "a", match: /foo/i }, + { type: "lit", match: "quxx" }, + ])).toThrow("Literal must be marked with {ignoreCase: true} (in token 'lit')") + }) + + test("ignoreCase is only required when case is relevant", () => { + expect(() => compile({ + cat: {match: "cat", ignoreCase: true}, + bat: {match: "BAT", ignoreCase: true}, + comma: ',', + semi: ';', + lparen: '(', + rparen: ')', + lbrace: '{', + rbrace: '}', + lbracket: '[', + rbracket: ']', + and: '&&', + or: '||', + bitand: '&', + bitor: '|', + })).not.toThrow() + }) + + test("require ignoreCase option to be match RegExp flags", () => { + expect(() => compile({ + word: { match: /[a-z]+/, ignoreCase: true }, + })).toThrow("ignoreCase option must match RegExp flags") + expect(() => compile({ + word: { match: ["foo", /[a-z]+/], ignoreCase: true }, + })).toThrow("ignoreCase option must match RegExp flags") + expect(() => compile({ + word: { match: /[a-z]+/i, ignoreCase: false }, + })).toThrow("ignoreCase option must match RegExp flags") }) test("supports ignoreCase", () => { - const lexer = compile({ a: /foo/i, b: /bar/i, c: "quxx" }) - lexer.reset("FoObArQuXx") + const lexer = compile({ a: /foo/i, b: /bar/i, }) + lexer.reset("FoObAr") expect(lexer.next()).toMatchObject({value: "FoO"}) expect(lexer.next()).toMatchObject({value: "bAr"}) - expect(lexer.next()).toMatchObject({value: "QuXx"}) }) })