Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
52 changes: 29 additions & 23 deletions lib/elasticsearch/cql_grammar.js
Original file line number Diff line number Diff line change
Expand Up @@ -17,24 +17,39 @@ function reverseGrammar (grammar) {
const leftCql = `
query ::= query whitespace connective whitespace sub_query | sub_query
connective ::= "AND NOT" | "AND" | "OR" | "NOT"
sub_query ::= atomic_query | "(" query ")"
atomic_query ::= scope relation quoted_term
sub_query ::= atomic_query | lparen_space query rparen_space
atomic_query ::= scope relation search_term
search_term ::= quoted_term | unquoted_word
scope ::= scope_term whitespace | scope_term
relation ::= relation_term whitespace | relation_term
scope_term ::= "title" | "author" | "keyword" | "callnumber" | "identifier" | "subject" | "language" | "date" | "series"| "genre" | "center" | "division" | "format"
relation_term ::= "any" | "adj" | "all" | "<=" | ">=" | "<" | ">" | "==" | "=" | "within" | "encloses"
quoted_term ::= quote phrase quote
phrase ::= phrase whitespace word | word
phrase ::= phrase whitespace_or_word | whitespace_or_word
whitespace_or_word ::= whitespace | word
whitespace ::= [#x20#x09#x0A#x0D]+
word ::= word escaped_char | word regular_char | escaped_char | regular_char
regular_char ::= [^#x22#x5c#x20#x09#x0A#x0D]
unquoted_word ::= unquoted_word escaped_char | unquoted_word unquoted_char | escaped_char | unquoted_char
unquoted_char ::= [^#x22#x5c#x20#x09#x0A#x0D=<>()]
escaped_char ::= slash char
slash ::= [#x5c]
char ::= [a-z]|[^a-z]
quote ::= [#x22]
lparen_space ::= lparen whitespace | lparen
rparen_space ::= whitespace rparen | rparen
lparen ::= [#x28]
rparen ::= [#x29]
`
function makeCaseInsensitiveLiterals (grammar) {
// Transform literals (e.g. "and not") into case-insensitive EBNF matches
return grammar.replace(/"([a-zA-Z ]+)"/g, (match, p1) => {
return p1.split('').map(c => c === ' ' ? 'whitespace' : `[${c.toLowerCase()}${c.toUpperCase()}]`).join(' ')
})
}

const rightCql = reverseGrammar(leftCql)
const processedLeftCql = makeCaseInsensitiveLiterals(leftCql)
const rightCql = reverseGrammar(processedLeftCql)

function simplify (ast) {
switch (ast.type) {
Expand All @@ -56,14 +71,19 @@ function simplify (ast) {
return ast.text
case 'relation_term':
return ast.text
case 'search_term':
return simplify(ast.children.find(child => child.type.includes('quoted_term') || child.type.includes('word')))
case 'quoted_term':
return simplify(ast.children.find(child => child.type.includes('phrase')))
case 'phrase': {
const word = ast.children.find(child => child.type === 'word')
const word = ast.children.find(child => child.type === 'whitespace_or_word')
const phrase = ast.children.find(child => child.type === 'phrase')
return [simplify(word)].concat(phrase ? simplify(phrase) : [])
return [simplify(word)].filter(x => x).concat(phrase ? simplify(phrase) : [])
}
case 'whitespace_or_word':
return simplify(ast.children.find(child => child.type === 'word'))
case 'word':
case 'unquoted_word':
return ast.text
default:
break
Expand Down Expand Up @@ -94,12 +114,12 @@ function parseWithRightCql (string) {

function parsedASTtoNestedArray (ast) {
if (!ast.type.includes('query')) {
return reverseString(ast.text)
return ast.text.trim()
}

const childTypes = [
'atomic_query', 'sub_query', 'query', 'connective',
'scope', 'relation', 'quoted_term'
'scope', 'relation', 'search_term'
]

const children = ast.children
Expand All @@ -113,18 +133,4 @@ function parsedASTtoNestedArray (ast) {
return children
}

// we need to reverse the error message since `parseWithRightCql` doesn't
function displayParsed (string) {
const parsed = parseWithRightCql(string)
if (!parsed) return {}
if (parsed.errors.length) {
return {
error: parsed.errors.map(error =>
`Parsing error likely near end of "${reverseString(error.token.rest)}"`
).join('\n')
}
}
return { parsed: parsedASTtoNestedArray(parsed) }
}

module.exports = { simplify, reverseAST, reverseGrammar, parseRight, parseWithRightCql, rightCqlParser, reverseString, displayParsed }
module.exports = { simplify, reverseAST, reverseGrammar, parseRight, parseWithRightCql, rightCqlParser, reverseString, parsedASTtoNestedArray }
67 changes: 51 additions & 16 deletions lib/elasticsearch/cql_query_builder.js
Original file line number Diff line number Diff line change
@@ -1,19 +1,45 @@
const { parseWithRightCql } = require('./cql_grammar')
const { parseWithRightCql, reverseString, parsedASTtoNestedArray } = require('./cql_grammar')
const { indexMapping } = require('./cql/index-mapping')
const ElasticQueryBuilder = require('./elastic-query-builder')
const { InvalidParameterError } = require('../errors')

function buildEsQuery (cqlQuery, request = null) {
const filterQuery = buildFilterQuery(request)
return {
bool: {
should: [
buildEsQueryFromTree(
parseWithRightCql(cqlQuery.trim())
)
],
...filterQuery
class CqlQuery {
constructor (queryStr) {
this.queryStr = (queryStr || '').trim()
this.parsedAST = null
}

parse () {
if (!this.parsedAST) {
this.parsedAST = parseWithRightCql(this.queryStr)
}
return this.parsedAST
}

buildEsQuery (request = null) {
const filterQuery = buildFilterQuery(request)
return {
bool: {
must: [
buildEsQueryFromTree(this.parse(), this.queryStr)
],
...filterQuery
}
}
}

displayParsed () {
const parsed = this.parse()
if (!parsed) return { error: 'Unknown parsing error. Error most likely near end of string' }
if (parsed.errors && parsed.errors.length) {
return {
error: parsed.errors.map(error =>
`Parsing error likely near end of "${reverseString(error.token.rest)}"`
).join('\n')
}
}
return { parsed: parsedASTtoNestedArray(parsed) }
}
}

function buildFilterQuery (request) {
Expand Down Expand Up @@ -52,6 +78,7 @@ function buildEsQueryFromTree (tree) {
}

function buildBoolean (operator, queries) {
operator = operator.toUpperCase()
if (['NOT', 'AND NOT'].includes(operator)) return buildNegation(queries)
const esOperator = operator === 'AND' ? 'must' : 'should'
return {
Expand Down Expand Up @@ -89,7 +116,7 @@ function atomicQueryParams (atomicQuery) {
for Hamlet Shakespeare, Hamlet, and Shakespeare, and this will return Hamlet Shakespeare
*/
function findTopPhrase (tree) {
if (tree.type === 'phrase') return tree.text
if (tree.type === 'phrase' || tree.type === 'unquoted_word') return tree.text.trim()
const topPhrases = tree.children.map(child => findTopPhrase(child)).filter(x => x)
return topPhrases.length ? topPhrases[0] : null
}
Expand All @@ -100,7 +127,7 @@ function findTopPhrase (tree) {
of word nodes for H, Ha, Ham, etc...
*/
function findTopWords (tree) {
if (tree.type === 'word') return [tree.text]
if (tree.type === 'word' || tree.type === 'unquoted_word') return [tree.text.trim()]
return tree.children.map(child => findTopWords(child)).flat()
}

Expand Down Expand Up @@ -139,6 +166,16 @@ function hasFields (obj) {
*/

function buildAtomic ({ scope, relation, terms, term }) {
scope = scope.toLowerCase()
relation = relation.toLowerCase()

if (scope === 'date') {
const dateRegex = /^\d{4}(?:[-/]\d{2})?(?:[-/]\d{2})?$/
if (!terms.every(t => dateRegex.test(t))) {
throw new InvalidParameterError('Dates must be of the form YYYY, YYYY/MM, or YYYY/MM/DD ')
}
}

const allFields = nestedFilterAndMap(
indexMapping[scope],
field => typeof field === 'string' || field.on(term),
Expand Down Expand Up @@ -214,7 +251,6 @@ function buildAtomicMain ({ fields, relation, terms, term }) {
}

function anyAllQueries ({ fields, relation, terms }) {
if (!['any', 'all'].includes(relation)) { return null }
const operator = (relation === 'any' ? 'should' : 'must')
return {
bool: {
Expand All @@ -224,7 +260,6 @@ function anyAllQueries ({ fields, relation, terms }) {
}

function adjEqQueries ({ fields, relation, terms, term }) {
if (!['=', '==', 'adj'].includes(relation)) { return null }
const type = (relation === '==') ? 'exact' : 'phrase'
return matchTermWithFields(fields, term, type)
}
Expand Down Expand Up @@ -335,7 +370,7 @@ function multiMatch (fields, term, type) {
}

module.exports = {
buildEsQuery,
CqlQuery,
buildEsQueryFromTree,
buildBoolean,
buildAtomic,
Expand Down
21 changes: 10 additions & 11 deletions lib/resources.js
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,7 @@ const { parseParams, deepValue } = require('../lib/util')

const ApiRequest = require('./api-request')
const ElasticQueryBuilder = require('./elasticsearch/elastic-query-builder')
const cqlQueryBuilder = require('./elasticsearch/cql_query_builder')
const { displayParsed } = require('./elasticsearch/cql_grammar')
const { CqlQuery } = require('./elasticsearch/cql_query_builder')
const { FILTER_CONFIG, SEARCH_SCOPES, AGGREGATIONS_SPEC } = require('./elasticsearch/config')

const errors = require('./errors')
Expand Down Expand Up @@ -651,10 +650,12 @@ module.exports = function (app, _private = null) {
app.logger.debug('Parsed params: ', params)

let parsed = {}
let cqlQuery = null

if (params.search_scope === 'cql') {
cqlQuery = new CqlQuery(params.q)
try {
parsed = displayParsed(params.q) // ?
parsed = cqlQuery.displayParsed()
} catch (e) {
throw new IndexSearchError('Unknown parsing error. Error most likely near end of string')
}
Expand All @@ -666,16 +667,14 @@ module.exports = function (app, _private = null) {
}
}

let body = buildElasticBody(params)
let body = buildElasticBody(params, cqlQuery)

// Strip unnecessary _source fields
body._source = {
excludes: EXCLUDE_FIELDS.concat(['items'])
}

if (params.search_scope !== 'cql') {
body = addInnerHits(body, { merge_checkin_card_items: params.merge_checkin_card_items })
}
body = addInnerHits(body, { merge_checkin_card_items: params.merge_checkin_card_items })

app.logger.debug('Resources#search', RESOURCES_INDEX, body)

Expand Down Expand Up @@ -878,13 +877,13 @@ module.exports = function (app, _private = null) {
*
* @return {object} An object that can be posted directly to ES
*/
const buildElasticBody = function (params) {
const buildElasticBody = function (params, cqlQuery = null) {
const body = {
from: (params.per_page * (params.page - 1)),
size: params.per_page
}

body.query = buildElasticQuery(params)
body.query = buildElasticQuery(params, cqlQuery)

// Apply sort:
let direction
Expand All @@ -910,10 +909,10 @@ const buildElasticBody = function (params) {
*
* @return {object} ES query object suitable to be POST'd to ES endpoint
*/
const buildElasticQuery = function (params) {
const buildElasticQuery = function (params, cqlQuery = null) {
const request = ApiRequest.fromParams(params)
if (params.search_scope === 'cql') {
const query = cqlQueryBuilder.buildEsQuery(params.q, request)
const query = (cqlQuery || new CqlQuery(params.q)).buildEsQuery(request)
return query
}

Expand Down
14 changes: 12 additions & 2 deletions test/cql_grammar.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,8 @@ function validateAtomicQuery (parsed, scope, relation, quotedTerm) {
const relationNode = atomicQuery.children.find(child => child.type === 'relation')
const relationTerm = relationNode.children.find(child => child.type === 'relation_term')
expect(relationTerm.text).to.equal(relation)
const quotedTermNode = atomicQuery.children.find(child => child.type === 'quoted_term')
expect(quotedTermNode.text).to.equal(quotedTerm)
const searchTermNode = atomicQuery.children.find(child => child.type === 'search_term')
expect(searchTermNode.text).to.equal(quotedTerm)
}

describe('CQL Grammar', function () {
Expand All @@ -29,6 +29,16 @@ describe('CQL Grammar', function () {
validateAtomicQuery(parseWithRightCql('subject all "hamlet shakespeare"'), 'subject', 'all', '"hamlet shakespeare"')
})

it('parses single-word atomic queries without quotes', function () {
validateAtomicQuery(parseWithRightCql('title=hamlet'), 'title', '=', 'hamlet')
})

it('parses quoted queries containing special characters', function () {
validateAtomicQuery(parseWithRightCql('title="hamlet=prince"'), 'title', '=', '"hamlet=prince"')
validateAtomicQuery(parseWithRightCql('date > "1990 > 1980"'), 'date', '>', '"1990 > 1980"')
validateAtomicQuery(parseWithRightCql('author adj "shakespeare (william)"'), 'author', 'adj', '"shakespeare (william)"')
})

it('allows whitespace variants', function () {
validateAtomicQuery(parseWithRightCql('title ="hamlet"'), 'title', '=', '"hamlet"')
validateAtomicQuery(parseWithRightCql('title= "hamlet"'), 'title', '=', '"hamlet"')
Expand Down
Loading
Loading