From 2ded42e9feecb6bd5938c4a8fb161781af8dbc94 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Torkel=20=C3=96degaard?= Date: Tue, 24 Dec 2013 09:32:51 +0100 Subject: [PATCH] improved parser code --- src/app/services/graphite/lexer.js | 126 +----------- src/app/services/graphite/parser.js | 303 ++++++++++++---------------- src/test/specs/lexer-specs.js | 14 +- src/test/specs/parser-specs.js | 26 +-- 4 files changed, 159 insertions(+), 310 deletions(-) diff --git a/src/app/services/graphite/lexer.js b/src/app/services/graphite/lexer.js index b57e7755565..81754ce314e 100644 --- a/src/app/services/graphite/lexer.js +++ b/src/app/services/graphite/lexer.js @@ -130,22 +130,12 @@ define([ i2 >= 48 && i2 <= 57; // 0-9 } - var Token = { - Identifier: "Identifier", - NumericLiteral: "NumericLiteral", - StringLiteral: "StringLiteral", - Punctuator: "Punctuator" - }; - - function Lexer(expression) { this.input = expression; this.char = 1; this.from = 1; } - Lexer.Token = Token; - Lexer.prototype = { peek: function (i) { @@ -344,7 +334,7 @@ define([ switch (id) { default: - type = Token.Identifier; + type = "identifier"; } return { @@ -415,7 +405,7 @@ define([ if (value.length <= 2) { // 0x return { - type: Token.NumericLiteral, + type: 'number', value: value, isMalformed: true }; @@ -429,7 +419,7 @@ define([ } return { - type: Token.NumericLiteral, + type: 'number', value: value, base: 16, isMalformed: false @@ -465,7 +455,7 @@ define([ } return { - type: Token.NumericLiteral, + type: 'number', value: value, base: 8, isMalformed: false @@ -545,7 +535,7 @@ define([ } return { - type: Token.NumericLiteral, + type: 'number', value: value, base: 10, isMalformed: !isFinite(value) @@ -563,7 +553,7 @@ define([ case "{": case "}": return { - type: Token.Punctuator, + type: ch1, value: ch1 }; } @@ -594,93 +584,22 @@ define([ var value = ""; var startLine = this.line; var startChar = this.char; - var allowNewLine = false; this.skip(); while (this.peek() !== quote) { - while (this.peek() === "") { // End Of Line - // If an EOL is not preceded by a backslash, show a warning - // and proceed like it was a legit multi-line string where - // author simply forgot to escape the newline symbol. - // - // Another approach is to implicitly close a string on EOL - // but it generates too many false positives. - - if (!allowNewLine) { - this.trigger("warning", { - code: "W112", - line: this.line, - character: this.char - }); - } else { - allowNewLine = false; - - // Otherwise show a warning if multistr option was not set. - // For JSON, show warning no matter what. - - this.triggerAsync("warning", { - code: "W043", - line: this.line, - character: this.char - }, checks, function () { return !state.option.multistr; }); - - this.triggerAsync("warning", { - code: "W042", - line: this.line, - character: this.char - }, checks, function () { return state.jsonMode && state.option.multistr; }); - } - - // If we get an EOF inside of an unclosed string, show an - // error and implicitly close it at the EOF point. - - if (!this.nextLine()) { - this.trigger("error", { - code: "E029", - line: startLine, - character: startChar - }); - - return { - type: Token.StringLiteral, - value: value, - isUnclosed: true, - quote: quote - }; - } - } - - allowNewLine = false; var char = this.peek(); var jump = 1; // A length of a jump, after we're done // parsing this character. - if (char < " ") { - // Warn about a control character in a string. - this.trigger("warning", { - code: "W113", - line: this.line, - character: this.char, - data: [ "" ] - }); - } - // Special treatment for some escaped characters. - if (char === "\\") { this.skip(); char = this.peek(); switch (char) { case "'": - this.triggerAsync("warning", { - code: "W114", - line: this.line, - character: this.char, - data: [ "\\'" ] - }, checks, function () {return state.jsonMode; }); break; case "b": char = "\b"; @@ -699,41 +618,16 @@ define([ break; case "0": char = "\0"; - - // Octal literals fail in strict mode. - // Check if the number is between 00 and 07. - var n = parseInt(this.peek(1), 10); - this.triggerAsync("warning", { - code: "W115", - line: this.line, - character: this.char - }, checks, - function () { return n >= 0 && n <= 7 && state.directive["use strict"]; }); break; case "u": char = String.fromCharCode(parseInt(this.input.substr(1, 4), 16)); jump = 5; break; case "v": - this.triggerAsync("warning", { - code: "W114", - line: this.line, - character: this.char, - data: [ "\\v" ] - }, checks, function () { return state.jsonMode; }); - char = "\v"; break; case "x": var x = parseInt(this.input.substr(1, 2), 16); - - this.triggerAsync("warning", { - code: "W114", - line: this.line, - character: this.char, - data: [ "\\x-" ] - }, checks, function () { return state.jsonMode; }); - char = String.fromCharCode(x); jump = 3; break; @@ -742,7 +636,6 @@ define([ case "/": break; case "": - allowNewLine = true; char = ""; break; case "!": @@ -753,11 +646,6 @@ define([ /*falls through */ default: // Weird escaping. - this.trigger("warning", { - code: "W044", - line: this.line, - character: this.char - }); } } @@ -767,7 +655,7 @@ define([ this.skip(); return { - type: Token.StringLiteral, + type: 'string', value: value, isUnclosed: false, quote: quote diff --git a/src/app/services/graphite/parser.js b/src/app/services/graphite/parser.js index 928e1715cec..f43c2c78da6 100644 --- a/src/app/services/graphite/parser.js +++ b/src/app/services/graphite/parser.js @@ -20,186 +20,147 @@ define([ this.lexer = new Lexer(expression); this.state = "start"; this.error = null; + this.tokens = this.lexer.tokenize(); + this.index = 0; } Parser.Nodes = NodeTypes; Parser.prototype = { - getAst: function () { - return this.parse('start'); - }, - isUnexpectedToken: function (expected, value) { - if (this.token === null) { - this.error = "Expected token: " + expected + " instead found end of string"; - return true; - } + getAst: function () { + return this.start(); + }, - if (this.token.type === expected) { - return false; - } - - if (value && this.token.value === value) { - return false; - } - - this.error = "Expected token " + expected + - ' instead found token ' + this.token.type + - ' ("' + this.token.value + '")' + - " at position: " + this.lexer.char; - - return true; - }, - - parse: function (state, allowParams) { - var node = { }; - - while(true) { - this.token = this.lexer.next(); - - switch(state) { - case "start": - if (allowParams) { - if (this.token === null) { - return null; - } - - if (this.token.type === Lexer.Token.NumericLiteral) { - return { - type: NodeTypes.NumericLiteral, - value: parseInt(this.token.value) - }; - } - - if (this.token.type === Lexer.Token.StringLiteral) { - return { - type: NodeTypes.StringLiteral, - value: this.token.value - }; - } - } - - if (this.isUnexpectedToken(Lexer.Token.Identifier)) { - return; - } - - state = "identifier"; - this.prevToken = this.token; - break; - - case "identifier": - if (this.token == null || (allowParams && this.token.value === ',')) { - return { - type: NodeTypes.MetricExpression, - segments: [{ - type: NodeTypes.MetricExpression, - value: this.prevToken.value - }] - }; - } - - if (this.isUnexpectedToken(Lexer.Token.Punctuator)) { - return null; - } - - if (this.token.value === '.') { - state = "metricNode"; - node.type = NodeTypes.MetricExpression; - node.segments = [{ - type: NodeTypes.MetricNode, - value: this.prevToken.value - }]; - - continue; - } - - if (this.token.value === '(') { - node.type = NodeTypes.FunctionCall; - node.name = this.prevToken.value; - node.params = this.parseFunc(); - return node; - } - - if (this.token.value === ')') { - return node; - } - - break; - - case 'metricEnd': - if (this.token === null) { - return node; - } - - if (this.isUnexpectedToken(Lexer.Token.Punctuator)) { - return null; - } - - if (this.token.value === '.') { - state = 'metricNode'; - } - - if (allowParams && (this.token.value === ',' || this.token.value === ')')) { - return node; - } - - break; - case 'metricNode': - if (this.isUnexpectedToken(Lexer.Token.Identifier)) { - return null; - } - - node.segments.push({ - type: NodeTypes.MetricNode, - value: this.token.value - }); - - state = 'metricEnd'; - break; - default: - this.error = 'unknown token: ' + this.token.type; - } - } - }, - - parseFunc: function() { - var arguments = []; - var arg; - - while(true) { - - arg = this.parse('start', true); - if (arg === null) { - this.error = "expected function arguments"; - return null; - } - - arguments.push(arg); - - if (this.token === null) { - this.error = "expected closing function at position: " + this.lexer.char; - return null; - } - - if (this.token.value === ')') { - return arguments; - } - - if (this.token.type === Lexer.Token.NumericLiteral || - this.token.type === Lexer.Token.StringLiteral) { - this.token = this.lexer.next(); - } - - if (this.isUnexpectedToken(Lexer.Token.Punctuator, ',')) { - return null; - } - - if (this.token.value === ')') { - return arguments; - } - } + start: function () { + return this.functionCall() || this.metricExpression(); + }, + metricExpression: function() { + if (!this.match('identifier')) { + return null; } + + var node = { + type: 'metric', + segments: [{ + type: 'segment', + value: this.tokens[this.index].value + }] + } + + this.index++; + + if (this.match('.')) { + this.index++; + var rest = this.metricExpression(); + node.segments = node.segments.concat(rest.segments) + } + + return node; + }, + + matchToken: function(type, index) { + var token = this.tokens[this.index + index]; + return (token === undefined && type === '') || + token && token.type === type; + }, + + match: function(token1, token2) { + return this.matchToken(token1, 0) && + (!token2 || this.matchToken(token2, 1)) + }, + + functionCall: function() { + if (!this.match('identifier', '(')) { + return null; + } + + var node = { + type: 'function', + name: this.tokens[this.index].value, + }; + + this.index += 2; + + node.params = this.functionParameters(); + + if (!this.match(')')) { + this.error = 'missing closing paranthesis'; + return null; + } + + this.index++; + + return node; + }, + + functionParameters: function () { + if (this.match(')') || this.match('')) { + return []; + } + + var param = + this.functionCall() || + this.metricExpression() || + this.numericLiteral() || + this.stringLiteral(); + + if (!this.match(',')) { + return [param]; + } + + this.index++; + return [param].concat(this.functionParameters()); + }, + + numericLiteral: function () { + if (!this.match('number')) { + return null; + } + + this.index++; + + return { + type: 'number', + value: this.tokens[this.index-1].value + }; + }, + + stringLiteral: function () { + if (!this.match('string')) { + return null; + } + + this.index++; + + return { + type: 'string', + value: this.tokens[this.index-1].value + }; + }, + + isUnexpectedToken: function (expected, value) { + if (this.token === null) { + this.error = "Expected token: " + expected + " instead found end of string"; + return true; + } + + if (this.token.type === expected) { + return false; + } + + if (value && this.token.value === value) { + return false; + } + + this.error = "Expected token " + expected + + ' instead found token ' + this.token.type + + ' ("' + this.token.value + '")' + + " at position: " + this.lexer.char; + + return true; + }, }; return Parser; diff --git a/src/test/specs/lexer-specs.js b/src/test/specs/lexer-specs.js index 144f5bc98c6..a87883f3d05 100644 --- a/src/test/specs/lexer-specs.js +++ b/src/test/specs/lexer-specs.js @@ -9,22 +9,22 @@ define([ var tokens = lexer.tokenize(); expect(tokens[0].value).to.be('metric'); expect(tokens[1].value).to.be('.'); - expect(tokens[2].type).to.be(Lexer.Token.Identifier); - expect(tokens[3].type).to.be(Lexer.Token.Punctuator); + expect(tokens[2].type).to.be('identifier'); + expect(tokens[4].type).to.be('identifier'); }); it('should tokenize functions and args', function() { var lexer = new Lexer("sum(metric.test, 12, 'test')"); var tokens = lexer.tokenize(); expect(tokens[0].value).to.be('sum'); - expect(tokens[0].type).to.be(Lexer.Token.Identifier); + expect(tokens[0].type).to.be('identifier'); expect(tokens[1].value).to.be('('); - expect(tokens[1].type).to.be(Lexer.Token.Punctuator); - expect(tokens[5].type).to.be(Lexer.Token.Punctuator); + expect(tokens[1].type).to.be('('); + expect(tokens[5].type).to.be(','); expect(tokens[5].value).to.be(','); - expect(tokens[6].type).to.be(Lexer.Token.NumericLiteral); + expect(tokens[6].type).to.be('number'); expect(tokens[6].value).to.be('12'); - expect(tokens[8].type).to.be(Lexer.Token.StringLiteral); + expect(tokens[8].type).to.be('string'); expect(tokens[8].value).to.be('test'); expect(tokens[tokens.length - 1].value).to.be(')'); }); diff --git a/src/test/specs/parser-specs.js b/src/test/specs/parser-specs.js index 857bf1dbfef..d85a730679b 100644 --- a/src/test/specs/parser-specs.js +++ b/src/test/specs/parser-specs.js @@ -9,7 +9,7 @@ define([ var rootNode = parser.getAst(); expect(parser.error).to.be(null); - expect(rootNode.type).to.be(Parser.Nodes.MetricExpression); + expect(rootNode.type).to.be('metric'); expect(rootNode.segments.length).to.be(5); expect(rootNode.segments[0].value).to.be('metric'); @@ -19,7 +19,7 @@ define([ var parser = new Parser('sum(test)'); var rootNode = parser.getAst(); expect(parser.error).to.be(null); - expect(rootNode.type).to.be(Parser.Nodes.FunctionCall); + expect(rootNode.type).to.be('function'); expect(rootNode.params.length).to.be(1); }); @@ -28,11 +28,11 @@ define([ var rootNode = parser.getAst(); expect(parser.error).to.be(null); - expect(rootNode.type).to.be(Parser.Nodes.FunctionCall); + expect(rootNode.type).to.be('function'); expect(rootNode.params.length).to.be(3); - expect(rootNode.params[0].type).to.be(Parser.Nodes.MetricExpression); - expect(rootNode.params[1].type).to.be(Parser.Nodes.NumericLiteral); - expect(rootNode.params[2].type).to.be(Parser.Nodes.StringLiteral); + expect(rootNode.params[0].type).to.be('metric'); + expect(rootNode.params[1].type).to.be('number'); + expect(rootNode.params[2].type).to.be('string'); }); it('function with nested function', function() { @@ -40,13 +40,13 @@ define([ var rootNode = parser.getAst(); expect(parser.error).to.be(null); - expect(rootNode.type).to.be(Parser.Nodes.FunctionCall); + expect(rootNode.type).to.be('function'); expect(rootNode.params.length).to.be(1); - expect(rootNode.params[0].type).to.be(Parser.Nodes.FunctionCall); + expect(rootNode.params[0].type).to.be('function'); expect(rootNode.params[0].name).to.be('scaleToSeconds'); expect(rootNode.params[0].params.length).to.be(2); - expect(rootNode.params[0].params[0].type).to.be(Parser.Nodes.MetricExpression); - expect(rootNode.params[0].params[1].type).to.be(Parser.Nodes.NumericLiteral); + expect(rootNode.params[0].params[0].type).to.be('metric'); + expect(rootNode.params[0].params[1].type).to.be('number'); }); it('function with multiple series', function() { @@ -54,10 +54,10 @@ define([ var rootNode = parser.getAst(); expect(parser.error).to.be(null); - expect(rootNode.type).to.be(Parser.Nodes.FunctionCall); + expect(rootNode.type).to.be('function'); expect(rootNode.params.length).to.be(2); - expect(rootNode.params[0].type).to.be(Parser.Nodes.MetricExpression); - expect(rootNode.params[1].type).to.be(Parser.Nodes.MetricExpression); + expect(rootNode.params[0].type).to.be('metric'); + expect(rootNode.params[1].type).to.be('metric'); }); });