Miércoles 05/05/2021
Learning Nearley.JS
See the examples at https://github.com/ULL-ESIT-PL/learning-nearley/tree/develop/examples
Ambiguedad
Removing Ambiguity
➜ calculator git:(main) ✗ cat arithmetic.ne
# This is a nice little grammar to familiarize yourself
# with the nearley syntax.
# `main` is the nonterminal that nearley tries to parse, so
# we define it first.
# The _'s are defined as whitespace below. This is a mini-
# -idiom.
main -> null | _ AS _ {% function(d) {return d[1]; } %}
# Addition and subtraction
AS -> AS _ "+" _ MD {% function(d) {return d[0]+d[4]; } %}
| AS _ "-" _ MD {% function(d) {return d[0]-d[4]; } %}
| MD {% id %}
# PEMDAS!
# We define each level of precedence as a nonterminal.
# Parentheses
P -> "(" _ AS _ ")" {% function(d) {return d[2]; } %}
| N {% id %}
# Factorial
F -> P "!" {% function(d) {
const fac = n => (n===0)?1:n*fac(n-1);
return fac(d[0]);
}
%}
| P {% id %}
# Exponents
E -> F _ "^" _ E {% function(d) {return Math.pow(d[0], d[4]); } %}
| F {% id %}
# Multiplication and division
MD -> MD _ "*" _ E {% function(d) {return d[0]*d[4]; } %}
| MD _ "/" _ E {% function(d) {return d[0]/d[4]; } %}
| E {% id %}
# A number or a function of a number
N -> float {% id %}
| "sin" _ P {% function(d) {return Math.sin(d[2]); } %}
| "cos" _ P {% function(d) {return Math.cos(d[2]); } %}
| "tan" _ P {% function(d) {return Math.tan(d[2]); } %}
| "asin" _ P {% function(d) {return Math.asin(d[2]); } %}
| "acos" _ P {% function(d) {return Math.acos(d[2]); } %}
| "atan" _ P {% function(d) {return Math.atan(d[2]); } %}
| "pi" {% function(d) {return Math.PI; } %}
| "e" {% function(d) {return Math.E; } %}
| "sqrt" _ P {% function(d) {return Math.sqrt(d[2]); } %}
| "ln" _ P {% function(d) {return Math.log(d[2]); } %}
# I use `float` to basically mean a number with a decimal point in it
float ->
int "." int {% function(d) {return parseFloat(d[0] + d[1] + d[2])} %}
| int {% function(d) {return parseInt(d[0])} %}
int -> [0-9]:+ {% function(d) {return d[0].join(""); } %}
# Whitespace. The important thing here is that the postprocessor
# is a null-returning function. This is a memory efficiency trick.
_ -> [\s]:* {% function(d) {return null; } %}
Ejecución:
➜ calculator git:(main) ✗ npm test
> calculator@1.0.0 test
> npm run compile && export NODE_PATH=$NODE_PATH:`npm root -g` && node calculator.js
> calculator@1.0.0 compile
> nearleyc arithmetic.ne -o grammar.js
> 2+3*4
14
> sin(pi/2)
1
> sinpi
1.2246467991473532e-16
>
Mejoras
- Mejoramos el análisis léxico y
- resolvemos el “bug” de
sinpi. - Factorizamos las acciones semánticas
La gramática que sigue no está terminada pero muestra las metodologías a seguir.
➜ calculator git:(main) ✗ cat arithmetic-parenthesis.ne
# How to Implement Lexical Analysis in Tools Whithout a separated Lexer
# To use it run:
# nearleyc arithmetic-parenthesis.ne -o grammar.js && export NODE_PATH=$NODE_PATH:`npm root -g` && node calculator.js
# This is a nice little grammar to familiarize yourself with the nearley syntax.
# It parses valid calculator input
# ln (3 + 2*(8/e - sin(pi/5)))
# is valid input.
@{%
const bin = (([x, op, y]) => op(x,y));
const Null = (d => null);
const fac = n => (n===0)?1:n*fac(n-1);
const unaryPost = (([p, op]) => op(p));
const funApply = ([fun, arg]) => fun(arg);
%}
main => null {% d => "" %} # Allow for empty lines
| AS _ {% function(d) {return d[0]; } %}
# PEMDAS!
# We define each level of precedence as a nonterminal.
# Addition and subtraction
AS -> AS PLUS MD {% bin %} # Prefer this syntax
| AS MINUS MD {% bin %}
| MD {% id %}
# Multiplication and division
MD -> MD MULT E {% bin %}
| MD DIV E {% bin %}
| E {% id %}
# Exponents
E -> F EXP E {% bin %}
| F {% id %}
# Factorial
F -> P FACTORIAL {% unaryPost %}
| P {% id %}
# Fixed "bug" sinpi
P -> Q
| FLOAT {% id %}
| SIN Q {% funApply %}
| COS Q {% funApply %}
| TAN Q {% funApply %}
| ASIN Q {% funApply %}
| ACOS Q {% funApply %}
| ATAN Q {% funApply %}
| PI {% id %}
| EULER {% id %}
| SQRT Q {% funApply %}
| LN Q {% funApply %}
# Parentheses
Q -> LP AS RP {% ([lp, as, rp]) => as %}
##### LEXICAL ANALYSIS #################################################
# I use `float` to basically mean a number with a decimal point in it
FLOAT -> _ float {% d => d[1] %}
float ->
int "." int {% function(d) {return parseFloat(d[0] + d[1] + d[2])} %}
| int {% function(d) {return parseInt(d[0])} %}
int -> [0-9]:+ {% function(d) {return d[0].join(""); } %}
# Whitespace. The important thing here is that the postprocessor
# is a null-returning function. This is a memory efficiency trick.
_ -> [\s]:* {% function(d) {return null; } %}
PLUS -> _ "+" {% function(d) {return ((a,b) => a+b); } %}
MINUS -> _ "-" {% function(d) {return ((a,b) => a-b); } %}
MULT -> _ "*" {% function(d) {return ((a,b) => a*b); } %}
DIV -> _ "/" {% function(d) {return ((a,b) => a/b); } %}
EXP -> _ "^" {% function(d) {return ((a,b) => Math.pow(a,b)); } %}
FACTORIAL -> "!" {% d => fac %}
LP -> _ "(" {% Null %}
RP -> _ ")" {% Null %}
SIN -> _ "sin"i {% d => Math.sin %}
COS -> _ "cos"i {% d => Math.cos %}
TAN -> _ "tan"i {% d => Math.tan %}
ASIN -> _ "asin"i {% d => Math.asin %}
ACOS -> _ "acos"i {% d => Math.acos %}
ATAN -> _ "atan"i {% d => Math.atan %}
PI -> _ "pi"i {% d => Math.PI %}
EULER -> _ "e"i {% d => Math.E %}
SQRT -> _ "sqrt"i {% d => Math.sqrt %}
LN -> _ "ln"i {% d => Math.log %}
Ejecución:
➜ calculator git:(main) ✗ npm run test2
> calculator@1.0.0 test2
> nearleyc arithmetic-parenthesis.ne -o grammar.js && export NODE_PATH=$NODE_PATH:`npm root -g` && node calculator.js
> 2 + 3 * 4
14
> sinpi
-----^ Error.
> sin(pi/2)
1
References
- Examples in repo learning-nearley
- Esquemas de Traducción y Definiciones Dirigidas por la Sintaxis (teacher’s old notes at GH)
- Compiler Theory:Syntax-Directed Translation by Marc Moreno Maza
- Nearley.js section in this site
- The Earley Algorithm explained in this site
Video
Que hemos visto
Let’s continue with Nearley.JS