#!/usr/bin/env node
/**
* @author Aarón José Cabrera Martín
* @description exporta las funciones necesarias para analizar gramaticalmente
* un programa EGG. La interfaz de usuario debería ser llamar a la función
* parse la cual recibe un string que contiene un programa EGG y devuelve el
* árbol AST correspondiente.
*/
'use strict'
const fileSystem = require('fs');
//const SPACE = /^(\s|#.*)*/;
// empiezan y terminan con " le siguen 0 o mas caracteres que o no son
// ni " ni \ o es \ seguido de cualquier caracter(entre ellos el admitimos \")
// necesitamos otro parentesis sin coger las "" para poder extraer la cadena
// si no lo ponemos, el parentesis al que le afecta el * solo daria el ultimo caracter
/*const STRING = /^"((?:[^"\\]|\\.)*)"/;
const NUMBER = /^[+-]?((\d+\.\d+)|(\d+))\b/;
const WORD = /^[^\s(),#"]+/;
// \r es retorno de carro, suelen ir juntos pero pueden ir separados
const NEWLINE = /\r\n|\n|\r/;
const LP = /^\(/;
const RP = /^\)/;
const COMMA = /^,/;*/
const SPACE = /(?<SPACE>(\s|#.*)+)/;
const STRING = /"(?<STRING>(?:[^"\\]|\\.)*)"/;
const NUMBER = /(?<NUMBER>[+-]?((\d+\.\d+)|(\d+))\b)/;
const WORD = /(?<WORD>[^\s(),#"]+)/;
// \r es retorno de carro, suelen ir juntos pero pueden ir separados
const NEWLINE = /(?<NEWLINE>\r\n|\n|\r)/;
const LP = /(?<LP>\()/;
const RP = /(?<RP>\))/;
const COMMA = /(?<COMMA>,)/;
const tokens = ['SPACE', 'NEWLINE', 'STRING', 'NUMBER', 'LP', 'RP', 'COMMA', 'WORD'];
const allRegex = [SPACE, NEWLINE, STRING, NUMBER, LP, RP, COMMA, WORD].reduce((acumulador, actual) => {
if (acumulador === undefined) {
return actual;
}
return new RegExp(acumulador.source + '|' + actual.source, 'y');
});
// necesario para comprobar si hemos llegado al final del programa o
// si hemos fallado al hacer match, debe reestablecerse en cada llamada a parse
let previousIndex = 0;
// The next character on the program
let lookahead = '';
// Contiene el programa egg a parsear
let program;
let lineNumber = 0;
function skipSpace(string) {
/*let correctedString = string.split(SPACE).reduce((acu, element) => {
// si no casa es null, por lo tanto false
return element.match(SPACE) ? acu : acu + element;
});
return correctedString;*/
const match = SPACE.exec(string);
const correctedString = string.slice(match[0].length);
lookahead = correctedString.length > 0 ? correctedString[0] : null;
return correctedString;
}
// actualiza la variable lookahead y actualiza por donde se va analizando del programa
/*function nextToken() {
// exec devuelve un vector de string con propiedades, con los matches
let match = SPACE.exec(program);
// test simplemente devuelve true o false
if (NEWLINE.test(match[0])) {
// split nos devolvera un array de string con todos los caracteres que hayan casado con newline
lineNumber += match[0].split(NEWLINE).length - 1;
}
program = program.slice(match[0].length);
if (program.length > 0) {
if (match = STRING.exec(program)) {
lookahead = { type: 'STRING', value: match[1] };
} else if (match = NUMBER.exec(program)) {
lookahead = { type: 'NUMBER', value: Number(match[0]) };
} else if (match = LP.exec(program)) {
lookahead = { type: 'LP', value: match[0] };
} else if (match = RP.exec(program)) {
lookahead = { type: 'RP', value: match[0] };
} else if (match = COMMA.exec(program)) {
lookahead = { type: 'COMMA', value: match[0] };
} else if (match = WORD.exec(program)) {
lookahead = { type: 'WORD', value: match[0] };
} else {
throw new SyntaxError(`Unexpected syntax on line ${lineNumber}\nOn program:\n${program}`);
}
program = program.slice(match[0].length);
} else {
lookahead = null;
}
return lookahead;
}*/
/**
* @returns the new lookahead value
* @description extract the new token, assigned it to lookahead variable
* and return it
*/
function nextToken() {
lookahead = null;
/*if (program.length === 0) {
throw new SyntaxError(`Empty program are not allowed`);
}*/
let match = allRegex.exec(program);
// si ocurre esto estamos en el final del programa
if (match === null || previousIndex > match.index) {
return lookahead;
}
previousIndex = match.index;
let matchHappen = false;
if (match !== null) {
match = match.groups;
for (const type of tokens) {
if (match[type] !== undefined) {
matchHappen = true;
if (type === 'NUMBER') {
lookahead = { 'type': type, 'value': Number(match[type]) };
break;
} else if (type === 'SPACE' || type === 'NEWLINE') {
/*match = allRegex.exec(program);
if (match === null) {
break;
}
match = match.groups;*/
return nextToken();
} else {
lookahead = { 'type': type, 'value': match[type] };
break;
}
}
}
}
if (!matchHappen) {
throw new SyntaxError(`Unexpected syntax on line ${lineNumber}\nOn program:\n${program}`);
}
return lookahead;
}
/**
* @returns AST, it can be a fragment of the complete one
* @description all on egg language are expression, parse expresion parse it.
* It can call parseApply if a word node is reached.
* ParseExpression can only parse literal nodes, such as string or numbers.
* These nodes are leaf nodes
*/
function parseExpression() {
let expr;
if (lookahead.type === 'STRING' || lookahead.type === 'NUMBER') {
expr = { type: 'value', value: lookahead.value };
nextToken();
//no permitimos que los valores sean invocados como funciones
if (lookahead.type === 'LP') {
throw new SyntaxError(`Unexpected syntax, using a value as a function on line:${lineNumber}\nOn program:${program}`);
}
return expr;
} else if (lookahead.type === 'WORD') {
expr = { type: 'word', name: lookahead.value };
nextToken();
return parseApply(expr);
} else {
throw new SyntaxError('Unexpected syntax ' + program);
}
}
/**
* @param {Array<Object>} AbstractSyntaxTree
* @description This function is called when the parsing reach an apply node
* apply nodes can have arguments, these nodes are words
* parseApply can parse words nodes, this nodes can have other childs
*/
function parseApply(ast) {
// si el lookahead es null es que esta en el final del programa
if (!lookahead) {
return ast;
}
if (lookahead.type !== 'LP') { // (
return ast;
}
// no admitimos que los valores sean invocados como funciones
/*if (lookahead.type === 'value') {
throw new SyntaxError(`Unexpected syntax, using a value as a function on line:${lineNumber}\nOn program:${program}`);
}*/
//es una llamada a una funcion, debemos procesar todos sus parametros
nextToken();
ast = { type: 'apply', operator: ast, args: [] };
while (lookahead && lookahead.type !== 'RP') {
//debemos parsear la expresion que tiene como parametros
let arg = parseExpression(program);
ast.args.push(arg);
//despues de cada argumento debe venir una coma o un cierra parentesis
if (lookahead.type === 'COMMA') {
nextToken();
if (lookahead.type === 'RP') {
throw new SyntaxError(`Unexpected ')' after a comma, on line:${lineNumber}\nOn program:${program}`);
} else if (!lookahead) {
throw new SyntaxError(`Expected a value or expression after a comma, on line:${lineNumber}\nOn program:${program}`);
}
} else if (lookahead.type !== 'RP') {
throw new SyntaxError(`Expected ',' or ')'on line:${lineNumber}\nOn program:${program}`);
}
}
// si salio del while porque lookahead es null,
// es que las lineas terminaron sin el )
if (!lookahead) {
throw new SyntaxError(`Expected ')' at the end of the line:${lineNumber}\nOn program:${program}`)
}
nextToken();
return parseApply(ast);
}
/**
* @param {String} eggProgram A string that contains the egg program
* @return {Object} ASTtree
* @description Parse the given eggProgram and return an object that represents
* the AST tree
*/
function parse(eggProgram) {
lookahead = null;
program = eggProgram;
lineNumber = 0;
previousIndex = 0;
nextToken();
const resultado = parseExpression(program);
if (lookahead) {
throw new SyntaxError('Unexpected text after program');
}
return resultado;
}
/**
* @param {String} fileRoute the source file route to parse
* @return {Object} ASTtree
* @description Read the given source file and returns the result of calling
* the parse function
*/
function parseFile(fileRoute) {
try {
return parse(fileSystem.readFileSync(fileRoute, 'utf8'));
} catch (error) {
console.log('Error en parseFile:', error);
}
}
/**
* @param {String} eggProgram String that contains an eggprogram
* @param {String} fileName String that contains the file name to save
* the generated AST tree
* @description export the AST tree of the given egg program on the given file
*/
function parseToFile(eggProgram, fileName) {
return exportToFile(parse(eggProgram), fileName);
}
/**
* @param {Object} ast an AST tree of an EGG program
* @param {String} fileName String that contains the file name to save
* the generated AST tree
* @description export the given AST tree on the given file as a .evm file
* .evm format is JSON
*/
function exportToFile(ast, fileName) {
const realFileName = fileName.endsWith('.evm') ? fileName : fileName + '.evm';
// JSON.stringify recibe 3 args, el objeto, que propiedades se incluyen
// null significa todas y el espacio, utiliza el caracter que le pasemos
fileSystem.writeFileSync(realFileName, JSON.stringify(ast, null, '\t'));
return ast;
}
/**
* @param {String} programFile file that contains an egg program
* @param {String} fileName name of the .evm file that will cointain the AST
* tree of the programFile program. .If it isnt defined, uses programFile value
* @description reads a file that contains an egg program and export it
* on the given .evm file
*/
function parseFromFileToFile(programFile, fileName) {
fileName = fileName === undefined ? programFile : fileName;
return exportToFile(parseFile(programFile), fileName);
}
module.exports = { parse, parseFile, parseToFile, exportToFile, parseFromFileToFile, parseApply, parseExpression };