Source: parse.js

#!/usr/bin/env node
/**
 * @author Aarón José Cabrera Martín
 * @description exporta las funciones necesarias para analizar gramaticalmente
 * un programa EGG. La interfaz de usuario debería ser llamar a la función 
 * parse la cual recibe un string que contiene un programa EGG y devuelve el
 * árbol AST correspondiente.
 */

'use strict'

const fileSystem = require('fs');

//const SPACE = /^(\s|#.*)*/;
// empiezan y terminan con " le siguen 0 o mas caracteres que o no son 
// ni " ni \ o es \ seguido de cualquier caracter(entre ellos el admitimos \")
// necesitamos otro parentesis sin coger las "" para poder extraer la cadena
// si no lo ponemos, el parentesis al que le afecta el * solo daria el ultimo caracter
/*const STRING = /^"((?:[^"\\]|\\.)*)"/;
const NUMBER = /^[+-]?((\d+\.\d+)|(\d+))\b/;
const WORD = /^[^\s(),#"]+/;
// \r es retorno de carro, suelen ir juntos pero pueden ir separados
const NEWLINE = /\r\n|\n|\r/;
const LP = /^\(/;
const RP = /^\)/;
const COMMA = /^,/;*/

const SPACE = /(?<SPACE>(\s|#.*)+)/;
const STRING = /"(?<STRING>(?:[^"\\]|\\.)*)"/;
const NUMBER = /(?<NUMBER>[+-]?((\d+\.\d+)|(\d+))\b)/;
const WORD = /(?<WORD>[^\s(),#"]+)/;
// \r es retorno de carro, suelen ir juntos pero pueden ir separados
const NEWLINE = /(?<NEWLINE>\r\n|\n|\r)/;
const LP = /(?<LP>\()/;
const RP = /(?<RP>\))/;
const COMMA = /(?<COMMA>,)/;
const tokens = ['SPACE', 'NEWLINE', 'STRING', 'NUMBER', 'LP', 'RP', 'COMMA', 'WORD'];
const allRegex = [SPACE, NEWLINE, STRING, NUMBER, LP, RP, COMMA, WORD].reduce((acumulador, actual) => {
  if (acumulador === undefined) {
    return actual;
  }
  return new RegExp(acumulador.source + '|' + actual.source, 'y');
});

// necesario para comprobar si hemos llegado al final del programa o
// si hemos fallado al hacer match, debe reestablecerse en cada llamada a parse
let previousIndex = 0;

// The next character on the program
let lookahead = '';

// Contiene el programa egg a parsear
let program;

let lineNumber = 0;

function skipSpace(string) {
  /*let correctedString = string.split(SPACE).reduce((acu, element) => {
    // si no casa es null, por lo tanto false
    return element.match(SPACE) ? acu : acu + element;
  });
  return correctedString;*/
  const match = SPACE.exec(string);
  const correctedString = string.slice(match[0].length);
  lookahead = correctedString.length > 0 ? correctedString[0] : null;
  return correctedString;
}

// actualiza la variable lookahead y actualiza por donde se va analizando del programa
/*function nextToken() {
  // exec devuelve un vector de string con propiedades, con los matches
  let match = SPACE.exec(program);
  // test simplemente devuelve true o false
  if (NEWLINE.test(match[0])) {
    // split nos devolvera un array de string con todos los caracteres que hayan casado con newline
    lineNumber += match[0].split(NEWLINE).length - 1;
  }

  program = program.slice(match[0].length);

  if (program.length > 0) {
    if (match = STRING.exec(program)) {
      lookahead = { type: 'STRING', value: match[1] };
    } else if (match = NUMBER.exec(program)) {
      lookahead = { type: 'NUMBER', value: Number(match[0]) };
    } else if (match = LP.exec(program)) {
      lookahead = { type: 'LP', value: match[0] };
    } else if (match = RP.exec(program)) {
      lookahead = { type: 'RP', value: match[0] };
    } else if (match = COMMA.exec(program)) {
      lookahead = { type: 'COMMA', value: match[0] };
    } else if (match = WORD.exec(program)) {
      lookahead = { type: 'WORD', value: match[0] };
    } else {
      throw new SyntaxError(`Unexpected syntax on line ${lineNumber}\nOn program:\n${program}`);
    }
    program = program.slice(match[0].length);
  } else {
    lookahead = null;
  }
  return lookahead;
}*/

/**
 * @returns the new lookahead value
 * @description extract the new token, assigned it to lookahead variable
 * and return it
 */
function nextToken() {
  lookahead = null;
  /*if (program.length === 0) {
    throw new SyntaxError(`Empty program are not allowed`);
  }*/
  let match = allRegex.exec(program);
  // si ocurre esto estamos en el final del programa
  if (match === null || previousIndex > match.index) {
    return lookahead;
  }
  previousIndex = match.index;
  let matchHappen = false;
  if (match !== null) {
    match = match.groups;
    for (const type of tokens) {
      if (match[type] !== undefined) {
        matchHappen = true;
        if (type === 'NUMBER') {
          lookahead = { 'type': type, 'value': Number(match[type]) };
          break;
        } else if (type === 'SPACE' || type === 'NEWLINE') {
          /*match = allRegex.exec(program);
          if (match === null) {
            break;
          }
          match = match.groups;*/
          return nextToken();
        } else {
          lookahead = { 'type': type, 'value': match[type] };
          break;
        }
      }
    }
  }
  if (!matchHappen) {
    throw new SyntaxError(`Unexpected syntax on line ${lineNumber}\nOn program:\n${program}`);
  }
  return lookahead;
}

/**
 * @returns AST, it can be a fragment of the complete one
 * @description all on egg language are expression, parse expresion parse it.
 * It can call parseApply if a word node is reached. 
 * ParseExpression can only parse literal nodes, such as string or numbers.
 * These nodes are leaf nodes  
 */
function parseExpression() {
  let expr;
  if (lookahead.type === 'STRING' || lookahead.type === 'NUMBER') {
    expr = { type: 'value', value: lookahead.value };
    nextToken();
    //no permitimos que los valores sean invocados como funciones
    if (lookahead.type === 'LP') {
      throw new SyntaxError(`Unexpected syntax, using a value as a function on line:${lineNumber}\nOn program:${program}`);
    }
    return expr;
  } else if (lookahead.type === 'WORD') {
    expr = { type: 'word', name: lookahead.value };
    nextToken();
    return parseApply(expr);
  } else {
    throw new SyntaxError('Unexpected syntax ' + program);
  }
}

/**
 * @param {Array<Object>} AbstractSyntaxTree
 * @description This function is called when the parsing reach an apply node
 * apply nodes can have arguments, these nodes are words
 * parseApply can parse words nodes, this nodes can have other childs
 */
function parseApply(ast) {
  // si el lookahead es null es que esta en el final del programa
  if (!lookahead) {
    return ast;
  }

  if (lookahead.type !== 'LP') { // (
    return ast;
  }

  // no admitimos que los valores sean invocados como funciones
  /*if (lookahead.type === 'value') {
    throw new SyntaxError(`Unexpected syntax, using a value as a function on line:${lineNumber}\nOn program:${program}`);
  }*/

  //es una llamada a una funcion, debemos procesar todos sus parametros
  nextToken();
  ast = { type: 'apply', operator: ast, args: [] };
  while (lookahead && lookahead.type !== 'RP') {
    //debemos parsear la expresion que tiene como parametros
    let arg = parseExpression(program);
    ast.args.push(arg);

    //despues de cada argumento debe venir una coma o un cierra parentesis
    if (lookahead.type === 'COMMA') {
      nextToken();
      if (lookahead.type === 'RP') {
        throw new SyntaxError(`Unexpected ')' after a comma, on line:${lineNumber}\nOn program:${program}`);
      } else if (!lookahead) {
        throw new SyntaxError(`Expected a value or expression after a comma, on line:${lineNumber}\nOn program:${program}`);
      }
    } else if (lookahead.type !== 'RP') {
      throw new SyntaxError(`Expected ',' or ')'on line:${lineNumber}\nOn program:${program}`);
    }
  }
  // si salio del while porque lookahead es null,
  // es que las lineas terminaron sin el )
  if (!lookahead) {
    throw new SyntaxError(`Expected ')' at the end of the line:${lineNumber}\nOn program:${program}`)
  }
  nextToken();
  return parseApply(ast);
}


/**
 * @param {String} eggProgram A string that contains the egg program
 * @return {Object} ASTtree
 * @description Parse the given eggProgram and return an object that represents
 * the AST tree
 */
function parse(eggProgram) {
  lookahead = null;
  program = eggProgram;
  lineNumber = 0;
  previousIndex = 0;
  nextToken();
  const resultado = parseExpression(program);
  if (lookahead) {
    throw new SyntaxError('Unexpected text after program');
  }
  return resultado;
}

/**
 * @param {String} fileRoute the source file route to parse
 * @return {Object} ASTtree
 * @description Read the given source file and returns the result of calling
 * the parse function
 */
function parseFile(fileRoute) {
  try {
    return parse(fileSystem.readFileSync(fileRoute, 'utf8'));
  } catch (error) {
    console.log('Error en parseFile:', error);
  }
}

/**
 * @param {String} eggProgram String that contains an eggprogram
 * @param {String} fileName String that contains the file name to save
 * the generated AST tree
 * @description export the AST tree of the given egg program on the given file
 */
function parseToFile(eggProgram, fileName) {
  return exportToFile(parse(eggProgram), fileName);
}

/**
 * @param {Object} ast an AST tree of an EGG program
 * @param {String} fileName String that contains the file name to save
 * the generated AST tree
 * @description export the given AST tree on the given file as a .evm file
 * .evm format is JSON
 */
function exportToFile(ast, fileName) {
  const realFileName = fileName.endsWith('.evm') ? fileName : fileName + '.evm';
  // JSON.stringify recibe 3 args, el objeto, que propiedades se incluyen
  // null significa todas y el espacio, utiliza el caracter que le pasemos
  fileSystem.writeFileSync(realFileName, JSON.stringify(ast, null, '\t'));
  return ast;
}

/**
 * @param {String} programFile file that contains an egg program
 * @param {String} fileName name of the .evm file that will cointain the AST
 * tree of the programFile program. .If it isnt defined, uses programFile value
 * @description reads a file that contains an egg program and export it
 * on the given .evm file
 */
function parseFromFileToFile(programFile, fileName) {
  fileName = fileName === undefined ? programFile : fileName;
  return exportToFile(parseFile(programFile), fileName);
}

module.exports = { parse, parseFile, parseToFile, exportToFile, parseFromFileToFile, parseApply, parseExpression };