Source: lib/compiler.js | Source: lib/compiler.js

// @ts-check
/**
 * @description The file with the code to interpret the Please lang
 * @author Daniel del Castillo de la Rosa <alu0101225548@ull.edu.es>
 * @since 8/04/2021
 * @module PleaseLangCompiler
 */

'use strict';

const fs = require('fs');
const {Value, Word, Call} = require('./ast.js');

/**
 * The defition of whitespace in the Please language
 */
const WHITE = /^(?:\s|\/\/.*|\/\*(?:.|\n)*?\*\/)*/;

/**
 * A lexer class that takes care of the lexical analysis
 */
class Lexer {
  /**
   * The constructor of the lexer
   * @param {string} source The source code of the program to analyze
   */
  constructor(source) {
    /**
     * @const {string}
     * @private
     */
    this.source_= source.replace(/\r/g, '');
    /**
     * @property {Object}
     * @private
    */
    this.cachedToken_ = undefined;
    /**
     * @const {number}
     * @private
    */
    this.line_ = 1;
    /**
     * @const {number}
     * @private
    */
    this.column_ = 1;
    /**
     * @const {number}
     * @private
    */
    this.offset_ = 0;
    /**
     * @const {RegExp}
    */
    this.WHITE = WHITE;
    /**
     * @const {RegExp}
     * @private
    */
    this.REGEXP_ = new RegExp(
        [
          /(?<STRING>(["'])(?:[^\2\\]|\\.)*?\2)/,
          /(?<NUMBER>[-+]?\d*\.?\d+(?:[eE][-+]?\d+)?)/,
          /(?<WORD>[^\s(){},"'\\]+)/,
          /(?<COMMA>,)/,
          /(?<LEFT_PARENTHESIS>[({])/,
          /(?<RIGHT_PARENTHESIS>[)}])/,
        ].map((regexp) => regexp.source).join('|'),
        'y',
    );
  }

  /**
   * A method that returns the next token of the source
   * @throws Will throw if there are invalid tokens
   */
  advanceToken() {
    this.skipSpace_();
    if (this.isEmpty()) {
      this.cachedToken_ = {type: 'EOF', line: this.line_, column: this.column_};
      return;
    }
    let match = this.REGEXP_.exec(this.source_);
    if (match == null) {
      match = /..*?\b|.*$/.exec(this.source_.slice(this.REGEXP_.lastIndex));
      throw new SyntaxError(
          `Invalid token: ${match[0]} ` +
          `at line ${this.line_} and column ${this.column_}`,
      );
    }
    this.cachedToken_ = this.constructResult_(match);
    this.cachedToken_.offset = this.offset_;
    this.cachedToken_.line = this.line_;
    this.cachedToken_.column = this.column_;
    this.updateAfterMatch_(match.groups[this.cachedToken_.type]);
  }

  /**
   * A helper function that construct the result from the match
   * @param {Object} match The string that matched with the RegExp
   * @return {Object} The constructed result
   * @private
   */
  constructResult_(match) {
    const result = {};
    result.type = Object.keys(match.groups)
        .find((type) => match.groups[type] !== undefined);
    if (result.type === 'WORD') {
      result.name = match.groups[result.type];
    } else if (result.type === 'NUMBER') {
      result.value = Number(match.groups[result.type]);
    } else if (result.type === 'STRING') {
      const string = match.groups[result.type];
      result.value = string.slice(1, string.length - 1);
    } else {
      result.value = match.groups[result.type];
    }
    return result;
  }

  /**
   * A method that returns the actual token
   * @return {Object} The actual token
   */
  getLookAhead() {
    return this.cachedToken_;
  }

  /**
   * A function to update the lexer after consuming a token
   * @param {string} token The token found
   * @private
   */
  updateAfterMatch_(token) {
    const lineStart = this.source_.lastIndexOf('\n', this.offset_);
    this.offset_ += token.length;
    this.column_ = this.offset_ - lineStart;
    this.line_ += token.split(/\n/).length - 1;
  }

  /**
   * A function to check if there are more tokens in the input
   * @return {boolean} Whether is it empty or not
   */
  isEmpty() {
    this.skipSpace_();
    return this.source_.length === this.offset_;
  }

  /**
   * A function that deletes starting whites from the source
   * @private
   */
  skipSpace_() {
    const match = this.WHITE.exec(this.source_.slice(this.offset_));
    this.REGEXP_.lastIndex += match[0].length;
    this.updateAfterMatch_(match[0]);
  };
}


/**
 * A function that parses an expression
 * @param {Lexer} lexer An instance of the Lexer class initialized with
 *     the source of the program
 * @return {Object} The JSON AST of the expression
 * @throws Will throw if there are syntactical errors
 */
const parseExpression = (lexer) => {
  const token = lexer.getLookAhead();
  if (token.type === 'WORD') {
    lexer.advanceToken();
    const expression = new Word(token);
    return parseCall(expression, lexer);
  }
  if (token.type === 'STRING' || token.type === 'NUMBER') {
    lexer.advanceToken();
    return new Value(token);
  }
  throw new SyntaxError(
      `Unexpected token: ${token.value} at line` +
      ` ${token.line} and column ${token.column}`,
  );
};

/**
 * A function that parses a call
 * @param {Object} operator The already parsed operator of the operator
 * @param {Lexer} lexer An instance of the Lexer class properly initialized
 * @return {Object} The JSON AST of the call
 * @throws Will throw if there are syntactical errors
 */
const parseCall = (operator, lexer) => {
  let token = lexer.getLookAhead();
  if (token.type === 'EOF' || token.type === 'RIGHT_PARENTHESIS' ||
     token.type === 'COMMA' || token.type === 'RIGHT_CURLY_BRACE') {
    return operator;
  }
  if (token.type !== 'LEFT_PARENTHESIS') {
    throw new SyntaxError(
        `Unexpected token: ${token.value} at line` +
        ` ${token.line} and column ${token.column}, expected '(' or '{'`,
    );
  }
  const finisher = token.value === '(' ? ')' : '}';
  lexer.advanceToken();
  token = lexer.getLookAhead();
  const args = [];
  while (token.value !== finisher) {
    if (token.type === 'EOF') {
      throw new SyntaxError(`Unexpected EOF`);
    }
    const arg = parseExpression(lexer);
    args.push(arg);
    token = lexer.getLookAhead();
    if (token.type === 'COMMA') {
      lexer.advanceToken();
      token = lexer.getLookAhead();
    } else if (token.value !== finisher) {
      throw new SyntaxError(
          `Expected ',' or '${finisher}' at line ${token.line} ` +
          `and column ${token.column}`,
      );
    }
  }
  const call = new Call(operator, args);
  lexer.advanceToken();
  return parseCall(call, lexer);
};

/**
 * A function that parses a Please program
 * @param {string} program The string with the unparsed program
 * @return {Object} The AST of the program
 * @throws Will throw if there are errors in the program
 */
const parse = (program) => {
  const lexer = new Lexer(program);
  lexer.advanceToken();
  const ast = parseExpression(lexer);
  if (lexer.getLookAhead().type !== 'EOF') {
    throw new SyntaxError('Unexpected text after program');
  }
  return ast;
};

/**
 * A function that reads a file and parses its contents
 * @param {string} fileName The name of the file
 * @return {Object} The JSON AST of the program
 * @throws Will throw if there are errors in the program or if the file
 *     can't be opened
 */
const parseFromFile = (fileName) => {
  const source = fs.readFileSync(fileName, 'utf8');
  return parse(source);
};

/**
 * A function that compiles a Please file
 * @param {string} origin The name of the origin file
 * @param {string} destination The name of the destination file
 * @throws Will throw if there are errors in the program or if the files
 *     can't be opened
 */
const compile = (origin, destination = undefined) => {
  const source = fs.readFileSync(origin, 'utf8');
  if (destination == undefined) {
    destination = origin.match(/^[^\.]*/)[0] + '.cpls';
  }
  const ast = JSON.stringify(parse(source), null, 2);
  fs.writeFileSync(destination, ast);
};

module.exports =
    {parse, parseCall, parseExpression, parseFromFile, compile, Lexer, WHITE};