Source: form-script/tokenizer.js

/**
 * Creates a new Tokenizer to turn input FormScript code into a token stream.
 * @class
 * @param {string} script - FormScript code
 */
function Tokenizer(script) {
  let tokenStream = [];
  let lineCounter = 0;
  let cursor = 0;

  var parseString = function() {
    var terminator = getChar();
    var str = "";
    move();
    
    while (getChar() != terminator) {
      if (cursor === script.length) {
        throw { type: "TOKENIZER_ERROR", msg: "String not closed." };
      }

      str += getChar();
      move();
    }
    
    return str;
  }

  var parseToken = function() {
    var token = "";
    
    while (!isTerminalCharacter(getChar()) && cursor < script.length) {
      token += getChar();
      move();
    }
    
    moveBack();
    return token;
  }

  var isInteger = function(token) {
	  return /^\d+$/.test(token);
  }

  var isVariableName = function(token) {
	  return /[_a-zA-Z][_a-zA-Z0-9]*/.test(token);
  }

  var isTerminalCharacter = function(char) {
    return char === "'" || char === '"' || 
      char === "<" || char === ">" ||
      char === "=" || char === "(" ||
      char === ")" || char === "+" ||
      char === "-" || char === "/" ||
      char === "*" || char === ":" ||
      char === "!" || char === "^" ||
      char === ";" || char === " " ||
      char === "{" || char === "}" ||
      char === "\n" || char === "\t" ||
      char === "." || char === ",";
  }

  var skipSpaces = function() {
	  while ((getChar() == ' ' || getChar() == '\n' || getChar() == '\t') && cursor < script.length) {
  	  move();
    }
  }

  var move = function() {
	  cursor++;
  }

  var moveBack = function() {
	  cursor--;
  }

  var rollback = function(to) {
    cursor = to;
  }

  var getChar = function() {
    return script[cursor];
  }

  var peekChar = function() {
    return script[cursor + 1];
  }

  var prevToken = function() {
    return tokenStream[tokenStream.length - 1];
  }

  var isCurrentOpUnary = function() {
    return !(prevToken().type === "NUMBER" || prevToken().type === "STRING" || 
    prevToken().type === "VARIABLE_NAME" || prevToken().type === "RIGHT_BRACKET");
  }

  /**
   * The main tokenizer method.
   * @method
   * @returns {Array}
   */
  this.tokenize = function() {
    while (cursor < script.length) {
      skipSpaces();
      
      if (getChar() == "'" || getChar() == '"') {
        var stringLiteral = parseString();
        tokenStream.push({ type: 'STRING', val: stringLiteral });
      } else if (getChar() == '=') {
        if (peekChar() == '=') {
          tokenStream.push({ type: 'EQUAL', isOperator: true, val: getChar() + peekChar() });
          move();
        } else {
          tokenStream.push({ type: 'ASSIGN', val: getChar() });
        }
      } else if (getChar() == '<') {
        if (peekChar() == '=') {
          tokenStream.push({ type: 'LT_EQUAL', isOperator: true, val: getChar() + peekChar() });
          move();
        } else {
          tokenStream.push({ type: 'LT', isOperator: true, val: getChar() });
        }
      } else if (getChar() == '>') {
        if (peekChar() == '=') {
          tokenStream.push({ type: 'GT_EQUAL', isOperator: true, val: getChar() + peekChar() });
          move();
        } else {
          tokenStream.push({ type: 'GT', isOperator: true, val: getChar() });
        }
      } else if (getChar() == '!') {
        if (peekChar() == '=') {
          tokenStream.push({ type: 'NOT_EQUAL', isOperator: true, val: getChar() + peekChar() });
          move();
        } else {
          tokenStream.push({ type: 'NOT', isOperator: true, isUnary: true, val: getChar() });
        }
      } else if (getChar() == '+') {
        if (isCurrentOpUnary()) {
          tokenStream.push({ type: 'UNARY_PLUS', isOperator: true, isUnary: true, val: "u" + getChar() });
        } else {
          tokenStream.push({ type: 'PLUS', isOperator: true, val: getChar() });
        }
      } else if (getChar() == '-') {
        if (isCurrentOpUnary()) {
          tokenStream.push({ type: 'UNARY_MINUS', isOperator: true, isUnary: true, val: "u" + getChar() });
        } else {
          tokenStream.push({ type: 'MINUS', isOperator: true, val: getChar() });
        }
      } else if (getChar() == '^') {
        tokenStream.push({ type: 'EXP', isOperator: true, val: getChar() });
      } else if (getChar() == '*') {
        tokenStream.push({ type: 'MUL', isOperator: true, val: getChar() });
      } else if (getChar() == '/') {
        tokenStream.push({ type: 'DIV', isOperator: true, val: getChar() });
      } else if (getChar() == '%') {
        tokenStream.push({ type: 'MOD', isOperator: true, val: getChar() });
      } else if (getChar() == ':') {
        tokenStream.push({ type: 'COLON', val: getChar()});
      } else if (getChar() == ';') {
        tokenStream.push({ type: 'SEMI_COLON', val: getChar() });
      } else if (getChar() == '.') {
        tokenStream.push({ type: 'DOT', val: getChar() });
      } else if (getChar() == ',') {
        tokenStream.push({ type: 'COMMA', val: getChar() });
      } else if (getChar() == '(') {
        tokenStream.push({ type: 'LEFT_BRACKET', val: getChar()});
      } else if (getChar() == ')') {
        tokenStream.push({ type: 'RIGHT_BRACKET', val: getChar()});
      } else if (getChar() == '{') {
        tokenStream.push({ type: 'LEFT_CURLY', val: getChar()});
      } else if (getChar() == '}') {
        tokenStream.push({ type: 'RIGHT_CURLY', val: getChar()});
      } else {
        var token = parseToken();
        
        if (isInteger(token)) {
          if (peekChar() === '.') {
            var anchorPoint = cursor;
            move();
            move();
            var decimalPart = parseToken();

            if (isInteger(decimalPart)) {
              var floatNumber = token + "." + decimalPart;
              tokenStream.push({ type: "NUMBER", val: floatNumber });
            } else {
              rollback(anchorPoint);
            }
          } else {
            tokenStream.push({ type: "NUMBER", val: parseInt(token) });
          }
        } else if (token === "and") {
          tokenStream.push({ type: "AND", isOperator: true, val: token });
        } else if (token === "or") {
          tokenStream.push({ type: "OR", isOperator: true, val: token});
        } else if (token === "if") {
          tokenStream.push({ type: "IF_KEYWORD", val: token });
        } else if (isVariableName(token)) {
          tokenStream.push({ type: "VARIABLE_NAME", val: token });
        } else {
          throw "Invalid token " + "'" + token + "'";
        }
      }
      
      move();
    }

    return tokenStream;
  }
}

export { Tokenizer };