teaching machines

CS 330: Lecture 9 – Assignment and Operators

February 16, 2018 by . Filed under cs330, lectures, spring 2018.

Dear students,

We’ve got a little programming language up and running. It’s got commands for drawing rectangles and circles, and it supports integer literals and variables. Today we add to it variable assignments and operators. Next time we’ll add loops and functions. To help us get a feel for what our programs will look like in their Javascript/AST representation, here’s a challenge for you and your neighbor:

Consider the following snippet of code:
rectangle 0 0 size size
circle 200 150 30
In AST form, it looks like this:
            program
           /       \
  rectangle         circle
 /   |  |  \        |   | \
int int var var    int int int
 |   |   |    |     |   |   |
 0   0  size size  200 150  30
What should the AST for this program look like?
i = 0
while i < 10
  rectangle (i * 10) (i * 10) 10 10
  i = i + 1
end

Perhaps, something like this.
    program
   /       \ 
  =         __while_
 / \       /        \
i  int    <          _____block____
    |    / \        /              \
    0  var int   __rectangle_       =
       |    |   /     |  \   \     / \
       i   10  *      *   int int i   + 
              / \    / \    |   |    / \  
           var int var int 10  10  var int
            |   |   |   |           |   |
            i  10   i  10           i   1

Here’s the grammar we will support by the end of our discussion today:

program
  : statement* EOF

statement
  : RECTANGLE expr expr expr expr
  | CIRCLE expr expr expr
  | PRINT expr
  | IDENTIFIER ASSIGN expr 
  | IF expr statement* ELSE statement* END
  | WHILE expr statement* END

expr
  : INTEGER
  | expr (ASTERISK|DIVIDE|MOD) expr
  | expr (PLUS|MINUS) expr
  | expr (MORE|MORE_OR_EQUAL|LESS|LESS_OR_EQUAL|EQUAL|NOT_EQUAL) expr

Also, while you weren’t looking, I added a couple of things:

Let’s start with assignment. We add our model:

function StatementAssign(id, rhs) {
  this.evaluate = function(env) {
    var value = rhs.evaluate(env);
    env[id] = value;
  }
}

And augment our parser’s statement rule:

else if (has(IDENTIFIER)) {
  var id = devour();
  if (has(ASSIGN)) {
    devour();
    return new StatementAssign(id.source, expression());
  } else {
    throw 'i dunno wat ur sayin';
  }
}

Great! Now let’s add a way to do math. Inside our expression rule, we have to decide which production to pursue of these from our grammar:

expr
  : INTEGER
  | IDENTIFIER
  | expr (ASTERISK|DIVIDE|MOD) expr
  | expr (PLUS|MINUS) expr
  | expr (MORE|MORE_OR_EQUAL|LESS|LESS_OR_EQUAL|EQUAL|NOT_EQUAL) expr

The first two are easy to detect. The last three are not. We’re effectively checking “we’re a multiplicative expression if we’re two expression expressions joined by a multiplicative operator.” This is a recursive definition, and this form of grammar is called left recursion. The programming language folks came up with a mechanism for avoiding it, and it also solves the problem of determining precedence. The solution is to create a tiered system of expressions from lowest precedence to highest:

expr
  : additive

additive
  : multiplicative ((PLUS|MINUS) multiplicative)*

multiplicative
  : atom ((ASTERISK|DIVIDE|MOD) atom)*

atom
  : IDENTIFIER
  | INTEGER

I admit I don’t find this intuitive. It feels weird to check for an additive expression when there may not actually be an addition or subtraction operation in the source. But it works.

Let’s add models for our mathematical operators:

function ExpressionAdd(a, b) {
  this.evaluate = function(env) {
    var valueA = a.evaluate(env);
    var valueB = b.evaluate(env);
    return valueA + valueB;
  }
}

function ExpressionSubtract(a, b) {
  this.evaluate = function(env) {
    var valueA = a.evaluate(env);
    var valueB = b.evaluate(env);
    return valueA - valueB;
  }
}

function ExpressionMultiply(a, b) {
  this.evaluate = function(env) {
    var valueA = a.evaluate(env);
    var valueB = b.evaluate(env);
    return valueA * valueB;
  }
}

function ExpressionDivide(a, b) {
  this.evaluate = function(env) {
    var valueA = a.evaluate(env);
    var valueB = b.evaluate(env);
    return valueA / valueB;
  }
}

function ExpressionMod(a, b) {
  this.evaluate = function(env) {
    var valueA = a.evaluate(env);
    var valueB = b.evaluate(env);
    return valueA % valueB;
  }
}

Add the parser gets a bit more complicated:

function expression() {
  return additive();
}

function additive() {
  var a = multiplicative();
  while (has(PLUS) || has(MINUS)) {
    var operator = tokens[i];
    devour(); // eat operator
    var b = multiplicative();
    if (operator.type == PLUS) {
      a = new ExpressionAdd(a, b);
    } else {
      a = new ExpressionSubtract(a, b);
    }
  }
  return a;
}

function multiplicative() {
  var a = atom();
  while (has(ASTERISK) || has(DIVIDE) || has(MOD)) {
    var operator = tokens[i];
    devour(); // eat operator
    var b = atom();
    if (operator.type == ASTERISK) {
      a = new ExpressionMultiply(a, b);
    } else if (operator.type == MOD) {
      a = new ExpressionMod(a, b);
    } else {
      a = new ExpressionDivide(a, b);
    }
  }
  return a;
}

function atom() {
  if (has(INTEGER)) {
    var e = new ExpressionInteger(parseInt(tokens[i].source));
    devour();
    return e;
  } else if (has(IDENTIFIER)) {
    var id = devour();
    var e = new ExpressionVariableReference(id.source);
    return e;
  } else {
    throw 'not legal expression [' + tokens[i].source + ']';
  }
}

We should be able to write code like this now:

w = 50
h = 2 * w
rectangle 0 0 w h

What about this code?

w = 50
rectangle 0 0 w (2 * h)

We don’t currently support parentheses. Adding support involves only the parser. We don’t need a new model, because parentheses are not a program construct. They just guide the parser as it associates other constructs together. Parentheses essentially turn an expression into an atom, so we add this logic to atom:

else if (has(LEFT_PARENTHESIS)) {
  devour(); // eat (
  var e = expression();
  if (!has(RIGHT_PARENTHESIS)) {
    throw 'unbalanced!';
  }
  devour(); // eat )
  return e;
}

That’s enough for one day. See you next time!

Sincerely,

P.S. Here’s a haiku just for you:

Why write a language?
Copy and paste the answers
From “Why have children?”

P.P.S. Here’s the code we wrote together:

lexer.js

var IDENTIFIER = 'IDENTIFIER';
var PLUS = 'PLUS';
var EOF = 'EOF';
var LEFT_PARENTHESIS = 'LEFT_PARENTHESIS';
var RECTANGLE = 'RECTANGLE';
var CIRCLE = 'CIRCLE';
var RIGHT_PARENTHESIS = 'RIGHT_PARENTHESIS';
var DIVIDE = 'DIVIDE';
var MINUS = 'MINUS';
var ASTERISK = 'ASTERISK';
var COMMA = 'COMMA';
var MOD = 'MOD';
var INTEGER = 'INTEGER';
var WHILE = 'WHILE';
var IF = 'IF';
var ELSE = 'ELSE';
var END = 'END';
var MORE_OR_EQUAL = 'MORE_OR_EQUAL';
var MORE = 'MORE';
var LESS_OR_EQUAL = 'LESS_OR_EQUAL';
var LESS = 'LESS';
var EQUAL = 'EQUAL';
var NOT_EQUAL = 'NOT_EQUAL';
var PRINT = 'PRINT';
var ASSIGN = 'ASSIGN';

function lex(source) {
  var tokens = [];
  var tokenSoFar = '';
  var i = 0;

  function has(regex) {
    return source.charAt(i).match(regex);
  }

  function devour() {
    tokenSoFar += source.charAt(i);
    i += 1;
  }

  function emit(type) {
    tokens.push({type: type, source: tokenSoFar});
    tokenSoFar = '';
  }

  while (i < source.length) {
    if (has(/\+/)) {
      devour();
      emit(PLUS);
    } else if (has(/%/)) {
      devour();
      emit(MOD);
    } else if (has(/=/)) {
      devour();
      emit(ASSIGN);
    } else if (has(/-/)) {
      devour();
      emit(MINUS);
    } else if (has(/\(/)) {
      devour();
      emit(LEFT_PARENTHESIS);
    } else if (has(/\)/)) {
      devour();
      emit(RIGHT_PARENTHESIS);
    } else if (has(/\*/)) {
      devour();
      emit(ASTERISK);
    } else if (has(/\//)) {
      devour();
      if (has(/\//)) {
        while (has(/[^\n]/)) {
          devour();
        }
        tokenSoFar = '';
      } else {
        emit(DIVIDE);
      }
    } else if (has(/\d/)) {
      while (has(/\d/)) {
        devour();
      }
      emit(INTEGER);
    } else if (has(/[a-zA-Z]/)) {
      while (has(/[a-zA-Z0-9]/)) {
        devour();
      }

      if (tokenSoFar == 'rectangle') {
        emit(RECTANGLE);
      } else if (tokenSoFar == 'circle') {
        emit(CIRCLE);
      } else if (tokenSoFar == 'while') {
        emit(WHILE);
      } else if (tokenSoFar == 'if') {
        emit(IF);
      } else if (tokenSoFar == 'else') {
        emit(ELSE);
      } else if (tokenSoFar == 'end') {
        emit(END);
      } else if (tokenSoFar == 'print') {
        emit(PRINT);
      } else {
        emit(IDENTIFIER);
      }
    } else if (has(/\s/)) {
      devour();
      tokenSoFar = '';
    } else {
      throw 'ack! [' + tokenSoFar + ']';
    }
  }

  emit(EOF);

  return tokens;
}

ast.js

function Block(statements) {
  this.evaluate = function(env) {
    statements.forEach(statement => statement.evaluate(env));
  }
}

function StatementCircle(originXExpression, originYExpression, radiusExpression) {
  this.evaluate = function(env) {
    var originX = originXExpression.evaluate(env); 
    var originY = originYExpression.evaluate(env); 
    var radius = radiusExpression.evaluate(env); 
    context.fillStyle = 'rgb(' + env.red + ',' + env.green + ',' + env.blue + ')';
    context.beginPath();
    context.arc(originX, originY, radius, 0, 2 * Math.PI);
    context.fill();
  }
}

function StatementRectangle(leftExpression, topExpression, widthExpression, heightExpression) {
  this.evaluate = function(env) {
    var l = leftExpression.evaluate(env); 
    var t = topExpression.evaluate(env); 
    var w = widthExpression.evaluate(env); 
    var h = heightExpression.evaluate(env); 
    context.fillStyle = 'rgb(' + env.red + ',' + env.green + ',' + env.blue + ')';
    context.fillRect(l, t, w, h);
  }
}

function StatementPrint(messageExpression) {
  this.evaluate = function(env) {
    var message = messageExpression.evaluate(env); 
    console.log(message);
  }
}

function ExpressionIntegerLiteral(literal) {
  this.evaluate = function(env) {
    return literal;
  }
}

function ExpressionVariableReference(id) {
  this.evaluate = function(env) {
    return env[id];
  }
}

function StatementAssignment(id, rhsExpression) {
  this.evaluate = function(env) {
    env[id] = rhsExpression.evaluate(env);
  }
}

function ExpressionAdd(lExpression, rExpression) {
  this.evaluate = function(env) {
    return lExpression.evaluate(env) + rExpression.evaluate(env);
  }
}

function ExpressionSubtract(lExpression, rExpression) {
  this.evaluate = function(env) {
    return lExpression.evaluate(env) - rExpression.evaluate(env);
  }
}

function ExpressionMultiply(a, b) {
  this.evaluate = function(env) {
    var valueA = a.evaluate(env);
    var valueB = b.evaluate(env);
    return valueA * valueB;
  }
}

function ExpressionDivide(a, b) {
  this.evaluate = function(env) {
    var valueA = a.evaluate(env);
    var valueB = b.evaluate(env);
    return valueA / valueB;
  }
}

function ExpressionMod(a, b) {
  this.evaluate = function(env) {
    var valueA = a.evaluate(env);
    var valueB = b.evaluate(env);
    return valueA % valueB;
  }
}

parser.js

function parse(tokens) {
  var i = 0;

  function has(tokenType) {
    return i < tokens.length && tokens[i].type == tokenType;
  }

  function devour() {
    var token = tokens[i];
    i++;
    return token;
  }

  function program() {
    var statements = [];
    while (!has(EOF)) {
      statements.push(statement());
    }
    return new Block(statements);
  }

  function statement() {
    if (has(RECTANGLE)) {
      devour();
      var leftExpression = expression();
      var topExpression = expression();
      var widthExpression = expression();
      var heightExpression = expression();
      return new StatementRectangle(leftExpression, topExpression, widthExpression, heightExpression);
    } else if (has(CIRCLE)) {
      devour();
      var originXExpression = expression();
      var originYExpression = expression();
      var radiusExpression = expression();
      return new StatementCircle(originXExpression, originYExpression, radiusExpression);
    } else if (has(PRINT)) {
      devour();
      var message = expression();
      return new StatementPrint(message);
    } else if (has(IDENTIFIER)) {
      var idToken = devour();
      if (!has(ASSIGN)) {
        throw 'Curses. ' + tokens[i];
      }
      devour();
      var rhs = expression();
      return new StatementAssignment(idToken.source, rhs);
    } else {
      throw 'You messed up big time, foobag. I don\'t know ' + tokens[i].source;
    }
  }

  function expression() {
    return additive();
  }

  function additive() {
    var l = multiplicative();
    while (has(PLUS) || has(MINUS)) {
      var operatorToken = devour();
      var r = multiplicative();
      if (operatorToken.type == PLUS) {
        l = new ExpressionAdd(l, r);
      } else {
        l = new ExpressionSubtract(l, r);
      }
    }

    return l;
  }

  function multiplicative() {
    var a = atom();
    while (has(ASTERISK) || has(DIVIDE) || has(MOD)) {
      var operator = tokens[i];
      devour(); // eat operator
      var b = atom();
      if (operator.type == ASTERISK) {
        a = new ExpressionMultiply(a, b);
      } else if (operator.type == MOD) {
        a = new ExpressionMod(a, b);
      } else {
        a = new ExpressionDivide(a, b);
      }
    }
    return a;
  }

  function atom() {
    if (has(INTEGER)) {
      var token = devour();
      return new ExpressionIntegerLiteral(parseInt(token.source));
    } else if (has(IDENTIFIER)) {
      var token = devour();
      return new ExpressionVariableReference(token.source);
    } else if (has(LEFT_PARENTHESIS)) {
      devour();
      var e = expression();
      if (!has(RIGHT_PARENTHESIS)) {
        throw 'uz unbalanced';
      }
      devour();
      return e;
    } else {
      throw 'I expected an expression. That\'s NOT what I found. ' + tokens[i].source;
    }
  }

  return program();
}

index.html

<!DOCTYPE html>
<html>
<head>
  <title>...</title>
  <script src="lexer.js"></script>
  <script src="ast.js"></script>
  <script src="parser.js"></script>

  <style>
* {
  box-sizing: border-box;
}

#editor {
  padding: 5px;
  width: 30%;
  position: absolute;
  top: 0;
  left: 0;
  bottom: 30px;
}

#editor textarea {
  font-size: 18pt;
  width: 100%;
  height: 95%;
}

#tools {
  height: 5%;
  position: absolute;
  bottom: 0;
  right: 5px;
}

canvas {
  margin: 5px;
  margin-left: 0px;
  position: absolute;
  top: 0;
  left: 30%;
}
  </style>
</head>
<body>
  <div id="editor">
    <textarea id="ide" rows="8"></textarea>
    <div id="tools">
      <input type="range" id="slider" value="0" min="0" max="100">
      <input type="button" id="run" value="Run">
    </div>
  </div>
  <canvas id="canvas" width="600" height="600" style="background-color: lightgray;"></canvas><br>
  
  <script>
var ide = document.getElementById('ide');
var run = document.getElementById('run');
var slider = document.getElementById('slider');
var canvas = document.getElementById('canvas');
var context = canvas.getContext('2d');

function evaluate() {
  context.clearRect(0, 0, canvas.width, canvas.height);

  var source = ide.value;
  var tokens = lex(source);
  var ast = parse(tokens);

  var env = {t: slider.value, red: 255, green: 0, blue: 0};
  ast.evaluate(env);
}

run.onclick = evaluate;
slider.oninput = evaluate;

  </script>
</body>
</html>