teaching machines

CS 330 Lecture 38 – Gollygeo, a complete ANTLR/C++ project

May 7, 2012 by . Filed under cs330, lectures, spring 2012.

Agenda

Gollygeo

We want to write a little geography quizzer. The player is dropped into a country and must identify its capital and neighbors before she can leave. To win (an education), the player must clear all countries. The author/level designer can draft up “maps” in a custom world description language. Perhaps it looks something like:

USA HAS CAPITAL "Washington, DC"
Mexico HAS CAPITAL "Mexico City"
Canada HAS CAPITAL "Ottawa"

Mexico BORDERS USA
Canada BORDERS USA

We need to parse this file first thing inside our interpreter. Suppose our grammar is in World.g, then our interpreter starts off with:

// Lex and parse world file.
WorldLexer::InputStreamType world_in((ANTLR_UINT8 *) argv[1],
                                     ANTLR_ENC_8BIT);
WorldLexer world_lexer(&world_in);
WorldParser::TokenStreamType world_tokens(ANTLR_SIZE_HINT,
                                          world_lexer.get_tokSource());
WorldParser parser(&world_tokens);
parser.SetCountries(&countries);
parser.file();

It’s going to the task of the world parser to set up our countries list. Since we’ll need access to these country objects in a variety of places, let’s declare and define the country object and its helper in their own files:

#ifndef COUNTRY_H
#define COUNTRY_H

#include <string>
#include <map>

using std::string;
using std::map;
using std::pair;

/**
 We need a forward declaration of Country because Neigbor and Country are
 codependent. Neither one can be first 'cuz they need each other.
 */
class Country;
class Neighbor;

/**
 C++ templated types are too much.
 */
typedef map<string, Country *> n2c_t;
typedef map<string, Neighbor> hood_t;

/**
 Before the player can leave a country, she must identify all neigbors.
 This struct encapsulates a neighbor and its identification status.
 */
struct Neighbor {
  Neighbor(Country *c)
    : neighbor(c),
      is_identified(false) {
  }

  Country *neighbor;
  bool is_identified;
};

/**
 A country is a name, capital, and a set of neighbors. To leave a
 country, its capital and numbers must be identified.
 */
class Country {
  public:
    Country(const std::string& name,
            const std::string& capital);

    const std::string& GetName() const;
    const std::string& GetCapital() const;
    hood_t& GetNeighbors();
    const hood_t& GetNeighbors() const;
    void AddNeighbor(Country *country);
    void IsCapitalIdentified(bool is_guessed);
    bool IsCapitalIdentified() const;
    bool IsCleared() const;
    int GetIdentifiedNeighborsCount() const;

  private:
    string name;
    string capital;

    hood_t neighbors;

    bool is_capital_guessed;
};

std::ostream& operator<<(std::ostream& out, const Country *country);

#endif

The implementation:

#include "Country.h"

#include <iostream>

/* ------------------------------------------------------------------------- */

Country::Country(const std::string& name,
                 const std::string& capital)
  : name(name),
    capital(capital),
    neighbors(),
    is_capital_guessed(false) {
}

/* ------------------------------------------------------------------------- */

const std::string& Country::GetName() const {
  return name;
}

/* ------------------------------------------------------------------------- */

const std::string& Country::GetCapital() const {
  return capital;
}

/* ------------------------------------------------------------------------- */

hood_t& Country::GetNeighbors() {
  return neighbors;
}

/* ------------------------------------------------------------------------- */

const hood_t& Country::GetNeighbors() const {
  return neighbors;
}

/* ------------------------------------------------------------------------- */

std::ostream& operator<<(std::ostream& out, const Country *country) {
  std::cout << country->GetName() << std::endl;
  std::cout << country->GetCapital() << std::endl;

  hood_t::const_iterator neighbor;
  for (neighbor = country->GetNeighbors().begin();
       neighbor != country->GetNeighbors().end();
       ++neighbor) {
    std::cout << "Borders " << neighbor->first << std::endl;
  }

  return out;
}

/* ------------------------------------------------------------------------- */

void Country::AddNeighbor(Country *country) {
  neighbors.insert(pair<string, Neighbor>(country->GetName(),
                                          Neighbor(country)));
}

/* ------------------------------------------------------------------------- */

void Country::IsCapitalIdentified(bool is_guessed) {
  is_capital_guessed = is_guessed;
}

/* ------------------------------------------------------------------------- */

bool Country::IsCapitalIdentified() const {
  return is_capital_guessed;
}

/* ------------------------------------------------------------------------- */

bool Country::IsCleared() const {
  return IsCapitalIdentified() &&
         GetIdentifiedNeighborsCount() == neighbors.size();
}

/* ------------------------------------------------------------------------- */

int Country::GetIdentifiedNeighborsCount() const {
  int nidentified = 0;
  for (hood_t::const_iterator i = neighbors.begin();
       i != neighbors.end();
       ++i) {
    if (i->second.is_identified) {
      ++nidentified;
    }
  }
  return nidentified;
}

/* ------------------------------------------------------------------------- */

Okay, now let’s think about the grammar. First, the boilerplate:

grammar World;

options {
  language = Cpp;
}

@parser::includes {
  #include <iostream>

  #include "WorldLexer.hpp"
  #include "Country.h"
}

@lexer::traits {
  class WorldLexer;
  class WorldParser;
  typedef antlr3::Traits<WorldLexer, WorldParser> WorldLexerTraits;
  typedef WorldLexerTraits WorldParserTraits;
}

@context {
  private:
    n2c_t *countries;

  public:
    void SetCountries(n2c_t *countries);
}

@members {
  void WorldParser::SetCountries(n2c_t *countries) {
    this->countries = countries;
  }
}

Now the world parser has a pointer to the names-countries map. This parser’s job is to populate it as it sees countries.

Let’s think about our starting production. We’re parsing a file which has commands on each line. We can match that with:

file
  : line* EOF
  ;

Each line is either a declaration command or a border command:

line
  : country=ID HAS CAPITAL capital=STRING
  | country1=ID BORDERS country2=ID
  ;

Let’s postpone for one moment handling the actions to take on parsing these commands. Let’s round out our terminals:

HAS : 'HAS';
CAPITAL : 'CAPITAL';
BORDERS : 'BORDERS';

ID
  : ('a'..'z' | 'A' .. 'Z')+
  ;

STRING
  : '"' (options {greedy=false;}: .)* '"'
  ;

WHITESPACE
  : ('\r'|'\n'|'\t'|' ')+ {skip();}
  ;

Note how I match quote-enclosed strings. I have to use the non-greedy option just like in Perl. Otherwise I’ll zoom into the very last string in the file.

Okay, I’m at a point where I should try compiling something. A makefile is indespensable:

ANTLR_DIR = /home/user/330/antlrcpp
ANTLR = java -jar $(ANTLR_DIR)/antlr-3.4-with-cpp.jar
WORLD_SRC = WorldLexer.cpp WorldParser.cpp
WORLD_INCLUDE = WorldLexer.hpp WorldParser.hpp
GOLLYGEO_SRC = GollygeoLexer.cpp GollygeoParser.cpp
GOLLYGEO_INCLUDE = GollygeoLexer.hpp GollygeoParser.hpp
GRAMMARS_SRC = $(WORLD_SRC) #$(GOLLYGEO_SRC)

interpreter: interpreter.cpp Country.cpp $(GRAMMARS_SRC) #GeoPlayer.h
	g++ -I$(ANTLR_DIR)/include -o interpreter -Wall interpreter.cpp Country.cpp $(GRAMMARS_SRC)

$(WORLD_SRC) $(WORLD_INCLUDE): World.g
	$(ANTLR) World.g

$(GOLLYGEO_SRC) $(GOLLYGEO_INCLUDE): Gollygeo.g
	$(ANTLR) Gollygeo.g

Let’s verify that parser parses. Does it? I hope so.

Now we want to actually define the actions to take on parsing. Let’s set up our test first. In our interpreter, we add:

n2c_t countries;
...
n2c_t::iterator country;
for (country = countries.begin(); country != countries.end(); ++country) {
  std::cout << "country->second: " << country->second << std::endl;
}

Now let’s flesh out the rules. On seeing a country declaration, we simply insert its record into our name-countries map:

{
  // Turn ID tokens into C++ strings.
  string country_name = string($country.text);
  string capital_name = string($capital.text);

  // Make sure author didn't found a country twice.
  n2c_t::iterator match = countries->find(country_name);
  if (match != countries->end()) {
    std::cerr << "Country " << country_name << " declared twice." << std::endl;
  } else {
    Country *new_country = new Country(country_name, capital_name);
    countries->insert(pair<string, Country *>(country_name, new_country));
  }
}

The text behind a terminal is in ANTLR string form. We convert to a std::string.

Let’s test. Does it work? I hope so.

Now, let’s add neighbors:

{
  // Turn ID tokens into C++ strings.
  string country1_name = string($country1.text);
  string country2_name = string($country2.text);

  // Assert that countries have already been declared.
  n2c_t::iterator match1 = countries->find(country1_name);
  n2c_t::iterator match2 = countries->find(country2_name);
  if (match1 == countries->end()) {
    std::cerr << "Country " << country1_name << " not declared." << std::endl;
  } else if (match2 == countries->end()) {
    std::cerr << "Country " << country2_name << " not declared." << std::endl;
  }

  // Mark border-relationships.
  else {
    match1->second->AddNeighbor(match2->second);
    match2->second->AddNeighbor(match1->second);
  }
}

Let’s test.

Okay, now we want the player to be able to interact. We’re going to drop the player in the starting country and let her enter in commands to identify capitals and neighbors. Let’s augment our interpreter:

GeoPlayer player;
player.current = countries.find("USA")->second;

// Lex and parse user commands, one line at a time.
std::string command;
std::cout << "> ";
while (getline(std::cin, command)) {
  GollygeoLexer::InputStreamType gollygeo_in((ANTLR_UINT8 *) command.c_str(), ANTLR_ENC_8BIT, command.length(), (ANTLR_UINT8 *) "command");
  GollygeoLexer gollygeo_lexer(&gollygeo_in);
  GollygeoParser::TokenStreamType gollygeo_tokens(ANTLR_SIZE_HINT, gollygeo_lexer.get_tokSource());
  GollygeoParser gollygeo_parser(&gollygeo_tokens);
  gollygeo_parser.SetCountries(&countries, &player);

  gollygeo_parser.line();

  std::cout << "> ";
}

std::cout << std::endl;

This parser will need the countries to do semantic checking (is that really a country? is that really its capital? etc.). It’ll also need a player that it can move around to different countries once the current country is cleared. Let’s add that simple player class:

#ifndef GEOPLAYER_H
#define GEOPLAYER_H

#include "Country.h"

struct GeoPlayer {
  Country *current;
};

#endif

Okay, let’s get the boilerplate for our grammar:

grammar Gollygeo;

options {
  language = Cpp;
}

@parser::includes {
  #include <iostream>

  #include "GollygeoLexer.hpp"
  #include "Country.h"
  #include "GeoPlayer.h"
}

@lexer::traits {
  class GollygeoLexer;
  class GollygeoParser;
  typedef antlr3::Traits<GollygeoLexer, GollygeoParser> GollygeoLexerTraits;
  typedef GollygeoLexerTraits GollygeoParserTraits;
}

@context {
  private:
    n2c_t *countries;
    GeoPlayer *player;

  public:
    void SetCountries(n2c_t *countries,
                      GeoPlayer *player);
}

@members {
  void GollygeoParser::SetCountries(n2c_t *countries,
                                    GeoPlayer *player) {
    this->countries = countries;
    this->player = player;
  }
}

And let’s add the terminals:

LOOK: ('?'|'l');
GO: 'go';
TO: 'to';
CAPITAL: 'capital';
BORDERS: 'borders';

ID
  : ('a'..'z' | 'A' .. 'Z')+
  ;

STRING
  : '"' (options {greedy=false;}: .)* '"'
  ;

WHITESPACE
  : ('\r'|'\n'|'\t'|' ')+ {skip();}
  ;

A lexer-parser will be generated for each command-line entered, so we’re only trying to parse a single command. Let’s let the user look around, identify the capital of the current country, identify neighbors, and go to neighbors once the capital and all neighbors have been identified:

line
  : LOOK EOF
  | CAPITAL STRING EOF
  | GO TO ID EOF
  | BORDERS ID EOF
  ;[/precode]

LOOK posts a short status message:

{
  std::cout << "You are in " << player->current->GetName() << "." << std::endl;
  std::cout << "You've " << (player->current->IsCapitalIdentified() ? "" : "not ") << "identified the capital." << std::endl;
  std::cout << "You've identified " << player->current->GetIdentifiedNeighborsCount() << " of " << player->current->GetNeighbors().size() << " neighbors." << std::endl;
}

CAPITAL checks that the current country does indeed have this capital:

{
  string guess = string($STRING.text);
  if (guess == player->current->GetCapital()) {
    std::cout << "Right! " << guess << " is the capital of " << player->current->GetName() << "." << std::endl;
    player->current->IsCapitalIdentified(true);
  } else {
    std::cout << "Wrong!" << std::endl;
  }
}

NEIGHBOR checks that the current country does indeed border this other country:

{
  string country_name = string($ID.text);
  hood_t::iterator match = player->current->GetNeighbors().find(country_name);
  if (match != player->current->GetNeighbors().end()) {
    match->second.is_identified = true;
    std::cout << "Right on! " << country_name << " does border " << player->current->GetName() << "." << std::endl;
    std::cout << "You've identified " << player->current->GetIdentifiedNeighborsCount() << " of " << player->current->GetNeighbors().size() << " neighbors." << std::endl;
  } else {
    std::cout << "No country named " << country_name << " shares a border with " << player->current->GetName() << "." << std::endl;
  }
}

And GO TO moves the player when the current country is cleared:

{
  if (!player->current->IsCleared()) {
    std::cout << "You're business here is not done." << std::endl;
  } else {
    string country_name = string($ID.text);
    hood_t::const_iterator match = player->current->GetNeighbors().find(country_name);
    if (match != player->current->GetNeighbors().end()) {
      player->current = match->second.neighbor;
      std::cout << "You are now in " << player->current->GetName() << "." << std::endl;
    } else {
      std::cout << "No country named " << country_name << " shares a border with " << player->current->GetName() << "." << std::endl;
    }
  }
}

Whew. Did it work?