teaching machines

CS 1: Lecture 38 – Binary Search

December 13, 2017 by . Filed under cs1, cs145, cs148, fall 2017, lectures.

Dear students,

We close our semester today with a discussion of finding things quickly with the binary search. We will illustrate the algorithm and implement it in the context of a dictionary/spell-checker.

Earlier in the semester we discussed the linear search. Let’s revisit that algorithm first by locating a spice in our spice rack. What happens when we’ve got 100 spices? 1000? 1e6? N? In the worst case, how many spices might we need to check? In the best case? On average? The Voyage of the Dawn Treader plays on the inefficiency of this algorithm.

We’ll then read about a better way from Computational Fairy Tales by Jeremy Kubica. That better way is called the binary search. Let’s see it with our spices. It works by maintaining a window in which the target element must appear. That window can be halved by examining its middle element and determining on which side of it the target element lies. The list very quickly gets whittled down. Unlike the linear search, the binary search can only be applied to arrays that are sorted.

I think this algorithm demonstrates a significant point. The ultra-fast binary search wasn’t the computer’s idea. It is just as happy to perform linear searches. The good idea, the intelligence, was a human idea. The computer is not really that good at math; we’re the ones writing the expressions. It’s not really a chef; we’re the ones organizing code into reusable chunks. It’s not a philosopher; that was us wondering something about are data. It’s not a pilot; that was us at its helm. It’s not a factory worker; that was us avoiding mindless repetition. It’s not a scribe; we told it what to read and write. It’s not a creator; we are. The computer only amplifies ideas that you have in your head. This class has been about getting those ideas out to the machine.

Here’s your TODO list for next time:

See you next class!

Sincerely,

P.S. It’s time for a haiku!

To solve big problems
Throw away half the data
Like in politics

P.P.S. Here’s the code we wrote together…

YouAintGottaDictionaryException.java

package lecture1213;

public class YouAintGottaDictionaryException extends Exception {

  public YouAintGottaDictionaryException() {
    super("You ain't gotta dictionary, foobag. Get 1.");
  }
}

Dictionary.java

package lecture1213;

import java.io.File;
import java.io.FileNotFoundException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Scanner;

public class Dictionary {
  private ArrayList<String> words;
  
  public Dictionary() throws YouAintGottaDictionaryException {
    words = new ArrayList<>();
    File file = new File("/usr/share/dict/words");
    
    try {
      Scanner in = new Scanner(file);
      while (in.hasNextLine()) {
        words.add(in.nextLine());
      }
      Collections.sort(words);
    } catch (FileNotFoundException e) {
      throw new YouAintGottaDictionaryException();
    }
  }
  
  public boolean containsLinear(String target) {
    for (String word : words) {
      if (word.equals(target)) {
        return true;
      }
    }
    return false;
  }
  
  public boolean containsBinary(String target) {
    int first = 0;
    int last = words.size() - 1;
    
    while (first <= last) {
      int middle = (first + last) / 2;
//      System.out.printf("%d %d %d%n", first, last, middle);

      int order = target.compareTo(words.get(middle));
      if (order < 0) {
        last = middle - 1;
      } else if (order > 0) {
        first = middle + 1;
      } else {
        return true;
      }
    }
    
    return false;
  }
}

Words.java

package lecture1213;

import lecture1129.Stopwatch;

public class Words {
  public static void main(String[] args) throws YouAintGottaDictionaryException {
    Dictionary dictionary = new Dictionary();
    
    String[] words = {
      "hey",
      "hay",
      "hayyyyyyy",
      "zoo",
      "yeah",
      "yay",
      "yea",
      "rumplestiltskin",
      "chris",
      "incest",
      "princess",
      "princest",
      "poverty",
      "gutter",
      "alabama",
      "euphemism",
      "good",
      "one",
      "foobag"
    };
    Stopwatch timer = new Stopwatch();
    timer.start();
    for (String word : words) {
      System.out.println(dictionary.containsBinary(word));
    }
    double elapsed = timer.stop();
    System.out.println(elapsed);
  }
}

ForSomeReasonYouDontHaveADictionaryInstalledOnYourMachineSorryException.java

package lecture1213;

public class ForSomeReasonYouDontHaveADictionaryInstalledOnYourMachineSorryException extends Exception {

  public ForSomeReasonYouDontHaveADictionaryInstalledOnYourMachineSorryException() {
    super("No hay un diccionario.");
  }
}

Dictionary.java

package lecture1213;

import java.io.File;
import java.io.FileNotFoundException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Scanner;

public class Dictionary {
  private ArrayList<String> words;

  public Dictionary() throws ForSomeReasonYouDontHaveADictionaryInstalledOnYourMachineSorryException {
    File file = new File("/usr/share/dict/words");
    try {
      words = new ArrayList<>();
      Scanner in = new Scanner(file);
      while (in.hasNextLine()) {
        words.add(in.nextLine());
      }
      Collections.sort(words);
    } catch (FileNotFoundException e) {
      throw new ForSomeReasonYouDontHaveADictionaryInstalledOnYourMachineSorryException();
    }
  }
  
  public boolean containsLinear(String target) {
    for (String word : words) {
      if (word.equals(target)) {
        return true;
      }
    }
    return false;
  }
  
  public boolean containsBinary(String target) {
    int first = 0;
    int last = words.size() - 1;
    
    while (first <= last) {
      int iMiddle = (first + last) / 2;
      int order = target.compareTo(words.get(iMiddle));
      if (order == 0) {
        return true;
      } else if (order < 0) {
        last = iMiddle - 1;
      } else {
        first = iMiddle + 1;
      }
    }
    
    return false;
  }
}

Main.java

package lecture1213;

import lecture1129.Stopwatch;

public class Main {
  public static void main(String[] args) throws ForSomeReasonYouDontHaveADictionaryInstalledOnYourMachineSorryException {
    String[] words = {
      "eucalyptus",
      "Chris",
      "periwinkle",
      "longest",
      "word",
      "xylophone",
      "super...",
      "antidisestablishmentarianism",
      "accuse",
      "puce",
      "scoot",
      "scoop",
      "finals",
      "java",
      "c++",
      "lonely",
      "\u3422"
    };
    
    Dictionary dictionary = new Dictionary();
    
    Stopwatch stopwatch = new Stopwatch();
    stopwatch.start();
    
    for (String word : words) {
      System.out.println(word + " -> " + dictionary.containsBinary(word));
    }
    
    stopwatch.stop();
    System.out.println(stopwatch.getElapsedSeconds());
  }
}