From 6da25559663fa5a2eb9247000a695ab8be60bb58 Mon Sep 17 00:00:00 2001
From: Daniel Jones <dcjones@cs.washington.edu>
Date: Tue, 26 Feb 2013 11:33:07 -0800
Subject: [PATCH] More parsing. Go fmt.

---
 lex.go     | 431 ++++++++++++++++++++++++++---------------------------
 mk.go      |   9 +-
 parse.go   | 365 ++++++++++++++++++++++++++++++++++-----------
 recipe.go  |  85 ++++++-----
 ruleset.go | 136 +++++++++++++++++
 5 files changed, 671 insertions(+), 355 deletions(-)
 create mode 100644 ruleset.go

diff --git a/lex.go b/lex.go
index b7fc8d3..14749da 100644
--- a/lex.go
+++ b/lex.go
@@ -1,12 +1,11 @@
-
 // TODO: Backquoted strings.
+// TODO: Comments
 
 package main
 
 import (
-    "fmt"
-    "strings"
-    "unicode/utf8"
+	"strings"
+	"unicode/utf8"
 )
 
 type tokenType int
@@ -14,321 +13,313 @@ type tokenType int
 const eof rune = '\000'
 
 const (
-    tokenError tokenType = iota
-    tokenBareString
-    tokenQuotedString
-    tokenPipeInclude
-    tokenRedirInclude
-    tokenColon
-    tokenAssign
-    tokenRecipe
+	tokenError tokenType = iota
+	tokenNewline
+	tokenBareString
+	tokenQuotedString
+	tokenPipeInclude
+	tokenRedirInclude
+	tokenColon
+	tokenAssign
+	tokenRecipe
 )
 
-
 func (typ tokenType) String() string {
-    switch typ {
-    case tokenError:        return "[Error]"
-    case tokenBareString:   return "[BareString]"
-    case tokenQuotedString: return "[QuotedString]"
-    case tokenPipeInclude:  return "[PipeInclude]"
-    case tokenRedirInclude: return "[RedirInclude]"
-    case tokenColon:        return "[Colon]"
-    case tokenAssign:       return "[Assign]"
-    case tokenRecipe:       return "[Recipe]"
-    }
-    return "[MysteryToken]"
+	switch typ {
+	case tokenError:
+		return "[Error]"
+	case tokenNewline:
+		return "[Newline]"
+	case tokenBareString:
+		return "[BareString]"
+	case tokenQuotedString:
+		return "[QuotedString]"
+	case tokenPipeInclude:
+		return "[PipeInclude]"
+	case tokenRedirInclude:
+		return "[RedirInclude]"
+	case tokenColon:
+		return "[Colon]"
+	case tokenAssign:
+		return "[Assign]"
+	case tokenRecipe:
+		return "[Recipe]"
+	}
+	return "[MysteryToken]"
 }
 
-
 type token struct {
-    typ tokenType // token type
-    val string    // token string
+	typ  tokenType // token type
+	val  string    // token string
+	line int       // line where it was found
 }
 
-
 func (t *token) String() string {
-    if t.typ == tokenError {
-        return t.val
-    }
+	if t.typ == tokenError {
+		return t.val
+	} else if t.typ == tokenNewline {
+		return "\\n"
+	}
 
-    return fmt.Sprintf("%s %q", t.typ, t.val)
+	return t.val
 }
 
-
 type lexer struct {
-    input    string     // input string to be lexed
-    output   chan token // channel on which tokens are sent
-    start    int        // token beginning
-    pos      int        // position within input
-    line     int        // line within input
-    col      int        // column within input
-    errmsg   string     // set to an appropriate error message when necessary
-    indented bool       // true if the only whitespace so far on this line
+	input    string     // input string to be lexed
+	output   chan token // channel on which tokens are sent
+	start    int        // token beginning
+	pos      int        // position within input
+	line     int        // line within input
+	col      int        // column within input
+	errmsg   string     // set to an appropriate error message when necessary
+	indented bool       // true if the only whitespace so far on this line
 }
 
-
 // A lexerStateFun is simultaneously the the state of the lexer and the next
 // action the lexer will perform.
-type lexerStateFun func (*lexer) lexerStateFun
-
+type lexerStateFun func(*lexer) lexerStateFun
 
 func (l *lexer) lexerror(what string) {
-    l.errmsg = what
-    l.emit(tokenError)
+	l.errmsg = what
+	l.emit(tokenError)
 }
 
-
 // Return the nth character without advancing.
 func (l *lexer) peekN(n int) (c rune) {
-    pos := l.pos
-    var width int
-    i := 0
-    for ; i <= n && pos < len(l.input); i++ {
-        c, width = utf8.DecodeRuneInString(l.input[pos:])
-        pos += width
-    }
+	pos := l.pos
+	var width int
+	i := 0
+	for ; i <= n && pos < len(l.input); i++ {
+		c, width = utf8.DecodeRuneInString(l.input[pos:])
+		pos += width
+	}
 
-    if i <= n {
-        return eof
-    }
+	if i <= n {
+		return eof
+	}
 
-    return
+	return
 }
 
-
 // Return the next character without advancing.
 func (l *lexer) peek() rune {
-    return l.peekN(0)
+	return l.peekN(0)
 }
 
-
 // Consume and return the next character in the lexer input.
 func (l *lexer) next() rune {
-    if l.pos >= len(l.input) {
-        return eof
-    }
-    c, width := utf8.DecodeRuneInString(l.input[l.pos:])
-    l.pos += width
+	if l.pos >= len(l.input) {
+		return eof
+	}
+	c, width := utf8.DecodeRuneInString(l.input[l.pos:])
+	l.pos += width
 
-    if c == '\n' {
-        l.col = 0
-        l.line += 1
-        l.indented = true
-    } else {
-        l.col += 1
-        if strings.IndexRune(" \t", c) < 0 {
-            l.indented = false
-        }
-    }
+	if c == '\n' {
+		l.col = 0
+		l.line += 1
+		l.indented = true
+	} else {
+		l.col += 1
+		if strings.IndexRune(" \t", c) < 0 {
+			l.indented = false
+		}
+	}
 
-    return c
+	return c
 }
 
-
 // Skip and return the next character in the lexer input.
 func (l *lexer) skip() {
-    l.next()
-    l.start = l.pos
+	l.next()
+	l.start = l.pos
 }
 
-
 func (l *lexer) emit(typ tokenType) {
-    l.output <- token{typ, l.input[l.start:l.pos]}
-    l.start = l.pos
+	l.output <- token{typ, l.input[l.start:l.pos], l.line}
+	l.start = l.pos
 }
 
-
 // Consume the next run if it is in the given string.
 func (l *lexer) accept(valid string) bool {
-    if strings.IndexRune(valid, l.peek()) >= 0 {
-        l.next()
-        return true
-    }
-    return false
+	if strings.IndexRune(valid, l.peek()) >= 0 {
+		l.next()
+		return true
+	}
+	return false
 }
 
-
 // Skip the next rune if it is in the valid string. Return true if it was
 // skipped.
 func (l *lexer) ignore(valid string) bool {
-    if strings.IndexRune(valid, l.peek()) >= 0 {
-        l.skip()
-        return true
-    }
-    return false
+	if strings.IndexRune(valid, l.peek()) >= 0 {
+		l.skip()
+		return true
+	}
+	return false
 }
 
-
 // Consume characters from the valid string until the next is not.
 func (l *lexer) acceptRun(valid string) int {
-    prevpos := l.pos
-    for strings.IndexRune(valid, l.peek()) >= 0 {
-        l.next()
-    }
-    return l.pos - prevpos
+	prevpos := l.pos
+	for strings.IndexRune(valid, l.peek()) >= 0 {
+		l.next()
+	}
+	return l.pos - prevpos
 }
 
-
 // Accept until something from the given string is encountered.
 func (l *lexer) acceptUntil(invalid string) {
-    for l.pos < len(l.input) && strings.IndexRune(invalid, l.peek()) < 0 {
-        l.next()
-    }
+	for l.pos < len(l.input) && strings.IndexRune(invalid, l.peek()) < 0 {
+		l.next()
+	}
 }
 
-
 // Skip characters from the valid string until the next is not.
-func (l* lexer) skipRun(valid string) int {
-    prevpos := l.pos
-    for strings.IndexRune(valid, l.peek()) >= 0 {
-        l.skip()
-    }
-    return l.pos - prevpos
+func (l *lexer) skipRun(valid string) int {
+	prevpos := l.pos
+	for strings.IndexRune(valid, l.peek()) >= 0 {
+		l.skip()
+	}
+	return l.pos - prevpos
 }
 
-
 // Skip until something from the given string is encountered.
 func (l *lexer) skipUntil(invalid string) {
-    for l.pos < len(l.input) && strings.IndexRune(invalid, l.peek()) < 0 {
-        l.skip()
-    }
+	for l.pos < len(l.input) && strings.IndexRune(invalid, l.peek()) < 0 {
+		l.skip()
+	}
 }
 
-
 // Start a new lexer to lex the given input.
 func lex(input string) (*lexer, chan token) {
-    l := &lexer{input: input, output: make(chan token)}
-    go l.run()
-    return l, l.output
+	l := &lexer{input: input, output: make(chan token), line: 1, indented: true}
+	go l.run()
+	return l, l.output
 }
 
-
 func (l *lexer) run() {
-    for state := lexTopLevel; state != nil; {
-        state = state(l)
-    }
-    close(l.output)
+	for state := lexTopLevel; state != nil; {
+		state = state(l)
+	}
+	close(l.output)
 }
 
-
 // What do we need?
 // A function that consumes non-newline whitespace.
 // A way of determining if the current line might be a recipe.
 
+func lexTopLevel(l *lexer) lexerStateFun {
 
-func lexTopLevel (l *lexer) lexerStateFun {
+	for {
+		l.skipRun(" \t\r")
+		// emit a newline token if we are ending a non-empty line.
+		if l.peek() == '\n' && !l.indented {
+			l.next()
+			l.emit(tokenNewline)
+		}
+		l.skipRun(" \t\r\n")
 
-    for {
-        l.skipRun(" \t\n\r")
-        if l.peek() == '\'' && l.peekN(1) == '\n' {
-            l.next()
-            l.next()
-            l.indented = false
-        } else {
-            break
-        }
-    }
+		if l.peek() == '\'' && l.peekN(1) == '\n' {
+			l.next()
+			l.next()
+			l.indented = false
+		} else {
+			break
+		}
+	}
 
-    if l.indented && l.col > 0 {
-        return lexRecipe
-    }
+	if l.indented && l.col > 0 {
+		return lexRecipe
+	}
 
-    c := l.peek()
-    switch c {
-    case eof: return nil
-    case '#': return lexComment
-    case '<': return lexInclude
-    case '"': return lexDoubleQuote
-    case '\'': return lexSingleQuote
-    case ':': return lexColon
-    case '=': return lexAssign
-    }
+	c := l.peek()
+	switch c {
+	case eof:
+		return nil
+	case '#':
+		return lexComment
+	case '<':
+		return lexInclude
+	case '"':
+		return lexDoubleQuote
+	case '\'':
+		return lexSingleQuote
+	case ':':
+		return lexColon
+	case '=':
+		return lexAssign
+	}
 
-    return lexBareString
+	return lexBareString
 }
 
-
-func lexColon (l* lexer) lexerStateFun {
-    l.next()
-    l.emit(tokenColon)
-    return lexTopLevel
+func lexColon(l *lexer) lexerStateFun {
+	l.next()
+	l.emit(tokenColon)
+	return lexTopLevel
 }
 
-
-func lexAssign (l* lexer) lexerStateFun {
-    l.next()
-    l.emit(tokenAssign)
-    return lexTopLevel
+func lexAssign(l *lexer) lexerStateFun {
+	l.next()
+	l.emit(tokenAssign)
+	return lexTopLevel
 }
 
-
-func lexComment (l* lexer) lexerStateFun {
-    l.skip() // '#'
-    l.skipUntil("\n")
-    return lexTopLevel
+func lexComment(l *lexer) lexerStateFun {
+	l.skip() // '#'
+	l.skipUntil("\n")
+	return lexTopLevel
 }
 
-
-func lexInclude (l* lexer) lexerStateFun {
-    l.skip() // '<'
-    var typ tokenType
-    if l.ignore("|") {
-        typ = tokenPipeInclude
-    } else {
-        typ = tokenRedirInclude
-    }
-
-    l.skipRun(" \t\n\r")
-    l.emit(typ)
-    return lexTopLevel
+func lexInclude(l *lexer) lexerStateFun {
+	l.next() // '<'
+	if l.accept("|") {
+		l.emit(tokenPipeInclude)
+	} else {
+		l.emit(tokenRedirInclude)
+	}
+	return lexTopLevel
 }
 
-
-func lexDoubleQuote (l *lexer) lexerStateFun {
-    l.skip() // '"'
-    for l.peek() != '"' {
-        l.acceptUntil("\\\"")
-        if l.accept("\\") {
-            l.accept("\"")
-        }
-    }
-    l.emit(tokenQuotedString)
-    l.skip() // skip '"'
-    return lexTopLevel
+func lexDoubleQuote(l *lexer) lexerStateFun {
+	l.skip() // '"'
+	for l.peek() != '"' {
+		l.acceptUntil("\\\"")
+		if l.accept("\\") {
+			l.accept("\"")
+		}
+	}
+	l.emit(tokenQuotedString)
+	l.skip() // skip '"'
+	return lexTopLevel
 }
 
-
-func lexSingleQuote (l *lexer) lexerStateFun {
-    l.skip() // '\''
-    l.acceptUntil("'")
-    l.emit(tokenQuotedString)
-    l.skip() // '\''
-    return lexTopLevel
+func lexSingleQuote(l *lexer) lexerStateFun {
+	l.skip() // '\''
+	l.acceptUntil("'")
+	l.emit(tokenQuotedString)
+	l.skip() // '\''
+	return lexTopLevel
 }
 
+func lexRecipe(l *lexer) lexerStateFun {
+	for {
+		l.acceptUntil("\n")
+		l.acceptRun(" \t\n\r")
+		if !l.indented || l.col == 0 {
+			break
+		}
+	}
 
-func lexRecipe (l *lexer) lexerStateFun {
-
-    for {
-        l.acceptUntil("\n")
-        l.acceptRun(" \t\n\r")
-        if !l.indented || l.col == 0 {
-            break
-        }
-    }
-
-    // TODO: don't emit if there is only whitespace in the recipe
-    l.emit(tokenRecipe)
-    return lexTopLevel
+	// TODO: don't emit if there is only whitespace in the recipe
+	l.emit(tokenRecipe)
+	return lexTopLevel
 }
 
-
-func lexBareString (l *lexer) lexerStateFun {
-    // TODO: allow escaping spaces and tabs?
-    // TODO: allow adjacent quoted string, e.g.: foo"bar"baz?
-    l.acceptUntil(" \t\n\r\\=:#'\"")
-    l.emit(tokenBareString)
-    return lexTopLevel
+func lexBareString(l *lexer) lexerStateFun {
+	// TODO: allow escaping spaces and tabs?
+	// TODO: allow adjacent quoted string, e.g.: foo"bar"baz?
+	l.acceptUntil(" \t\n\r\\=:#'\"")
+	l.emit(tokenBareString)
+	return lexTopLevel
 }
-
-
diff --git a/mk.go b/mk.go
index fab8418..3e27f29 100644
--- a/mk.go
+++ b/mk.go
@@ -1,10 +1,13 @@
-
 package main
 
 import (
+	"fmt"
+	"io/ioutil"
+	"os"
 )
 
 func main() {
+	input, _ := ioutil.ReadAll(os.Stdin)
+	rs := parse(string(input), "<stdin>")
+	fmt.Println(rs)
 }
-
-
diff --git a/parse.go b/parse.go
index b835ecc..6fb737d 100644
--- a/parse.go
+++ b/parse.go
@@ -1,121 +1,308 @@
-
 package main
 
 import (
-    "fmt"
-    "os"
-    "os/exec"
+	"fmt"
+	"os"
 )
 
-/* Grammar, to the best of my knowledge:
-
-Should we deviate at all from mk?
-
-Yes! I want to simplify things by saying recipes have nonzero indentation and
-everything else has zero.
-
-rule ::= targets ':' attributes ':' prereqs NEWLINE RECIPE |
-         targets ':' prereqs NEWLINE RECIPE
-
-targets ::= string | string "," targets
-
-attributes ::= SCALAR | SCALAR attributes
-
-prereqs ::= string | string "," prereqs
-
-include ::= '<' string NEWLINE
-
-string ::= SCALAR | QSTRING
-
-assignment ::= SCALAR '=' string
-
-How do we handle escaping new lines?
-Is newline a token that's emitted?
-
-*/
-
-
-// The parser for mk files is terribly simple. There are only three sorts of
-// statements in mkfiles: variable assignments, rules (possibly with
-// accompanying recipes), and includes.
-
-
-
-//
-// Maybe this is the wrong way to organize things.
-// We should perhaps have a type for a parsed mkfile that includes every
-// assignment as well as every rule.
-//
-// Rule order should not matter.
-//
-// Includes are tricky. If they were straight up includes, the could be
-// evaluated in place, but they could contain shell script, etc.
-//
-// No...we still have to evaluate them in place. That means figuring out how to
-// spawn shells from go.
-//
-
-
 type parser struct {
-    l *lexer         // underlying lexer
-    tokenbuf []token // tokens consumed on the current statement
-    rules *ruleSet   // current ruleSet
+	l        *lexer   // underlying lexer
+	name     string   // name of the file being parsed
+	tokenbuf []token  // tokens consumed on the current statement
+	rules    *ruleSet // current ruleSet
 }
 
+func (p *parser) parseError(context string, expected string, found token) {
+	fmt.Fprintf(os.Stderr, "%s:%d: syntax error: ", p.name, found.line)
+	fmt.Fprintf(os.Stderr, "while %s, expected %s but found \"%s\".\n",
+		context, expected, found.String())
+	os.Exit(1)
+}
+
+func (p *parser) basicErrorAtToken(what string, found token) {
+	p.basicErrorAtLine(what, found.line)
+}
+
+func (p *parser) basicErrorAtLine(what string, line int) {
+	fmt.Fprintf(os.Stderr, "%s:%d: syntax error: %s\n",
+		p.name, line, what)
+	os.Exit(1)
+}
+
+func (p *parser) push(t token) {
+	p.tokenbuf = append(p.tokenbuf, t)
+}
+
+func (p *parser) clear() {
+	p.tokenbuf = p.tokenbuf[:0]
+}
 
 // A parser state function takes a parser and the next token and returns a new
 // state function, or nil if there was a parse error.
-type parserStateFun func (*parser, token) parserStateFun
-
+type parserStateFun func(*parser, token) parserStateFun
 
 // Parse a mkfile, returning a new ruleSet.
-func parse(input string) *ruleSet {
-    rules := &ruleSet{}
-    parseInto(input, rules)
-    return rules
+func parse(input string, name string) *ruleSet {
+	rules := &ruleSet{make(map[string][]string), make([]rule, 0)}
+	parseInto(input, name, rules)
+	return rules
 }
 
-
 // Parse a mkfile inserting rules and variables into a given ruleSet.
-func parseInto(input string, rules *ruleSet) {
-    l, tokens := lex(input)
-    p := &parser{l, []token{}, rules}
-    state := parseTopLevel
-    for t := range tokens {
-        if t.typ == tokenError {
-            // TODO: fancier error messages
-            fmt.Fprintf(os.Stderr, "Error: %s", l.errmsg)
-            break
-        }
+func parseInto(input string, name string, rules *ruleSet) {
+	l, tokens := lex(input)
+	p := &parser{l, name, []token{}, rules}
+	state := parseTopLevel
+	for t := range tokens {
+		if t.typ == tokenError {
+			// TODO: fancier error messages
+			fmt.Fprintf(os.Stderr, "Error: %s", l.errmsg)
+			break
+		}
 
-        state = state(p, t)
-    }
+		state = state(p, t)
+	}
 
-    // TODO: Handle the case when state is not top level.
+	// insert a dummy newline to allow parsing of any assignments or recipeless
+	// rules to finish.
+	state = state(p, token{tokenNewline, "\n", l.line})
+
+	// TODO: Handle the case when state is not top level.
 }
 
-
 func parseTopLevel(p *parser, t token) parserStateFun {
-    switch t.typ {
-        case tokenPipeInclude: return parsePipeInclude(p, t)
-        // TODO: all others
-    }
+	switch t.typ {
+	case tokenNewline:
+		return parseTopLevel
+	case tokenPipeInclude:
+		return parsePipeInclude
+	case tokenRedirInclude:
+		return parseRedirInclude
+	case tokenQuotedString:
+		return parseTargets(p, t)
+	case tokenBareString:
+		return parseAssignmentOrTarget(p, t)
+	default:
+		p.parseError("parsing mkfile",
+			"a rule, include, or assignment", t)
+	}
 
-    return parseTopLevel
+	return parseTopLevel
 }
 
-
 func parsePipeInclude(p *parser, t token) parserStateFun {
-    // TODO: We need to split this up into arguments so we can feed it into
-    // executeRecipe.
-    return parseTopLevel
-}
+	switch t.typ {
+	case tokenNewline:
+		if len(p.tokenbuf) == 0 {
+			p.basicErrorAtToken("empty pipe include", t)
+		}
 
+		args := make([]string, len(p.tokenbuf)-1)
+		for i := 1; i < len(p.tokenbuf); i++ {
+			args[i-1] = p.tokenbuf[i].val
+		}
+
+		output := executeRecipe("sh", args, "", false, false, true)
+		parseInto(output, fmt.Sprintf("%s:sh", p.name), p.rules)
+
+		p.clear()
+		return parseTopLevel
+
+	// Almost anything goes. Let the shell sort it out.
+	case tokenBareString:
+		fallthrough
+	case tokenPipeInclude:
+		fallthrough
+	case tokenRedirInclude:
+		fallthrough
+	case tokenColon:
+		fallthrough
+	case tokenAssign:
+		fallthrough
+	case tokenQuotedString:
+		p.tokenbuf = append(p.tokenbuf, t)
+
+	default:
+		// TODO: Complain about unexpected tokens.
+	}
+
+	return parsePipeInclude
+}
 
 func parseRedirInclude(p *parser, t token) parserStateFun {
-    // TODO: Open the file, read its context, call parseInto recursively.
-    return parseTopLevel
+	switch t.typ {
+	case tokenNewline:
+		// TODO:
+		// Open the file, read its context, call parseInto recursively.
+		// Clear out p.tokenbuf
+
+	case tokenBareString:
+	case tokenQuotedString:
+
+	default:
+		// TODO: Complain about unexpected tokens.
+	}
+
+	return parseRedirInclude
 }
 
+// Encountered a bare string at the beginning of the line.
+func parseAssignmentOrTarget(p *parser, t token) parserStateFun {
+	fmt.Println("assignment or target")
+	p.push(t)
+	return parseEqualsOrTarget
+}
 
+// Consumed one bare string ot the begging of the line.
+func parseEqualsOrTarget(p *parser, t token) parserStateFun {
+	fmt.Println("equals or target")
+	switch t.typ {
+	case tokenAssign:
+		return parseAssignment
 
+	case tokenBareString:
+		fallthrough
+	case tokenQuotedString:
+		p.push(t)
+		return parseTargets
+
+	case tokenColon:
+		p.push(t)
+		return parseAttributesOrPrereqs
+
+	default:
+		p.parseError("reading a a target or assignment",
+			"'=', ':', or another target", t)
+	}
+
+	return parseTopLevel // unreachable
+}
+
+// Consumed 'foo='. Everything else is a value being assigned to foo.
+func parseAssignment(p *parser, t token) parserStateFun {
+	switch t.typ {
+	case tokenNewline:
+		p.rules.executeAssignment(p.tokenbuf)
+		p.clear()
+		return parseTopLevel
+
+	default:
+		p.push(t)
+	}
+
+	return parseAssignment
+}
+
+// Everything up to : must be a target.
+func parseTargets(p *parser, t token) parserStateFun {
+	switch t.typ {
+	case tokenBareString:
+		fallthrough
+	case tokenQuotedString:
+		p.push(t)
+	case tokenColon:
+		p.push(t)
+		return parseAttributesOrPrereqs
+
+	default:
+		p.parseError("reading a rule's targets",
+			"filename or pattern", t)
+	}
+
+	return parseTargets
+}
+
+// Consumed one or more strings followed by a :.
+func parseAttributesOrPrereqs(p *parser, t token) parserStateFun {
+	fmt.Println("attributes or prereqs")
+	switch t.typ {
+	case tokenNewline:
+		return parseRecipe
+	case tokenColon:
+		p.push(t)
+		return parsePrereqs
+	case tokenBareString:
+		fallthrough
+	case tokenQuotedString:
+		p.push(t)
+	default:
+		p.parseError("reading a rule's attributes or prerequisites",
+			"an attribute, pattern, or filename", t)
+	}
+
+	return parseAttributesOrPrereqs
+}
+
+func parsePrereqs(p *parser, t token) parserStateFun {
+	fmt.Println("prereqs")
+	switch t.typ {
+	case tokenNewline:
+		return parseRecipe
+	case tokenBareString:
+		fallthrough
+	case tokenQuotedString:
+		p.push(t)
+
+	default:
+		p.parseError("reading a rule's prerequisites",
+			"filename or pattern", t)
+	}
+
+	return parsePrereqs
+}
+
+func parseRecipe(p *parser, t token) parserStateFun {
+	fmt.Println("recipe")
+
+	// Assemble the rule!
+	r := rule{}
+
+	// find one or two colons
+	i := 0
+	for ; i < len(p.tokenbuf) && p.tokenbuf[i].typ != tokenColon; i++ {
+	}
+	j := i + 1
+	for ; j < len(p.tokenbuf) && p.tokenbuf[j].typ != tokenColon; j++ {
+	}
+
+	// targets
+	r.targets = make([]string, i)
+	for k := 0; k < i; k++ {
+		r.targets[k] = p.tokenbuf[k].val
+	}
+
+	// rule has attributes
+	if j < len(p.tokenbuf) {
+		attribs := make([]string, j-i-1)
+		for k := i + 1; k < j; k++ {
+			attribs[k-i-1] = p.tokenbuf[k].val
+		}
+		err := r.parseAttribs(attribs)
+		if err != nil {
+			msg := fmt.Sprintf("while reading a rule's attributes expected an attribute but found '%c'.", err.found)
+			p.basicErrorAtToken(msg, p.tokenbuf[i+1])
+		}
+	} else {
+		j = i
+	}
+
+	// prereqs
+	r.prereqs = make([]string, len(p.tokenbuf)-j-1)
+	for k := j + 1; k < len(p.tokenbuf); k++ {
+		r.prereqs[k-j-1] = p.tokenbuf[k].val
+	}
+
+	if t.typ == tokenRecipe {
+		r.recipe = t.val
+	}
+
+	p.rules.push(r)
+	p.clear()
+
+	// the current token doesn't belong to this rule
+	if t.typ != tokenRecipe {
+		return parseTopLevel(p, t)
+	}
+
+	return parseTopLevel
+}
diff --git a/recipe.go b/recipe.go
index 0ffea2a..bb8749e 100644
--- a/recipe.go
+++ b/recipe.go
@@ -1,57 +1,56 @@
-
 package main
 
-
 import (
-    "os/exec"
-    "os"
-    "io"
+	"io"
+	"log"
+	"os"
+	"os/exec"
 )
 
-
 // A monolithic function for executing recipes.
 func executeRecipe(program string,
-                   args []string,
-                   input string,
-                   echo_out bool,
-                   echo_err bool,
-                   capture_out bool) string {
-    cmd := exec.Command(program, args...)
+	args []string,
+	input string,
+	echo_out bool,
+	echo_err bool,
+	capture_out bool) string {
+	cmd := exec.Command(program, args...)
 
-    if echo_out {
-        cmdout, err := cmd.StdoutPipe()
-        if err != nil {
-            go io.Copy(os.Stdout, cmdout)
-        }
-    }
+	if echo_out {
+		cmdout, err := cmd.StdoutPipe()
+		if err != nil {
+			go io.Copy(os.Stdout, cmdout)
+		}
+	}
 
-    if echo_err {
-        cmderr, err := cmd.StdoutPipe()
-        if err != nil {
-            go io.Copy(os.Stderr, cmderr)
-        }
-    }
+	if echo_err {
+		cmderr, err := cmd.StdoutPipe()
+		if err != nil {
+			go io.Copy(os.Stderr, cmderr)
+		}
+	}
 
-    if len(input) > 0 {
-        cmdin, err := cmd.StdinPipe()
-        go func () { cmdin.Write([]byte(input)) }()
-    }
+	if len(input) > 0 {
+		cmdin, err := cmd.StdinPipe()
+		if err != nil {
+			go func() { cmdin.Write([]byte(input)) }()
+		}
+	}
 
+	output := ""
+	var err error
+	if capture_out {
+		var outbytes []byte
+		outbytes, err = cmd.Output()
+		output = string(outbytes)
+	} else {
+		err = cmd.Run()
+	}
 
-    output := ""
-    var err error
-    if capture_out {
-        output, err = cmd.Output()
-    } else {
-        err = cmd.Run()
-    }
+	if err != nil {
+		// TODO: better error output
+		log.Fatal("Recipe failed")
+	}
 
-    if err != nil {
-        // TODO: better error output
-        log.Fatal("Recipe failed")
-    }
-
-    return output
+	return output
 }
-
-
diff --git a/ruleset.go b/ruleset.go
new file mode 100644
index 0000000..fadd5b2
--- /dev/null
+++ b/ruleset.go
@@ -0,0 +1,136 @@
+// Mkfiles are parsed into ruleSets, which as the name suggests, are sets of
+// rules with accompanying recipes, as well as assigned variables which are
+// expanding when evaluating rules and recipes.
+
+package main
+
+import (
+	"unicode/utf8"
+)
+
+type attribSet struct {
+	delFailed       bool // delete targets when the recipe fails
+	nonstop         bool // don't stop if the recipe fails
+	forcedTimestamp bool // update timestamp whether the recipe does or not
+	nonvirtual      bool // a meta-rule that will only match files
+	quiet           bool // don't print the recipe
+	regex           bool // regular expression meta-rule
+	update          bool // treat the targets as if they were updated
+	virtual         bool // rule is virtual (does not match files)
+}
+
+// Error parsing an attribute
+type attribError struct {
+	found rune
+}
+
+type rule struct {
+	targets    []string  // non-empty array of targets
+	attributes attribSet // rule attributes
+	prereqs    []string  // possibly empty prerequesites
+	shell      []string  // command used to execute the recipe
+	recipe     string    // recipe source
+	command    []string  // command attribute
+}
+
+// Read attributes for an array of strings, updating the rule.
+func (r *rule) parseAttribs(inputs []string) *attribError {
+	for i := 0; i < len(inputs); i++ {
+		input := inputs[i]
+		pos := 0
+		for pos < len(input) {
+			c, w := utf8.DecodeRuneInString(input[pos:])
+			switch c {
+			case 'D':
+				r.attributes.delFailed = true
+			case 'E':
+				r.attributes.nonstop = true
+			case 'N':
+				r.attributes.forcedTimestamp = true
+			case 'n':
+				r.attributes.nonvirtual = true
+			case 'Q':
+				r.attributes.quiet = true
+			case 'R':
+				r.attributes.regex = true
+			case 'U':
+				r.attributes.update = true
+			case 'V':
+				r.attributes.virtual = true
+			case 'P':
+				if pos+w < len(input) {
+					r.command = append(r.command, input[pos+w:])
+				}
+				r.command = append(r.command, inputs[i+1:]...)
+				return nil
+
+			case 'S':
+				if pos+w < len(input) {
+					r.shell = append(r.shell, input[pos+w:])
+				}
+				r.shell = append(r.shell, inputs[i+1:]...)
+				return nil
+
+			default:
+				return &attribError{c}
+			}
+
+			pos += w
+		}
+	}
+
+	return nil
+}
+
+type ruleSet struct {
+	vars  map[string][]string
+	rules []rule
+}
+
+// Add a rule to the rule set.
+func (rs *ruleSet) push(r rule) {
+	rs.rules = append(rs.rules, r)
+}
+
+// Expand variables found in a string.
+func (rs *ruleSet) expand(t token) string {
+	// TODO: implement this
+	return t.val
+}
+
+func isValidVarName(v string) bool {
+	for i := 0; i < len(v); {
+		c, w := utf8.DecodeRuneInString(v[i:])
+		if i == 0 && !(isalpha(c) || c == '_') {
+			return false
+		} else if !isalnum(c) || c == '_' {
+			return false
+		}
+		i += w
+	}
+	return true
+}
+
+func isalpha(c rune) bool {
+	return ('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z')
+}
+
+func isalnum(c rune) bool {
+	return isalpha(c) || ('0' <= c && c <= '9')
+}
+
+// Parse and execute assignment operation.
+func (rs *ruleSet) executeAssignment(ts []token) {
+	assignee := ts[0].val
+	if !isValidVarName(assignee) {
+		// TODO: complain
+	}
+
+	// expanded variables
+	vals := make([]string, len(ts)-1)
+	for i := 0; i < len(vals); i++ {
+		vals[i] = rs.expand(ts[i+1])
+	}
+
+	rs.vars[assignee] = vals
+}