More parsing. Go fmt.

2013-02-26 11:33:07 -08:00 · 2013-02-26 11:33:07 -08:00 · 6da2555966
commit 6da2555966
parent d129ff285c
5 changed files with 671 additions and 355 deletions
--- a/parse.go
+++ b/parse.go
@ -1,121 +1,308 @@
-
 package main

 import (
-    "fmt"
-    "os"
-    "os/exec"
+	"fmt"
+	"os"
 )

-/* Grammar, to the best of my knowledge:
-
-Should we deviate at all from mk?
-
-Yes! I want to simplify things by saying recipes have nonzero indentation and
-everything else has zero.
-
-rule ::= targets ':' attributes ':' prereqs NEWLINE RECIPE |
-         targets ':' prereqs NEWLINE RECIPE
-
-targets ::= string | string "," targets
-
-attributes ::= SCALAR | SCALAR attributes
-
-prereqs ::= string | string "," prereqs
-
-include ::= '<' string NEWLINE
-
-string ::= SCALAR | QSTRING
-
-assignment ::= SCALAR '=' string
-
-How do we handle escaping new lines?
-Is newline a token that's emitted?
-
-*/
-
-
-// The parser for mk files is terribly simple. There are only three sorts of
-// statements in mkfiles: variable assignments, rules (possibly with
-// accompanying recipes), and includes.
-
-
-
-//
-// Maybe this is the wrong way to organize things.
-// We should perhaps have a type for a parsed mkfile that includes every
-// assignment as well as every rule.
-//
-// Rule order should not matter.
-//
-// Includes are tricky. If they were straight up includes, the could be
-// evaluated in place, but they could contain shell script, etc.
-//
-// No...we still have to evaluate them in place. That means figuring out how to
-// spawn shells from go.
-//
-
-
 type parser struct {
-    l *lexer         // underlying lexer
-    tokenbuf []token // tokens consumed on the current statement
-    rules *ruleSet   // current ruleSet
+	l        *lexer   // underlying lexer
+	name     string   // name of the file being parsed
+	tokenbuf []token  // tokens consumed on the current statement
+	rules    *ruleSet // current ruleSet
 }

+func (p *parser) parseError(context string, expected string, found token) {
+	fmt.Fprintf(os.Stderr, "%s:%d: syntax error: ", p.name, found.line)
+	fmt.Fprintf(os.Stderr, "while %s, expected %s but found \"%s\".\n",
+		context, expected, found.String())
+	os.Exit(1)
+}
+
+func (p *parser) basicErrorAtToken(what string, found token) {
+	p.basicErrorAtLine(what, found.line)
+}
+
+func (p *parser) basicErrorAtLine(what string, line int) {
+	fmt.Fprintf(os.Stderr, "%s:%d: syntax error: %s\n",
+		p.name, line, what)
+	os.Exit(1)
+}
+
+func (p *parser) push(t token) {
+	p.tokenbuf = append(p.tokenbuf, t)
+}
+
+func (p *parser) clear() {
+	p.tokenbuf = p.tokenbuf[:0]
+}

 // A parser state function takes a parser and the next token and returns a new
 // state function, or nil if there was a parse error.
-type parserStateFun func (*parser, token) parserStateFun
-
+type parserStateFun func(*parser, token) parserStateFun

 // Parse a mkfile, returning a new ruleSet.
-func parse(input string) *ruleSet {
-    rules := &ruleSet{}
-    parseInto(input, rules)
-    return rules
+func parse(input string, name string) *ruleSet {
+	rules := &ruleSet{make(map[string][]string), make([]rule, 0)}
+	parseInto(input, name, rules)
+	return rules
 }

-
 // Parse a mkfile inserting rules and variables into a given ruleSet.
-func parseInto(input string, rules *ruleSet) {
-    l, tokens := lex(input)
-    p := &parser{l, []token{}, rules}
-    state := parseTopLevel
-    for t := range tokens {
-        if t.typ == tokenError {
-            // TODO: fancier error messages
-            fmt.Fprintf(os.Stderr, "Error: %s", l.errmsg)
-            break
-        }
+func parseInto(input string, name string, rules *ruleSet) {
+	l, tokens := lex(input)
+	p := &parser{l, name, []token{}, rules}
+	state := parseTopLevel
+	for t := range tokens {
+		if t.typ == tokenError {
+			// TODO: fancier error messages
+			fmt.Fprintf(os.Stderr, "Error: %s", l.errmsg)
+			break
+		}

-        state = state(p, t)
-    }
+		state = state(p, t)
+	}

-    // TODO: Handle the case when state is not top level.
+	// insert a dummy newline to allow parsing of any assignments or recipeless
+	// rules to finish.
+	state = state(p, token{tokenNewline, "\n", l.line})
+
+	// TODO: Handle the case when state is not top level.
 }

-
 func parseTopLevel(p *parser, t token) parserStateFun {
-    switch t.typ {
-        case tokenPipeInclude: return parsePipeInclude(p, t)
-        // TODO: all others
-    }
+	switch t.typ {
+	case tokenNewline:
+		return parseTopLevel
+	case tokenPipeInclude:
+		return parsePipeInclude
+	case tokenRedirInclude:
+		return parseRedirInclude
+	case tokenQuotedString:
+		return parseTargets(p, t)
+	case tokenBareString:
+		return parseAssignmentOrTarget(p, t)
+	default:
+		p.parseError("parsing mkfile",
+			"a rule, include, or assignment", t)
+	}

-    return parseTopLevel
+	return parseTopLevel
 }

-
 func parsePipeInclude(p *parser, t token) parserStateFun {
-    // TODO: We need to split this up into arguments so we can feed it into
-    // executeRecipe.
-    return parseTopLevel
-}
+	switch t.typ {
+	case tokenNewline:
+		if len(p.tokenbuf) == 0 {
+			p.basicErrorAtToken("empty pipe include", t)
+		}

+		args := make([]string, len(p.tokenbuf)-1)
+		for i := 1; i < len(p.tokenbuf); i++ {
+			args[i-1] = p.tokenbuf[i].val
+		}
+
+		output := executeRecipe("sh", args, "", false, false, true)
+		parseInto(output, fmt.Sprintf("%s:sh", p.name), p.rules)
+
+		p.clear()
+		return parseTopLevel
+
+	// Almost anything goes. Let the shell sort it out.
+	case tokenBareString:
+		fallthrough
+	case tokenPipeInclude:
+		fallthrough
+	case tokenRedirInclude:
+		fallthrough
+	case tokenColon:
+		fallthrough
+	case tokenAssign:
+		fallthrough
+	case tokenQuotedString:
+		p.tokenbuf = append(p.tokenbuf, t)
+
+	default:
+		// TODO: Complain about unexpected tokens.
+	}
+
+	return parsePipeInclude
+}

 func parseRedirInclude(p *parser, t token) parserStateFun {
-    // TODO: Open the file, read its context, call parseInto recursively.
-    return parseTopLevel
+	switch t.typ {
+	case tokenNewline:
+		// TODO:
+		// Open the file, read its context, call parseInto recursively.
+		// Clear out p.tokenbuf
+
+	case tokenBareString:
+	case tokenQuotedString:
+
+	default:
+		// TODO: Complain about unexpected tokens.
+	}
+
+	return parseRedirInclude
 }

+// Encountered a bare string at the beginning of the line.
+func parseAssignmentOrTarget(p *parser, t token) parserStateFun {
+	fmt.Println("assignment or target")
+	p.push(t)
+	return parseEqualsOrTarget
+}

+// Consumed one bare string ot the begging of the line.
+func parseEqualsOrTarget(p *parser, t token) parserStateFun {
+	fmt.Println("equals or target")
+	switch t.typ {
+	case tokenAssign:
+		return parseAssignment

+	case tokenBareString:
+		fallthrough
+	case tokenQuotedString:
+		p.push(t)
+		return parseTargets
+
+	case tokenColon:
+		p.push(t)
+		return parseAttributesOrPrereqs
+
+	default:
+		p.parseError("reading a a target or assignment",
+			"'=', ':', or another target", t)
+	}
+
+	return parseTopLevel // unreachable
+}
+
+// Consumed 'foo='. Everything else is a value being assigned to foo.
+func parseAssignment(p *parser, t token) parserStateFun {
+	switch t.typ {
+	case tokenNewline:
+		p.rules.executeAssignment(p.tokenbuf)
+		p.clear()
+		return parseTopLevel
+
+	default:
+		p.push(t)
+	}
+
+	return parseAssignment
+}
+
+// Everything up to : must be a target.
+func parseTargets(p *parser, t token) parserStateFun {
+	switch t.typ {
+	case tokenBareString:
+		fallthrough
+	case tokenQuotedString:
+		p.push(t)
+	case tokenColon:
+		p.push(t)
+		return parseAttributesOrPrereqs
+
+	default:
+		p.parseError("reading a rule's targets",
+			"filename or pattern", t)
+	}
+
+	return parseTargets
+}
+
+// Consumed one or more strings followed by a :.
+func parseAttributesOrPrereqs(p *parser, t token) parserStateFun {
+	fmt.Println("attributes or prereqs")
+	switch t.typ {
+	case tokenNewline:
+		return parseRecipe
+	case tokenColon:
+		p.push(t)
+		return parsePrereqs
+	case tokenBareString:
+		fallthrough
+	case tokenQuotedString:
+		p.push(t)
+	default:
+		p.parseError("reading a rule's attributes or prerequisites",
+			"an attribute, pattern, or filename", t)
+	}
+
+	return parseAttributesOrPrereqs
+}
+
+func parsePrereqs(p *parser, t token) parserStateFun {
+	fmt.Println("prereqs")
+	switch t.typ {
+	case tokenNewline:
+		return parseRecipe
+	case tokenBareString:
+		fallthrough
+	case tokenQuotedString:
+		p.push(t)
+
+	default:
+		p.parseError("reading a rule's prerequisites",
+			"filename or pattern", t)
+	}
+
+	return parsePrereqs
+}
+
+func parseRecipe(p *parser, t token) parserStateFun {
+	fmt.Println("recipe")
+
+	// Assemble the rule!
+	r := rule{}
+
+	// find one or two colons
+	i := 0
+	for ; i < len(p.tokenbuf) && p.tokenbuf[i].typ != tokenColon; i++ {
+	}
+	j := i + 1
+	for ; j < len(p.tokenbuf) && p.tokenbuf[j].typ != tokenColon; j++ {
+	}
+
+	// targets
+	r.targets = make([]string, i)
+	for k := 0; k < i; k++ {
+		r.targets[k] = p.tokenbuf[k].val
+	}
+
+	// rule has attributes
+	if j < len(p.tokenbuf) {
+		attribs := make([]string, j-i-1)
+		for k := i + 1; k < j; k++ {
+			attribs[k-i-1] = p.tokenbuf[k].val
+		}
+		err := r.parseAttribs(attribs)
+		if err != nil {
+			msg := fmt.Sprintf("while reading a rule's attributes expected an attribute but found '%c'.", err.found)
+			p.basicErrorAtToken(msg, p.tokenbuf[i+1])
+		}
+	} else {
+		j = i
+	}
+
+	// prereqs
+	r.prereqs = make([]string, len(p.tokenbuf)-j-1)
+	for k := j + 1; k < len(p.tokenbuf); k++ {
+		r.prereqs[k-j-1] = p.tokenbuf[k].val
+	}
+
+	if t.typ == tokenRecipe {
+		r.recipe = t.val
+	}
+
+	p.rules.push(r)
+	p.clear()
+
+	// the current token doesn't belong to this rule
+	if t.typ != tokenRecipe {
+		return parseTopLevel(p, t)
+	}
+
+	return parseTopLevel
+}