More parsing. Go fmt.

2013-02-26 11:33:07 -08:00 · 2013-02-26 11:33:07 -08:00 · 6da2555966
commit 6da2555966
parent d129ff285c
5 changed files with 671 additions and 355 deletions
--- a/lex.go
+++ b/lex.go
@ -1,10 +1,9 @@
 // TODO: Backquoted strings.
 // TODO: Comments
 package main
 import (
    "fmt"
 	"strings"
 	"unicode/utf8"
 )
@ -15,6 +14,7 @@ const eof rune = '\000'
 const (
 	tokenError tokenType = iota
 	tokenNewline
 	tokenBareString
 	tokenQuotedString
 	tokenPipeInclude
@ -24,37 +24,46 @@ const (
 	tokenRecipe
 )
 func (typ tokenType) String() string {
 	switch typ {
-    case tokenError:        return "[Error]"
+	case tokenError:
-    case tokenBareString:   return "[BareString]"
+		return "[Error]"
-    case tokenQuotedString: return "[QuotedString]"
+	case tokenNewline:
-    case tokenPipeInclude:  return "[PipeInclude]"
+		return "[Newline]"
-    case tokenRedirInclude: return "[RedirInclude]"
+	case tokenBareString:
-    case tokenColon:        return "[Colon]"
+		return "[BareString]"
-    case tokenAssign:       return "[Assign]"
+	case tokenQuotedString:
-    case tokenRecipe:       return "[Recipe]"
+		return "[QuotedString]"
 	case tokenPipeInclude:
 		return "[PipeInclude]"
 	case tokenRedirInclude:
 		return "[RedirInclude]"
 	case tokenColon:
 		return "[Colon]"
 	case tokenAssign:
 		return "[Assign]"
 	case tokenRecipe:
 		return "[Recipe]"
 	}
 	return "[MysteryToken]"
 }
 type token struct {
 	typ  tokenType // token type
 	val  string    // token string
 	line int       // line where it was found
 }
 func (t *token) String() string {
 	if t.typ == tokenError {
 		return t.val
 	} else if t.typ == tokenNewline {
 		return "\\n"
 	}
-    return fmt.Sprintf("%s %q", t.typ, t.val)
+	return t.val
 }
 type lexer struct {
 	input    string     // input string to be lexed
 	output   chan token // channel on which tokens are sent
@ -66,18 +75,15 @@ type lexer struct {
 	indented bool       // true if the only whitespace so far on this line
 }
 // A lexerStateFun is simultaneously the the state of the lexer and the next
 // action the lexer will perform.
 type lexerStateFun func(*lexer) lexerStateFun
 func (l *lexer) lexerror(what string) {
 	l.errmsg = what
 	l.emit(tokenError)
 }
 // Return the nth character without advancing.
 func (l *lexer) peekN(n int) (c rune) {
 	pos := l.pos
@ -95,13 +101,11 @@ func (l *lexer) peekN(n int) (c rune) {
 	return
 }
 // Return the next character without advancing.
 func (l *lexer) peek() rune {
 	return l.peekN(0)
 }
 // Consume and return the next character in the lexer input.
 func (l *lexer) next() rune {
 	if l.pos >= len(l.input) {
@ -124,20 +128,17 @@ func (l *lexer) next() rune {
 	return c
 }
 // Skip and return the next character in the lexer input.
 func (l *lexer) skip() {
 	l.next()
 	l.start = l.pos
 }
 func (l *lexer) emit(typ tokenType) {
-    l.output <- token{typ, l.input[l.start:l.pos]}
+	l.output <- token{typ, l.input[l.start:l.pos], l.line}
 	l.start = l.pos
 }
 // Consume the next run if it is in the given string.
 func (l *lexer) accept(valid string) bool {
 	if strings.IndexRune(valid, l.peek()) >= 0 {
@ -147,7 +148,6 @@ func (l *lexer) accept(valid string) bool {
 	return false
 }
 // Skip the next rune if it is in the valid string. Return true if it was
 // skipped.
 func (l *lexer) ignore(valid string) bool {
@ -158,7 +158,6 @@ func (l *lexer) ignore(valid string) bool {
 	return false
 }
 // Consume characters from the valid string until the next is not.
 func (l *lexer) acceptRun(valid string) int {
 	prevpos := l.pos
@ -168,7 +167,6 @@ func (l *lexer) acceptRun(valid string) int {
 	return l.pos - prevpos
 }
 // Accept until something from the given string is encountered.
 func (l *lexer) acceptUntil(invalid string) {
 	for l.pos < len(l.input) && strings.IndexRune(invalid, l.peek()) < 0 {
@ -176,7 +174,6 @@ func (l *lexer) acceptUntil(invalid string) {
 	}
 }
 // Skip characters from the valid string until the next is not.
 func (l *lexer) skipRun(valid string) int {
 	prevpos := l.pos
@ -186,7 +183,6 @@ func (l* lexer) skipRun(valid string) int {
 	return l.pos - prevpos
 }
 // Skip until something from the given string is encountered.
 func (l *lexer) skipUntil(invalid string) {
 	for l.pos < len(l.input) && strings.IndexRune(invalid, l.peek()) < 0 {
@ -194,15 +190,13 @@ func (l *lexer) skipUntil(invalid string) {
 	}
 }
 // Start a new lexer to lex the given input.
 func lex(input string) (*lexer, chan token) {
-    l := &lexer{input: input, output: make(chan token)}
+	l := &lexer{input: input, output: make(chan token), line: 1, indented: true}
 	go l.run()
 	return l, l.output
 }
 func (l *lexer) run() {
 	for state := lexTopLevel; state != nil; {
 		state = state(l)
@ -210,16 +204,21 @@ func (l *lexer) run() {
 	close(l.output)
 }
 // What do we need?
 // A function that consumes non-newline whitespace.
 // A way of determining if the current line might be a recipe.
 func lexTopLevel(l *lexer) lexerStateFun {
 	for {
-        l.skipRun(" \t\n\r")
+		l.skipRun(" \t\r")
 		// emit a newline token if we are ending a non-empty line.
 		if l.peek() == '\n' && !l.indented {
 			l.next()
 			l.emit(tokenNewline)
 		}
 		l.skipRun(" \t\r\n")
 		if l.peek() == '\'' && l.peekN(1) == '\n' {
 			l.next()
 			l.next()
@ -235,55 +234,53 @@ func lexTopLevel (l *lexer) lexerStateFun {
 	c := l.peek()
 	switch c {
-    case eof: return nil
+	case eof:
-    case '#': return lexComment
+		return nil
-    case '<': return lexInclude
+	case '#':
-    case '"': return lexDoubleQuote
+		return lexComment
-    case '\'': return lexSingleQuote
+	case '<':
-    case ':': return lexColon
+		return lexInclude
-    case '=': return lexAssign
+	case '"':
 		return lexDoubleQuote
 	case '\'':
 		return lexSingleQuote
 	case ':':
 		return lexColon
 	case '=':
 		return lexAssign
 	}
 	return lexBareString
 }
 func lexColon(l *lexer) lexerStateFun {
 	l.next()
 	l.emit(tokenColon)
 	return lexTopLevel
 }
 func lexAssign(l *lexer) lexerStateFun {
 	l.next()
 	l.emit(tokenAssign)
 	return lexTopLevel
 }
 func lexComment(l *lexer) lexerStateFun {
 	l.skip() // '#'
 	l.skipUntil("\n")
 	return lexTopLevel
 }
 func lexInclude(l *lexer) lexerStateFun {
-    l.skip() // '<'
+	l.next() // '<'
-    var typ tokenType
+	if l.accept("|") {
-    if l.ignore("|") {
+		l.emit(tokenPipeInclude)
        typ = tokenPipeInclude
 	} else {
-        typ = tokenRedirInclude
+		l.emit(tokenRedirInclude)
 	}
    l.skipRun(" \t\n\r")
    l.emit(typ)
 	return lexTopLevel
 }
 func lexDoubleQuote(l *lexer) lexerStateFun {
 	l.skip() // '"'
 	for l.peek() != '"' {
@ -297,7 +294,6 @@ func lexDoubleQuote (l *lexer) lexerStateFun {
 	return lexTopLevel
 }
 func lexSingleQuote(l *lexer) lexerStateFun {
 	l.skip() // '\''
 	l.acceptUntil("'")
@ -306,9 +302,7 @@ func lexSingleQuote (l *lexer) lexerStateFun {
 	return lexTopLevel
 }
 func lexRecipe(l *lexer) lexerStateFun {
 	for {
 		l.acceptUntil("\n")
 		l.acceptRun(" \t\n\r")
@ -322,7 +316,6 @@ func lexRecipe (l *lexer) lexerStateFun {
 	return lexTopLevel
 }
 func lexBareString(l *lexer) lexerStateFun {
 	// TODO: allow escaping spaces and tabs?
 	// TODO: allow adjacent quoted string, e.g.: foo"bar"baz?
@ -330,5 +323,3 @@ func lexBareString (l *lexer) lexerStateFun {
 	l.emit(tokenBareString)
 	return lexTopLevel
 }
--- a/mk.go
+++ b/mk.go
@ -1,10 +1,13 @@
 package main
 import (
 	"fmt"
 	"io/ioutil"
 	"os"
 )
 func main() {
 	input, _ := ioutil.ReadAll(os.Stdin)
 	rs := parse(string(input), "<stdin>")
 	fmt.Println(rs)
 }
--- a/parse.go
+++ b/parse.go
@ -1,85 +1,57 @@
 package main
 import (
 	"fmt"
 	"os"
    "os/exec"
 )
 /* Grammar, to the best of my knowledge:
 Should we deviate at all from mk?
 Yes! I want to simplify things by saying recipes have nonzero indentation and
 everything else has zero.
 rule ::= targets ':' attributes ':' prereqs NEWLINE RECIPE |
         targets ':' prereqs NEWLINE RECIPE
 targets ::= string | string "," targets
 attributes ::= SCALAR | SCALAR attributes
 prereqs ::= string | string "," prereqs
 include ::= '<' string NEWLINE
 string ::= SCALAR | QSTRING
 assignment ::= SCALAR '=' string
 How do we handle escaping new lines?
 Is newline a token that's emitted?
 */
 // The parser for mk files is terribly simple. There are only three sorts of
 // statements in mkfiles: variable assignments, rules (possibly with
 // accompanying recipes), and includes.
 //
 // Maybe this is the wrong way to organize things.
 // We should perhaps have a type for a parsed mkfile that includes every
 // assignment as well as every rule.
 //
 // Rule order should not matter.
 //
 // Includes are tricky. If they were straight up includes, the could be
 // evaluated in place, but they could contain shell script, etc.
 //
 // No...we still have to evaluate them in place. That means figuring out how to
 // spawn shells from go.
 //
 type parser struct {
 	l        *lexer   // underlying lexer
 	name     string   // name of the file being parsed
 	tokenbuf []token  // tokens consumed on the current statement
 	rules    *ruleSet // current ruleSet
 }
 func (p *parser) parseError(context string, expected string, found token) {
 	fmt.Fprintf(os.Stderr, "%s:%d: syntax error: ", p.name, found.line)
 	fmt.Fprintf(os.Stderr, "while %s, expected %s but found \"%s\".\n",
 		context, expected, found.String())
 	os.Exit(1)
 }
 func (p *parser) basicErrorAtToken(what string, found token) {
 	p.basicErrorAtLine(what, found.line)
 }
 func (p *parser) basicErrorAtLine(what string, line int) {
 	fmt.Fprintf(os.Stderr, "%s:%d: syntax error: %s\n",
 		p.name, line, what)
 	os.Exit(1)
 }
 func (p *parser) push(t token) {
 	p.tokenbuf = append(p.tokenbuf, t)
 }
 func (p *parser) clear() {
 	p.tokenbuf = p.tokenbuf[:0]
 }
 // A parser state function takes a parser and the next token and returns a new
 // state function, or nil if there was a parse error.
 type parserStateFun func(*parser, token) parserStateFun
 // Parse a mkfile, returning a new ruleSet.
-func parse(input string) *ruleSet {
+func parse(input string, name string) *ruleSet {
-    rules := &ruleSet{}
+	rules := &ruleSet{make(map[string][]string), make([]rule, 0)}
-    parseInto(input, rules)
+	parseInto(input, name, rules)
 	return rules
 }
 // Parse a mkfile inserting rules and variables into a given ruleSet.
-func parseInto(input string, rules *ruleSet) {
+func parseInto(input string, name string, rules *ruleSet) {
 	l, tokens := lex(input)
-    p := &parser{l, []token{}, rules}
+	p := &parser{l, name, []token{}, rules}
 	state := parseTopLevel
 	for t := range tokens {
 		if t.typ == tokenError {
@ -91,31 +63,246 @@ func parseInto(input string, rules *ruleSet) {
 		state = state(p, t)
 	}
 	// insert a dummy newline to allow parsing of any assignments or recipeless
 	// rules to finish.
 	state = state(p, token{tokenNewline, "\n", l.line})
 	// TODO: Handle the case when state is not top level.
 }
 func parseTopLevel(p *parser, t token) parserStateFun {
 	switch t.typ {
-        case tokenPipeInclude: return parsePipeInclude(p, t)
+	case tokenNewline:
-        // TODO: all others
+		return parseTopLevel
 	case tokenPipeInclude:
 		return parsePipeInclude
 	case tokenRedirInclude:
 		return parseRedirInclude
 	case tokenQuotedString:
 		return parseTargets(p, t)
 	case tokenBareString:
 		return parseAssignmentOrTarget(p, t)
 	default:
 		p.parseError("parsing mkfile",
 			"a rule, include, or assignment", t)
 	}
 	return parseTopLevel
 }
 func parsePipeInclude(p *parser, t token) parserStateFun {
-    // TODO: We need to split this up into arguments so we can feed it into
+	switch t.typ {
-    // executeRecipe.
+	case tokenNewline:
-    return parseTopLevel
+		if len(p.tokenbuf) == 0 {
 			p.basicErrorAtToken("empty pipe include", t)
 		}
 		args := make([]string, len(p.tokenbuf)-1)
 		for i := 1; i < len(p.tokenbuf); i++ {
 			args[i-1] = p.tokenbuf[i].val
 		}
 		output := executeRecipe("sh", args, "", false, false, true)
 		parseInto(output, fmt.Sprintf("%s:sh", p.name), p.rules)
 		p.clear()
 		return parseTopLevel
 	// Almost anything goes. Let the shell sort it out.
 	case tokenBareString:
 		fallthrough
 	case tokenPipeInclude:
 		fallthrough
 	case tokenRedirInclude:
 		fallthrough
 	case tokenColon:
 		fallthrough
 	case tokenAssign:
 		fallthrough
 	case tokenQuotedString:
 		p.tokenbuf = append(p.tokenbuf, t)
 	default:
 		// TODO: Complain about unexpected tokens.
 	}
 	return parsePipeInclude
 }
 func parseRedirInclude(p *parser, t token) parserStateFun {
-    // TODO: Open the file, read its context, call parseInto recursively.
+	switch t.typ {
-    return parseTopLevel
+	case tokenNewline:
 		// TODO:
 		// Open the file, read its context, call parseInto recursively.
 		// Clear out p.tokenbuf
 	case tokenBareString:
 	case tokenQuotedString:
 	default:
 		// TODO: Complain about unexpected tokens.
 	}
 	return parseRedirInclude
 }
 // Encountered a bare string at the beginning of the line.
 func parseAssignmentOrTarget(p *parser, t token) parserStateFun {
 	fmt.Println("assignment or target")
 	p.push(t)
 	return parseEqualsOrTarget
 }
 // Consumed one bare string ot the begging of the line.
 func parseEqualsOrTarget(p *parser, t token) parserStateFun {
 	fmt.Println("equals or target")
 	switch t.typ {
 	case tokenAssign:
 		return parseAssignment
 	case tokenBareString:
 		fallthrough
 	case tokenQuotedString:
 		p.push(t)
 		return parseTargets
 	case tokenColon:
 		p.push(t)
 		return parseAttributesOrPrereqs
 	default:
 		p.parseError("reading a a target or assignment",
 			"'=', ':', or another target", t)
 	}
 	return parseTopLevel // unreachable
 }
 // Consumed 'foo='. Everything else is a value being assigned to foo.
 func parseAssignment(p *parser, t token) parserStateFun {
 	switch t.typ {
 	case tokenNewline:
 		p.rules.executeAssignment(p.tokenbuf)
 		p.clear()
 		return parseTopLevel
 	default:
 		p.push(t)
 	}
 	return parseAssignment
 }
 // Everything up to : must be a target.
 func parseTargets(p *parser, t token) parserStateFun {
 	switch t.typ {
 	case tokenBareString:
 		fallthrough
 	case tokenQuotedString:
 		p.push(t)
 	case tokenColon:
 		p.push(t)
 		return parseAttributesOrPrereqs
 	default:
 		p.parseError("reading a rule's targets",
 			"filename or pattern", t)
 	}
 	return parseTargets
 }
 // Consumed one or more strings followed by a :.
 func parseAttributesOrPrereqs(p *parser, t token) parserStateFun {
 	fmt.Println("attributes or prereqs")
 	switch t.typ {
 	case tokenNewline:
 		return parseRecipe
 	case tokenColon:
 		p.push(t)
 		return parsePrereqs
 	case tokenBareString:
 		fallthrough
 	case tokenQuotedString:
 		p.push(t)
 	default:
 		p.parseError("reading a rule's attributes or prerequisites",
 			"an attribute, pattern, or filename", t)
 	}
 	return parseAttributesOrPrereqs
 }
 func parsePrereqs(p *parser, t token) parserStateFun {
 	fmt.Println("prereqs")
 	switch t.typ {
 	case tokenNewline:
 		return parseRecipe
 	case tokenBareString:
 		fallthrough
 	case tokenQuotedString:
 		p.push(t)
 	default:
 		p.parseError("reading a rule's prerequisites",
 			"filename or pattern", t)
 	}
 	return parsePrereqs
 }
 func parseRecipe(p *parser, t token) parserStateFun {
 	fmt.Println("recipe")
 	// Assemble the rule!
 	r := rule{}
 	// find one or two colons
 	i := 0
 	for ; i < len(p.tokenbuf) && p.tokenbuf[i].typ != tokenColon; i++ {
 	}
 	j := i + 1
 	for ; j < len(p.tokenbuf) && p.tokenbuf[j].typ != tokenColon; j++ {
 	}
 	// targets
 	r.targets = make([]string, i)
 	for k := 0; k < i; k++ {
 		r.targets[k] = p.tokenbuf[k].val
 	}
 	// rule has attributes
 	if j < len(p.tokenbuf) {
 		attribs := make([]string, j-i-1)
 		for k := i + 1; k < j; k++ {
 			attribs[k-i-1] = p.tokenbuf[k].val
 		}
 		err := r.parseAttribs(attribs)
 		if err != nil {
 			msg := fmt.Sprintf("while reading a rule's attributes expected an attribute but found '%c'.", err.found)
 			p.basicErrorAtToken(msg, p.tokenbuf[i+1])
 		}
 	} else {
 		j = i
 	}
 	// prereqs
 	r.prereqs = make([]string, len(p.tokenbuf)-j-1)
 	for k := j + 1; k < len(p.tokenbuf); k++ {
 		r.prereqs[k-j-1] = p.tokenbuf[k].val
 	}
 	if t.typ == tokenRecipe {
 		r.recipe = t.val
 	}
 	p.rules.push(r)
 	p.clear()
 	// the current token doesn't belong to this rule
 	if t.typ != tokenRecipe {
 		return parseTopLevel(p, t)
 	}
 	return parseTopLevel
 }
--- a/recipe.go
+++ b/recipe.go
@ -1,14 +1,12 @@
 package main
 import (
    "os/exec"
    "os"
 	"io"
 	"log"
 	"os"
 	"os/exec"
 )
 // A monolithic function for executing recipes.
 func executeRecipe(program string,
 	args []string,
@ -34,14 +32,17 @@ func executeRecipe(program string,
 	if len(input) > 0 {
 		cmdin, err := cmd.StdinPipe()
 		if err != nil {
 			go func() { cmdin.Write([]byte(input)) }()
 		}
-
+	}
 	output := ""
 	var err error
 	if capture_out {
-        output, err = cmd.Output()
+		var outbytes []byte
 		outbytes, err = cmd.Output()
 		output = string(outbytes)
 	} else {
 		err = cmd.Run()
 	}
@ -53,5 +54,3 @@ func executeRecipe(program string,
 	return output
 }
--- a/ruleset.go
+++ b/ruleset.go
@ -0,0 +1,136 @@
 // Mkfiles are parsed into ruleSets, which as the name suggests, are sets of
 // rules with accompanying recipes, as well as assigned variables which are
 // expanding when evaluating rules and recipes.
 package main
 import (
 	"unicode/utf8"
 )
 type attribSet struct {
 	delFailed       bool // delete targets when the recipe fails
 	nonstop         bool // don't stop if the recipe fails
 	forcedTimestamp bool // update timestamp whether the recipe does or not
 	nonvirtual      bool // a meta-rule that will only match files
 	quiet           bool // don't print the recipe
 	regex           bool // regular expression meta-rule
 	update          bool // treat the targets as if they were updated
 	virtual         bool // rule is virtual (does not match files)
 }
 // Error parsing an attribute
 type attribError struct {
 	found rune
 }
 type rule struct {
 	targets    []string  // non-empty array of targets
 	attributes attribSet // rule attributes
 	prereqs    []string  // possibly empty prerequesites
 	shell      []string  // command used to execute the recipe
 	recipe     string    // recipe source
 	command    []string  // command attribute
 }
 // Read attributes for an array of strings, updating the rule.
 func (r *rule) parseAttribs(inputs []string) *attribError {
 	for i := 0; i < len(inputs); i++ {
 		input := inputs[i]
 		pos := 0
 		for pos < len(input) {
 			c, w := utf8.DecodeRuneInString(input[pos:])
 			switch c {
 			case 'D':
 				r.attributes.delFailed = true
 			case 'E':
 				r.attributes.nonstop = true
 			case 'N':
 				r.attributes.forcedTimestamp = true
 			case 'n':
 				r.attributes.nonvirtual = true
 			case 'Q':
 				r.attributes.quiet = true
 			case 'R':
 				r.attributes.regex = true
 			case 'U':
 				r.attributes.update = true
 			case 'V':
 				r.attributes.virtual = true
 			case 'P':
 				if pos+w < len(input) {
 					r.command = append(r.command, input[pos+w:])
 				}
 				r.command = append(r.command, inputs[i+1:]...)
 				return nil
 			case 'S':
 				if pos+w < len(input) {
 					r.shell = append(r.shell, input[pos+w:])
 				}
 				r.shell = append(r.shell, inputs[i+1:]...)
 				return nil
 			default:
 				return &attribError{c}
 			}
 			pos += w
 		}
 	}
 	return nil
 }
 type ruleSet struct {
 	vars  map[string][]string
 	rules []rule
 }
 // Add a rule to the rule set.
 func (rs *ruleSet) push(r rule) {
 	rs.rules = append(rs.rules, r)
 }
 // Expand variables found in a string.
 func (rs *ruleSet) expand(t token) string {
 	// TODO: implement this
 	return t.val
 }
 func isValidVarName(v string) bool {
 	for i := 0; i < len(v); {
 		c, w := utf8.DecodeRuneInString(v[i:])
 		if i == 0 && !(isalpha(c) || c == '_') {
 			return false
 		} else if !isalnum(c) || c == '_' {
 			return false
 		}
 		i += w
 	}
 	return true
 }
 func isalpha(c rune) bool {
 	return ('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z')
 }
 func isalnum(c rune) bool {
 	return isalpha(c) || ('0' <= c && c <= '9')
 }
 // Parse and execute assignment operation.
 func (rs *ruleSet) executeAssignment(ts []token) {
 	assignee := ts[0].val
 	if !isValidVarName(assignee) {
 		// TODO: complain
 	}
 	// expanded variables
 	vals := make([]string, len(ts)-1)
 	for i := 0; i < len(vals); i++ {
 		vals[i] = rs.expand(ts[i+1])
 	}
 	rs.vars[assignee] = vals
 }