More parsing. Go fmt.

This commit is contained in:
Daniel Jones 2013-02-26 11:33:07 -08:00
parent d129ff285c
commit 6da2555966
5 changed files with 671 additions and 355 deletions

135
lex.go
View file

@ -1,10 +1,9 @@
// TODO: Backquoted strings.
// TODO: Comments
package main
import (
"fmt"
"strings"
"unicode/utf8"
)
@ -15,6 +14,7 @@ const eof rune = '\000'
const (
tokenError tokenType = iota
tokenNewline
tokenBareString
tokenQuotedString
tokenPipeInclude
@ -24,37 +24,46 @@ const (
tokenRecipe
)
func (typ tokenType) String() string {
switch typ {
case tokenError: return "[Error]"
case tokenBareString: return "[BareString]"
case tokenQuotedString: return "[QuotedString]"
case tokenPipeInclude: return "[PipeInclude]"
case tokenRedirInclude: return "[RedirInclude]"
case tokenColon: return "[Colon]"
case tokenAssign: return "[Assign]"
case tokenRecipe: return "[Recipe]"
case tokenError:
return "[Error]"
case tokenNewline:
return "[Newline]"
case tokenBareString:
return "[BareString]"
case tokenQuotedString:
return "[QuotedString]"
case tokenPipeInclude:
return "[PipeInclude]"
case tokenRedirInclude:
return "[RedirInclude]"
case tokenColon:
return "[Colon]"
case tokenAssign:
return "[Assign]"
case tokenRecipe:
return "[Recipe]"
}
return "[MysteryToken]"
}
type token struct {
typ tokenType // token type
val string // token string
line int // line where it was found
}
func (t *token) String() string {
if t.typ == tokenError {
return t.val
} else if t.typ == tokenNewline {
return "\\n"
}
return fmt.Sprintf("%s %q", t.typ, t.val)
return t.val
}
type lexer struct {
input string // input string to be lexed
output chan token // channel on which tokens are sent
@ -66,18 +75,15 @@ type lexer struct {
indented bool // true if the only whitespace so far on this line
}
// A lexerStateFun is simultaneously the the state of the lexer and the next
// action the lexer will perform.
type lexerStateFun func (*lexer) lexerStateFun
type lexerStateFun func(*lexer) lexerStateFun
func (l *lexer) lexerror(what string) {
l.errmsg = what
l.emit(tokenError)
}
// Return the nth character without advancing.
func (l *lexer) peekN(n int) (c rune) {
pos := l.pos
@ -95,13 +101,11 @@ func (l *lexer) peekN(n int) (c rune) {
return
}
// Return the next character without advancing.
func (l *lexer) peek() rune {
return l.peekN(0)
}
// Consume and return the next character in the lexer input.
func (l *lexer) next() rune {
if l.pos >= len(l.input) {
@ -124,20 +128,17 @@ func (l *lexer) next() rune {
return c
}
// Skip and return the next character in the lexer input.
func (l *lexer) skip() {
l.next()
l.start = l.pos
}
func (l *lexer) emit(typ tokenType) {
l.output <- token{typ, l.input[l.start:l.pos]}
l.output <- token{typ, l.input[l.start:l.pos], l.line}
l.start = l.pos
}
// Consume the next run if it is in the given string.
func (l *lexer) accept(valid string) bool {
if strings.IndexRune(valid, l.peek()) >= 0 {
@ -147,7 +148,6 @@ func (l *lexer) accept(valid string) bool {
return false
}
// Skip the next rune if it is in the valid string. Return true if it was
// skipped.
func (l *lexer) ignore(valid string) bool {
@ -158,7 +158,6 @@ func (l *lexer) ignore(valid string) bool {
return false
}
// Consume characters from the valid string until the next is not.
func (l *lexer) acceptRun(valid string) int {
prevpos := l.pos
@ -168,7 +167,6 @@ func (l *lexer) acceptRun(valid string) int {
return l.pos - prevpos
}
// Accept until something from the given string is encountered.
func (l *lexer) acceptUntil(invalid string) {
for l.pos < len(l.input) && strings.IndexRune(invalid, l.peek()) < 0 {
@ -176,9 +174,8 @@ func (l *lexer) acceptUntil(invalid string) {
}
}
// Skip characters from the valid string until the next is not.
func (l* lexer) skipRun(valid string) int {
func (l *lexer) skipRun(valid string) int {
prevpos := l.pos
for strings.IndexRune(valid, l.peek()) >= 0 {
l.skip()
@ -186,7 +183,6 @@ func (l* lexer) skipRun(valid string) int {
return l.pos - prevpos
}
// Skip until something from the given string is encountered.
func (l *lexer) skipUntil(invalid string) {
for l.pos < len(l.input) && strings.IndexRune(invalid, l.peek()) < 0 {
@ -194,15 +190,13 @@ func (l *lexer) skipUntil(invalid string) {
}
}
// Start a new lexer to lex the given input.
func lex(input string) (*lexer, chan token) {
l := &lexer{input: input, output: make(chan token)}
l := &lexer{input: input, output: make(chan token), line: 1, indented: true}
go l.run()
return l, l.output
}
func (l *lexer) run() {
for state := lexTopLevel; state != nil; {
state = state(l)
@ -210,16 +204,21 @@ func (l *lexer) run() {
close(l.output)
}
// What do we need?
// A function that consumes non-newline whitespace.
// A way of determining if the current line might be a recipe.
func lexTopLevel (l *lexer) lexerStateFun {
func lexTopLevel(l *lexer) lexerStateFun {
for {
l.skipRun(" \t\n\r")
l.skipRun(" \t\r")
// emit a newline token if we are ending a non-empty line.
if l.peek() == '\n' && !l.indented {
l.next()
l.emit(tokenNewline)
}
l.skipRun(" \t\r\n")
if l.peek() == '\'' && l.peekN(1) == '\n' {
l.next()
l.next()
@ -235,56 +234,54 @@ func lexTopLevel (l *lexer) lexerStateFun {
c := l.peek()
switch c {
case eof: return nil
case '#': return lexComment
case '<': return lexInclude
case '"': return lexDoubleQuote
case '\'': return lexSingleQuote
case ':': return lexColon
case '=': return lexAssign
case eof:
return nil
case '#':
return lexComment
case '<':
return lexInclude
case '"':
return lexDoubleQuote
case '\'':
return lexSingleQuote
case ':':
return lexColon
case '=':
return lexAssign
}
return lexBareString
}
func lexColon (l* lexer) lexerStateFun {
func lexColon(l *lexer) lexerStateFun {
l.next()
l.emit(tokenColon)
return lexTopLevel
}
func lexAssign (l* lexer) lexerStateFun {
func lexAssign(l *lexer) lexerStateFun {
l.next()
l.emit(tokenAssign)
return lexTopLevel
}
func lexComment (l* lexer) lexerStateFun {
func lexComment(l *lexer) lexerStateFun {
l.skip() // '#'
l.skipUntil("\n")
return lexTopLevel
}
func lexInclude (l* lexer) lexerStateFun {
l.skip() // '<'
var typ tokenType
if l.ignore("|") {
typ = tokenPipeInclude
func lexInclude(l *lexer) lexerStateFun {
l.next() // '<'
if l.accept("|") {
l.emit(tokenPipeInclude)
} else {
typ = tokenRedirInclude
l.emit(tokenRedirInclude)
}
l.skipRun(" \t\n\r")
l.emit(typ)
return lexTopLevel
}
func lexDoubleQuote (l *lexer) lexerStateFun {
func lexDoubleQuote(l *lexer) lexerStateFun {
l.skip() // '"'
for l.peek() != '"' {
l.acceptUntil("\\\"")
@ -297,8 +294,7 @@ func lexDoubleQuote (l *lexer) lexerStateFun {
return lexTopLevel
}
func lexSingleQuote (l *lexer) lexerStateFun {
func lexSingleQuote(l *lexer) lexerStateFun {
l.skip() // '\''
l.acceptUntil("'")
l.emit(tokenQuotedString)
@ -306,9 +302,7 @@ func lexSingleQuote (l *lexer) lexerStateFun {
return lexTopLevel
}
func lexRecipe (l *lexer) lexerStateFun {
func lexRecipe(l *lexer) lexerStateFun {
for {
l.acceptUntil("\n")
l.acceptRun(" \t\n\r")
@ -322,13 +316,10 @@ func lexRecipe (l *lexer) lexerStateFun {
return lexTopLevel
}
func lexBareString (l *lexer) lexerStateFun {
func lexBareString(l *lexer) lexerStateFun {
// TODO: allow escaping spaces and tabs?
// TODO: allow adjacent quoted string, e.g.: foo"bar"baz?
l.acceptUntil(" \t\n\r\\=:#'\"")
l.emit(tokenBareString)
return lexTopLevel
}

9
mk.go
View file

@ -1,10 +1,13 @@
package main
import (
"fmt"
"io/ioutil"
"os"
)
func main() {
input, _ := ioutil.ReadAll(os.Stdin)
rs := parse(string(input), "<stdin>")
fmt.Println(rs)
}

325
parse.go
View file

@ -1,85 +1,57 @@
package main
import (
"fmt"
"os"
"os/exec"
)
/* Grammar, to the best of my knowledge:
Should we deviate at all from mk?
Yes! I want to simplify things by saying recipes have nonzero indentation and
everything else has zero.
rule ::= targets ':' attributes ':' prereqs NEWLINE RECIPE |
targets ':' prereqs NEWLINE RECIPE
targets ::= string | string "," targets
attributes ::= SCALAR | SCALAR attributes
prereqs ::= string | string "," prereqs
include ::= '<' string NEWLINE
string ::= SCALAR | QSTRING
assignment ::= SCALAR '=' string
How do we handle escaping new lines?
Is newline a token that's emitted?
*/
// The parser for mk files is terribly simple. There are only three sorts of
// statements in mkfiles: variable assignments, rules (possibly with
// accompanying recipes), and includes.
//
// Maybe this is the wrong way to organize things.
// We should perhaps have a type for a parsed mkfile that includes every
// assignment as well as every rule.
//
// Rule order should not matter.
//
// Includes are tricky. If they were straight up includes, the could be
// evaluated in place, but they could contain shell script, etc.
//
// No...we still have to evaluate them in place. That means figuring out how to
// spawn shells from go.
//
type parser struct {
l *lexer // underlying lexer
name string // name of the file being parsed
tokenbuf []token // tokens consumed on the current statement
rules *ruleSet // current ruleSet
}
func (p *parser) parseError(context string, expected string, found token) {
fmt.Fprintf(os.Stderr, "%s:%d: syntax error: ", p.name, found.line)
fmt.Fprintf(os.Stderr, "while %s, expected %s but found \"%s\".\n",
context, expected, found.String())
os.Exit(1)
}
func (p *parser) basicErrorAtToken(what string, found token) {
p.basicErrorAtLine(what, found.line)
}
func (p *parser) basicErrorAtLine(what string, line int) {
fmt.Fprintf(os.Stderr, "%s:%d: syntax error: %s\n",
p.name, line, what)
os.Exit(1)
}
func (p *parser) push(t token) {
p.tokenbuf = append(p.tokenbuf, t)
}
func (p *parser) clear() {
p.tokenbuf = p.tokenbuf[:0]
}
// A parser state function takes a parser and the next token and returns a new
// state function, or nil if there was a parse error.
type parserStateFun func (*parser, token) parserStateFun
type parserStateFun func(*parser, token) parserStateFun
// Parse a mkfile, returning a new ruleSet.
func parse(input string) *ruleSet {
rules := &ruleSet{}
parseInto(input, rules)
func parse(input string, name string) *ruleSet {
rules := &ruleSet{make(map[string][]string), make([]rule, 0)}
parseInto(input, name, rules)
return rules
}
// Parse a mkfile inserting rules and variables into a given ruleSet.
func parseInto(input string, rules *ruleSet) {
func parseInto(input string, name string, rules *ruleSet) {
l, tokens := lex(input)
p := &parser{l, []token{}, rules}
p := &parser{l, name, []token{}, rules}
state := parseTopLevel
for t := range tokens {
if t.typ == tokenError {
@ -91,31 +63,246 @@ func parseInto(input string, rules *ruleSet) {
state = state(p, t)
}
// insert a dummy newline to allow parsing of any assignments or recipeless
// rules to finish.
state = state(p, token{tokenNewline, "\n", l.line})
// TODO: Handle the case when state is not top level.
}
func parseTopLevel(p *parser, t token) parserStateFun {
switch t.typ {
case tokenPipeInclude: return parsePipeInclude(p, t)
// TODO: all others
case tokenNewline:
return parseTopLevel
case tokenPipeInclude:
return parsePipeInclude
case tokenRedirInclude:
return parseRedirInclude
case tokenQuotedString:
return parseTargets(p, t)
case tokenBareString:
return parseAssignmentOrTarget(p, t)
default:
p.parseError("parsing mkfile",
"a rule, include, or assignment", t)
}
return parseTopLevel
}
func parsePipeInclude(p *parser, t token) parserStateFun {
// TODO: We need to split this up into arguments so we can feed it into
// executeRecipe.
return parseTopLevel
}
switch t.typ {
case tokenNewline:
if len(p.tokenbuf) == 0 {
p.basicErrorAtToken("empty pipe include", t)
}
args := make([]string, len(p.tokenbuf)-1)
for i := 1; i < len(p.tokenbuf); i++ {
args[i-1] = p.tokenbuf[i].val
}
output := executeRecipe("sh", args, "", false, false, true)
parseInto(output, fmt.Sprintf("%s:sh", p.name), p.rules)
p.clear()
return parseTopLevel
// Almost anything goes. Let the shell sort it out.
case tokenBareString:
fallthrough
case tokenPipeInclude:
fallthrough
case tokenRedirInclude:
fallthrough
case tokenColon:
fallthrough
case tokenAssign:
fallthrough
case tokenQuotedString:
p.tokenbuf = append(p.tokenbuf, t)
default:
// TODO: Complain about unexpected tokens.
}
return parsePipeInclude
}
func parseRedirInclude(p *parser, t token) parserStateFun {
// TODO: Open the file, read its context, call parseInto recursively.
return parseTopLevel
switch t.typ {
case tokenNewline:
// TODO:
// Open the file, read its context, call parseInto recursively.
// Clear out p.tokenbuf
case tokenBareString:
case tokenQuotedString:
default:
// TODO: Complain about unexpected tokens.
}
return parseRedirInclude
}
// Encountered a bare string at the beginning of the line.
func parseAssignmentOrTarget(p *parser, t token) parserStateFun {
fmt.Println("assignment or target")
p.push(t)
return parseEqualsOrTarget
}
// Consumed one bare string ot the begging of the line.
func parseEqualsOrTarget(p *parser, t token) parserStateFun {
fmt.Println("equals or target")
switch t.typ {
case tokenAssign:
return parseAssignment
case tokenBareString:
fallthrough
case tokenQuotedString:
p.push(t)
return parseTargets
case tokenColon:
p.push(t)
return parseAttributesOrPrereqs
default:
p.parseError("reading a a target or assignment",
"'=', ':', or another target", t)
}
return parseTopLevel // unreachable
}
// Consumed 'foo='. Everything else is a value being assigned to foo.
func parseAssignment(p *parser, t token) parserStateFun {
switch t.typ {
case tokenNewline:
p.rules.executeAssignment(p.tokenbuf)
p.clear()
return parseTopLevel
default:
p.push(t)
}
return parseAssignment
}
// Everything up to : must be a target.
func parseTargets(p *parser, t token) parserStateFun {
switch t.typ {
case tokenBareString:
fallthrough
case tokenQuotedString:
p.push(t)
case tokenColon:
p.push(t)
return parseAttributesOrPrereqs
default:
p.parseError("reading a rule's targets",
"filename or pattern", t)
}
return parseTargets
}
// Consumed one or more strings followed by a :.
func parseAttributesOrPrereqs(p *parser, t token) parserStateFun {
fmt.Println("attributes or prereqs")
switch t.typ {
case tokenNewline:
return parseRecipe
case tokenColon:
p.push(t)
return parsePrereqs
case tokenBareString:
fallthrough
case tokenQuotedString:
p.push(t)
default:
p.parseError("reading a rule's attributes or prerequisites",
"an attribute, pattern, or filename", t)
}
return parseAttributesOrPrereqs
}
func parsePrereqs(p *parser, t token) parserStateFun {
fmt.Println("prereqs")
switch t.typ {
case tokenNewline:
return parseRecipe
case tokenBareString:
fallthrough
case tokenQuotedString:
p.push(t)
default:
p.parseError("reading a rule's prerequisites",
"filename or pattern", t)
}
return parsePrereqs
}
func parseRecipe(p *parser, t token) parserStateFun {
fmt.Println("recipe")
// Assemble the rule!
r := rule{}
// find one or two colons
i := 0
for ; i < len(p.tokenbuf) && p.tokenbuf[i].typ != tokenColon; i++ {
}
j := i + 1
for ; j < len(p.tokenbuf) && p.tokenbuf[j].typ != tokenColon; j++ {
}
// targets
r.targets = make([]string, i)
for k := 0; k < i; k++ {
r.targets[k] = p.tokenbuf[k].val
}
// rule has attributes
if j < len(p.tokenbuf) {
attribs := make([]string, j-i-1)
for k := i + 1; k < j; k++ {
attribs[k-i-1] = p.tokenbuf[k].val
}
err := r.parseAttribs(attribs)
if err != nil {
msg := fmt.Sprintf("while reading a rule's attributes expected an attribute but found '%c'.", err.found)
p.basicErrorAtToken(msg, p.tokenbuf[i+1])
}
} else {
j = i
}
// prereqs
r.prereqs = make([]string, len(p.tokenbuf)-j-1)
for k := j + 1; k < len(p.tokenbuf); k++ {
r.prereqs[k-j-1] = p.tokenbuf[k].val
}
if t.typ == tokenRecipe {
r.recipe = t.val
}
p.rules.push(r)
p.clear()
// the current token doesn't belong to this rule
if t.typ != tokenRecipe {
return parseTopLevel(p, t)
}
return parseTopLevel
}

View file

@ -1,14 +1,12 @@
package main
import (
"os/exec"
"os"
"io"
"log"
"os"
"os/exec"
)
// A monolithic function for executing recipes.
func executeRecipe(program string,
args []string,
@ -34,14 +32,17 @@ func executeRecipe(program string,
if len(input) > 0 {
cmdin, err := cmd.StdinPipe()
go func () { cmdin.Write([]byte(input)) }()
if err != nil {
go func() { cmdin.Write([]byte(input)) }()
}
}
output := ""
var err error
if capture_out {
output, err = cmd.Output()
var outbytes []byte
outbytes, err = cmd.Output()
output = string(outbytes)
} else {
err = cmd.Run()
}
@ -53,5 +54,3 @@ func executeRecipe(program string,
return output
}

136
ruleset.go Normal file
View file

@ -0,0 +1,136 @@
// Mkfiles are parsed into ruleSets, which as the name suggests, are sets of
// rules with accompanying recipes, as well as assigned variables which are
// expanding when evaluating rules and recipes.
package main
import (
"unicode/utf8"
)
type attribSet struct {
delFailed bool // delete targets when the recipe fails
nonstop bool // don't stop if the recipe fails
forcedTimestamp bool // update timestamp whether the recipe does or not
nonvirtual bool // a meta-rule that will only match files
quiet bool // don't print the recipe
regex bool // regular expression meta-rule
update bool // treat the targets as if they were updated
virtual bool // rule is virtual (does not match files)
}
// Error parsing an attribute
type attribError struct {
found rune
}
type rule struct {
targets []string // non-empty array of targets
attributes attribSet // rule attributes
prereqs []string // possibly empty prerequesites
shell []string // command used to execute the recipe
recipe string // recipe source
command []string // command attribute
}
// Read attributes for an array of strings, updating the rule.
func (r *rule) parseAttribs(inputs []string) *attribError {
for i := 0; i < len(inputs); i++ {
input := inputs[i]
pos := 0
for pos < len(input) {
c, w := utf8.DecodeRuneInString(input[pos:])
switch c {
case 'D':
r.attributes.delFailed = true
case 'E':
r.attributes.nonstop = true
case 'N':
r.attributes.forcedTimestamp = true
case 'n':
r.attributes.nonvirtual = true
case 'Q':
r.attributes.quiet = true
case 'R':
r.attributes.regex = true
case 'U':
r.attributes.update = true
case 'V':
r.attributes.virtual = true
case 'P':
if pos+w < len(input) {
r.command = append(r.command, input[pos+w:])
}
r.command = append(r.command, inputs[i+1:]...)
return nil
case 'S':
if pos+w < len(input) {
r.shell = append(r.shell, input[pos+w:])
}
r.shell = append(r.shell, inputs[i+1:]...)
return nil
default:
return &attribError{c}
}
pos += w
}
}
return nil
}
type ruleSet struct {
vars map[string][]string
rules []rule
}
// Add a rule to the rule set.
func (rs *ruleSet) push(r rule) {
rs.rules = append(rs.rules, r)
}
// Expand variables found in a string.
func (rs *ruleSet) expand(t token) string {
// TODO: implement this
return t.val
}
func isValidVarName(v string) bool {
for i := 0; i < len(v); {
c, w := utf8.DecodeRuneInString(v[i:])
if i == 0 && !(isalpha(c) || c == '_') {
return false
} else if !isalnum(c) || c == '_' {
return false
}
i += w
}
return true
}
func isalpha(c rune) bool {
return ('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z')
}
func isalnum(c rune) bool {
return isalpha(c) || ('0' <= c && c <= '9')
}
// Parse and execute assignment operation.
func (rs *ruleSet) executeAssignment(ts []token) {
assignee := ts[0].val
if !isValidVarName(assignee) {
// TODO: complain
}
// expanded variables
vals := make([]string, len(ts)-1)
for i := 0; i < len(vals); i++ {
vals[i] = rs.expand(ts[i+1])
}
rs.vars[assignee] = vals
}