diff --git a/lex.go b/lex.go index 14749da..7da84b0 100644 --- a/lex.go +++ b/lex.go @@ -15,8 +15,7 @@ const eof rune = '\000' const ( tokenError tokenType = iota tokenNewline - tokenBareString - tokenQuotedString + tokenWord tokenPipeInclude tokenRedirInclude tokenColon @@ -30,10 +29,8 @@ func (typ tokenType) String() string { return "[Error]" case tokenNewline: return "[Newline]" - case tokenBareString: - return "[BareString]" - case tokenQuotedString: - return "[QuotedString]" + case tokenWord: + return "[Word]" case tokenPipeInclude: return "[PipeInclude]" case tokenRedirInclude: @@ -209,7 +206,6 @@ func (l *lexer) run() { // A way of determining if the current line might be a recipe. func lexTopLevel(l *lexer) lexerStateFun { - for { l.skipRun(" \t\r") // emit a newline token if we are ending a non-empty line. @@ -219,7 +215,7 @@ func lexTopLevel(l *lexer) lexerStateFun { } l.skipRun(" \t\r\n") - if l.peek() == '\'' && l.peekN(1) == '\n' { + if l.peek() == '\\' && l.peekN(1) == '\n' { l.next() l.next() l.indented = false @@ -240,17 +236,21 @@ func lexTopLevel(l *lexer) lexerStateFun { return lexComment case '<': return lexInclude - case '"': - return lexDoubleQuote - case '\'': - return lexSingleQuote case ':': return lexColon case '=': return lexAssign + case '"': + return lexDoubleQuotedWord + case '\'': + return lexSingleQuotedWord + case '`': + return lexBackQuotedWord } - return lexBareString + // TODO: No! The lexer can get stuck in a loop this way. + // Check if the next charar is a valid bare string chacter. If not, error. + return lexBareWord } func lexColon(l *lexer) lexerStateFun { @@ -281,25 +281,30 @@ func lexInclude(l *lexer) lexerStateFun { return lexTopLevel } -func lexDoubleQuote(l *lexer) lexerStateFun { - l.skip() // '"' +func lexDoubleQuotedWord(l *lexer) lexerStateFun { + l.next() // '"' for l.peek() != '"' { l.acceptUntil("\\\"") if l.accept("\\") { l.accept("\"") } } - l.emit(tokenQuotedString) - l.skip() // skip '"' - return lexTopLevel + l.next() // '"' + return lexBareWord } -func lexSingleQuote(l *lexer) lexerStateFun { - l.skip() // '\'' +func lexBackQuotedWord(l *lexer) lexerStateFun { + l.next() // '`' + l.acceptUntil("`") + l.next() // '`' + return lexBareWord +} + +func lexSingleQuotedWord(l *lexer) lexerStateFun { + l.next() // '\'' l.acceptUntil("'") - l.emit(tokenQuotedString) - l.skip() // '\'' - return lexTopLevel + l.next() // '\'' + return lexBareWord } func lexRecipe(l *lexer) lexerStateFun { @@ -316,10 +321,19 @@ func lexRecipe(l *lexer) lexerStateFun { return lexTopLevel } -func lexBareString(l *lexer) lexerStateFun { - // TODO: allow escaping spaces and tabs? - // TODO: allow adjacent quoted string, e.g.: foo"bar"baz? +func lexBareWord(l *lexer) lexerStateFun { l.acceptUntil(" \t\n\r\\=:#'\"") - l.emit(tokenBareString) + if l.peek() == '"' { + return lexDoubleQuotedWord + } else if l.peek() == '\'' { + return lexSingleQuotedWord + } else if l.peek() == '`' { + return lexBackQuotedWord + } + + if l.start < l.pos { + l.emit(tokenWord) + } + return lexTopLevel } diff --git a/mk.go b/mk.go index 3e27f29..5f6cd4f 100644 --- a/mk.go +++ b/mk.go @@ -1,13 +1,25 @@ package main import ( - "fmt" - "io/ioutil" - "os" +//"fmt" +//"io/ioutil" +//"os" ) func main() { - input, _ := ioutil.ReadAll(os.Stdin) - rs := parse(string(input), "") - fmt.Println(rs) + //input, _ := ioutil.ReadAll(os.Stdin) + + // TEST LEXING + //_, tokens := lex(string(input)) + //for t := range tokens { + //fmt.Printf("%s %s\n", t.typ, t.val) + //} + + // TEST PARSING + //rs := parse(string(input), "") + //fmt.Println(rs) + + // TEST STRING EXPANSION + rules := &ruleSet{make(map[string][]string), make([]rule, 0)} + println(rules.expand("\"This is a quote: \\\"\"")) } diff --git a/parse.go b/parse.go index 6fb737d..e793bf1 100644 --- a/parse.go +++ b/parse.go @@ -1,3 +1,6 @@ +// This is a mkfile parser. It executes assignments and includes as it goes, and +// collects a set of rules, which are returned as a ruleSet object. + package main import ( @@ -12,6 +15,7 @@ type parser struct { rules *ruleSet // current ruleSet } +// Pretty errors. func (p *parser) parseError(context string, expected string, found token) { fmt.Fprintf(os.Stderr, "%s:%d: syntax error: ", p.name, found.line) fmt.Fprintf(os.Stderr, "while %s, expected %s but found \"%s\".\n", @@ -19,6 +23,7 @@ func (p *parser) parseError(context string, expected string, found token) { os.Exit(1) } +// More basic errors. func (p *parser) basicErrorAtToken(what string, found token) { p.basicErrorAtLine(what, found.line) } @@ -29,10 +34,12 @@ func (p *parser) basicErrorAtLine(what string, line int) { os.Exit(1) } +// Accept a token for use in the current statement being parsed. func (p *parser) push(t token) { p.tokenbuf = append(p.tokenbuf, t) } +// Clear all the accepted tokens. Called when a statement is finished. func (p *parser) clear() { p.tokenbuf = p.tokenbuf[:0] } @@ -67,9 +74,11 @@ func parseInto(input string, name string, rules *ruleSet) { // rules to finish. state = state(p, token{tokenNewline, "\n", l.line}) - // TODO: Handle the case when state is not top level. + // TODO: Error when state != parseTopLevel } +// We are at the top level of a mkfile, expecting rules, assignments, or +// includes. func parseTopLevel(p *parser, t token) parserStateFun { switch t.typ { case tokenNewline: @@ -78,9 +87,7 @@ func parseTopLevel(p *parser, t token) parserStateFun { return parsePipeInclude case tokenRedirInclude: return parseRedirInclude - case tokenQuotedString: - return parseTargets(p, t) - case tokenBareString: + case tokenWord: return parseAssignmentOrTarget(p, t) default: p.parseError("parsing mkfile", @@ -90,6 +97,7 @@ func parseTopLevel(p *parser, t token) parserStateFun { return parseTopLevel } +// Consumed a '<|' func parsePipeInclude(p *parser, t token) parserStateFun { switch t.typ { case tokenNewline: @@ -109,8 +117,6 @@ func parsePipeInclude(p *parser, t token) parserStateFun { return parseTopLevel // Almost anything goes. Let the shell sort it out. - case tokenBareString: - fallthrough case tokenPipeInclude: fallthrough case tokenRedirInclude: @@ -119,7 +125,7 @@ func parsePipeInclude(p *parser, t token) parserStateFun { fallthrough case tokenAssign: fallthrough - case tokenQuotedString: + case tokenWord: p.tokenbuf = append(p.tokenbuf, t) default: @@ -129,6 +135,7 @@ func parsePipeInclude(p *parser, t token) parserStateFun { return parsePipeInclude } +// Consumed a '<' func parseRedirInclude(p *parser, t token) parserStateFun { switch t.typ { case tokenNewline: @@ -136,8 +143,8 @@ func parseRedirInclude(p *parser, t token) parserStateFun { // Open the file, read its context, call parseInto recursively. // Clear out p.tokenbuf - case tokenBareString: - case tokenQuotedString: + case tokenWord: + // TODO: default: // TODO: Complain about unexpected tokens. @@ -153,16 +160,14 @@ func parseAssignmentOrTarget(p *parser, t token) parserStateFun { return parseEqualsOrTarget } -// Consumed one bare string ot the begging of the line. +// Consumed one bare string ot the beginning of the line. func parseEqualsOrTarget(p *parser, t token) parserStateFun { fmt.Println("equals or target") switch t.typ { case tokenAssign: return parseAssignment - case tokenBareString: - fallthrough - case tokenQuotedString: + case tokenWord: p.push(t) return parseTargets @@ -182,7 +187,10 @@ func parseEqualsOrTarget(p *parser, t token) parserStateFun { func parseAssignment(p *parser, t token) parserStateFun { switch t.typ { case tokenNewline: - p.rules.executeAssignment(p.tokenbuf) + err := p.rules.executeAssignment(p.tokenbuf) + if err != nil { + p.basicErrorAtToken(err.what, err.where) + } p.clear() return parseTopLevel @@ -193,12 +201,10 @@ func parseAssignment(p *parser, t token) parserStateFun { return parseAssignment } -// Everything up to : must be a target. +// Everything up to ':' must be a target. func parseTargets(p *parser, t token) parserStateFun { switch t.typ { - case tokenBareString: - fallthrough - case tokenQuotedString: + case tokenWord: p.push(t) case tokenColon: p.push(t) @@ -212,7 +218,7 @@ func parseTargets(p *parser, t token) parserStateFun { return parseTargets } -// Consumed one or more strings followed by a :. +// Consumed one or more strings followed by a first ':'. func parseAttributesOrPrereqs(p *parser, t token) parserStateFun { fmt.Println("attributes or prereqs") switch t.typ { @@ -221,9 +227,7 @@ func parseAttributesOrPrereqs(p *parser, t token) parserStateFun { case tokenColon: p.push(t) return parsePrereqs - case tokenBareString: - fallthrough - case tokenQuotedString: + case tokenWord: p.push(t) default: p.parseError("reading a rule's attributes or prerequisites", @@ -233,14 +237,13 @@ func parseAttributesOrPrereqs(p *parser, t token) parserStateFun { return parseAttributesOrPrereqs } +// Targets and attributes and the second ':' have been consumed. func parsePrereqs(p *parser, t token) parserStateFun { fmt.Println("prereqs") switch t.typ { case tokenNewline: return parseRecipe - case tokenBareString: - fallthrough - case tokenQuotedString: + case tokenWord: p.push(t) default: @@ -251,6 +254,7 @@ func parsePrereqs(p *parser, t token) parserStateFun { return parsePrereqs } +// An entire rule has been consumed. func parseRecipe(p *parser, t token) parserStateFun { fmt.Println("recipe") @@ -279,7 +283,7 @@ func parseRecipe(p *parser, t token) parserStateFun { } err := r.parseAttribs(attribs) if err != nil { - msg := fmt.Sprintf("while reading a rule's attributes expected an attribute but found '%c'.", err.found) + msg := fmt.Sprintf("while reading a rule's attributes expected an attribute but found \"%c\".", err.found) p.basicErrorAtToken(msg, p.tokenbuf[i+1]) } } else { diff --git a/recipe.go b/recipe.go index bb8749e..ed32325 100644 --- a/recipe.go +++ b/recipe.go @@ -32,8 +32,8 @@ func executeRecipe(program string, if len(input) > 0 { cmdin, err := cmd.StdinPipe() - if err != nil { - go func() { cmdin.Write([]byte(input)) }() + if err == nil { + go func() { cmdin.Write([]byte(input)); cmdin.Close() }() } } @@ -43,6 +43,9 @@ func executeRecipe(program string, var outbytes []byte outbytes, err = cmd.Output() output = string(outbytes) + if output[len(output)-1] == '\n' { + output = output[:len(output)-1] + } } else { err = cmd.Run() } diff --git a/ruleset.go b/ruleset.go index fadd5b2..f7e1eeb 100644 --- a/ruleset.go +++ b/ruleset.go @@ -5,6 +5,8 @@ package main import ( + "fmt" + "strings" "unicode/utf8" ) @@ -92,10 +94,106 @@ func (rs *ruleSet) push(r rule) { rs.rules = append(rs.rules, r) } -// Expand variables found in a string. -func (rs *ruleSet) expand(t token) string { - // TODO: implement this - return t.val +// Expand a word. This includes substituting variables and handling quotes. +func (rs *ruleSet) expand(input string) string { + expanded := make([]byte, 0) + var i, j int + for i = 0; i < len(input); { + j = i + strings.IndexAny(input[i:], "\"'`$\\") + + if j < 0 { + expanded = append(expanded, []byte(input[i:])...) + break + } + + expanded = append(expanded, []byte(input[i:j])...) + c, w := utf8.DecodeRuneInString(input[j:]) + i = j + w + + var off int + var out string + switch c { + case '\\': + out, off = rs.expandEscape(input[i:]) + + case '"': + out, off = rs.expandDoubleQuoted(input[i:]) + + case '\'': + out, off = rs.expandSingleQuoted(input[i:]) + + case '`': + out, off = rs.expandBackQuoted(input[i:]) + + case '$': + // TODO: recursive call: expandSigil + } + + expanded = append(expanded, []byte(out)...) + i += off + } + + return string(expanded) +} + +// Expand following a '\\' +func (rs *ruleSet) expandEscape(input string) (string, int) { + c, w := utf8.DecodeRuneInString(input) + return string(c), w +} + +// Expand a double quoted string starting after a '\"' +func (rs *ruleSet) expandDoubleQuoted(input string) (string, int) { + // find the first non-escaped " + j := 0 + for { + j = strings.IndexAny(input[j:], "\"\\") + if j < 0 { + break + } + + _, w := utf8.DecodeRuneInString(input[j:]) + j += w + + c, w := utf8.DecodeRuneInString(input[j:]) + j += w + + if c == '"' { + return rs.expand(input[:j]), (j + w) + } + + if c == '\\' { + if j+w < len(input) { + j += w + _, w := utf8.DecodeRuneInString(input[j:]) + j += w + } else { + break + } + } + } + + return input, len(input) +} + +// Expand a single quoted string starting after a '\'' +func (rs *ruleSet) expandSingleQuoted(input string) (string, int) { + j := strings.Index(input, "'") + if j < 0 { + return input, len(input) + } + return input[:j], (j + 1) +} + +// Expand a backtick quoted string, by executing the contents. +func (rs *ruleSet) expandBackQuoted(input string) (string, int) { + j := strings.Index(input, "`") + if j < 0 { + return input, len(input) + } + + output := executeRecipe("sh", nil, input[:j], false, false, true) + return output, (j + 1) } func isValidVarName(v string) bool { @@ -103,7 +201,7 @@ func isValidVarName(v string) bool { c, w := utf8.DecodeRuneInString(v[i:]) if i == 0 && !(isalpha(c) || c == '_') { return false - } else if !isalnum(c) || c == '_' { + } else if !(isalnum(c) || c == '_') { return false } i += w @@ -119,18 +217,26 @@ func isalnum(c rune) bool { return isalpha(c) || ('0' <= c && c <= '9') } +type assignmentError struct { + what string + where token +} + // Parse and execute assignment operation. -func (rs *ruleSet) executeAssignment(ts []token) { +func (rs *ruleSet) executeAssignment(ts []token) *assignmentError { assignee := ts[0].val if !isValidVarName(assignee) { - // TODO: complain + return &assignmentError{ + fmt.Sprintf("target of assignment is not a valid variable name: \"%s\"", assignee), + ts[0]} } // expanded variables vals := make([]string, len(ts)-1) for i := 0; i < len(vals); i++ { - vals[i] = rs.expand(ts[i+1]) + vals[i] = rs.expand(ts[i+1].val) } rs.vars[assignee] = vals + return nil }