Work on string expansion.

This commit is contained in:
Daniel Jones 2013-02-26 22:41:25 -08:00
parent 6da2555966
commit 084a45fc74
5 changed files with 208 additions and 69 deletions

68
lex.go
View file

@ -15,8 +15,7 @@ const eof rune = '\000'
const (
tokenError tokenType = iota
tokenNewline
tokenBareString
tokenQuotedString
tokenWord
tokenPipeInclude
tokenRedirInclude
tokenColon
@ -30,10 +29,8 @@ func (typ tokenType) String() string {
return "[Error]"
case tokenNewline:
return "[Newline]"
case tokenBareString:
return "[BareString]"
case tokenQuotedString:
return "[QuotedString]"
case tokenWord:
return "[Word]"
case tokenPipeInclude:
return "[PipeInclude]"
case tokenRedirInclude:
@ -209,7 +206,6 @@ func (l *lexer) run() {
// A way of determining if the current line might be a recipe.
func lexTopLevel(l *lexer) lexerStateFun {
for {
l.skipRun(" \t\r")
// emit a newline token if we are ending a non-empty line.
@ -219,7 +215,7 @@ func lexTopLevel(l *lexer) lexerStateFun {
}
l.skipRun(" \t\r\n")
if l.peek() == '\'' && l.peekN(1) == '\n' {
if l.peek() == '\\' && l.peekN(1) == '\n' {
l.next()
l.next()
l.indented = false
@ -240,17 +236,21 @@ func lexTopLevel(l *lexer) lexerStateFun {
return lexComment
case '<':
return lexInclude
case '"':
return lexDoubleQuote
case '\'':
return lexSingleQuote
case ':':
return lexColon
case '=':
return lexAssign
case '"':
return lexDoubleQuotedWord
case '\'':
return lexSingleQuotedWord
case '`':
return lexBackQuotedWord
}
return lexBareString
// TODO: No! The lexer can get stuck in a loop this way.
// Check if the next charar is a valid bare string chacter. If not, error.
return lexBareWord
}
func lexColon(l *lexer) lexerStateFun {
@ -281,25 +281,30 @@ func lexInclude(l *lexer) lexerStateFun {
return lexTopLevel
}
func lexDoubleQuote(l *lexer) lexerStateFun {
l.skip() // '"'
func lexDoubleQuotedWord(l *lexer) lexerStateFun {
l.next() // '"'
for l.peek() != '"' {
l.acceptUntil("\\\"")
if l.accept("\\") {
l.accept("\"")
}
}
l.emit(tokenQuotedString)
l.skip() // skip '"'
return lexTopLevel
l.next() // '"'
return lexBareWord
}
func lexSingleQuote(l *lexer) lexerStateFun {
l.skip() // '\''
func lexBackQuotedWord(l *lexer) lexerStateFun {
l.next() // '`'
l.acceptUntil("`")
l.next() // '`'
return lexBareWord
}
func lexSingleQuotedWord(l *lexer) lexerStateFun {
l.next() // '\''
l.acceptUntil("'")
l.emit(tokenQuotedString)
l.skip() // '\''
return lexTopLevel
l.next() // '\''
return lexBareWord
}
func lexRecipe(l *lexer) lexerStateFun {
@ -316,10 +321,19 @@ func lexRecipe(l *lexer) lexerStateFun {
return lexTopLevel
}
func lexBareString(l *lexer) lexerStateFun {
// TODO: allow escaping spaces and tabs?
// TODO: allow adjacent quoted string, e.g.: foo"bar"baz?
func lexBareWord(l *lexer) lexerStateFun {
l.acceptUntil(" \t\n\r\\=:#'\"")
l.emit(tokenBareString)
if l.peek() == '"' {
return lexDoubleQuotedWord
} else if l.peek() == '\'' {
return lexSingleQuotedWord
} else if l.peek() == '`' {
return lexBackQuotedWord
}
if l.start < l.pos {
l.emit(tokenWord)
}
return lexTopLevel
}

24
mk.go
View file

@ -1,13 +1,25 @@
package main
import (
"fmt"
"io/ioutil"
"os"
//"fmt"
//"io/ioutil"
//"os"
)
func main() {
input, _ := ioutil.ReadAll(os.Stdin)
rs := parse(string(input), "<stdin>")
fmt.Println(rs)
//input, _ := ioutil.ReadAll(os.Stdin)
// TEST LEXING
//_, tokens := lex(string(input))
//for t := range tokens {
//fmt.Printf("%s %s\n", t.typ, t.val)
//}
// TEST PARSING
//rs := parse(string(input), "<stdin>")
//fmt.Println(rs)
// TEST STRING EXPANSION
rules := &ruleSet{make(map[string][]string), make([]rule, 0)}
println(rules.expand("\"This is a quote: \\\"\""))
}

View file

@ -1,3 +1,6 @@
// This is a mkfile parser. It executes assignments and includes as it goes, and
// collects a set of rules, which are returned as a ruleSet object.
package main
import (
@ -12,6 +15,7 @@ type parser struct {
rules *ruleSet // current ruleSet
}
// Pretty errors.
func (p *parser) parseError(context string, expected string, found token) {
fmt.Fprintf(os.Stderr, "%s:%d: syntax error: ", p.name, found.line)
fmt.Fprintf(os.Stderr, "while %s, expected %s but found \"%s\".\n",
@ -19,6 +23,7 @@ func (p *parser) parseError(context string, expected string, found token) {
os.Exit(1)
}
// More basic errors.
func (p *parser) basicErrorAtToken(what string, found token) {
p.basicErrorAtLine(what, found.line)
}
@ -29,10 +34,12 @@ func (p *parser) basicErrorAtLine(what string, line int) {
os.Exit(1)
}
// Accept a token for use in the current statement being parsed.
func (p *parser) push(t token) {
p.tokenbuf = append(p.tokenbuf, t)
}
// Clear all the accepted tokens. Called when a statement is finished.
func (p *parser) clear() {
p.tokenbuf = p.tokenbuf[:0]
}
@ -67,9 +74,11 @@ func parseInto(input string, name string, rules *ruleSet) {
// rules to finish.
state = state(p, token{tokenNewline, "\n", l.line})
// TODO: Handle the case when state is not top level.
// TODO: Error when state != parseTopLevel
}
// We are at the top level of a mkfile, expecting rules, assignments, or
// includes.
func parseTopLevel(p *parser, t token) parserStateFun {
switch t.typ {
case tokenNewline:
@ -78,9 +87,7 @@ func parseTopLevel(p *parser, t token) parserStateFun {
return parsePipeInclude
case tokenRedirInclude:
return parseRedirInclude
case tokenQuotedString:
return parseTargets(p, t)
case tokenBareString:
case tokenWord:
return parseAssignmentOrTarget(p, t)
default:
p.parseError("parsing mkfile",
@ -90,6 +97,7 @@ func parseTopLevel(p *parser, t token) parserStateFun {
return parseTopLevel
}
// Consumed a '<|'
func parsePipeInclude(p *parser, t token) parserStateFun {
switch t.typ {
case tokenNewline:
@ -109,8 +117,6 @@ func parsePipeInclude(p *parser, t token) parserStateFun {
return parseTopLevel
// Almost anything goes. Let the shell sort it out.
case tokenBareString:
fallthrough
case tokenPipeInclude:
fallthrough
case tokenRedirInclude:
@ -119,7 +125,7 @@ func parsePipeInclude(p *parser, t token) parserStateFun {
fallthrough
case tokenAssign:
fallthrough
case tokenQuotedString:
case tokenWord:
p.tokenbuf = append(p.tokenbuf, t)
default:
@ -129,6 +135,7 @@ func parsePipeInclude(p *parser, t token) parserStateFun {
return parsePipeInclude
}
// Consumed a '<'
func parseRedirInclude(p *parser, t token) parserStateFun {
switch t.typ {
case tokenNewline:
@ -136,8 +143,8 @@ func parseRedirInclude(p *parser, t token) parserStateFun {
// Open the file, read its context, call parseInto recursively.
// Clear out p.tokenbuf
case tokenBareString:
case tokenQuotedString:
case tokenWord:
// TODO:
default:
// TODO: Complain about unexpected tokens.
@ -153,16 +160,14 @@ func parseAssignmentOrTarget(p *parser, t token) parserStateFun {
return parseEqualsOrTarget
}
// Consumed one bare string ot the begging of the line.
// Consumed one bare string ot the beginning of the line.
func parseEqualsOrTarget(p *parser, t token) parserStateFun {
fmt.Println("equals or target")
switch t.typ {
case tokenAssign:
return parseAssignment
case tokenBareString:
fallthrough
case tokenQuotedString:
case tokenWord:
p.push(t)
return parseTargets
@ -182,7 +187,10 @@ func parseEqualsOrTarget(p *parser, t token) parserStateFun {
func parseAssignment(p *parser, t token) parserStateFun {
switch t.typ {
case tokenNewline:
p.rules.executeAssignment(p.tokenbuf)
err := p.rules.executeAssignment(p.tokenbuf)
if err != nil {
p.basicErrorAtToken(err.what, err.where)
}
p.clear()
return parseTopLevel
@ -193,12 +201,10 @@ func parseAssignment(p *parser, t token) parserStateFun {
return parseAssignment
}
// Everything up to : must be a target.
// Everything up to ':' must be a target.
func parseTargets(p *parser, t token) parserStateFun {
switch t.typ {
case tokenBareString:
fallthrough
case tokenQuotedString:
case tokenWord:
p.push(t)
case tokenColon:
p.push(t)
@ -212,7 +218,7 @@ func parseTargets(p *parser, t token) parserStateFun {
return parseTargets
}
// Consumed one or more strings followed by a :.
// Consumed one or more strings followed by a first ':'.
func parseAttributesOrPrereqs(p *parser, t token) parserStateFun {
fmt.Println("attributes or prereqs")
switch t.typ {
@ -221,9 +227,7 @@ func parseAttributesOrPrereqs(p *parser, t token) parserStateFun {
case tokenColon:
p.push(t)
return parsePrereqs
case tokenBareString:
fallthrough
case tokenQuotedString:
case tokenWord:
p.push(t)
default:
p.parseError("reading a rule's attributes or prerequisites",
@ -233,14 +237,13 @@ func parseAttributesOrPrereqs(p *parser, t token) parserStateFun {
return parseAttributesOrPrereqs
}
// Targets and attributes and the second ':' have been consumed.
func parsePrereqs(p *parser, t token) parserStateFun {
fmt.Println("prereqs")
switch t.typ {
case tokenNewline:
return parseRecipe
case tokenBareString:
fallthrough
case tokenQuotedString:
case tokenWord:
p.push(t)
default:
@ -251,6 +254,7 @@ func parsePrereqs(p *parser, t token) parserStateFun {
return parsePrereqs
}
// An entire rule has been consumed.
func parseRecipe(p *parser, t token) parserStateFun {
fmt.Println("recipe")
@ -279,7 +283,7 @@ func parseRecipe(p *parser, t token) parserStateFun {
}
err := r.parseAttribs(attribs)
if err != nil {
msg := fmt.Sprintf("while reading a rule's attributes expected an attribute but found '%c'.", err.found)
msg := fmt.Sprintf("while reading a rule's attributes expected an attribute but found \"%c\".", err.found)
p.basicErrorAtToken(msg, p.tokenbuf[i+1])
}
} else {

View file

@ -32,8 +32,8 @@ func executeRecipe(program string,
if len(input) > 0 {
cmdin, err := cmd.StdinPipe()
if err != nil {
go func() { cmdin.Write([]byte(input)) }()
if err == nil {
go func() { cmdin.Write([]byte(input)); cmdin.Close() }()
}
}
@ -43,6 +43,9 @@ func executeRecipe(program string,
var outbytes []byte
outbytes, err = cmd.Output()
output = string(outbytes)
if output[len(output)-1] == '\n' {
output = output[:len(output)-1]
}
} else {
err = cmd.Run()
}

View file

@ -5,6 +5,8 @@
package main
import (
"fmt"
"strings"
"unicode/utf8"
)
@ -92,10 +94,106 @@ func (rs *ruleSet) push(r rule) {
rs.rules = append(rs.rules, r)
}
// Expand variables found in a string.
func (rs *ruleSet) expand(t token) string {
// TODO: implement this
return t.val
// Expand a word. This includes substituting variables and handling quotes.
func (rs *ruleSet) expand(input string) string {
expanded := make([]byte, 0)
var i, j int
for i = 0; i < len(input); {
j = i + strings.IndexAny(input[i:], "\"'`$\\")
if j < 0 {
expanded = append(expanded, []byte(input[i:])...)
break
}
expanded = append(expanded, []byte(input[i:j])...)
c, w := utf8.DecodeRuneInString(input[j:])
i = j + w
var off int
var out string
switch c {
case '\\':
out, off = rs.expandEscape(input[i:])
case '"':
out, off = rs.expandDoubleQuoted(input[i:])
case '\'':
out, off = rs.expandSingleQuoted(input[i:])
case '`':
out, off = rs.expandBackQuoted(input[i:])
case '$':
// TODO: recursive call: expandSigil
}
expanded = append(expanded, []byte(out)...)
i += off
}
return string(expanded)
}
// Expand following a '\\'
func (rs *ruleSet) expandEscape(input string) (string, int) {
c, w := utf8.DecodeRuneInString(input)
return string(c), w
}
// Expand a double quoted string starting after a '\"'
func (rs *ruleSet) expandDoubleQuoted(input string) (string, int) {
// find the first non-escaped "
j := 0
for {
j = strings.IndexAny(input[j:], "\"\\")
if j < 0 {
break
}
_, w := utf8.DecodeRuneInString(input[j:])
j += w
c, w := utf8.DecodeRuneInString(input[j:])
j += w
if c == '"' {
return rs.expand(input[:j]), (j + w)
}
if c == '\\' {
if j+w < len(input) {
j += w
_, w := utf8.DecodeRuneInString(input[j:])
j += w
} else {
break
}
}
}
return input, len(input)
}
// Expand a single quoted string starting after a '\''
func (rs *ruleSet) expandSingleQuoted(input string) (string, int) {
j := strings.Index(input, "'")
if j < 0 {
return input, len(input)
}
return input[:j], (j + 1)
}
// Expand a backtick quoted string, by executing the contents.
func (rs *ruleSet) expandBackQuoted(input string) (string, int) {
j := strings.Index(input, "`")
if j < 0 {
return input, len(input)
}
output := executeRecipe("sh", nil, input[:j], false, false, true)
return output, (j + 1)
}
func isValidVarName(v string) bool {
@ -103,7 +201,7 @@ func isValidVarName(v string) bool {
c, w := utf8.DecodeRuneInString(v[i:])
if i == 0 && !(isalpha(c) || c == '_') {
return false
} else if !isalnum(c) || c == '_' {
} else if !(isalnum(c) || c == '_') {
return false
}
i += w
@ -119,18 +217,26 @@ func isalnum(c rune) bool {
return isalpha(c) || ('0' <= c && c <= '9')
}
type assignmentError struct {
what string
where token
}
// Parse and execute assignment operation.
func (rs *ruleSet) executeAssignment(ts []token) {
func (rs *ruleSet) executeAssignment(ts []token) *assignmentError {
assignee := ts[0].val
if !isValidVarName(assignee) {
// TODO: complain
return &assignmentError{
fmt.Sprintf("target of assignment is not a valid variable name: \"%s\"", assignee),
ts[0]}
}
// expanded variables
vals := make([]string, len(ts)-1)
for i := 0; i < len(vals); i++ {
vals[i] = rs.expand(ts[i+1])
vals[i] = rs.expand(ts[i+1].val)
}
rs.vars[assignee] = vals
return nil
}