// Package parse implements the elvish parser. // // The parser builds a hybrid of AST (abstract syntax tree) and parse tree // (a.k.a. concrete syntax tree). The AST part only includes parts that are // semantically significant -- i.e. skipping whitespaces and symbols that do not // alter the semantics, and is embodied in the fields of each *Node type. The // parse tree part corresponds to all the text in the original source text, and // is embodied in the children of each *Node type.
package parse //go:generate stringer -type=PrimaryType,RedirMode,ExprCtx -output=string.go import ( ) // Tree represents a parsed tree. type Tree struct { Root *Chunk Source Source } // Config keeps configuration options when parsing. type Config struct { // Destination of warnings. If nil, warnings are suppressed. WarningWriter io.Writer } // Parse parses the given source. The returned error always has type *Error // if it is not nil. func ( Source, Config) (Tree, error) { := Tree{&Chunk{}, } := ParseAs(, .Root, ) return , } // ParseAs parses the given source as a node, depending on the dynamic type of // n. If the error is not nil, it always has type *Error. func ( Source, Node, Config) error { := &parser{srcName: .Name, src: .Code, warn: .WarningWriter} .parse() .done() return .assembleError() } // Errors. var ( errShouldBeForm = newError("", "form") errBadLHS = errors.New("bad assignment LHS") errBadRedirSign = newError("bad redir sign", "'<'", "'>'", "'>>'", "'<>'") errShouldBeFD = newError("", "a composite term representing fd") errShouldBeFilename = newError("", "a composite term representing filename") errShouldBeArray = newError("", "spaced") errStringUnterminated = newError("string not terminated") errChainedAssignment = newError("chained assignment not yet supported") errInvalidEscape = newError("invalid escape sequence") errInvalidEscapeOct = newError("invalid escape sequence", "octal digit") errInvalidEscapeHex = newError("invalid escape sequence", "hex digit") errInvalidEscapeControl = newError("invalid control sequence", "a codepoint between 0x3F and 0x5F") errShouldBePrimary = newError("", "single-quoted string", "double-quoted string", "bareword") errShouldBeVariableName = newError("", "variable name") errShouldBeRBracket = newError("", "']'") errShouldBeRBrace = newError("", "'}'") errShouldBeBraceSepOrRBracket = newError("", "','", "'}'") errShouldBeRParen = newError("", "')'") errShouldBeCompound = newError("", "compound") errShouldBeEqual = newError("", "'='") errBothElementsAndPairs = newError("cannot contain both list elements and map pairs") errShouldBeNewline = newError("", "newline") ) // Chunk = { PipelineSep | Space } { Pipeline { PipelineSep | Space } } type Chunk struct { node Pipelines []*Pipeline } func ( *Chunk) ( *parser) { .parseSeps() for startsPipeline(.peek()) { .parse(&Pipeline{}).addTo(&.Pipelines, ) if .parseSeps() == 0 { break } } } func ( rune) bool { return == '\r' || == '\n' || == ';' } // parseSeps parses pipeline separators along with whitespaces. It returns the // number of pipeline separators parsed. func ( *Chunk) ( *parser) int { := 0 for { := .peek() if isPipelineSep() { // parse as a Sep parseSep(, , ) ++ } else if IsInlineWhitespace() || == '#' { // parse a run of spaces as a Sep parseSpaces(, ) } else { break } } return } // Pipeline = Form { '|' Form } type Pipeline struct { node Forms []*Form Background bool } func ( *Pipeline) ( *parser) { .parse(&Form{}).addTo(&.Forms, ) for parseSep(, , '|') { parseSpacesAndNewlines(, ) if !startsForm(.peek()) { .error(errShouldBeForm) return } .parse(&Form{}).addTo(&.Forms, ) } parseSpaces(, ) if .peek() == '&' { .next() addSep(, ) .Background = true parseSpaces(, ) } } func ( rune) bool { return startsForm() } // Form = { Space } { { Assignment } { Space } } // { Compound } { Space } { ( Compound | MapPair | Redir ) { Space } } type Form struct { node Assignments []*Assignment Head *Compound Args []*Compound Opts []*MapPair Redirs []*Redir } func ( *Form) ( *parser) { parseSpaces(, ) for .tryAssignment() { parseSpaces(, ) } // Parse head. if !startsCompound(.peek(), CmdExpr) { if len(.Assignments) > 0 { // Assignment-only form. return } // Bad form. .error(fmt.Errorf("bad rune at form head: %q", .peek())) } .parse(&Compound{ExprCtx: CmdExpr}).addAs(&.Head, ) parseSpaces(, ) for { := .peek() switch { case == '&': .next() := startsCompound(.peek(), LHSExpr) .backup() if ! { // background indicator return } .parse(&MapPair{}).addTo(&.Opts, ) case startsCompound(, NormalExpr): := &Compound{} .parse() if isRedirSign(.peek()) { // Redir .parse(&Redir{Left: }).addTo(&.Redirs, ) } else { parsed{}.addTo(&.Args, ) } case isRedirSign(): .parse(&Redir{}).addTo(&.Redirs, ) default: return } parseSpaces(, ) } } // tryAssignment tries to parse an assignment. If succeeded, it adds the parsed // assignment to fn.Assignments and returns true. Otherwise it rewinds the // parser and returns false. func ( *Form) ( *parser) bool { if !startsIndexing(.peek(), LHSExpr) { return false } := .pos := .errors.Entries := .parse(&Assignment{}) // If errors were added, revert if len(.errors.Entries) > len() { .errors.Entries = .pos = return false } .addTo(&.Assignments, ) return true } func ( rune) bool { return IsInlineWhitespace() || startsCompound(, CmdExpr) } // Assignment = Indexing '=' Compound type Assignment struct { node Left *Indexing Right *Compound } func ( *Assignment) ( *parser) { .parse(&Indexing{ExprCtx: LHSExpr}).addAs(&.Left, ) := .Left.Head if !ValidLHSVariable(, true) { .errorp(, errShouldBeVariableName) } if !parseSep(, , '=') { .error(errShouldBeEqual) } .parse(&Compound{}).addAs(&.Right, ) } func ( *Primary, bool) bool { switch .Type { case Braced: // TODO(xiaq): check further inside braced expression return true case SingleQuoted, DoubleQuoted: // Quoted variable names may contain anything return true case Bareword: // Bareword variable names may only contain runes that are valid in raw // variable names if .Value == "" { return false } := .Value if && [0] == '@' { = [1:] } for , := range { if !allowedInVariableName() { return false } } return true default: return false } } // Redir = { Compound } { '<'|'>'|'<>'|'>>' } { Space } ( '&'? Compound ) type Redir struct { node Left *Compound Mode RedirMode RightIsFd bool Right *Compound } func ( *Redir) ( *parser) { // The parsing of the Left part is done in Form.parse. if .Left != nil { addChild(, .Left) .From = .Left.From } := .pos for isRedirSign(.peek()) { .next() } := .src[:.pos] switch { case "<": .Mode = Read case ">": .Mode = Write case ">>": .Mode = Append case "<>": .Mode = ReadWrite default: .error(errBadRedirSign) } addSep(, ) parseSpaces(, ) if parseSep(, , '&') { .RightIsFd = true } .parse(&Compound{}).addAs(&.Right, ) if len(.Right.Indexings) == 0 { if .RightIsFd { .error(errShouldBeFD) } else { .error(errShouldBeFilename) } return } } func ( rune) bool { return == '<' || == '>' } // RedirMode records the mode of an IO redirection. type RedirMode int // Possible values for RedirMode. const ( BadRedirMode RedirMode = iota Read Write ReadWrite Append ) // Filter is the Elvish filter DSL. It uses the same syntax as arguments and // options to a command. type Filter struct { node Args []*Compound Opts []*MapPair } func ( *Filter) ( *parser) { parseSpaces(, ) for { := .peek() switch { case == '&': .parse(&MapPair{}).addTo(&.Opts, ) case startsCompound(, NormalExpr): .parse(&Compound{}).addTo(&.Args, ) default: return } parseSpaces(, ) } } // Compound = { Indexing } type Compound struct { node ExprCtx ExprCtx Indexings []*Indexing } // ExprCtx represents special contexts of expression parsing. type ExprCtx int const ( // NormalExpr represents a normal expression, namely none of the special // ones below. It is the default value. NormalExpr ExprCtx = iota // CmdExpr represents an expression used as the command in a form. In this // context, unquoted <>*^ are treated as bareword characters. CmdExpr // LHSExpr represents an expression used as the left-hand-side in either // assignments or map pairs. In this context, an unquoted = serves as an // expression terminator and is thus not treated as a bareword character. LHSExpr // BracedElemExpr represents an expression used as an element in a braced // expression. In this context, an unquoted , serves as an expression // terminator and is thus not treated as a bareword character. BracedElemExpr // strictExpr is only meaningful to allowedInBareword. strictExpr ) func ( *Compound) ( *parser) { .tilde() for startsIndexing(.peek(), .ExprCtx) { .parse(&Indexing{ExprCtx: .ExprCtx}).addTo(&.Indexings, ) } } // tilde parses a tilde if there is one. It is implemented here instead of // within Primary since a tilde can only appear as the first part of a // Compound. Elsewhere tildes are barewords. func ( *Compound) ( *parser) { if .peek() == '~' { .next() := node{Ranging: diag.Ranging{From: .pos - 1, To: .pos}, sourceText: "~", parent: nil, children: nil} := &Primary{node: , Type: Tilde, Value: "~"} := &Indexing{node: } parsed{}.addAs(&.Head, ) parsed{}.addTo(&.Indexings, ) } } func ( rune, ExprCtx) bool { return startsIndexing(, ) } // Indexing = Primary { '[' Array ']' } type Indexing struct { node ExprCtx ExprCtx Head *Primary Indicies []*Array } func ( *Indexing) ( *parser) { .parse(&Primary{ExprCtx: .ExprCtx}).addAs(&.Head, ) for parseSep(, , '[') { if !startsArray(.peek()) { .error(errShouldBeArray) } .parse(&Array{}).addTo(&.Indicies, ) if !parseSep(, , ']') { .error(errShouldBeRBracket) return } } } func ( rune, ExprCtx) bool { return startsPrimary(, ) } // Array = { Space | '\n' } { Compound { Space | '\n' } } type Array struct { node Compounds []*Compound // When non-empty, records the occurrences of semicolons by the indices of // the compounds they appear before. For instance, [; ; a b; c d;] results // in Semicolons={0 0 2 4}. Semicolons []int } func ( *Array) ( *parser) { := func() { parseSpacesAndNewlines(, ) } () for startsCompound(.peek(), NormalExpr) { .parse(&Compound{}).addTo(&.Compounds, ) () } } func ( rune) bool { return IsWhitespace() || startsIndexing(, NormalExpr) } // Primary is the smallest expression unit. type Primary struct { node ExprCtx ExprCtx Type PrimaryType // The unquoted string value. Valid for Bareword, SingleQuoted, // DoubleQuoted, Variable, Wildcard and Tilde. Value string Elements []*Compound // Valid for List and Lambda Chunk *Chunk // Valid for OutputCapture, ExitusCapture and Lambda MapPairs []*MapPair // Valid for Map and Lambda Braced []*Compound // Valid for Braced } // PrimaryType is the type of a Primary. type PrimaryType int // Possible values for PrimaryType. const ( BadPrimary PrimaryType = iota Bareword SingleQuoted DoubleQuoted Variable Wildcard Tilde ExceptionCapture OutputCapture List Lambda Map Braced ) func ( *Primary) ( *parser) { := .peek() if !startsPrimary(, .ExprCtx) { .error(errShouldBePrimary) return } // Try bareword early, since it has precedence over wildcard on * // when ctx = commandExpr. if allowedInBareword(, .ExprCtx) { .bareword() return } switch { case '\'': .singleQuoted() case '"': .doubleQuoted() case '$': .variable() case '*': .starWildcard() case '?': if .hasPrefix("?(") { .exitusCapture() } else { .questionWildcard() } case '(': .outputCapture() case '[': .lbracket() case '{': .lbrace() default: // Parse an empty bareword. .Type = Bareword } } func ( *Primary) ( *parser) { .Type = SingleQuoted .next() .singleQuotedInner() } // Parses a single-quoted string after the opening quote. Sets pn.Value but not // pn.Type. func ( *Primary) ( *parser) { var bytes.Buffer defer func() { .Value = .String() }() for { switch := .next(); { case eof: .error(errStringUnterminated) return case '\'': if .peek() == '\'' { // Two consecutive single quotes .next() .WriteByte('\'') } else { // End of string return } default: .WriteRune() } } } func ( *Primary) ( *parser) { .Type = DoubleQuoted .next() .doubleQuotedInner() } // Parses a double-quoted string after the opening quote. Sets pn.Value but not // pn.Type. func ( *Primary) ( *parser) { var bytes.Buffer defer func() { .Value = .String() }() for { switch := .next(); { case eof: .error(errStringUnterminated) return case '"': return case '\\': switch := .next(); { case 'c', '^': // control sequence := .next() if < 0x3F || > 0x5F { .backup() .error(errInvalidEscapeControl) .next() } if byte() == '?' { // special-case: \c? => del .WriteByte(byte(0x7F)) } else { .WriteByte(byte( - 0x40)) } case 'x', 'u', 'U': // two, four, or eight hex digits var int switch { case 'x': = 2 case 'u': = 4 case 'U': = 8 } var rune for := 0; < ; ++ { , := hexToDigit(.next()) if ! { .backup() .error(errInvalidEscapeHex) break } = *16 + } .WriteRune() case '0', '1', '2', '3', '4', '5', '6', '7': // three octal digits := - '0' for := 0; < 2; ++ { := .next() if < '0' || > '7' { .backup() .error(errInvalidEscapeOct) break } = *8 + ( - '0') } .WriteRune() default: if , := doubleEscape[]; { .WriteRune() } else { .backup() .error(errInvalidEscape) .next() } } default: .WriteRune() } } } // a table for the simple double-quote escape sequences. var doubleEscape = map[rune]rune{ // same as golang 'a': '\a', 'b': '\b', 'f': '\f', 'n': '\n', 'r': '\r', 't': '\t', 'v': '\v', '\\': '\\', '"': '"', // additional 'e': '\033', } var doubleUnescape = map[rune]rune{} func () { for , := range doubleEscape { doubleUnescape[] = } } func ( rune) (rune, bool) { switch { case '0' <= && <= '9': return - '0', true case 'a' <= && <= 'f': return - 'a' + 10, true case 'A' <= && <= 'F': return - 'A' + 10, true default: return -1, false } } func ( *Primary) ( *parser) { .Type = Variable .next() switch := .next(); { case eof: .backup() .error(errShouldBeVariableName) .next() case '\'': .singleQuotedInner() case '"': .doubleQuotedInner() default: defer func() { .Value = .src[.From+1 : .pos] }() if !allowedInVariableName() && != '@' { .backup() .error(errShouldBeVariableName) } for allowedInVariableName(.peek()) { .next() } } } // The following are allowed in variable names: // * Anything beyond ASCII that is printable // * Letters and numbers // * The symbols "-_:~" func ( rune) bool { return ( >= 0x80 && unicode.IsPrint()) || ('0' <= && <= '9') || ('a' <= && <= 'z') || ('A' <= && <= 'Z') || == '-' || == '_' || == ':' || == '~' } func ( *Primary) ( *parser) { .Type = Wildcard for .peek() == '*' { .next() } .Value = .src[.From:.pos] } func ( *Primary) ( *parser) { .Type = Wildcard if .peek() == '?' { .next() } .Value = .src[.From:.pos] } func ( *Primary) ( *parser) { .next() .next() addSep(, ) .Type = ExceptionCapture .parse(&Chunk{}).addAs(&.Chunk, ) if !parseSep(, , ')') { .error(errShouldBeRParen) } } func ( *Primary) ( *parser) { .Type = OutputCapture parseSep(, , '(') .parse(&Chunk{}).addAs(&.Chunk, ) if !parseSep(, , ')') { .error(errShouldBeRParen) } } // List = '[' { Space } { Compound } ']' // = '[' { Space } { MapPair { Space } } ']' // Map = '[' { Space } '&' { Space } ']' // Lambda = '[' { Space } { (Compound | MapPair) { Space } } ']' '{' Chunk '}' func ( *Primary) ( *parser) { parseSep(, , '[') parseSpacesAndNewlines(, ) := false : for { := .peek() switch { case == '&': .next() := startsCompound(.peek(), LHSExpr) if ! { = true addSep(, ) parseSpacesAndNewlines(, ) break } .backup() .parse(&MapPair{}).addTo(&.MapPairs, ) case startsCompound(, NormalExpr): .parse(&Compound{}).addTo(&.Elements, ) default: break } parseSpacesAndNewlines(, ) } if !parseSep(, , ']') { .error(errShouldBeRBracket) } if parseSep(, , '{') { .lambda() } else { if || len(.MapPairs) > 0 { if len(.Elements) > 0 { // TODO(xiaq): Add correct position information. .error(errBothElementsAndPairs) } .Type = Map } else { .Type = List } } } // lambda parses a lambda expression. The opening brace has been seen. func ( *Primary) ( *parser) { .Type = Lambda .parse(&Chunk{}).addAs(&.Chunk, ) if !parseSep(, , '}') { .error(errShouldBeRBrace) } } // Braced = '{' Compound { BracedSep Compounds } '}' // BracedSep = { Space | '\n' } [ ',' ] { Space | '\n' } func ( *Primary) ( *parser) { parseSep(, , '{') if := .peek(); == ';' || == '\r' || == '\n' || IsInlineWhitespace() { .lambda() return } .Type = Braced // TODO(xiaq): The compound can be empty, which allows us to parse {,foo}. // Allowing compounds to be empty can be fragile in other cases. .parse(&Compound{ExprCtx: BracedElemExpr}).addTo(&.Braced, ) for isBracedSep(.peek()) { parseSpacesAndNewlines(, ) // optional, so ignore the return value parseSep(, , ',') parseSpacesAndNewlines(, ) .parse(&Compound{ExprCtx: BracedElemExpr}).addTo(&.Braced, ) } if !parseSep(, , '}') { .error(errShouldBeBraceSepOrRBracket) } } func ( rune) bool { return == ',' || IsWhitespace() } func ( *Primary) ( *parser) { .Type = Bareword defer func() { .Value = .src[.From:.pos] }() for allowedInBareword(.peek(), .ExprCtx) { .next() } } // allowedInBareword returns where a rune is allowed in barewords in the given // expression context. The special strictExpr context queries whether the rune // is allowed in all contexts. // // The following are allowed in barewords: // // * Anything allowed in variable names // * The symbols "./\@%+!" // * The symbol "=", if ctx != lhsExpr && ctx != strictExpr // * The symbol ",", if ctx != bracedExpr && ctx != strictExpr // * The symbols "<>*^", if ctx = commandExpr // // The seemingly weird inclusion of \ is for easier path manipulation in // Windows. func ( rune, ExprCtx) bool { return allowedInVariableName() || == '.' || == '/' || == '\\' || == '@' || == '%' || == '+' || == '!' || ( != LHSExpr && != strictExpr && == '=') || ( != BracedElemExpr && != strictExpr && == ',') || ( == CmdExpr && ( == '<' || == '>' || == '*' || == '^')) } func ( rune, ExprCtx) bool { return == '\'' || == '"' || == '$' || allowedInBareword(, ) || == '?' || == '*' || == '(' || == '[' || == '{' } // MapPair = '&' { Space } Compound { Space } Compound type MapPair struct { node Key, Value *Compound } func ( *MapPair) ( *parser) { parseSep(, , '&') .parse(&Compound{ExprCtx: LHSExpr}).addAs(&.Key, ) if len(.Key.Indexings) == 0 { .error(errShouldBeCompound) } if parseSep(, , '=') { parseSpacesAndNewlines(, ) // Parse value part. It can be empty. .parse(&Compound{}).addAs(&.Value, ) } } // Sep is the catch-all node type for leaf nodes that lack internal structures // and semantics, and serve solely for syntactic purposes. The parsing of // separators depend on the Parent node; as such it lacks a genuine parse // method. type Sep struct { node } // NewSep makes a new Sep. func ( string, , int) *Sep { return &Sep{node: node{diag.Ranging{From: , To: }, [:], nil, nil}} } func (*Sep) (*parser) { // A no-op, only to satisfy the Node interface. } func ( Node, *parser) { var int := Children() if len() > 0 { = [len()-1].Range().To } else { = .Range().From } if < .pos { addChild(, NewSep(.src, , .pos)) } } func ( Node, *parser, rune) bool { if .peek() == { .next() addSep(, ) return true } return false } func ( Node, *parser) { parseSpacesInner(, , false) } func ( Node, *parser) { parseSpacesInner(, , true) } func ( Node, *parser, bool) { : for { := .peek() switch { case IsInlineWhitespace(): .next() case && IsWhitespace(): .next() case == '#': // Comment is like inline whitespace as long as we don't include the // trailing newline. .next() for { := .peek() if == eof || == '\r' || == '\n' { break } .next() } case == '^': // Line continuation is like inline whitespace. .next() switch .peek() { case '\r': .next() if .peek() == '\n' { .next() } case '\n': .next() case eof: .error(errShouldBeNewline) default: .backup() break } default: break } } addSep(, ) } // IsInlineWhitespace reports whether r is an inline whitespace character. // Currently this includes space (Unicode 0x20) and tab (Unicode 0x9). func ( rune) bool { return == ' ' || == '\t' } // IsWhitespace reports whether r is a whitespace. Currently this includes // inline whitespace characters and newline (Unicode 0xa). func ( rune) bool { return IsInlineWhitespace() || == '\r' || == '\n' } func ( Node, Node) { .n().addChild() .n().parent = }