package parse
import (
"bytes"
"errors"
"fmt"
"io"
"unicode"
"src.elv.sh/pkg/diag"
)
type Tree struct {
Root *Chunk
Source Source
}
type Config struct {
WarningWriter io .Writer
}
func Parse (src Source , cfg Config ) (Tree , error ) {
tree := Tree {&Chunk {}, src }
err := ParseAs (src , tree .Root , cfg )
return tree , err
}
func ParseAs (src Source , n Node , cfg Config ) error {
ps := &parser {srcName : src .Name , src : src .Code , warn : cfg .WarningWriter }
ps .parse (n )
ps .done ()
return ps .assembleError ()
}
var (
errShouldBeForm = newError ("" , "form" )
errBadLHS = errors .New ("bad assignment LHS" )
errBadRedirSign = newError ("bad redir sign" , "'<'" , "'>'" , "'>>'" , "'<>'" )
errShouldBeFD = newError ("" , "a composite term representing fd" )
errShouldBeFilename = newError ("" , "a composite term representing filename" )
errShouldBeArray = newError ("" , "spaced" )
errStringUnterminated = newError ("string not terminated" )
errChainedAssignment = newError ("chained assignment not yet supported" )
errInvalidEscape = newError ("invalid escape sequence" )
errInvalidEscapeOct = newError ("invalid escape sequence" , "octal digit" )
errInvalidEscapeHex = newError ("invalid escape sequence" , "hex digit" )
errInvalidEscapeControl = newError ("invalid control sequence" , "a codepoint between 0x3F and 0x5F" )
errShouldBePrimary = newError ("" , "single-quoted string" , "double-quoted string" , "bareword" )
errShouldBeVariableName = newError ("" , "variable name" )
errShouldBeRBracket = newError ("" , "']'" )
errShouldBeRBrace = newError ("" , "'}'" )
errShouldBeBraceSepOrRBracket = newError ("" , "','" , "'}'" )
errShouldBeRParen = newError ("" , "')'" )
errShouldBeCompound = newError ("" , "compound" )
errShouldBeEqual = newError ("" , "'='" )
errBothElementsAndPairs = newError ("cannot contain both list elements and map pairs" )
errShouldBeNewline = newError ("" , "newline" )
)
type Chunk struct {
node
Pipelines []*Pipeline
}
func (bn *Chunk ) parse (ps *parser ) {
bn .parseSeps (ps )
for startsPipeline (ps .peek ()) {
ps .parse (&Pipeline {}).addTo (&bn .Pipelines , bn )
if bn .parseSeps (ps ) == 0 {
break
}
}
}
func isPipelineSep (r rune ) bool {
return r == '\r' || r == '\n' || r == ';'
}
func (bn *Chunk ) parseSeps (ps *parser ) int {
nseps := 0
for {
r := ps .peek ()
if isPipelineSep (r ) {
parseSep (bn , ps , r )
nseps ++
} else if IsInlineWhitespace (r ) || r == '#' {
parseSpaces (bn , ps )
} else {
break
}
}
return nseps
}
type Pipeline struct {
node
Forms []*Form
Background bool
}
func (pn *Pipeline ) parse (ps *parser ) {
ps .parse (&Form {}).addTo (&pn .Forms , pn )
for parseSep (pn , ps , '|' ) {
parseSpacesAndNewlines (pn , ps )
if !startsForm (ps .peek ()) {
ps .error (errShouldBeForm )
return
}
ps .parse (&Form {}).addTo (&pn .Forms , pn )
}
parseSpaces (pn , ps )
if ps .peek () == '&' {
ps .next ()
addSep (pn , ps )
pn .Background = true
parseSpaces (pn , ps )
}
}
func startsPipeline (r rune ) bool {
return startsForm (r )
}
type Form struct {
node
Assignments []*Assignment
Head *Compound
Args []*Compound
Opts []*MapPair
Redirs []*Redir
}
func (fn *Form ) parse (ps *parser ) {
parseSpaces (fn , ps )
for fn .tryAssignment (ps ) {
parseSpaces (fn , ps )
}
if !startsCompound (ps .peek (), CmdExpr ) {
if len (fn .Assignments ) > 0 {
return
}
ps .error (fmt .Errorf ("bad rune at form head: %q" , ps .peek ()))
}
ps .parse (&Compound {ExprCtx : CmdExpr }).addAs (&fn .Head , fn )
parseSpaces (fn , ps )
for {
r := ps .peek ()
switch {
case r == '&' :
ps .next ()
hasMapPair := startsCompound (ps .peek (), LHSExpr )
ps .backup ()
if !hasMapPair {
return
}
ps .parse (&MapPair {}).addTo (&fn .Opts , fn )
case startsCompound (r , NormalExpr ):
cn := &Compound {}
ps .parse (cn )
if isRedirSign (ps .peek ()) {
ps .parse (&Redir {Left : cn }).addTo (&fn .Redirs , fn )
} else {
parsed {cn }.addTo (&fn .Args , fn )
}
case isRedirSign (r ):
ps .parse (&Redir {}).addTo (&fn .Redirs , fn )
default :
return
}
parseSpaces (fn , ps )
}
}
func (fn *Form ) tryAssignment (ps *parser ) bool {
if !startsIndexing (ps .peek (), LHSExpr ) {
return false
}
pos := ps .pos
errorEntries := ps .errors .Entries
parsedAssignment := ps .parse (&Assignment {})
if len (ps .errors .Entries ) > len (errorEntries ) {
ps .errors .Entries = errorEntries
ps .pos = pos
return false
}
parsedAssignment .addTo (&fn .Assignments , fn )
return true
}
func startsForm (r rune ) bool {
return IsInlineWhitespace (r ) || startsCompound (r , CmdExpr )
}
type Assignment struct {
node
Left *Indexing
Right *Compound
}
func (an *Assignment ) parse (ps *parser ) {
ps .parse (&Indexing {ExprCtx : LHSExpr }).addAs (&an .Left , an )
head := an .Left .Head
if !ValidLHSVariable (head , true ) {
ps .errorp (head , errShouldBeVariableName )
}
if !parseSep (an , ps , '=' ) {
ps .error (errShouldBeEqual )
}
ps .parse (&Compound {}).addAs (&an .Right , an )
}
func ValidLHSVariable (p *Primary , allowSigil bool ) bool {
switch p .Type {
case Braced :
return true
case SingleQuoted , DoubleQuoted :
return true
case Bareword :
if p .Value == "" {
return false
}
name := p .Value
if allowSigil && name [0 ] == '@' {
name = name [1 :]
}
for _ , r := range name {
if !allowedInVariableName (r ) {
return false
}
}
return true
default :
return false
}
}
type Redir struct {
node
Left *Compound
Mode RedirMode
RightIsFd bool
Right *Compound
}
func (rn *Redir ) parse (ps *parser ) {
if rn .Left != nil {
addChild (rn , rn .Left )
rn .From = rn .Left .From
}
begin := ps .pos
for isRedirSign (ps .peek ()) {
ps .next ()
}
sign := ps .src [begin :ps .pos ]
switch sign {
case "<" :
rn .Mode = Read
case ">" :
rn .Mode = Write
case ">>" :
rn .Mode = Append
case "<>" :
rn .Mode = ReadWrite
default :
ps .error (errBadRedirSign )
}
addSep (rn , ps )
parseSpaces (rn , ps )
if parseSep (rn , ps , '&' ) {
rn .RightIsFd = true
}
ps .parse (&Compound {}).addAs (&rn .Right , rn )
if len (rn .Right .Indexings ) == 0 {
if rn .RightIsFd {
ps .error (errShouldBeFD )
} else {
ps .error (errShouldBeFilename )
}
return
}
}
func isRedirSign (r rune ) bool {
return r == '<' || r == '>'
}
type RedirMode int
const (
BadRedirMode RedirMode = iota
Read
Write
ReadWrite
Append
)
type Filter struct {
node
Args []*Compound
Opts []*MapPair
}
func (qn *Filter ) parse (ps *parser ) {
parseSpaces (qn , ps )
for {
r := ps .peek ()
switch {
case r == '&' :
ps .parse (&MapPair {}).addTo (&qn .Opts , qn )
case startsCompound (r , NormalExpr ):
ps .parse (&Compound {}).addTo (&qn .Args , qn )
default :
return
}
parseSpaces (qn , ps )
}
}
type Compound struct {
node
ExprCtx ExprCtx
Indexings []*Indexing
}
type ExprCtx int
const (
NormalExpr ExprCtx = iota
CmdExpr
LHSExpr
BracedElemExpr
strictExpr
)
func (cn *Compound ) parse (ps *parser ) {
cn .tilde (ps )
for startsIndexing (ps .peek (), cn .ExprCtx ) {
ps .parse (&Indexing {ExprCtx : cn .ExprCtx }).addTo (&cn .Indexings , cn )
}
}
func (cn *Compound ) tilde (ps *parser ) {
if ps .peek () == '~' {
ps .next ()
base := node {Ranging : diag .Ranging {From : ps .pos - 1 , To : ps .pos },
sourceText : "~" , parent : nil , children : nil }
pn := &Primary {node : base , Type : Tilde , Value : "~" }
in := &Indexing {node : base }
parsed {pn }.addAs (&in .Head , in )
parsed {in }.addTo (&cn .Indexings , cn )
}
}
func startsCompound (r rune , ctx ExprCtx ) bool {
return startsIndexing (r , ctx )
}
type Indexing struct {
node
ExprCtx ExprCtx
Head *Primary
Indicies []*Array
}
func (in *Indexing ) parse (ps *parser ) {
ps .parse (&Primary {ExprCtx : in .ExprCtx }).addAs (&in .Head , in )
for parseSep (in , ps , '[' ) {
if !startsArray (ps .peek ()) {
ps .error (errShouldBeArray )
}
ps .parse (&Array {}).addTo (&in .Indicies , in )
if !parseSep (in , ps , ']' ) {
ps .error (errShouldBeRBracket )
return
}
}
}
func startsIndexing (r rune , ctx ExprCtx ) bool {
return startsPrimary (r , ctx )
}
type Array struct {
node
Compounds []*Compound
Semicolons []int
}
func (sn *Array ) parse (ps *parser ) {
parseSep := func () { parseSpacesAndNewlines (sn , ps ) }
parseSep ()
for startsCompound (ps .peek (), NormalExpr ) {
ps .parse (&Compound {}).addTo (&sn .Compounds , sn )
parseSep ()
}
}
func startsArray (r rune ) bool {
return IsWhitespace (r ) || startsIndexing (r , NormalExpr )
}
type Primary struct {
node
ExprCtx ExprCtx
Type PrimaryType
Value string
Elements []*Compound
Chunk *Chunk
MapPairs []*MapPair
Braced []*Compound
}
type PrimaryType int
const (
BadPrimary PrimaryType = iota
Bareword
SingleQuoted
DoubleQuoted
Variable
Wildcard
Tilde
ExceptionCapture
OutputCapture
List
Lambda
Map
Braced
)
func (pn *Primary ) parse (ps *parser ) {
r := ps .peek ()
if !startsPrimary (r , pn .ExprCtx ) {
ps .error (errShouldBePrimary )
return
}
if allowedInBareword (r , pn .ExprCtx ) {
pn .bareword (ps )
return
}
switch r {
case '\'' :
pn .singleQuoted (ps )
case '"' :
pn .doubleQuoted (ps )
case '$' :
pn .variable (ps )
case '*' :
pn .starWildcard (ps )
case '?' :
if ps .hasPrefix ("?(" ) {
pn .exitusCapture (ps )
} else {
pn .questionWildcard (ps )
}
case '(' :
pn .outputCapture (ps )
case '[' :
pn .lbracket (ps )
case '{' :
pn .lbrace (ps )
default :
pn .Type = Bareword
}
}
func (pn *Primary ) singleQuoted (ps *parser ) {
pn .Type = SingleQuoted
ps .next ()
pn .singleQuotedInner (ps )
}
func (pn *Primary ) singleQuotedInner (ps *parser ) {
var buf bytes .Buffer
defer func () { pn .Value = buf .String () }()
for {
switch r := ps .next (); r {
case eof :
ps .error (errStringUnterminated )
return
case '\'' :
if ps .peek () == '\'' {
ps .next ()
buf .WriteByte ('\'' )
} else {
return
}
default :
buf .WriteRune (r )
}
}
}
func (pn *Primary ) doubleQuoted (ps *parser ) {
pn .Type = DoubleQuoted
ps .next ()
pn .doubleQuotedInner (ps )
}
func (pn *Primary ) doubleQuotedInner (ps *parser ) {
var buf bytes .Buffer
defer func () { pn .Value = buf .String () }()
for {
switch r := ps .next (); r {
case eof :
ps .error (errStringUnterminated )
return
case '"' :
return
case '\\' :
switch r := ps .next (); r {
case 'c' , '^' :
r := ps .next ()
if r < 0x3F || r > 0x5F {
ps .backup ()
ps .error (errInvalidEscapeControl )
ps .next ()
}
if byte (r ) == '?' {
buf .WriteByte (byte (0x7F ))
} else {
buf .WriteByte (byte (r - 0x40 ))
}
case 'x' , 'u' , 'U' :
var n int
switch r {
case 'x' :
n = 2
case 'u' :
n = 4
case 'U' :
n = 8
}
var rr rune
for i := 0 ; i < n ; i ++ {
d , ok := hexToDigit (ps .next ())
if !ok {
ps .backup ()
ps .error (errInvalidEscapeHex )
break
}
rr = rr *16 + d
}
buf .WriteRune (rr )
case '0' , '1' , '2' , '3' , '4' , '5' , '6' , '7' :
rr := r - '0'
for i := 0 ; i < 2 ; i ++ {
r := ps .next ()
if r < '0' || r > '7' {
ps .backup ()
ps .error (errInvalidEscapeOct )
break
}
rr = rr *8 + (r - '0' )
}
buf .WriteRune (rr )
default :
if rr , ok := doubleEscape [r ]; ok {
buf .WriteRune (rr )
} else {
ps .backup ()
ps .error (errInvalidEscape )
ps .next ()
}
}
default :
buf .WriteRune (r )
}
}
}
var doubleEscape = map [rune ]rune {
'a' : '\a' , 'b' : '\b' , 'f' : '\f' , 'n' : '\n' , 'r' : '\r' ,
't' : '\t' , 'v' : '\v' , '\\' : '\\' , '"' : '"' ,
'e' : '\033' ,
}
var doubleUnescape = map [rune ]rune {}
func init () {
for k , v := range doubleEscape {
doubleUnescape [v ] = k
}
}
func hexToDigit (r rune ) (rune , bool ) {
switch {
case '0' <= r && r <= '9' :
return r - '0' , true
case 'a' <= r && r <= 'f' :
return r - 'a' + 10 , true
case 'A' <= r && r <= 'F' :
return r - 'A' + 10 , true
default :
return -1 , false
}
}
func (pn *Primary ) variable (ps *parser ) {
pn .Type = Variable
ps .next ()
switch r := ps .next (); r {
case eof :
ps .backup ()
ps .error (errShouldBeVariableName )
ps .next ()
case '\'' :
pn .singleQuotedInner (ps )
case '"' :
pn .doubleQuotedInner (ps )
default :
defer func () { pn .Value = ps .src [pn .From +1 : ps .pos ] }()
if !allowedInVariableName (r ) && r != '@' {
ps .backup ()
ps .error (errShouldBeVariableName )
}
for allowedInVariableName (ps .peek ()) {
ps .next ()
}
}
}
func allowedInVariableName (r rune ) bool {
return (r >= 0x80 && unicode .IsPrint (r )) ||
('0' <= r && r <= '9' ) ||
('a' <= r && r <= 'z' ) ||
('A' <= r && r <= 'Z' ) ||
r == '-' || r == '_' || r == ':' || r == '~'
}
func (pn *Primary ) starWildcard (ps *parser ) {
pn .Type = Wildcard
for ps .peek () == '*' {
ps .next ()
}
pn .Value = ps .src [pn .From :ps .pos ]
}
func (pn *Primary ) questionWildcard (ps *parser ) {
pn .Type = Wildcard
if ps .peek () == '?' {
ps .next ()
}
pn .Value = ps .src [pn .From :ps .pos ]
}
func (pn *Primary ) exitusCapture (ps *parser ) {
ps .next ()
ps .next ()
addSep (pn , ps )
pn .Type = ExceptionCapture
ps .parse (&Chunk {}).addAs (&pn .Chunk , pn )
if !parseSep (pn , ps , ')' ) {
ps .error (errShouldBeRParen )
}
}
func (pn *Primary ) outputCapture (ps *parser ) {
pn .Type = OutputCapture
parseSep (pn , ps , '(' )
ps .parse (&Chunk {}).addAs (&pn .Chunk , pn )
if !parseSep (pn , ps , ')' ) {
ps .error (errShouldBeRParen )
}
}
func (pn *Primary ) lbracket (ps *parser ) {
parseSep (pn , ps , '[' )
parseSpacesAndNewlines (pn , ps )
loneAmpersand := false
items :
for {
r := ps .peek ()
switch {
case r == '&' :
ps .next ()
hasMapPair := startsCompound (ps .peek (), LHSExpr )
if !hasMapPair {
loneAmpersand = true
addSep (pn , ps )
parseSpacesAndNewlines (pn , ps )
break items
}
ps .backup ()
ps .parse (&MapPair {}).addTo (&pn .MapPairs , pn )
case startsCompound (r , NormalExpr ):
ps .parse (&Compound {}).addTo (&pn .Elements , pn )
default :
break items
}
parseSpacesAndNewlines (pn , ps )
}
if !parseSep (pn , ps , ']' ) {
ps .error (errShouldBeRBracket )
}
if parseSep (pn , ps , '{' ) {
pn .lambda (ps )
} else {
if loneAmpersand || len (pn .MapPairs ) > 0 {
if len (pn .Elements ) > 0 {
ps .error (errBothElementsAndPairs )
}
pn .Type = Map
} else {
pn .Type = List
}
}
}
func (pn *Primary ) lambda (ps *parser ) {
pn .Type = Lambda
ps .parse (&Chunk {}).addAs (&pn .Chunk , pn )
if !parseSep (pn , ps , '}' ) {
ps .error (errShouldBeRBrace )
}
}
func (pn *Primary ) lbrace (ps *parser ) {
parseSep (pn , ps , '{' )
if r := ps .peek (); r == ';' || r == '\r' || r == '\n' || IsInlineWhitespace (r ) {
pn .lambda (ps )
return
}
pn .Type = Braced
ps .parse (&Compound {ExprCtx : BracedElemExpr }).addTo (&pn .Braced , pn )
for isBracedSep (ps .peek ()) {
parseSpacesAndNewlines (pn , ps )
parseSep (pn , ps , ',' )
parseSpacesAndNewlines (pn , ps )
ps .parse (&Compound {ExprCtx : BracedElemExpr }).addTo (&pn .Braced , pn )
}
if !parseSep (pn , ps , '}' ) {
ps .error (errShouldBeBraceSepOrRBracket )
}
}
func isBracedSep (r rune ) bool {
return r == ',' || IsWhitespace (r )
}
func (pn *Primary ) bareword (ps *parser ) {
pn .Type = Bareword
defer func () { pn .Value = ps .src [pn .From :ps .pos ] }()
for allowedInBareword (ps .peek (), pn .ExprCtx ) {
ps .next ()
}
}
func allowedInBareword (r rune , ctx ExprCtx ) bool {
return allowedInVariableName (r ) || r == '.' || r == '/' ||
r == '\\' || r == '@' || r == '%' || r == '+' || r == '!' ||
(ctx != LHSExpr && ctx != strictExpr && r == '=' ) ||
(ctx != BracedElemExpr && ctx != strictExpr && r == ',' ) ||
(ctx == CmdExpr && (r == '<' || r == '>' || r == '*' || r == '^' ))
}
func startsPrimary (r rune , ctx ExprCtx ) bool {
return r == '\'' || r == '"' || r == '$' || allowedInBareword (r , ctx ) ||
r == '?' || r == '*' || r == '(' || r == '[' || r == '{'
}
type MapPair struct {
node
Key , Value *Compound
}
func (mpn *MapPair ) parse (ps *parser ) {
parseSep (mpn , ps , '&' )
ps .parse (&Compound {ExprCtx : LHSExpr }).addAs (&mpn .Key , mpn )
if len (mpn .Key .Indexings ) == 0 {
ps .error (errShouldBeCompound )
}
if parseSep (mpn , ps , '=' ) {
parseSpacesAndNewlines (mpn , ps )
ps .parse (&Compound {}).addAs (&mpn .Value , mpn )
}
}
type Sep struct {
node
}
func NewSep (src string , begin , end int ) *Sep {
return &Sep {node : node {diag .Ranging {From : begin , To : end }, src [begin :end ], nil , nil }}
}
func (*Sep ) parse (*parser ) {
}
func addSep (n Node , ps *parser ) {
var begin int
ch := Children (n )
if len (ch ) > 0 {
begin = ch [len (ch )-1 ].Range ().To
} else {
begin = n .Range ().From
}
if begin < ps .pos {
addChild (n , NewSep (ps .src , begin , ps .pos ))
}
}
func parseSep (n Node , ps *parser , sep rune ) bool {
if ps .peek () == sep {
ps .next ()
addSep (n , ps )
return true
}
return false
}
func parseSpaces (n Node , ps *parser ) {
parseSpacesInner (n , ps , false )
}
func parseSpacesAndNewlines (n Node , ps *parser ) {
parseSpacesInner (n , ps , true )
}
func parseSpacesInner (n Node , ps *parser , newlines bool ) {
spaces :
for {
r := ps .peek ()
switch {
case IsInlineWhitespace (r ):
ps .next ()
case newlines && IsWhitespace (r ):
ps .next ()
case r == '#' :
ps .next ()
for {
r := ps .peek ()
if r == eof || r == '\r' || r == '\n' {
break
}
ps .next ()
}
case r == '^' :
ps .next ()
switch ps .peek () {
case '\r' :
ps .next ()
if ps .peek () == '\n' {
ps .next ()
}
case '\n' :
ps .next ()
case eof :
ps .error (errShouldBeNewline )
default :
ps .backup ()
break spaces
}
default :
break spaces
}
}
addSep (n , ps )
}
func IsInlineWhitespace (r rune ) bool {
return r == ' ' || r == '\t'
}
func IsWhitespace (r rune ) bool {
return IsInlineWhitespace (r ) || r == '\r' || r == '\n'
}
func addChild (p Node , ch Node ) {
p .n ().addChild (ch )
ch .n ().parent = p
}