Initial Commit

This commit is contained in:
2021-10-02 15:12:57 -07:00
commit 1ff241a74e
22 changed files with 3000 additions and 0 deletions

85
parser/code.go Normal file
View File

@@ -0,0 +1,85 @@
package parser
import (
"bytes"
"go.arsenm.dev/amu/ast"
"go.arsenm.dev/amu/scanner"
)
func (p *Parser) parseCode(tok scanner.Token, lit string) *ast.Code {
// Create new code
code := &ast.Code{}
// If token is not WORD or literal is not "=list"
if tok != scanner.WORD || lit != "=code" {
// Return nil as this code is invalid
return nil
}
// Scan token
tok, lit = p.scan()
// If token is not PUNCT or literal is not "["
if tok != scanner.PUNCT || lit != "[" {
// Unscan token
p.unscan()
// Return nil as this code is invalid
return nil
}
// Parse argument list
args := p.parseArgs()
// If 1 or more arguments provided
if len(args) >= 1 {
// Set language to first argument
code.Language = args[0]
}
// If 2 or more arguments provided
if len(args) >= 2 {
// Set style to second argument
code.Style = args[1]
}
// Create buffer for text
textBuf := &bytes.Buffer{}
for {
// Scan token
tok, lit = p.scan()
// If end of file
if tok == scanner.EOF {
// Return whatever waS parsed so far
return code
}
// If token is WORD and lit is "=end"
if tok == scanner.WORD && lit == "=end" {
// Scan token
tok, lit = p.scan()
// If token is not PUNCT and literal is not "["
if tok != scanner.PUNCT && lit != "[" {
// Return nil as this is not a valid code
return nil
}
// Scan token
tok, lit = p.scan()
// If token is not PUNCT and literal is not "]"
if tok != scanner.PUNCT && lit != "]" {
// Return nil as this is not a valid code
return nil
}
break
}
// Write literal to text buffer
textBuf.WriteString(lit)
}
// Set code text to contents of text buffer
code.Text = textBuf.String()
return code
}

55
parser/common.go Normal file
View File

@@ -0,0 +1,55 @@
package parser
import (
"bytes"
"go.arsenm.dev/amu/scanner"
)
// Attempt to parse an argument list (comma-separated)
func (p *Parser) parseArgs() []string {
// Create buffer for arguments
argBuf := &bytes.Buffer{}
// Create new line slice for arguments
var args []string
parseLoop:
for {
// Scan token
tok, lit := p.scan()
// If end of file
if tok == scanner.EOF {
// Return nil as this is an invalid argument list
return nil
}
switch tok {
case scanner.WORD:
// Write word to argument buffer
argBuf.WriteString(lit)
case scanner.WS:
// Write whitespace to argument buffer
argBuf.WriteString(lit)
case scanner.PUNCT:
// If literal is "]"
if lit == "]" {
// If length of argument is greater than 0
if argBuf.Len() > 0 {
// Add current argument to slice
args = append(args, argBuf.String())
}
// Stop parsing
break parseLoop
} else if lit == "," {
// Add argument to slice
args = append(args, argBuf.String())
// Reset buffer
argBuf.Reset()
}
}
}
// Return parsed arguments
return args
}

31
parser/heading.go Normal file
View File

@@ -0,0 +1,31 @@
package parser
import (
"go.arsenm.dev/amu/ast"
"go.arsenm.dev/amu/scanner"
)
// Attempt to parse heading
func (p *Parser) parseHeading() *ast.Heading {
// Scan token
tok, lit := p.scan()
// Set level to length of HEADING token
level := len(lit)
// If token is not HEADING or level is greater than 6
if tok != scanner.HEADING || level > 6 {
// Return nil as this is not a valid heading
return nil
}
// Parse para until one newline enocountered
para := p.parsePara(1)
// If successful
if para != nil {
return &ast.Heading{Level: level, Content: para}
}
// Return nil as this is not a valid heading
return nil
}

86
parser/image.go Normal file
View File

@@ -0,0 +1,86 @@
package parser
import (
"bytes"
"strings"
"go.arsenm.dev/amu/ast"
"go.arsenm.dev/amu/scanner"
)
// parseImage attempts to parse an image
func (p *Parser) parseImage() *ast.Image {
// Create new image
img := &ast.Image{}
// Create buffers for image properties
altBuf := &bytes.Buffer{}
srcBuf := &bytes.Buffer{}
linkBuf := &bytes.Buffer{}
// Set current buffer to alt text buffer
currentBuf := altBuf
// Scan token
tok, lit := p.scan()
// If token is not PUNCT or literal is not "["
if tok != scanner.PUNCT || lit != "[" {
// Return nil as this is not a valid image
return nil
}
// Declare variable for last literal
var lastLit string
parseLoop:
for {
// Scan token
tok, lit := p.scan()
switch tok {
case scanner.WORD:
// Write word to current buffer
currentBuf.WriteString(lit)
case scanner.WS:
// Write whitespace to current buffer
currentBuf.WriteString(lit)
case scanner.PUNCT:
// If last literal is "]" and current is "("
if lastLit == "]" && lit == "(" {
// Set current buffer to source buffer
currentBuf = srcBuf
// Continue to next token
continue
}
// If last literal is ")" and current is "{"
if lastLit == ")" && lit == "{" {
// Set current buffer to link buffer
currentBuf = linkBuf
// Continue to next token
continue
}
// If current literal is "}" and current buffer is link buffer
if lit == "}" && currentBuf == linkBuf {
// Stop parsing
break parseLoop
}
// If literal does not contain any of the restrict characters
if !strings.ContainsAny(lit, "()[]{}") {
// Write literal to current buffer
currentBuf.WriteString(lit)
}
case scanner.EOL, scanner.EOF:
// Return nil as this is not a valid link
return nil
}
// Set last literal
lastLit = lit
}
// Set image properties
img.Alternate = altBuf.String()
img.Source = srcBuf.String()
img.Link = linkBuf.String()
return img
}

143
parser/list.go Normal file
View File

@@ -0,0 +1,143 @@
package parser
import (
"go.arsenm.dev/amu/ast"
"go.arsenm.dev/amu/scanner"
)
// parseList attempts to parse a list
func (p *Parser) parseList(tok scanner.Token, lit string) *ast.List {
// Create new list
list := &ast.List{}
// If token is not WORD or literal is not "=list"
if tok != scanner.WORD || lit != "=list" {
// Return nil as this list is invalid
return nil
}
tok, lit = p.scan()
// If token is not PUNCT or literal is not "["
if tok != scanner.PUNCT || lit != "[" {
// Return nil as this list is invalid
return nil
}
tok, lit = p.scan()
// If token is not WORD (the name of the function)
if tok != scanner.WORD {
// Return nil as this list is invalid
return nil
}
list.Type = lit
tok, lit = p.scan()
// If token is not PUNCT or literal is not "]"
if tok != scanner.PUNCT || lit != "]" {
// Return nil as this list is invalid
return nil
}
parseLoop:
for {
// If end of file
if tok == scanner.EOF {
// Return whatever was parsed so far
return list
}
// Create new list item at level 0
item := ast.ListItem{Level: 0}
for {
// Scan token
tok, lit = p.scan()
// If end of file
if tok == scanner.EOF {
// Return whatever was parsed so far
return list
}
// If token is not punctuation or literal is not "."
if tok != scanner.PUNCT || lit != "." {
// Break out of loop as this is the end of level
break
}
// Increment item level
item.Level++
}
for tok != scanner.PUNCT && lit != "." {
// If token is WORD and literal is "=end"
if tok == scanner.WORD && lit == "=end" {
// Scan token
tok, lit = p.scan()
// If token is not PUNCT and literal is not "["
if tok != scanner.PUNCT && lit != "[" {
// Return nil as this is not a valid list
return nil
}
// Scan token
tok, lit = p.scan()
// If token is not PUNCT and literal is not "]"
if tok != scanner.PUNCT && lit != "]" {
// Return nil as this is not a valid list
return nil
}
// Add item to list
list.Items = append(list.Items, item)
// Stop parsing
break parseLoop
}
// If end of file
if tok == scanner.EOF {
// Return whatever was parsed so far
return list
}
// Unscan character as it will be required for para
p.unscan()
// Attempt to parse para until one newline encountered
para := p.parsePara(1)
if para == nil {
break
}
// Add para to item content
item.Content = append(item.Content, para)
// Scan token for next loop
tok, lit = p.scan()
// if end of line
if tok == scanner.EOL {
// Scan again
tok, lit = p.scan()
}
}
// If token is part of level
if tok == scanner.PUNCT && lit == "." {
// Unscan for next level loop
p.unscan()
}
// If no content in item
if len(item.Content) == 0 {
// Continue to next item
continue
}
// Add item to list items
list.Items = append(list.Items, item)
}
// Return lists
return list
}

223
parser/para.go Normal file
View File

@@ -0,0 +1,223 @@
package parser
import (
"bytes"
"strings"
"go.arsenm.dev/amu/ast"
"go.arsenm.dev/amu/scanner"
)
// parsePara attempts to parse a paragraph until untilEOLAmt
// newlines are encountered
func (p *Parser) parsePara(untilEOLAmt int) *ast.Para {
// Create new empty para
para := &ast.Para{}
parseLoop:
for {
// Scan token
tok, lit := p.scan()
switch tok {
case scanner.WS:
// Add whitespace to para
para.Fragments = append(para.Fragments, ast.ParaFragment{Whitespace: &lit})
case scanner.PUNCT:
if lit == "[" {
// Attempt to parse link
link, _ := p.parseLink()
// If successful
if link != nil {
// Add link to para
para.Fragments = append(para.Fragments, ast.ParaFragment{Link: link})
// Continue to next token
continue
}
}
// Add punctuation to para
para.Fragments = append(para.Fragments, ast.ParaFragment{Punct: &lit})
case scanner.WORD:
if strings.HasPrefix(lit, "+") {
// Attempt to parse function
function := p.parseFunc(tok, lit)
// If successful
if function != nil {
// Add function to para
para.Fragments = append(para.Fragments, ast.ParaFragment{Func: function})
// Continue to next token
continue
}
}
// Add word to para
para.Fragments = append(para.Fragments, ast.ParaFragment{Word: &lit})
case scanner.FORMAT:
// Create new nil slice of ast.FormatType
var types []ast.FormatType
if strings.HasPrefix(lit, "_") {
// Remove leading and trailing "_"
lit = strings.Trim(lit, "_")
// Add italic format to slice
types = append(types, ast.FormatTypeItalic)
}
if strings.HasPrefix(lit, "*") {
// Remove leading and trailing "*"
lit = strings.Trim(lit, "*")
// Add bold format to slice
types = append(types, ast.FormatTypeBold)
}
if strings.HasPrefix(lit, "$") {
// Remove leading and trailing "$"
lit = strings.Trim(lit, "$")
// Add math format to slice
types = append(types, ast.FormatTypeMath)
}
if strings.HasPrefix(lit, "`") {
// Remove leading and trailing "`"
lit = strings.Trim(lit, "`")
// Add code format to slice
types = []ast.FormatType{ast.FormatTypeCode}
}
if strings.HasPrefix(lit, "~") {
// Remove leading and trailing "~"
lit = strings.Trim(lit, "~")
// Add strike format to slice
types = []ast.FormatType{ast.FormatTypeStrike}
}
// Add format to para
para.Fragments = append(para.Fragments, ast.ParaFragment{Format: &ast.Format{
Types: types,
Text: lit,
}})
case scanner.EOL:
// If untilEOLAmt or more newlines encountered
if strings.Count(lit, "\n") >= untilEOLAmt {
// Stop parsing
break parseLoop
}
// Add EOL to para
para.Fragments = append(para.Fragments, ast.ParaFragment{Whitespace: &lit})
case scanner.EOF:
// Stop parsing
break parseLoop
}
}
// If nothing in para
if len(para.Fragments) == 0 {
// Return nothing
return nil
}
return para
}
// parseFunc appempts to parse a function call
func (p *Parser) parseFunc(tok scanner.Token, lit string) *ast.Func {
// Create new function
function := &ast.Func{}
// If the token is not a word or does not have a prefix of "+"
if tok != scanner.WORD || !strings.HasPrefix(lit, "+") {
// Return nil as this is an invalid function call
return nil
}
// Set function name to literal, trimming "+" prefix
function.Name = strings.TrimPrefix(lit, "+")
// Scan token
tok, lit = p.scan()
// If token is not punctuatuion or is not "["
if tok != scanner.PUNCT || lit != "[" {
// Unscan token
p.unscan()
// Return nil as this is an invalid function call
return nil
}
// Parse arguments
function.Args = p.parseArgs()
return function
}
// Attempt to parse link
func (p *Parser) parseLink() (*ast.Link, bool) {
// Create new link
link := &ast.Link{}
// Initialize buffers for link properties
textBuf := &bytes.Buffer{}
linkBuf := &bytes.Buffer{}
// Set current buffer to text buffer
currentBuf := textBuf
// Declare variable for last literal
var lastLit string
// Define variable for amount of scans performed
amtScans := 0
parseLoop:
for {
// Scan token
tok, lit := p.scan()
// Increment amtScans
amtScans++
switch tok {
case scanner.WORD:
// Write word to current buffer
currentBuf.WriteString(lit)
case scanner.WS:
// Write word to current buffer
currentBuf.WriteString(lit)
case scanner.PUNCT:
// If closing bracket found but no text stored
if lit == "]" && currentBuf.Len() == 0 {
// Unscan token
p.unscan()
// Return nil as this is an invalid link
return nil, false
}
// If last literal is "]" and current is "("
if lastLit == "]" && lit == "(" {
// Switch current buffer to link buffer
currentBuf = linkBuf
// Continue to next token
continue
}
// If literal is ")"
if lit == ")" {
// Stop parsing
break parseLoop
}
// If literal is not "]"
if lit != "]" {
// Write literal to current buffer
currentBuf.WriteString(lit)
}
case scanner.EOL, scanner.EOF:
// Unscan all performed scans
p.unscanMulti(amtScans)
// Return nil as this is an invalid link
return nil, false
}
// Set last literal
lastLit = lit
}
// If no text
if textBuf.Len() == 0 {
// Use link as text
textBuf.WriteString(linkBuf.String())
}
// Set properties
link.Text = textBuf.String()
link.Link = linkBuf.String()
return link, false
}

197
parser/parser.go Normal file
View File

@@ -0,0 +1,197 @@
/*
AMU: Custom simple markup language
Copyright (C) 2021 Arsen Musayelyan
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
// Package parser provides a parser for AMU source code
package parser
import (
"io"
"strings"
"go.arsenm.dev/amu/ast"
"go.arsenm.dev/amu/internal/stack"
"go.arsenm.dev/amu/scanner"
)
// Parser parses tokens from scanner.Scanner into an AST
type Parser struct {
s *scanner.Scanner
buf *buffer
}
// New creates a new parser using a new scanner with the
// provided reader
func New(r io.Reader) *Parser {
return &Parser{s: scanner.New(r), buf: newBuffer()}
}
// NewWithScanner creates a new parser using the provided scanner
func NewWithScanner(s *scanner.Scanner) *Parser {
return &Parser{s: s, buf: newBuffer()}
}
// scan scans a single token from the underlying scanner.Scanner
func (p *Parser) scan() (scanner.Token, string) {
// If at least one unscan
if p.buf.unscans > 0 {
// Get token and literal from buffer
tok, lit := p.buf.pop()
// Decrement unscans
p.buf.unscans--
// Return buffered token and literal
return tok, lit
}
// Scan token and literal from scanner
tok, lit := p.s.Scan()
// Add token and literal to buffer
p.buf.push(tok, lit)
// Return token and literal
return tok, lit
}
// unscan unscans a single token
func (p *Parser) unscan() {
// Increment unscans
p.buf.unscans++
}
// unscanMulti unscans multiple tokens
func (p *Parser) unscanMulti(amt int) {
// Increment unscans by amt
p.buf.unscans += amt
}
// buffer stores tokens and literals for unreads
type buffer struct {
tok *stack.Stack
lit *stack.Stack
unscans int
}
// newBuffer creates a new buffer, initializing stacks
func newBuffer() *buffer {
return &buffer{
tok: stack.New(),
lit: stack.New(),
}
}
// push adds a token and literal to the buffer
func (b *buffer) push(tok scanner.Token, lit string) {
// Add token to buffer
b.tok.Push(tok)
// Add literal to buffer
b.lit.Push(lit)
}
// pop removes a token from the buffer and returns it
func (b *buffer) pop() (scanner.Token, string) {
if b.tok.Size() > 0 {
tok := b.tok.Pop()
lit := b.lit.Pop()
return tok.(scanner.Token), lit.(string)
}
return scanner.EOF, ""
}
func (b *buffer) peek() (scanner.Token, string) {
if b.tok.Size() > 0 {
tok := b.tok.Peek()
lit := b.lit.Peek()
return tok.(scanner.Token), lit.(string)
}
return scanner.EOF, ""
}
// Parse parses the input into an AST
func (p *Parser) Parse() (*ast.AST, error) {
// Create new AST
AST := &ast.AST{}
parseLoop:
for {
// Scan token
tok, lit := p.scan()
switch tok {
case scanner.HEADING:
p.unscan()
// Attempt to parse heading
heading := p.parseHeading()
// If successful
if heading != nil {
// Add heading to the AST
AST.Entries = append(AST.Entries, ast.Entry{Heading: heading})
}
case scanner.WS, scanner.WORD, scanner.FORMAT, scanner.PUNCT:
if tok == scanner.PUNCT && lit == "!" {
// Attempt to parse image
img := p.parseImage()
// If successful
if img != nil {
// Add image to AST
AST.Entries = append(AST.Entries, ast.Entry{Image: img})
// Continue to next token
continue
}
} else if tok == scanner.WORD && lit == "=list" {
// Attempt to parse list
list := p.parseList(tok, lit)
// If successful
if list != nil {
// Add list to AST
AST.Entries = append(AST.Entries, ast.Entry{List: list})
// Continue to next token
continue
}
} else if tok == scanner.WORD && lit == "=code" {
// Attempt to parse code
code := p.parseCode(tok, lit)
// If successful
if code != nil {
// Add code to AST
AST.Entries = append(AST.Entries, ast.Entry{Code: code})
// Continue to next token
continue
}
}
// Unscan token as it will be needed for parsing para
p.unscan()
// Attempt to parse paragraph until 2 newlines encountered
para := p.parsePara(2)
if para != nil {
AST.Entries = append(AST.Entries, ast.Entry{Para: para})
}
case scanner.EOL:
// If 2 or more newlines encountered
if strings.Count(lit, "\n") >= 2 {
// Add break to AST
AST.Entries = append(AST.Entries, ast.Entry{Break: &ast.Break{}})
}
case scanner.EOF:
// Stop parsing
break parseLoop
}
}
// Return filled AST
return AST, nil
}