Initial Commit

2021-10-02 15:12:57 -07:00
commit 1ff241a74e
22 changed files with 3000 additions and 0 deletions
--- a/parser/code.go
+++ b/parser/code.go
@@ -0,0 +1,85 @@
+package parser
+
+import (
+	"bytes"
+
+	"go.arsenm.dev/amu/ast"
+	"go.arsenm.dev/amu/scanner"
+)
+
+func (p *Parser) parseCode(tok scanner.Token, lit string) *ast.Code {
+	// Create new code
+	code := &ast.Code{}
+
+	// If token is not WORD or literal is not "=list"
+	if tok != scanner.WORD || lit != "=code" {
+		// Return nil as this code is invalid
+		return nil
+	}
+
+	// Scan token
+	tok, lit = p.scan()
+
+	// If token is not PUNCT or literal is not "["
+	if tok != scanner.PUNCT || lit != "[" {
+		// Unscan token
+		p.unscan()
+		// Return nil as this code is invalid
+		return nil
+	}
+
+	// Parse argument list
+	args := p.parseArgs()
+
+	// If 1 or more arguments provided
+	if len(args) >= 1 {
+		// Set language to first argument
+		code.Language = args[0]
+	}
+	// If 2 or more arguments provided
+	if len(args) >= 2 {
+		// Set style to second argument
+		code.Style = args[1]
+	}
+
+	// Create buffer for text
+	textBuf := &bytes.Buffer{}
+
+	for {
+		// Scan token
+		tok, lit = p.scan()
+
+		// If end of file
+		if tok == scanner.EOF {
+			// Return whatever waS parsed so far
+			return code
+		}
+
+		// If token is WORD and lit is "=end"
+		if tok == scanner.WORD && lit == "=end" {
+			// Scan token
+			tok, lit = p.scan()
+			// If token is not PUNCT and literal is not "["
+			if tok != scanner.PUNCT && lit != "[" {
+				// Return nil as this is not a valid code
+				return nil
+			}
+			// Scan token
+			tok, lit = p.scan()
+			// If token is not PUNCT and literal is not "]"
+			if tok != scanner.PUNCT && lit != "]" {
+				// Return nil as this is not a valid code
+				return nil
+			}
+			break
+		}
+
+		// Write literal to text buffer
+		textBuf.WriteString(lit)
+	}
+
+	// Set code text to contents of text buffer
+	code.Text = textBuf.String()
+
+	return code
+}
--- a/parser/common.go
+++ b/parser/common.go
@@ -0,0 +1,55 @@
+package parser
+
+import (
+	"bytes"
+
+	"go.arsenm.dev/amu/scanner"
+)
+
+// Attempt to parse an argument list (comma-separated)
+func (p *Parser) parseArgs() []string {
+	// Create buffer for arguments
+	argBuf := &bytes.Buffer{}
+	// Create new line slice for arguments
+	var args []string
+
+parseLoop:
+	for {
+		// Scan token
+		tok, lit := p.scan()
+
+		// If end of file
+		if tok == scanner.EOF {
+			// Return nil as this is an invalid argument list
+			return nil
+		}
+
+		switch tok {
+		case scanner.WORD:
+			// Write word to argument buffer
+			argBuf.WriteString(lit)
+		case scanner.WS:
+			// Write whitespace to argument buffer
+			argBuf.WriteString(lit)
+		case scanner.PUNCT:
+			// If literal is "]"
+			if lit == "]" {
+				// If length of argument is greater than 0
+				if argBuf.Len() > 0 {
+					// Add current argument to slice
+					args = append(args, argBuf.String())
+				}
+				// Stop parsing
+				break parseLoop
+			} else if lit == "," {
+				// Add argument to slice
+				args = append(args, argBuf.String())
+				// Reset buffer
+				argBuf.Reset()
+			}
+		}
+	}
+
+	// Return parsed arguments
+	return args
+}
--- a/parser/heading.go
+++ b/parser/heading.go
@@ -0,0 +1,31 @@
+package parser
+
+import (
+	"go.arsenm.dev/amu/ast"
+	"go.arsenm.dev/amu/scanner"
+)
+
+// Attempt to parse heading
+func (p *Parser) parseHeading() *ast.Heading {
+	// Scan token
+	tok, lit := p.scan()
+
+	// Set level to length of HEADING token
+	level := len(lit)
+
+	// If token is not HEADING or level is greater than 6
+	if tok != scanner.HEADING || level > 6 {
+		// Return nil as this is not a valid heading
+		return nil
+	}
+
+	// Parse para until one newline enocountered
+	para := p.parsePara(1)
+	// If successful
+	if para != nil {
+		return &ast.Heading{Level: level, Content: para}
+	}
+
+	// Return nil as this is not a valid heading
+	return nil
+}
--- a/parser/image.go
+++ b/parser/image.go
@@ -0,0 +1,86 @@
+package parser
+
+import (
+	"bytes"
+	"strings"
+
+	"go.arsenm.dev/amu/ast"
+	"go.arsenm.dev/amu/scanner"
+)
+
+// parseImage attempts to parse an image
+func (p *Parser) parseImage() *ast.Image {
+	// Create new image
+	img := &ast.Image{}
+
+	// Create buffers for image properties
+	altBuf := &bytes.Buffer{}
+	srcBuf := &bytes.Buffer{}
+	linkBuf := &bytes.Buffer{}
+	// Set current buffer to alt text buffer
+	currentBuf := altBuf
+
+	// Scan token
+	tok, lit := p.scan()
+	// If token is not PUNCT or literal is not "["
+	if tok != scanner.PUNCT || lit != "[" {
+		// Return nil as this is not a valid image
+		return nil
+	}
+
+	// Declare variable for last literal
+	var lastLit string
+
+parseLoop:
+	for {
+		// Scan token
+		tok, lit := p.scan()
+
+		switch tok {
+		case scanner.WORD:
+			// Write word to current buffer
+			currentBuf.WriteString(lit)
+		case scanner.WS:
+			// Write whitespace to current buffer
+			currentBuf.WriteString(lit)
+		case scanner.PUNCT:
+			// If last literal is "]" and current is "("
+			if lastLit == "]" && lit == "(" {
+				// Set current buffer to source buffer
+				currentBuf = srcBuf
+				// Continue to next token
+				continue
+			}
+			// If last literal is ")" and current is "{"
+			if lastLit == ")" && lit == "{" {
+				// Set current buffer to link buffer
+				currentBuf = linkBuf
+				// Continue to next token
+				continue
+			}
+			// If current literal is "}" and current buffer is link buffer
+			if lit == "}" && currentBuf == linkBuf {
+				// Stop parsing
+				break parseLoop
+			}
+
+			// If literal does not contain any of the restrict characters
+			if !strings.ContainsAny(lit, "()[]{}") {
+				// Write literal to current buffer
+				currentBuf.WriteString(lit)
+			}
+		case scanner.EOL, scanner.EOF:
+			// Return nil as this is not a valid link
+			return nil
+		}
+		// Set last literal
+		lastLit = lit
+	}
+
+	// Set image properties
+	img.Alternate = altBuf.String()
+	img.Source = srcBuf.String()
+	img.Link = linkBuf.String()
+
+	return img
+}
--- a/parser/list.go
+++ b/parser/list.go
@@ -0,0 +1,143 @@
+package parser
+
+import (
+	"go.arsenm.dev/amu/ast"
+	"go.arsenm.dev/amu/scanner"
+)
+
+// parseList attempts to parse a list
+func (p *Parser) parseList(tok scanner.Token, lit string) *ast.List {
+	// Create new list
+	list := &ast.List{}
+
+	// If token is not WORD or literal is not "=list"
+	if tok != scanner.WORD || lit != "=list" {
+		// Return nil as this list is invalid
+		return nil
+	}
+
+	tok, lit = p.scan()
+
+	// If token is not PUNCT or literal is not "["
+	if tok != scanner.PUNCT || lit != "[" {
+		// Return nil as this list is invalid
+		return nil
+	}
+
+	tok, lit = p.scan()
+
+	// If token is not WORD (the name of the function)
+	if tok != scanner.WORD {
+		// Return nil as this list is invalid
+		return nil
+	}
+
+	list.Type = lit
+
+	tok, lit = p.scan()
+
+	// If token is not PUNCT or literal is not "]"
+	if tok != scanner.PUNCT || lit != "]" {
+		// Return nil as this list is invalid
+		return nil
+	}
+
+parseLoop:
+	for {
+		// If end of file
+		if tok == scanner.EOF {
+			// Return whatever was parsed so far
+			return list
+		}
+
+		// Create new list item at level 0
+		item := ast.ListItem{Level: 0}
+
+		for {
+			// Scan token
+			tok, lit = p.scan()
+
+			// If end of file
+			if tok == scanner.EOF {
+				// Return whatever was parsed so far
+				return list
+			}
+
+			// If token is not punctuation or literal is not "."
+			if tok != scanner.PUNCT || lit != "." {
+				// Break out of loop as this is the end of level
+				break
+			}
+
+			// Increment item level
+			item.Level++
+		}
+
+		for tok != scanner.PUNCT && lit != "." {
+			// If token is WORD and literal is "=end"
+			if tok == scanner.WORD && lit == "=end" {
+				// Scan token
+				tok, lit = p.scan()
+				// If token is not PUNCT and literal is not "["
+				if tok != scanner.PUNCT && lit != "[" {
+					// Return nil as this is not a valid list
+					return nil
+				}
+				// Scan token
+				tok, lit = p.scan()
+				// If token is not PUNCT and literal is not "]"
+				if tok != scanner.PUNCT && lit != "]" {
+					// Return nil as this is not a valid list
+					return nil
+				}
+				// Add item to list
+				list.Items = append(list.Items, item)
+				// Stop parsing
+				break parseLoop
+			}
+
+			// If end of file
+			if tok == scanner.EOF {
+				// Return whatever was parsed so far
+				return list
+			}
+
+			// Unscan character as it will be required for para
+			p.unscan()
+			// Attempt to parse para until one newline encountered
+			para := p.parsePara(1)
+			if para == nil {
+				break
+			}
+			// Add para to item content
+			item.Content = append(item.Content, para)
+
+			// Scan token for next loop
+			tok, lit = p.scan()
+
+			// if end of line
+			if tok == scanner.EOL {
+				// Scan again
+				tok, lit = p.scan()
+			}
+		}
+
+		// If token is part of level
+		if tok == scanner.PUNCT && lit == "." {
+			// Unscan for next level loop
+			p.unscan()
+		}
+
+		// If no content in item
+		if len(item.Content) == 0 {
+			// Continue to next item
+			continue
+		}
+
+		// Add item to list items
+		list.Items = append(list.Items, item)
+	}
+
+	// Return lists
+	return list
+}
--- a/parser/para.go
+++ b/parser/para.go
@@ -0,0 +1,223 @@
+package parser
+
+import (
+	"bytes"
+	"strings"
+
+	"go.arsenm.dev/amu/ast"
+	"go.arsenm.dev/amu/scanner"
+)
+
+// parsePara attempts to parse a paragraph until untilEOLAmt
+// newlines are encountered
+func (p *Parser) parsePara(untilEOLAmt int) *ast.Para {
+	// Create new empty para
+	para := &ast.Para{}
+
+parseLoop:
+	for {
+		// Scan token
+		tok, lit := p.scan()
+
+		switch tok {
+		case scanner.WS:
+			// Add whitespace to para
+			para.Fragments = append(para.Fragments, ast.ParaFragment{Whitespace: &lit})
+		case scanner.PUNCT:
+			if lit == "[" {
+				// Attempt to parse link
+				link, _ := p.parseLink()
+				// If successful
+				if link != nil {
+					// Add link to para
+					para.Fragments = append(para.Fragments, ast.ParaFragment{Link: link})
+					// Continue to next token
+					continue
+				}
+			}
+			// Add punctuation to para
+			para.Fragments = append(para.Fragments, ast.ParaFragment{Punct: &lit})
+		case scanner.WORD:
+			if strings.HasPrefix(lit, "+") {
+				// Attempt to parse function
+				function := p.parseFunc(tok, lit)
+				// If successful
+				if function != nil {
+					// Add function to para
+					para.Fragments = append(para.Fragments, ast.ParaFragment{Func: function})
+					// Continue to next token
+					continue
+				}
+			}
+			// Add word to para
+			para.Fragments = append(para.Fragments, ast.ParaFragment{Word: &lit})
+		case scanner.FORMAT:
+			// Create new nil slice of ast.FormatType
+			var types []ast.FormatType
+			if strings.HasPrefix(lit, "_") {
+				// Remove leading and trailing "_" 
+				lit = strings.Trim(lit, "_")
+				// Add italic format to slice
+				types = append(types, ast.FormatTypeItalic)
+			}
+			if strings.HasPrefix(lit, "*") {
+				// Remove leading and trailing "*"
+				lit = strings.Trim(lit, "*")
+				// Add bold format to slice
+				types = append(types, ast.FormatTypeBold)
+			}
+			if strings.HasPrefix(lit, "$") {
+				// Remove leading and trailing "$"
+				lit = strings.Trim(lit, "$")
+				// Add math format to slice
+				types = append(types, ast.FormatTypeMath)
+			}
+			if strings.HasPrefix(lit, "`") {
+				// Remove leading and trailing "`"
+				lit = strings.Trim(lit, "`")
+				// Add code format to slice
+				types = []ast.FormatType{ast.FormatTypeCode}
+			}
+			if strings.HasPrefix(lit, "~") {
+				// Remove leading and trailing "~"
+				lit = strings.Trim(lit, "~")
+				// Add strike format to slice
+				types = []ast.FormatType{ast.FormatTypeStrike}
+			}
+			// Add format to para
+			para.Fragments = append(para.Fragments, ast.ParaFragment{Format: &ast.Format{
+				Types: types,
+				Text:  lit,
+			}})
+		case scanner.EOL:
+			// If untilEOLAmt or more newlines encountered
+			if strings.Count(lit, "\n") >= untilEOLAmt {
+				// Stop parsing
+				break parseLoop
+			}
+			// Add EOL to para
+			para.Fragments = append(para.Fragments, ast.ParaFragment{Whitespace: &lit})
+		case scanner.EOF:
+			// Stop parsing
+			break parseLoop
+		}
+	}
+
+	// If nothing in para
+	if len(para.Fragments) == 0 {
+		// Return nothing
+		return nil
+	}
+
+	return para
+}
+
+// parseFunc appempts to parse a function call
+func (p *Parser) parseFunc(tok scanner.Token, lit string) *ast.Func {
+	// Create new function
+	function := &ast.Func{}
+
+	// If the token is not a word or does not have a prefix of "+"
+	if tok != scanner.WORD || !strings.HasPrefix(lit, "+") {
+		// Return nil as this is an invalid function call
+		return nil
+	}
+
+	// Set function name to literal, trimming "+" prefix
+	function.Name = strings.TrimPrefix(lit, "+")
+
+	// Scan token
+	tok, lit = p.scan()
+
+	// If token is not punctuatuion or is not "["
+	if tok != scanner.PUNCT || lit != "[" {
+		// Unscan token
+		p.unscan()
+		// Return nil as this is an invalid function call
+		return nil
+	}
+
+	// Parse arguments
+	function.Args = p.parseArgs()
+
+	return function
+}
+
+// Attempt to parse link
+func (p *Parser) parseLink() (*ast.Link, bool) {
+	// Create new link
+	link := &ast.Link{}
+
+	// Initialize buffers for link properties
+	textBuf := &bytes.Buffer{}
+	linkBuf := &bytes.Buffer{}
+	// Set current buffer to text buffer
+	currentBuf := textBuf
+
+	// Declare variable for last literal
+	var lastLit string
+
+	// Define variable for amount of scans performed
+	amtScans := 0
+parseLoop:
+	for {
+		// Scan token
+		tok, lit := p.scan()
+		// Increment amtScans
+		amtScans++
+
+		switch tok {
+		case scanner.WORD:
+			// Write word to current buffer
+			currentBuf.WriteString(lit)
+		case scanner.WS:
+			// Write word to current buffer
+			currentBuf.WriteString(lit)
+		case scanner.PUNCT:
+			// If closing bracket found but no text stored	
+			if lit == "]" && currentBuf.Len() == 0 {
+				// Unscan token
+				p.unscan()
+				// Return nil as this is an invalid link
+				return nil, false
+			}
+			// If last literal is "]" and current is "("
+			if lastLit == "]" && lit == "(" {
+				// Switch current buffer to link buffer
+				currentBuf = linkBuf
+				// Continue to next token
+				continue
+			}
+			// If literal is ")"
+			if lit == ")" {
+				// Stop parsing
+				break parseLoop
+			}
+			// If literal is not "]"
+			if lit != "]" {
+				// Write literal to current buffer
+				currentBuf.WriteString(lit)
+			}
+		case scanner.EOL, scanner.EOF:
+			// Unscan all performed scans
+			p.unscanMulti(amtScans)
+			// Return nil as this is an invalid link
+			return nil, false
+		}
+
+		// Set last literal
+		lastLit = lit
+	}
+
+	// If no text
+	if textBuf.Len() == 0 {
+		// Use link as text
+		textBuf.WriteString(linkBuf.String())
+	}
+
+	// Set properties
+	link.Text = textBuf.String()
+	link.Link = linkBuf.String()
+
+	return link, false
+}
--- a/parser/parser.go
+++ b/parser/parser.go
@@ -0,0 +1,197 @@
+/*
+   AMU: Custom simple markup language
+   Copyright (C) 2021 Arsen Musayelyan
+
+   This program is free software: you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation, either version 3 of the License, or
+   (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program.  If not, see <https://www.gnu.org/licenses/>.
+*/
+
+// Package parser provides a parser for AMU source code
+package parser
+
+import (
+	"io"
+	"strings"
+
+	"go.arsenm.dev/amu/ast"
+	"go.arsenm.dev/amu/internal/stack"
+	"go.arsenm.dev/amu/scanner"
+)
+
+// Parser parses tokens from scanner.Scanner into an AST
+type Parser struct {
+	s   *scanner.Scanner
+	buf *buffer
+}
+
+// New creates a new parser using a new scanner with the
+// provided reader
+func New(r io.Reader) *Parser {
+	return &Parser{s: scanner.New(r), buf: newBuffer()}
+}
+
+// NewWithScanner creates a new parser using the provided scanner
+func NewWithScanner(s *scanner.Scanner) *Parser {
+	return &Parser{s: s, buf: newBuffer()}
+}
+
+// scan scans a single token from the underlying scanner.Scanner
+func (p *Parser) scan() (scanner.Token, string) {
+	// If at least one unscan
+	if p.buf.unscans > 0 {
+		// Get token and literal from buffer
+		tok, lit := p.buf.pop()
+		// Decrement unscans
+		p.buf.unscans--
+		// Return buffered token and literal
+		return tok, lit
+	}
+
+	// Scan token and literal from scanner
+	tok, lit := p.s.Scan()
+
+	// Add token and literal to buffer
+	p.buf.push(tok, lit)
+
+	// Return token and literal
+	return tok, lit
+}
+
+// unscan unscans a single token
+func (p *Parser) unscan() {
+	// Increment unscans
+	p.buf.unscans++
+}
+
+// unscanMulti unscans multiple tokens
+func (p *Parser) unscanMulti(amt int) {
+	// Increment unscans by amt
+	p.buf.unscans += amt
+}
+
+// buffer stores tokens and literals for unreads
+type buffer struct {
+	tok     *stack.Stack
+	lit     *stack.Stack
+	unscans int
+}
+
+// newBuffer creates a new buffer, initializing stacks
+func newBuffer() *buffer {
+	return &buffer{
+		tok: stack.New(),
+		lit: stack.New(),
+	}
+}
+
+// push adds a token and literal to the buffer
+func (b *buffer) push(tok scanner.Token, lit string) {
+	// Add token to buffer
+	b.tok.Push(tok)
+	// Add literal to buffer
+	b.lit.Push(lit)
+}
+
+// pop removes a token from the buffer and returns it
+func (b *buffer) pop() (scanner.Token, string) {
+	if b.tok.Size() > 0 {
+		tok := b.tok.Pop()
+		lit := b.lit.Pop()
+		return tok.(scanner.Token), lit.(string)
+	}
+	return scanner.EOF, ""
+}
+
+func (b *buffer) peek() (scanner.Token, string) {
+	if b.tok.Size() > 0 {
+		tok := b.tok.Peek()
+		lit := b.lit.Peek()
+		return tok.(scanner.Token), lit.(string)
+	}
+	return scanner.EOF, ""
+}
+
+// Parse parses the input into an AST
+func (p *Parser) Parse() (*ast.AST, error) {
+	// Create new AST
+	AST := &ast.AST{}
+
+parseLoop:
+	for {
+		// Scan token
+		tok, lit := p.scan()
+
+		switch tok {
+		case scanner.HEADING:
+			p.unscan()
+			// Attempt to parse heading
+			heading := p.parseHeading()
+			// If successful
+			if heading != nil {
+				// Add heading to the AST
+				AST.Entries = append(AST.Entries, ast.Entry{Heading: heading})
+			}
+		case scanner.WS, scanner.WORD, scanner.FORMAT, scanner.PUNCT:
+			if tok == scanner.PUNCT && lit == "!" {
+				// Attempt to parse image
+				img := p.parseImage()
+				// If successful
+				if img != nil {
+					// Add image to AST
+					AST.Entries = append(AST.Entries, ast.Entry{Image: img})
+					// Continue to next token
+					continue
+				}
+			} else if tok == scanner.WORD && lit == "=list" {
+				// Attempt to parse list
+				list := p.parseList(tok, lit)
+				// If successful
+				if list != nil {
+					// Add list to AST
+					AST.Entries = append(AST.Entries, ast.Entry{List: list})
+					// Continue to next token
+					continue
+				}
+			} else if tok == scanner.WORD && lit == "=code" {
+				// Attempt to parse code
+				code := p.parseCode(tok, lit)
+				// If successful
+				if code != nil {
+					// Add code to AST
+					AST.Entries = append(AST.Entries, ast.Entry{Code: code})
+					// Continue to next token
+					continue
+				}
+			}
+			// Unscan token as it will be needed for parsing para
+			p.unscan()
+			// Attempt to parse paragraph until 2 newlines encountered
+			para := p.parsePara(2)
+			if para != nil {
+				AST.Entries = append(AST.Entries, ast.Entry{Para: para})
+			}
+		case scanner.EOL:
+			// If 2 or more newlines encountered
+			if strings.Count(lit, "\n") >= 2 {
+				// Add break to AST
+				AST.Entries = append(AST.Entries, ast.Entry{Break: &ast.Break{}})
+			}
+		case scanner.EOF:
+			// Stop parsing
+			break parseLoop
+		}
+	}
+
+	// Return filled AST
+	return AST, nil
+}