commit 93e9ee10f9aafcaca81f0705d58e1a9be8ed7c5b Author: Elara6331 Date: Sat Nov 9 23:12:29 2024 -0800 Initial Commit diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..a76d4e5 --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +__pycache__/ +/dist/ \ No newline at end of file diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..d0a1fa1 --- /dev/null +++ b/LICENSE @@ -0,0 +1,373 @@ +Mozilla Public License Version 2.0 +================================== + +1. Definitions +-------------- + +1.1. "Contributor" + means each individual or legal entity that creates, contributes to + the creation of, or owns Covered Software. + +1.2. "Contributor Version" + means the combination of the Contributions of others (if any) used + by a Contributor and that particular Contributor's Contribution. + +1.3. "Contribution" + means Covered Software of a particular Contributor. + +1.4. "Covered Software" + means Source Code Form to which the initial Contributor has attached + the notice in Exhibit A, the Executable Form of such Source Code + Form, and Modifications of such Source Code Form, in each case + including portions thereof. + +1.5. "Incompatible With Secondary Licenses" + means + + (a) that the initial Contributor has attached the notice described + in Exhibit B to the Covered Software; or + + (b) that the Covered Software was made available under the terms of + version 1.1 or earlier of the License, but not also under the + terms of a Secondary License. + +1.6. "Executable Form" + means any form of the work other than Source Code Form. + +1.7. "Larger Work" + means a work that combines Covered Software with other material, in + a separate file or files, that is not Covered Software. + +1.8. "License" + means this document. + +1.9. "Licensable" + means having the right to grant, to the maximum extent possible, + whether at the time of the initial grant or subsequently, any and + all of the rights conveyed by this License. + +1.10. "Modifications" + means any of the following: + + (a) any file in Source Code Form that results from an addition to, + deletion from, or modification of the contents of Covered + Software; or + + (b) any new file in Source Code Form that contains any Covered + Software. + +1.11. "Patent Claims" of a Contributor + means any patent claim(s), including without limitation, method, + process, and apparatus claims, in any patent Licensable by such + Contributor that would be infringed, but for the grant of the + License, by the making, using, selling, offering for sale, having + made, import, or transfer of either its Contributions or its + Contributor Version. + +1.12. "Secondary License" + means either the GNU General Public License, Version 2.0, the GNU + Lesser General Public License, Version 2.1, the GNU Affero General + Public License, Version 3.0, or any later versions of those + licenses. + +1.13. "Source Code Form" + means the form of the work preferred for making modifications. + +1.14. "You" (or "Your") + means an individual or a legal entity exercising rights under this + License. For legal entities, "You" includes any entity that + controls, is controlled by, or is under common control with You. For + purposes of this definition, "control" means (a) the power, direct + or indirect, to cause the direction or management of such entity, + whether by contract or otherwise, or (b) ownership of more than + fifty percent (50%) of the outstanding shares or beneficial + ownership of such entity. + +2. License Grants and Conditions +-------------------------------- + +2.1. Grants + +Each Contributor hereby grants You a world-wide, royalty-free, +non-exclusive license: + +(a) under intellectual property rights (other than patent or trademark) + Licensable by such Contributor to use, reproduce, make available, + modify, display, perform, distribute, and otherwise exploit its + Contributions, either on an unmodified basis, with Modifications, or + as part of a Larger Work; and + +(b) under Patent Claims of such Contributor to make, use, sell, offer + for sale, have made, import, and otherwise transfer either its + Contributions or its Contributor Version. + +2.2. Effective Date + +The licenses granted in Section 2.1 with respect to any Contribution +become effective for each Contribution on the date the Contributor first +distributes such Contribution. + +2.3. Limitations on Grant Scope + +The licenses granted in this Section 2 are the only rights granted under +this License. No additional rights or licenses will be implied from the +distribution or licensing of Covered Software under this License. +Notwithstanding Section 2.1(b) above, no patent license is granted by a +Contributor: + +(a) for any code that a Contributor has removed from Covered Software; + or + +(b) for infringements caused by: (i) Your and any other third party's + modifications of Covered Software, or (ii) the combination of its + Contributions with other software (except as part of its Contributor + Version); or + +(c) under Patent Claims infringed by Covered Software in the absence of + its Contributions. + +This License does not grant any rights in the trademarks, service marks, +or logos of any Contributor (except as may be necessary to comply with +the notice requirements in Section 3.4). + +2.4. Subsequent Licenses + +No Contributor makes additional grants as a result of Your choice to +distribute the Covered Software under a subsequent version of this +License (see Section 10.2) or under the terms of a Secondary License (if +permitted under the terms of Section 3.3). + +2.5. Representation + +Each Contributor represents that the Contributor believes its +Contributions are its original creation(s) or it has sufficient rights +to grant the rights to its Contributions conveyed by this License. + +2.6. Fair Use + +This License is not intended to limit any rights You have under +applicable copyright doctrines of fair use, fair dealing, or other +equivalents. + +2.7. Conditions + +Sections 3.1, 3.2, 3.3, and 3.4 are conditions of the licenses granted +in Section 2.1. + +3. Responsibilities +------------------- + +3.1. Distribution of Source Form + +All distribution of Covered Software in Source Code Form, including any +Modifications that You create or to which You contribute, must be under +the terms of this License. You must inform recipients that the Source +Code Form of the Covered Software is governed by the terms of this +License, and how they can obtain a copy of this License. You may not +attempt to alter or restrict the recipients' rights in the Source Code +Form. + +3.2. Distribution of Executable Form + +If You distribute Covered Software in Executable Form then: + +(a) such Covered Software must also be made available in Source Code + Form, as described in Section 3.1, and You must inform recipients of + the Executable Form how they can obtain a copy of such Source Code + Form by reasonable means in a timely manner, at a charge no more + than the cost of distribution to the recipient; and + +(b) You may distribute such Executable Form under the terms of this + License, or sublicense it under different terms, provided that the + license for the Executable Form does not attempt to limit or alter + the recipients' rights in the Source Code Form under this License. + +3.3. Distribution of a Larger Work + +You may create and distribute a Larger Work under terms of Your choice, +provided that You also comply with the requirements of this License for +the Covered Software. If the Larger Work is a combination of Covered +Software with a work governed by one or more Secondary Licenses, and the +Covered Software is not Incompatible With Secondary Licenses, this +License permits You to additionally distribute such Covered Software +under the terms of such Secondary License(s), so that the recipient of +the Larger Work may, at their option, further distribute the Covered +Software under the terms of either this License or such Secondary +License(s). + +3.4. Notices + +You may not remove or alter the substance of any license notices +(including copyright notices, patent notices, disclaimers of warranty, +or limitations of liability) contained within the Source Code Form of +the Covered Software, except that You may alter any license notices to +the extent required to remedy known factual inaccuracies. + +3.5. Application of Additional Terms + +You may choose to offer, and to charge a fee for, warranty, support, +indemnity or liability obligations to one or more recipients of Covered +Software. However, You may do so only on Your own behalf, and not on +behalf of any Contributor. You must make it absolutely clear that any +such warranty, support, indemnity, or liability obligation is offered by +You alone, and You hereby agree to indemnify every Contributor for any +liability incurred by such Contributor as a result of warranty, support, +indemnity or liability terms You offer. You may include additional +disclaimers of warranty and limitations of liability specific to any +jurisdiction. + +4. Inability to Comply Due to Statute or Regulation +--------------------------------------------------- + +If it is impossible for You to comply with any of the terms of this +License with respect to some or all of the Covered Software due to +statute, judicial order, or regulation then You must: (a) comply with +the terms of this License to the maximum extent possible; and (b) +describe the limitations and the code they affect. Such description must +be placed in a text file included with all distributions of the Covered +Software under this License. Except to the extent prohibited by statute +or regulation, such description must be sufficiently detailed for a +recipient of ordinary skill to be able to understand it. + +5. Termination +-------------- + +5.1. The rights granted under this License will terminate automatically +if You fail to comply with any of its terms. However, if You become +compliant, then the rights granted under this License from a particular +Contributor are reinstated (a) provisionally, unless and until such +Contributor explicitly and finally terminates Your grants, and (b) on an +ongoing basis, if such Contributor fails to notify You of the +non-compliance by some reasonable means prior to 60 days after You have +come back into compliance. Moreover, Your grants from a particular +Contributor are reinstated on an ongoing basis if such Contributor +notifies You of the non-compliance by some reasonable means, this is the +first time You have received notice of non-compliance with this License +from such Contributor, and You become compliant prior to 30 days after +Your receipt of the notice. + +5.2. If You initiate litigation against any entity by asserting a patent +infringement claim (excluding declaratory judgment actions, +counter-claims, and cross-claims) alleging that a Contributor Version +directly or indirectly infringes any patent, then the rights granted to +You by any and all Contributors for the Covered Software under Section +2.1 of this License shall terminate. + +5.3. In the event of termination under Sections 5.1 or 5.2 above, all +end user license agreements (excluding distributors and resellers) which +have been validly granted by You or Your distributors under this License +prior to termination shall survive termination. + +************************************************************************ +* * +* 6. Disclaimer of Warranty * +* ------------------------- * +* * +* Covered Software is provided under this License on an "as is" * +* basis, without warranty of any kind, either expressed, implied, or * +* statutory, including, without limitation, warranties that the * +* Covered Software is free of defects, merchantable, fit for a * +* particular purpose or non-infringing. The entire risk as to the * +* quality and performance of the Covered Software is with You. * +* Should any Covered Software prove defective in any respect, You * +* (not any Contributor) assume the cost of any necessary servicing, * +* repair, or correction. This disclaimer of warranty constitutes an * +* essential part of this License. No use of any Covered Software is * +* authorized under this License except under this disclaimer. * +* * +************************************************************************ + +************************************************************************ +* * +* 7. Limitation of Liability * +* -------------------------- * +* * +* Under no circumstances and under no legal theory, whether tort * +* (including negligence), contract, or otherwise, shall any * +* Contributor, or anyone who distributes Covered Software as * +* permitted above, be liable to You for any direct, indirect, * +* special, incidental, or consequential damages of any character * +* including, without limitation, damages for lost profits, loss of * +* goodwill, work stoppage, computer failure or malfunction, or any * +* and all other commercial damages or losses, even if such party * +* shall have been informed of the possibility of such damages. This * +* limitation of liability shall not apply to liability for death or * +* personal injury resulting from such party's negligence to the * +* extent applicable law prohibits such limitation. Some * +* jurisdictions do not allow the exclusion or limitation of * +* incidental or consequential damages, so this exclusion and * +* limitation may not apply to You. * +* * +************************************************************************ + +8. Litigation +------------- + +Any litigation relating to this License may be brought only in the +courts of a jurisdiction where the defendant maintains its principal +place of business and such litigation shall be governed by laws of that +jurisdiction, without reference to its conflict-of-law provisions. +Nothing in this Section shall prevent a party's ability to bring +cross-claims or counter-claims. + +9. Miscellaneous +---------------- + +This License represents the complete agreement concerning the subject +matter hereof. If any provision of this License is held to be +unenforceable, such provision shall be reformed only to the extent +necessary to make it enforceable. Any law or regulation which provides +that the language of a contract shall be construed against the drafter +shall not be used to construe this License against a Contributor. + +10. Versions of the License +--------------------------- + +10.1. New Versions + +Mozilla Foundation is the license steward. Except as provided in Section +10.3, no one other than the license steward has the right to modify or +publish new versions of this License. Each version will be given a +distinguishing version number. + +10.2. Effect of New Versions + +You may distribute the Covered Software under the terms of the version +of the License under which You originally received the Covered Software, +or under the terms of any subsequent version published by the license +steward. + +10.3. Modified Versions + +If you create software not governed by this License, and you want to +create a new license for such software, you may create and use a +modified version of this License if you rename the license and remove +any references to the name of the license steward (except to note that +such modified license differs from this License). + +10.4. Distributing Source Code Form that is Incompatible With Secondary +Licenses + +If You choose to distribute Source Code Form that is Incompatible With +Secondary Licenses under the terms of this version of the License, the +notice described in Exhibit B of this License must be attached. + +Exhibit A - Source Code Form License Notice +------------------------------------------- + + This Source Code Form is subject to the terms of the Mozilla Public + License, v. 2.0. If a copy of the MPL was not distributed with this + file, You can obtain one at https://mozilla.org/MPL/2.0/. + +If it is not possible or desirable to put the notice in a particular +file, then You may include the notice in a location (such as a LICENSE +file in a relevant directory) where a recipient would be likely to look +for such a notice. + +You may add additional accurate notices of copyright ownership. + +Exhibit B - "Incompatible With Secondary Licenses" Notice +--------------------------------------------------------- + + This Source Code Form is "Incompatible With Secondary Licenses", as + defined by the Mozilla Public License, v. 2.0. diff --git a/README.md b/README.md new file mode 100644 index 0000000..7e8bb0c --- /dev/null +++ b/README.md @@ -0,0 +1,53 @@ +# HissCL + +A [HashiCorp Config Language](https://github.com/hashicorp/hcl) parser for Python + +## Usage + +For most simple use-cases, you can use the `load*` convenience functions: + +`load_file()`: +```python +import hisscl +cfg = hisscl.load_file("config.hcl") +``` + +`loads()`: +```python +import hisscl +# Use the optional name argument to specify a filename for errors +cfg = hisscl.loads("x = 2 * 4", name='string.hcl') +``` + +`load()`: +```python +import hisscl +with open('test.hcl', 'r') as fl: + # Use the optional name argument to specify a filename for errors + cfg = hisscl.load(fl, name=fl.name) +``` + +Each `load*` function has an optional `vars: dict[str, Any]` parameter, whose elements are used as variables in your config file. For example, if you have `x = y + 1`, `y` must be defined in `globals`. + +For more advanced use-cases, `lexer`, `parser`, `ast`, and `interp` submodules are provided. + +## Output Format + +The interpreter outputs a python dictionary containing field values and blocks. Blocks are stored in a list of `interp.Block` values. `interp.Block` is a subclass of `dict` with an extra `labels` attribute that can be used to get a list of block labels. For example: + +```python +import hisscl +cfg = hisscl.loads('x "y" "z" { a = "b" }') +print(cfg['x'][0].labels) # ['y', 'z'] +print(cfg['x'][0]['a']) # b +``` + +## Features + +Currently, this parser supports all HCL features except: + +- [For Expressions](https://github.com/hashicorp/hcl/blob/main/hclsyntax/spec.md#for-expressions) +- [Templates](https://github.com/hashicorp/hcl/blob/main/hclsyntax/spec.md#templates) +- [Function Calls](https://github.com/hashicorp/hcl/blob/main/hclsyntax/spec.md#functions-and-function-calls) + +Support for these features is planned. \ No newline at end of file diff --git a/hisscl/__init__.py b/hisscl/__init__.py new file mode 100644 index 0000000..ee8f1bc --- /dev/null +++ b/hisscl/__init__.py @@ -0,0 +1,18 @@ +from . import interp +import io +import typing + +__all__ = ['load', 'loads', 'load_file'] + +def load(stream: typing.TextIO, name: str = "", vars: dict[str, typing.Any] = {}): + i = interp.Interp(stream, name) + i.update(vars) + return i.run() + +def loads(src: str, name: str = "", vars: dict[str, typing.Any] = {}): + with io.StringIO(src) as stream: + return load(stream, name, vars) + +def load_file(path: str, vars: dict[str, typing.Any] = {}): + with open(path, 'r') as fl: + return load(fl, path, vars) \ No newline at end of file diff --git a/hisscl/ast.py b/hisscl/ast.py new file mode 100644 index 0000000..ca1ab30 --- /dev/null +++ b/hisscl/ast.py @@ -0,0 +1,86 @@ +import dataclasses +import typing + +@dataclasses.dataclass +class Position: + name: str = "" + line: int = 1 + col: int = 0 + + def __str__(self) -> str: + return f'{self.name}:{self.line}:{self.col}' + +@dataclasses.dataclass +class Integer: + pos: Position + value: int + +@dataclasses.dataclass +class Float: + pos: Position + value: float + +@dataclasses.dataclass +class Bool: + pos: Position + value: bool + +@dataclasses.dataclass +class String: + pos: Position + value: str + +@dataclasses.dataclass +class VariableRef: + pos: Position + name: str + +Literal = Integer | Float | Bool | String | VariableRef + +@dataclasses.dataclass +class Tuple: + pos: Position + items: list['Value'] + +@dataclasses.dataclass +class Object: + pos: Position + items: list[tuple['Value', 'Value']] + +Collection = Tuple | Object + +@dataclasses.dataclass +class Operator: + pos: Position + value: str + +@dataclasses.dataclass +class BinaryExpression: + pos: Position + left: 'Value' + op: Operator + right: 'Value' + +@dataclasses.dataclass +class UnaryExpression: + pos: Position + op: Operator + value: 'Value' + +Expression = BinaryExpression | UnaryExpression +Value = Literal | Collection | Expression + +@dataclasses.dataclass +class Assignment: + pos: Position + name: str + value: Value + +@dataclasses.dataclass +class Block: + pos: Position + name: str + labels: list[str] + children: list[Assignment | typing.Self] + +AST = list[Assignment | Block] \ No newline at end of file diff --git a/hisscl/interp.py b/hisscl/interp.py new file mode 100644 index 0000000..f00333d --- /dev/null +++ b/hisscl/interp.py @@ -0,0 +1,168 @@ +from . import ast +from . import parser + +import typing +import io + +__all__ = ['TypeError', 'Block', 'Interp'] + +class TypeError(Exception): + def __init__(self, pos: ast.Position, action: str, issue: str, val: typing.Any): + super().__init__(f'{pos}: cannot perform {action} on {issue} operand ({type(val).__name__})') + +class Block(dict): + def __init__(self, labels: list[str]): + self.labels = labels + super().__init__() + +class Interp: + vars: dict[str, typing.Any] = {} + + def __init__(self, stream: typing.TextIO, name: str): + self.parser = parser.Parser(stream, name) + + def __setitem__(self, key, val): + self.vars[key] = val + + def __getitem__(self, key) -> typing.Any: + return self.vars[key] + + def __delitem__(self, key): + del self.vars[key] + + def update(self, vars: dict[str, typing.Any]): + self.vars.update(vars) + + def _convert_value(self, val: ast.Value) -> typing.Any: + if isinstance(val, ast.VariableRef): + if val.name not in self.vars: + raise KeyError(f'{val.pos}: no such variable: {repr(val.name)}') + return self.vars[val.name] + elif isinstance(val, ast.Literal): + return val.value + elif isinstance(val, ast.Tuple): + return [self._convert_value(item) for item in val.items] + elif isinstance(val, ast.Object): + return {self._convert_value(key): self._convert_value(value) for key, value in val.items} + elif isinstance(val, ast.BinaryExpression): + return self._eval_binary_expr(val) + elif isinstance(val, ast.UnaryExpression): + return self._eval_unary_expr(val) + + def _is_numerical(self, val: typing.Any) -> bool: + return isinstance(val, float | int) and type(val) is not bool + + def _is_comparable(self, val: typing.Any) -> bool: + return self._is_numerical(val) or isinstance(val, str) + + def _eval_unary_expr(self, expr: ast.UnaryExpression) -> float | int | bool: + val = self._convert_value(expr.value) + match expr.op.value: + case '!': + if type(val) is not bool: + raise TypeError(expr.value.pos, 'NOT operation', 'non-boolean', val) + return not val + case '-': + if not self._is_numerical(val): + raise TypeError(expr.value.pos, 'negation', 'non-numerical', val) + return -val + case _: + raise ValueError(f'{expr.op.pos}: unknown unary operation: {repr(expr.op.value)}') + + def _eval_binary_expr(self, expr: ast.BinaryExpression) -> float | int | bool: + left = self._convert_value(expr.left) + right = self._convert_value(expr.right) + + match expr.op.value: + case '==': + return left == right + case '!=': + return left != right + case '+': + if not self._is_numerical(left): + raise TypeError(expr.left.pos, 'addition operation', 'non-numerical', left) + elif not self._is_numerical(right): + raise TypeError(expr.right.pos, 'addition operation', 'non-numerical', right) + return left + right + case '-': + if not self._is_numerical(left): + raise TypeError(expr.left.pos, 'subtraction operation', 'non-numerical', left) + elif not self._is_numerical(right): + raise TypeError(expr.right.pos, 'subtraction operation', 'non-numerical', right) + return left - right + case '*': + if not self._is_numerical(left): + raise TypeError(expr.left.pos, 'multiplication operation', 'non-numerical', left) + elif not self._is_numerical(right): + raise TypeError(expr.right.pos, 'multiplication operation', 'non-numerical', right) + return left * right + case '/': + if not self._is_numerical(left): + raise TypeError(expr.left.pos, 'division operation', 'non-numerical', left) + elif not self._is_numerical(right): + raise TypeError(expr.right.pos, 'division operation', 'non-numerical', right) + return left / right + case '%': + if not self._is_numerical(left): + raise TypeError(expr.left.pos, 'modulo operation', 'non-numerical', left) + elif not self._is_numerical(right): + raise TypeError(expr.right.pos, 'modulo operation', 'non-numerical', right) + return left % right + case '>': + if not self._is_comparable(left): + raise TypeError(expr.left.pos, 'comparison', 'non-comparable', left) + elif not self._is_comparable(right): + raise TypeError(expr.right.pos, 'comparison', 'non-comparable', right) + return left > right + case '<': + if not self._is_comparable(left): + raise TypeError(expr.left.pos, 'comparison', 'non-comparable', left) + elif not self._is_comparable(right): + raise TypeError(expr.right.pos, 'comparison', 'non-comparable', right) + return left < right + case '<=': + if not self._is_comparable(left): + raise TypeError(expr.left.pos, 'comparison', 'non-comparable', left) + elif not self._is_comparable(right): + raise TypeError(expr.right.pos, 'comparison', 'non-comparable', right) + return left <= right + case '>=': + if not self._is_comparable(left): + raise TypeError(expr.left.pos, 'comparison', 'non-comparable', left) + elif not self._is_comparable(right): + raise TypeError(expr.right.pos, 'comparison', 'non-comparable', right) + return left >= right + case '||': + if type(left) is not bool: + raise TypeError(expr.left.pos, 'OR operation', 'non-boolean', left) + elif type(right) is not bool: + raise TypeError(expr.right.pos, 'OR operation', 'non-boolean', right) + return left or right + case '&&': + if type(left) is not bool: + raise TypeError(expr.left.pos, 'AND operation', 'non-boolean', left) + elif type(right) is not bool: + raise TypeError(expr.right.pos, 'AND operation', 'non-boolean', right) + return left and right + case _: + raise ValueError(f'{expr.op.pos}: unknown binary operation: {repr(expr.op.value)}') + + def _run(self, tree: ast.AST, cfg: dict[typing.Any, typing.Any]): + for stmt in tree: + if isinstance(stmt, ast.Assignment): + if stmt.name in cfg: + raise KeyError(f'{stmt.pos}: {repr(stmt.name)} is already defined') + cfg[stmt.name] = self._convert_value(stmt.value) + elif isinstance(stmt, ast.Block): + if stmt.name in cfg and (not isinstance(cfg[stmt.name], list) or type(cfg[stmt.name][0]) is not Block): + raise KeyError(f'{stmt.pos}: {repr(stmt.name)} is already defined') + elif stmt.name not in cfg: + cfg[stmt.name] = [] + block = Block(stmt.labels) + self._run(stmt.children, block) + cfg[stmt.name].append(block) + + def run(self) -> dict[typing.Any, typing.Any]: + cfg = {} + self._run(self.parser.parse(), cfg) + return cfg \ No newline at end of file diff --git a/hisscl/lexer.py b/hisscl/lexer.py new file mode 100644 index 0000000..a141234 --- /dev/null +++ b/hisscl/lexer.py @@ -0,0 +1,254 @@ +from . import ast + +import io +import enum +import typing +import dataclasses + +__all__ = ['Token', 'ExpectedError', 'Lexer', 'is_whitespace', 'is_operator', 'is_numeric', 'is_alpha', 'is_alphanum'] + +class Token(enum.Enum): + ILLEGAL = -1 + EOF = 0 + + COMMENT = 1 + IDENT = 2 + STRING = 3 + BOOL = 4 + INTEGER = 5 + FLOAT = 6 + HEREDOC = 7 + CURLY = 8 + SQUARE = 9 + PAREN = 10 + COMMA = 11 + COLON = 12 + OPERATOR = 13 + +class ExpectedError(Exception): + def __init__(self, pos: ast.Position, expected: str, got: str): + super().__init__(f'{pos}: expected {expected}, got {"EOF" if got == '' else repr(got)}') + self.pos = pos + self.got = got + self.expected = expected + +class Lexer: + pos = ast.Position() + prev_pos = ast.Position() + unread = '' + + def __init__(self, stream: typing.TextIO, name: str): + self.stream = stream + self.pos.name = name + + def _peek(self, n: int) -> str: + pos = self.stream.tell() + text = self.stream.read(n) + self.stream.seek(pos) + return text + + def _read(self) -> str: + char = self.unread + if self.unread != '': + self.unread = '' + + if char == '': + char = self.stream.read(1) + + self.prev_pos = dataclasses.replace(self.pos) + if char == '\n': + self.pos.line += 1 + self.pos.col = 1 + elif char != '': + self.pos.col += 1 + return char + + def _unread(self, char): + self.pos = self.prev_pos + self.unread = char + + def _scan_str(self) -> tuple[Token, ast.Position, str]: + pos = dataclasses.replace(self.pos) + with io.StringIO() as out: + out.write('"') + escape = False + char = self._read() + while True: + if char == '"' and escape: + escape = False + out.write('\\"') + elif char == '\\' and escape: + escape = False + out.write('\\\\') + elif char == '\\': + escape = True + elif char == '"': + break + elif char == '' or char == '\r' or char == '\n': + raise ExpectedError(self.pos, repr('"'), char) + elif escape: + escape = False + out.write('\\' + char) + else: + out.write(char) + + char = self._read() + out.write('"') + return Token.STRING, pos, out.getvalue() + + def _scan_number(self, char: str) -> tuple[Token, ast.Position, str]: + pos = dataclasses.replace(self.pos) + tok = Token.INTEGER + with io.StringIO() as out: + while True: + if is_numeric(char): + out.write(char) + elif char == '.': + if tok == Token.FLOAT: + raise ExpectedError(self.pos, "number", char) + tok = Token.FLOAT + out.write(char) + else: + self._unread(char) + return tok, pos, out.getvalue() + char = self._read() + + def _scan_ident(self, char: str) -> tuple[Token, ast.Position, str]: + pos = dataclasses.replace(self.pos) + with io.StringIO() as out: + while is_alphanum(char) or char in ('-', '_'): + out.write(char) + char = self._read() + self._unread(char) + val = out.getvalue() + if val in ('true', 'false'): + return Token.BOOL, pos, val + else: + return Token.IDENT, pos, out.getvalue() + + def _scan_comment(self, char: str) -> tuple[Token, ast.Position, str]: + pos = dataclasses.replace(self.pos) + with io.StringIO() as out: + while char != '\n' and char != '': + if char != '\r': + out.write(char) + char = self._read() + return Token.COMMENT, pos, out.getvalue() + + def _scan_inline_comment(self, char: str) -> tuple[Token, ast.Position, str]: + pos = dataclasses.replace(self.pos) + with io.StringIO() as out: + while True: + if char == '*' and self._peek(1) == '/': + self._read() + break + out.write(char) + char = self._read() + return Token.COMMENT, pos, out.getvalue() + + def _scan_heredoc(self, char: str) -> tuple[Token, ast.Position, str]: + pos = dataclasses.replace(self.pos) + with io.StringIO() as out: + char = self._read() + if char != '<': + raise ExpectedError(self.pos, repr('<'), char) + + char = self._read() + if not is_alpha(char): + raise ExpectedError(self.pos, 'heredoc name', char) + + _, _, heredoc_name = self._scan_ident(char) + name_len = len(heredoc_name) - 1 + + char = self._read() + while True: + if char == heredoc_name[0] and self._peek(name_len) == heredoc_name[1:]: + self.pos.col += name_len + self.stream.seek(self.stream.tell()+name_len) + break + else: + out.write(char) + char = self._read() + + return Token.HEREDOC, pos, out.getvalue() + + # TODO: scan multi-char operators like == + def _scan_operator(self, char) -> tuple[Token, ast.Position, str]: + pos = dataclasses.replace(self.pos) + with io.StringIO() as out: + while is_operator(char): + out.write(char) + char = self._read() + self._unread(char) + val = out.getvalue() + return Token.OPERATOR, pos, out.getvalue() + + def scan(self) -> tuple[Token, ast.Position, str]: + char = self._read() + while is_whitespace(char): + char = self._read() + + match char: + case '{' | '}': + return Token.CURLY, self.pos, char + case '[' | ']': + return Token.SQUARE, self.pos, char + case '(' | ')': + return Token.PAREN, self.pos, char + case ',': + return Token.COMMA, self.pos, char + case ':': + return Token.COLON, self.pos, char + case '"': + return self._scan_str() + case '<': + # If the next character is not another less than symbol, + # this is probably a less than operator. + if self._peek(1) != '<': + return Token.OPERATOR, self.pos, char + return self._scan_heredoc(char) + case '/': + next = self._peek(1) + if next == '/': + # Ignore comment and return next token + self._scan_comment(char) + return self.scan() + elif next == '*': + # Ignore inlinecomment and return next token + self._scan_inline_comment(char) + return self.scan() + else: + # If the next character is not another slash + # or an asterisk, this is probably a division + # operator. + return Token.OPERATOR, self.pos, char + case '#': + # Ignore comments and return next token + self._scan_comment(char) + return self.scan() + case '': + return Token.EOF, self.pos, char + + if is_numeric(char): + return self._scan_number(char) + elif is_alpha(char): + return self._scan_ident(char) + elif is_operator(char): + return self._scan_operator(char) + + return Token.ILLEGAL, self.pos, char + +def is_whitespace(char: str) -> bool: + return char in (' ', '\t', '\r', '\n') + +def is_operator(char: str) -> bool: + return char in ('=', '+', '-', '*', '/', '%', '!', '>', '<', '|', '&') + +def is_numeric(char: str) -> bool: + return char >= '0' and char <= '9' + +def is_alpha(char: str) -> bool: + return (char >= 'a' and char <= 'z') or (char >= 'A' and char <= 'Z') + +def is_alphanum(char: str) -> bool: + return is_numeric(char) or is_alpha(char) diff --git a/hisscl/parser.py b/hisscl/parser.py new file mode 100644 index 0000000..d7c11fe --- /dev/null +++ b/hisscl/parser.py @@ -0,0 +1,147 @@ +from . import ast +from . import lexer + +from typing import TextIO + +import ast as pyast + +__all__ = ['ExpectedError', 'Parser'] + +class ExpectedError(Exception): + def __init__(self, pos: ast.Position, expected: str, got: str): + super().__init__(f'{pos}: expected {expected}; got {"EOF" if got == '' else repr(got)}') + self.pos = pos + self.got = got + self.expected = expected + +class Parser: + _prev: tuple[lexer.Token, ast.Position, str] | None = None + + def __init__(self, stream: TextIO, name: str): + self.lexer = lexer.Lexer(stream, name) + + def _scan(self) -> tuple[lexer.Token, ast.Position, str]: + if self._prev is not None: + prev = self._prev + self._prev = None + return prev + return self.lexer.scan() + + def _unscan(self, tok: lexer.Token, pos: ast.Position, lit: str): + self._prev = tok, pos, lit + + def _parse_expr(self) -> ast.Value: + left = self._parse_value() + tok, pos, lit = self._scan() + if tok != lexer.Token.OPERATOR: + self._unscan(tok, pos, lit) + return left + right = self._parse_expr() + return ast.BinaryExpression(pos=left.pos, op=ast.Operator(pos=pos, value=lit), left=left, right=right) + + def _parse_tuple(self, start_pos: ast.Position) -> ast.Tuple: + items: list[ast.Value] = [] + while True: + tok, pos, lit = self._scan() + if tok == lexer.Token.SQUARE and lit == ']': + break + self._unscan(tok, pos, lit) + items.append(self._parse_expr()) + + tok, pos, lit = self._scan() + if tok != lexer.Token.COMMA and (tok != lexer.Token.SQUARE or lit != ']'): + raise ExpectedError(pos, 'comma or closing square bracket', lit) + elif tok == lexer.Token.SQUARE and lit == ']': + break + return ast.Tuple(start_pos, items) + + def _parse_object(self, start_pos: ast.Position) -> ast.Object: + items: list[tuple[ast.Value, ast.Value]] = [] + while True: + tok, pos, lit = self._scan() + if tok == lexer.Token.CURLY and lit == '}': + break + self._unscan(tok, pos, lit) + key = self._parse_expr() + + tok, pos, lit = self._scan() + if tok != lexer.Token.COLON and (tok != lexer.Token.OPERATOR or lit != '='): + raise ExpectedError(pos, 'colon or equals sign', lit) + + val = self._parse_expr() + items.append((key, val)) + + tok, pos, lit = self._scan() + if tok != lexer.Token.COMMA: + self._unscan(tok, pos, lit) + + return ast.Object(start_pos, items) + + def _parse_value(self) -> ast.Value: + tok, pos, lit = self._scan() + match tok: + case lexer.Token.INTEGER: + return ast.Integer(pos=pos, value=int(lit)) + case lexer.Token.FLOAT: + return ast.Float(pos=pos, value=float(lit)) + case lexer.Token.BOOL: + return ast.Bool(pos=pos, value=(lit == 'true')) + case lexer.Token.STRING: + return ast.String(pos=pos, value=pyast.literal_eval(lit)) + case lexer.Token.IDENT: + return ast.VariableRef(pos=pos, name=lit) + case lexer.Token.HEREDOC: + return ast.String(pos=pos, value=lit) + case lexer.Token.OPERATOR: + return ast.UnaryExpression(pos=pos, op=ast.Operator(pos=pos, value=lit), value=self._parse_value()) + case lexer.Token.SQUARE: + if lit != '[': + raise ExpectedError(pos, repr('['), lit) + return self._parse_tuple(pos) + case lexer.Token.CURLY: + if lit != '{': + raise ExpectedError(pos, repr('{'), lit) + return self._parse_object(pos) + case lexer.Token.PAREN: + if lit != '(': + raise ExpectedError(pos, repr('('), lit) + expr = self._parse_expr() + tok, pos, lit = self._scan() + if tok != lexer.Token.PAREN or lit != ')': + raise ExpectedError(pos, repr(')'), lit) + return expr + + raise ExpectedError(pos, 'value', lit) + + def parse(self, until: tuple[lexer.Token, str] = (lexer.Token.EOF, '')) -> ast.AST: + tree = [] + while True: + id_tok, id_pos, id_lit = self._scan() + if id_tok == until[0] and id_lit == until[1]: + break + + if id_tok != lexer.Token.IDENT: + raise ExpectedError(id_pos, str(lexer.Token.IDENT), id_lit) + + tok, pos, lit = self._scan() + if tok == lexer.Token.OPERATOR and lit == '=': + tree.append(ast.Assignment(pos=id_pos, name=id_lit, value=self._parse_expr())) + elif tok == lexer.Token.CURLY and lit == '{': + tree.append(ast.Block(pos=id_pos, name=id_lit, labels=[], children=self.parse(until=(lexer.Token.CURLY, '}')))) + elif tok in (lexer.Token.STRING, lexer.Token.IDENT): + labels = [] + while tok in (lexer.Token.STRING, lexer.Token.IDENT): + if tok == lexer.Token.IDENT: + labels.append(lit) + else: + self._unscan(tok, pos, lit) + val = self._parse_value() + assert isinstance(val, ast.String) + labels.append(val.value) + tok, pos, lit = self._scan() + if tok != lexer.Token.CURLY and lit != '{': + raise ExpectedError(pos, repr('{'), lit) + tree.append(ast.Block(pos=id_pos, name=id_lit, labels=labels, children=self.parse(until=(lexer.Token.CURLY, '}')))) + else: + raise ExpectedError(pos, "equals sign, opening curly brace, or string", lit) + return tree \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..1185d0f --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,32 @@ +[build-system] + requires = ['hatchling', 'hatch-vcs'] + build-backend = "hatchling.build" + +[tool.hatch.build.targets.sdist] + include = ["*.py"] + +[tool.hatch.version] + source = "vcs" + fallback-version = "0.0.1" + +[project] + name = "hisscl" + description = "Python HCL parser" + dynamic = ["version"] + authors = [{ name = "Elara6331", email = "elara@elara.ws" }] + readme = "README.md" + license = "MPL-2.0" + keywords = ["hcl", "hashicorp", "parser", "config", "configuration"] + requires-python = ">=3.8" + classifiers = [ + "Development Status :: 4 - Beta", + + "Intended Audience :: Developers", + "Topic :: File Formats", + "Topic :: Software Development :: Interpreters", + + "License :: OSI Approved :: Mozilla Public License 2.0 (MPL 2.0)" + ] + +[project.urls] + Repository = "https://gitea.elara.ws/Elara6331/hisscl" \ No newline at end of file