summary refs log tree commit diff
path: root/pc-thing/the_e_programming_language
diff options
context:
space:
mode:
Diffstat (limited to 'pc-thing/the_e_programming_language')
-rw-r--r--pc-thing/the_e_programming_language/ast.json79
-rw-r--r--pc-thing/the_e_programming_language/ast.ts357
-rw-r--r--pc-thing/the_e_programming_language/code.txt11
-rw-r--r--pc-thing/the_e_programming_language/compiler.ts151
-rwxr-xr-xpc-thing/the_e_programming_language/lang.ts31
-rwxr-xr-xpc-thing/the_e_programming_language/test-2.e6
-rwxr-xr-xpc-thing/the_e_programming_language/test.e43
-rwxr-xr-xpc-thing/the_e_programming_language/test.e.txt43
-rwxr-xr-xpc-thing/the_e_programming_language/tokenizer.ts171
9 files changed, 892 insertions, 0 deletions
diff --git a/pc-thing/the_e_programming_language/ast.json b/pc-thing/the_e_programming_language/ast.json
new file mode 100644
index 0000000..faf9b67
--- /dev/null
+++ b/pc-thing/the_e_programming_language/ast.json
@@ -0,0 +1,79 @@
+[
+    {
+        "type": "VariableDeclaration",
+        "identifier": "buff",
+        "value": {
+            "type": "Number",
+            "value": 0
+        },
+        "vtype": "int",
+        "length": 4
+    },
+    {
+        "type": "VariableDeclaration",
+        "identifier": "counter",
+        "value": {
+            "type": "Number",
+            "value": 0
+        },
+        "vtype": "int",
+        "length": 1
+    },
+    {
+        "type": "FunctionDeclaration",
+        "name": "_start",
+        "body": [
+            {
+                "type": "While",
+                "condition": {
+                    "type": "BinaryExpression",
+                    "operator": "<",
+                    "left": {
+                        "type": "Identifier",
+                        "name": "counter"
+                    },
+                    "right": {
+                        "type": "Number",
+                        "value": 4
+                    }
+                },
+                "branch": [
+                    {
+                        "type": "Assignment",
+                        "identifier": {
+                            "type": "Identifier",
+                            "name": "buff",
+                            "offset": {
+                                "type": "Identifier",
+                                "name": "counter"
+                            }
+                        },
+                        "value": {
+                            "type": "Identifier",
+                            "name": "counter"
+                        }
+                    },
+                    {
+                        "type": "Assignment",
+                        "identifier": {
+                            "type": "Identifier",
+                            "name": "counter"
+                        },
+                        "value": {
+                            "type": "BinaryExpression",
+                            "operator": "+",
+                            "left": {
+                                "type": "Identifier",
+                                "name": "counter"
+                            },
+                            "right": {
+                                "type": "Number",
+                                "value": 1
+                            }
+                        }
+                    }
+                ]
+            }
+        ]
+    }
+]
\ No newline at end of file
diff --git a/pc-thing/the_e_programming_language/ast.ts b/pc-thing/the_e_programming_language/ast.ts
new file mode 100644
index 0000000..80210d8
--- /dev/null
+++ b/pc-thing/the_e_programming_language/ast.ts
@@ -0,0 +1,357 @@
+import { Token, TokenType } from "./tokenizer.ts";
+
+export interface ASTNode {
+    type: string;
+}
+
+export interface VariableDeclarationNode extends ASTNode {
+    type: "VariableDeclaration";
+    identifier: string;
+    value: ASTNode;
+    vtype: string;
+    length: number;
+}
+
+export interface FunctionDeclarationNode extends ASTNode {
+    type: "FunctionDeclaration";
+    name: string;
+    // params: string[];
+    body: ASTNode[];
+}
+
+export interface AssignmentNode extends ASTNode {
+    type: "Assignment";
+    identifier: IdentifierNode;
+    value: ASTNode;
+}
+
+export interface BinaryExpressionNode extends ASTNode {
+    type: "BinaryExpression";
+    operator: string;
+    left: ASTNode;
+    right: ASTNode;
+}
+
+export interface LiteralNode extends ASTNode {
+    type: "Literal";
+    value: string;
+}
+
+export interface NumberNode extends ASTNode {
+    type: "Number";
+    value: number;
+}
+
+export interface IdentifierNode extends ASTNode {
+    type: "Identifier";
+    name: string;
+    offset?: ASTNode;
+}
+
+export interface FunctionCallNode extends ASTNode {
+    type: "FunctionCall";
+    identifier: string;
+    args: ASTNode[];
+}
+
+// export interface BranchFunctionCallNode extends ASTNode {
+//     type: "BranchFunctionCall";
+//     identifier: string;
+//     args: ASTNode[];
+//     branches: ASTNode[][];
+// }
+
+// export interface StartBlockNode extends ASTNode {
+//     type: "StartBlock";
+//     body: ASTNode[];
+// }
+
+export interface IfNode extends ASTNode {
+    type: "If";
+    condition: ASTNode;
+    thenBranch: ASTNode[];
+    elseBranch?: ASTNode[];
+}
+
+export interface WhileNode extends ASTNode {
+    type: "While";
+    condition: ASTNode;
+    branch: ASTNode[];
+}
+
+// export interface ForNode extends ASTNode {
+//     type: "For";
+//     times: ASTNode;
+//     varname: ASTNode;
+//     branch: ASTNode[];
+// }
+
+// export interface GreenFlagNode extends ASTNode {
+//     type: "GreenFlag";
+//     branch: ASTNode[];
+// }
+
+// use 1 or 0 for boolean
+// export interface BooleanNode extends ASTNode {
+//     type: "Boolean";
+//     value: boolean;
+// }
+
+// export interface IncludeNode extends ASTNode {
+//     type: "Include";
+//     itype: string;
+//     path: string;
+// }
+
+// export interface ListDeclarationNode extends ASTNode {
+//     type: "ListDeclaration";
+//     identifier: string;
+//     value: ASTNode[];
+//     vtype: 'list' | 'global'
+// }
+
+export default class AST {
+    private tokens: Token[];
+    position: number = 0;
+
+    constructor(tokens: Token[]) {
+        this.tokens = tokens;
+    }
+
+    private peek(ahead = 0): Token {
+        return this.tokens[this.position + ahead];
+    }
+
+    private advance(): Token {
+        return this.tokens[this.position++];
+    }
+
+    private match(...types: TokenType[]): boolean {
+        if (types.includes(this.peek().type)) {
+            this.advance();
+            return true;
+        }
+        return false;
+    }
+
+    private matchTk(types: TokenType[], token = this.peek()): boolean {
+        if (types.includes(token.type)) {
+            return true;
+        }
+        return false;
+    }
+
+    private expect(type: TokenType, errorMessage: string): Token {
+        if (this.peek().type === type) {
+            return this.advance();
+        }
+        console.error('trace: tokens', this.tokens, '\nIDX:', this.position);
+        throw new Error(errorMessage);
+    }
+
+    parse(): ASTNode[] {
+        const nodes: ASTNode[] = [];
+        while (this.peek().type !== TokenType.EOF) {
+            nodes.push(this.parseStatement());
+        }
+        return nodes;
+    }
+
+    private parseStatement(): ASTNode {
+        if (this.matchTk([TokenType.TYPE])) {
+            const type = this.advance().value
+            let len = 1;
+            if (this.match(TokenType.LBRACKET)) {
+                len = Number(this.expect(TokenType.NUMBER, 'expected number after [').value);
+                this.expect(TokenType.RBRACKET, 'expected ] after length')
+            }
+            const identifier = this.expect(TokenType.IDENTIFIER, "expected var name after type (hint: functions dont have return types yet").value;
+            this.expect(TokenType.ASSIGN, "expected = after var name");
+            const value = this.parseAssignment(false);
+            return { type: "VariableDeclaration", identifier, value, vtype: type, length: len } as VariableDeclarationNode;
+        }
+
+        if (this.match(TokenType.FN_DECL)) {
+            const name = this.expect(TokenType.IDENTIFIER, "expected function name after fn").value;
+            // this.expect(TokenType.LPAREN, "Expected '(' after function name");
+            // const params: string[] = [];
+            // if (!this.match(TokenType.RPAREN)) {
+            //     do {
+            //         params.push(this.expect(TokenType.IDENTIFIER, "Expected parameter name").value);
+            //     } while (this.match(TokenType.COMMA));
+            //     this.expect(TokenType.RPAREN, "Expected ')' after parameters");
+            // }
+            this.expect(TokenType.LBRACE, "expected '{' before function body");
+            const body = this.parseBlock();
+            return { type: "FunctionDeclaration", name, body } as FunctionDeclarationNode;
+        }
+
+        if (this.match(TokenType.IF)) {
+            this.expect(TokenType.LPAREN, "Expected '(' after 'if'");
+            const condition = this.parseAssignment();
+            this.expect(TokenType.RPAREN, "Expected ')' after if condition");
+            this.expect(TokenType.LBRACE, "Expected '{' after if condition");
+            const thenBranch = this.parseBlock();
+            let elseBranch: ASTNode[] | undefined;
+            if (this.match(TokenType.ELSE)) {
+                this.expect(TokenType.LBRACE, "Expected '{' after 'else'");
+                elseBranch = this.parseBlock();
+            }
+            return { type: "If", condition, thenBranch, elseBranch } as IfNode;
+        }
+
+        if (this.match(TokenType.WHILE)) {
+            this.expect(TokenType.LPAREN, "Expected '(' after 'while'");
+            const condition = this.parseAssignment();
+            this.expect(TokenType.RPAREN, "Expected ')' after while condition");
+            this.expect(TokenType.LBRACE, "Expected '{' after while condition");
+            const branch = this.parseBlock();
+            return { type: "While", condition, branch } as WhileNode;
+        }
+
+        // if (this.match(TokenType.FOR)) {
+        //     this.expect(TokenType.LPAREN, "Expected '(' after 'for'");
+        //     const varname = this.parseAssignment();
+        //     const of = this.expect(TokenType.IDENTIFIER, 'expected of');
+        //     if (of.value !== 'of') throw new Error('expected of');
+        //     const times = this.parseAssignment();
+        //     this.expect(TokenType.RPAREN, "Expected ')' after for");
+        //     this.expect(TokenType.LBRACE, "Expected '{' after for");
+        //     const branch = this.parseBlock();
+
+        //     return { type: "For", varname, times, branch } as ForNode;
+        // }
+
+        // if (this.match(TokenType.GREENFLAG)) {
+        //     this.expect(TokenType.LBRACE, "Expected '{' after greenflag");
+        //     const branch = this.parseBlock();
+
+        //     return { type: "GreenFlag", branch } as GreenFlagNode;
+        // }
+
+        return this.parseAssignment();
+    }
+
+    private parseBlock(): ASTNode[] {
+        const nodes: ASTNode[] = [];
+
+        while (!this.match(TokenType.RBRACE)) {
+            nodes.push(this.parseStatement());
+        }
+
+        return nodes;
+    }
+
+    private parseAssignment(allowStuff = true): ASTNode {
+
+        const expr = this.parseBinaryExpression(allowStuff);
+        if (this.match(TokenType.ASSIGN)) {
+            if (expr.type !== "Identifier")
+                throw new Error("invalid assignment target; expected an identifier");
+            const value = allowStuff ? this.parseAssignment() : this.parsePrimary(false);
+            // let offset = undefined;
+            // if (this.match(TokenType.LBRACKET)) {
+            //     offset = this.parseAssignment();
+            //     this.expect(TokenType.RBRACKET, 'expected ]')
+            // }
+            return { type: "Assignment", identifier: (expr as IdentifierNode), value } as AssignmentNode;
+        }
+        return expr;
+    }
+
+    private parseBinaryExpression(allowStuff = false): ASTNode {
+        let left = this.parseCall(allowStuff);
+
+        while (this.peek().type === TokenType.BINOP) {
+            const operator = this.advance().value;
+            const right = this.parseCall();
+            left = { type: "BinaryExpression", operator, left, right } as BinaryExpressionNode;
+        }
+        return left;
+    }
+
+    private parseCall(allowStuff = false): ASTNode {
+        let expr = this.parsePrimary(allowStuff);
+
+        while (this.peek().type === TokenType.LPAREN) {
+            expr = this.finishCall(expr);
+        }
+        return expr;
+    }
+
+    private finishCall(callee: ASTNode): ASTNode {
+        this.expect(TokenType.LPAREN, "Expected '(' after function name");
+        //TODO - arguments
+        // const args: ASTNode[] = [];
+        // if (this.peek().type !== TokenType.RPAREN) {
+        //     do {
+        //         args.push(this.parseAssignment());
+        //     } while (this.match(TokenType.COMMA));
+        // }
+        this.expect(TokenType.RPAREN, "Expected ')' after arguments");
+
+
+        // if (this.peek().type === TokenType.LBRACE) {
+        //     const branches: ASTNode[][] = [];
+        //     do {
+        //         this.expect(TokenType.LBRACE, "Expected '{' for branch block");
+        //         branches.push(this.parseBlock());
+        //     } while (this.peek().type === TokenType.LBRACE);
+
+        //     if (callee.type !== "Identifier")
+        //         throw new Error("Branch function call expects an identifier");
+        //     return {
+        //         type: "BranchFunctionCall",
+        //         identifier: (callee as IdentifierNode).name,
+        //         args,
+        //         branches,
+        //     } as BranchFunctionCallNode;
+        // }
+
+
+        if (callee.type !== "Identifier")
+            throw new Error("Function call expects an identifier");
+        return {
+            type: "FunctionCall",
+            identifier: (callee as IdentifierNode).name,
+            // args,
+        } as FunctionCallNode;
+    }
+
+    private parsePrimary(allowOther = true): ASTNode {
+        const token = this.peek();
+
+        if (this.match(TokenType.NUMBER)) {
+            return { type: "Number", value: Number(token.value) } as NumberNode;
+        }
+
+        if (this.match(TokenType.LITERAL)) {
+            return { type: "Literal", value: token.value } as LiteralNode;
+        }
+
+        if (this.match(TokenType.IDENTIFIER) && allowOther) {
+
+            // if (["True", "true", "False", "false"].includes(token.value)) {
+            //     return {
+            //         type: "Boolean",
+            //         value: token.value === "True" || token.value === "true"
+            //     } as BooleanNode;
+            // }
+            let offset = undefined;
+            if (this.match(TokenType.LBRACKET)) {
+                offset = this.parseAssignment();
+                this.expect(TokenType.RBRACKET, 'expected ]')
+            }
+            return { type: "Identifier", name: token.value, offset } as IdentifierNode;
+        }
+
+        if (this.match(TokenType.LPAREN) && allowOther) {
+            const expr = this.parseAssignment();
+            this.expect(TokenType.RPAREN, "Expected ')' after expression");
+            return expr;
+        }
+
+        throw new Error(`Unexpected token: ${token.type}`);
+    }
+
+}
\ No newline at end of file
diff --git a/pc-thing/the_e_programming_language/code.txt b/pc-thing/the_e_programming_language/code.txt
new file mode 100644
index 0000000..f45a592
--- /dev/null
+++ b/pc-thing/the_e_programming_language/code.txt
@@ -0,0 +1,11 @@
+mov 97,0
+mov 98,0
+str 98,97
+mov 97,0
+mov 98,4
+str 98,97
+pop 97
+mov 98,0
+cmp 97,98
+mov 97,18
+jnz 97
\ No newline at end of file
diff --git a/pc-thing/the_e_programming_language/compiler.ts b/pc-thing/the_e_programming_language/compiler.ts
new file mode 100644
index 0000000..37538f7
--- /dev/null
+++ b/pc-thing/the_e_programming_language/compiler.ts
@@ -0,0 +1,151 @@
+import { ASTNode, BinaryExpressionNode, FunctionDeclarationNode, NumberNode, VariableDeclarationNode, WhileNode } from "./ast.ts";
+// import { PC } from "../pc.ts";
+// const pc = new PC();
+
+type Opcode = 
+    'mov'  |
+    'swp'  |
+    'ld'   |
+    'str'  |
+    'add'  |
+    'sub'  |
+    'mul'  |
+    'div'  |
+    'mod'  |
+    'shl'  |
+    'shr'  |
+    'cmp'  |
+    'cmr'  |
+    'and'  |
+    'or'   |
+    'xor'  |
+    'not'  |
+    'push' |
+    'pop'  |
+    'halt' |
+    'sys'  |
+    'jmp'  |
+    'jnz'  |
+    'jz'   |
+    'jmr'  |
+    'ret'  |
+    'end';
+type Register = 97 | 98 | 99 | 100
+
+interface Instruction {
+    opcode: Opcode,
+    args: (Register | number)[]
+}
+
+const types: Record<string, number> = {
+    'int' : 1,
+    'bool': 1,
+    'char': 1
+}
+
+const A: Register = 97;
+const B: Register = 98;
+// deno-lint-ignore no-unused-vars
+const C: Register = 99;
+// deno-lint-ignore no-unused-vars
+const D: Register = 100;
+
+export default class Compiler {
+    vars: Record<string, [number, number]> = {};
+    functions: Record<string, number> = {};
+    AST: ASTNode[];
+    lastAddr: number = 0;
+    instructions: Instruction[] = [];
+    constructor (ast: ASTNode[]) {
+        this.AST = ast
+    }
+    compile (node: ASTNode) {
+        if ((node as VariableDeclarationNode).type == 'VariableDeclaration') {
+            const varDeclNode = node as VariableDeclarationNode;
+            if (!types[varDeclNode.vtype]) throw 'unknown type';
+            const addr = this.vars[varDeclNode.identifier] =
+                [this.lastAddr, types[varDeclNode.vtype] * varDeclNode.length];
+            this.lastAddr += types[varDeclNode.vtype] * varDeclNode.length;
+            if (varDeclNode.value.type != 'Number') throw 'a';
+            this.instructions.push({
+                opcode: 'mov',
+                args: [A, (varDeclNode.value as NumberNode).value]
+            })
+            this.instructions.push({
+                opcode: 'mov',
+                args: [B, addr[0]]
+            })
+            this.instructions.push({
+                opcode: 'str',
+                args: [B, A]
+            })
+        } else if ((node as FunctionDeclarationNode).type == 'FunctionDeclaration') {
+            const fnDeclNode = node as FunctionDeclarationNode;
+            this.functions[fnDeclNode.name] = this.instructions
+                .map(k => 1 + k.args.length)
+                .reduce((prev, curr) => {
+                    return prev + curr 
+                }, 0);
+            for (const node of fnDeclNode.body) {
+                this.compile(node)
+            }
+        } else if ((node as BinaryExpressionNode).type == 'BinaryExpression') {
+            const binExpNode = node as BinaryExpressionNode;
+            this.instructions.push({
+                opcode: 'pop',
+                args: [A]
+            })
+            this.instructions.push({
+                opcode: 'pop',
+                args: [B]
+            })
+            switch (binExpNode.operator) {
+                case '+':
+                    this.instructions.push({
+                        opcode: 'add',
+                        args: [A, A, B]
+                    })
+                    break;
+            
+                default:
+                    throw 'oh no'
+            }
+            this.instructions.push({
+                opcode: 'push',
+                args: [A]
+            })
+        } else if ((node as WhileNode).type == 'While') {
+            const whileNode = node as WhileNode;
+            const start = this.instructions
+                .map(k => 1 + k.args.length)
+                .reduce((prev, curr) => {
+                    return prev + curr 
+                }, 0);
+            for (const node of whileNode.branch) {
+                this.compile(node)
+            }
+            this.instructions.push({
+                opcode: 'pop',
+                args: [A]
+            })
+            this.instructions.push({
+                opcode: 'mov',
+                args: [B, 0]
+            })
+            this.instructions.push({
+                opcode: 'cmp',
+                args: [A, B]
+            })
+            this.instructions.push({
+                opcode: 'mov',
+                args: [A, start]
+            })
+            this.instructions.push({
+                opcode: 'jnz',
+                args: [A]
+            })
+        } else {
+            console.error(`!!! UNIMPLEMENTED NODE `, node.type, node)
+        }
+    }
+}
\ No newline at end of file
diff --git a/pc-thing/the_e_programming_language/lang.ts b/pc-thing/the_e_programming_language/lang.ts
new file mode 100755
index 0000000..5c00b98
--- /dev/null
+++ b/pc-thing/the_e_programming_language/lang.ts
@@ -0,0 +1,31 @@
+import Tokenizer from "./tokenizer.ts";
+import ASTGen from "./ast.ts";
+import Compiler from "./compiler.ts";
+const input = Deno.readTextFileSync('test.e')
+
+const tokenizer = new Tokenizer(input);
+const tokens = tokenizer.tokenize();
+
+console.log(tokens)
+
+const astGenerator = new ASTGen(tokens);
+
+let ast;
+try {
+    ast = astGenerator.parse()
+} catch (error) {
+    console.error(error);
+    console.log('at', astGenerator.position, tokens.map((a, i) => i == astGenerator.position ? `${a.type}(${a.value}) <--` : `${a.type}(${a.value})`).join('\n'))
+    Deno.exit(1)
+}
+
+console.log(ast)
+
+const compiler = new Compiler(ast);
+
+for (const node of compiler.AST) {
+    compiler.compile(node)
+}
+
+Deno.writeTextFileSync('ast.json', JSON.stringify(ast, null, 4))
+Deno.writeTextFileSync('code.txt', compiler.instructions.map(i => `${i.opcode}${i.args.length > 0 ? ' ' : ''}${i.args.join(',')}`).join('\n'))
\ No newline at end of file
diff --git a/pc-thing/the_e_programming_language/test-2.e b/pc-thing/the_e_programming_language/test-2.e
new file mode 100755
index 0000000..3b1b371
--- /dev/null
+++ b/pc-thing/the_e_programming_language/test-2.e
@@ -0,0 +1,6 @@
+int[8] a = 0
+int test = 5
+fn _start {
+a[4] = 5
+a[test] = 6
+}
\ No newline at end of file
diff --git a/pc-thing/the_e_programming_language/test.e b/pc-thing/the_e_programming_language/test.e
new file mode 100755
index 0000000..4996d5c
--- /dev/null
+++ b/pc-thing/the_e_programming_language/test.e
@@ -0,0 +1,43 @@
+// comments exist
+// 
+// types (in addresses (they store 2 bytes if you forgot)):
+// int - 1
+// char - 1
+// bool - 1
+// uuh yeah,, theyre all 1 address ;-;
+//
+// $VARNAME is the address of the var, when used in mov it means that we get that var
+// [$VARNAME] is the address of the var, but when it's used in mov it means we set to the address
+
+// translates to:
+// mov a 0
+// mov $counter a
+// prob not gonna make it do assembly tho
+int[4] buff = 0
+int counter = 0
+
+fn _start {
+    // translates to
+    // <inner code>
+    // mov a $counter
+    // mov b 4
+    // cmp a b
+    // jz $start_of_inner_code
+    while (counter < 4) {
+        // translates to
+        // mov a [$buff]
+        // mov b [$counter]
+        // add a a b
+        // mov b $counter
+        // str a b
+        buff[counter] = counter
+        // translates to
+        // mov a $counter ; further translates to:
+        //                ; mov a $counter
+        //                ; ld a a
+        // mov b 1
+        // add a a b
+        // str $counter a
+        counter = counter + 1
+    }
+}
\ No newline at end of file
diff --git a/pc-thing/the_e_programming_language/test.e.txt b/pc-thing/the_e_programming_language/test.e.txt
new file mode 100755
index 0000000..c8d09c5
--- /dev/null
+++ b/pc-thing/the_e_programming_language/test.e.txt
@@ -0,0 +1,43 @@
+// comments exist
+// 
+// types (in addresses (they store 2 bytes if you forgot)):
+// int - 1
+// char - 1
+// bool - 1
+// uuh yeah,, theyre all 1 address ;-;
+//
+// $VARNAME is the address of the var, when used in mov it means that we get that var
+// [$VARNAME] is the address of the var, but when it's used in mov it means we set to the address
+
+// translates to:
+// mov a 0
+// mov $counter a
+// prob not gonna make it do assembly tho
+int counter = 0
+int[4] buff = 0
+
+fn _start {
+    // translates to
+    // <inner code>
+    // mov a $counter
+    // mov b 4
+    // cmp a b
+    // jz $start_of_inner_code
+    while (counter < 4) {
+        // translates to
+        // mov a [$buff]
+        // mov b [$counter]
+        // add a a b
+        // mov b $counter
+        // str a b
+        buff[counter] = counter
+        // translates to
+        // mov a $counter ; further translates to:
+        //                ; mov a $counter
+        //                ; ld a a
+        // mov b 1
+        // add a a b
+        // str $counter a
+        counter = counter + 1
+    }
+}
\ No newline at end of file
diff --git a/pc-thing/the_e_programming_language/tokenizer.ts b/pc-thing/the_e_programming_language/tokenizer.ts
new file mode 100755
index 0000000..57d0131
--- /dev/null
+++ b/pc-thing/the_e_programming_language/tokenizer.ts
@@ -0,0 +1,171 @@
+export enum TokenType {
+    TYPE       = "TYPE",
+    FN_DECL    = "FN_DECL",
+    LITERAL    = "LITERAL",
+    NUMBER     = "NUMBER",
+    LPAREN     = "LPAREN",
+    RPAREN     = "RPAREN",
+    LBRACE     = "LBRACE",
+    RBRACE     = "RBRACE",
+    LBRACKET   = "LBRACKER",
+    RBRACKET   = "RBRACKER",
+    COMMA      = "COMMA",
+    WHILE      = "WHILE",
+    IF         = "IF",
+    ELSE       = "ELSE",
+    ASSIGN     = "ASSIGN",
+    BINOP      = "BINOP",
+    IDENTIFIER = "IDENTIFIER",
+
+    EOF        = "EOF",
+}
+
+export interface Token {
+    type: TokenType;
+    value: string;
+}
+
+const types = ['int', 'bool', 'char']
+
+// i hardly know her
+export default class Tokenizer {
+    private source: string;
+    private position: number = 0;
+
+    constructor(source: string) {
+        this.source = source;
+    }
+
+    private isAlpha(char: string): boolean {
+        return /[a-zA-Z_#]/.test(char);
+    }
+
+    private isDigit(char: string): boolean {
+        return /-?[\d\.]/.test(char);
+    }
+
+    private isWhitespace(char: string): boolean {
+        return /\s/.test(char);
+    }
+
+    private advance(): string {
+        return this.source[this.position++];
+    }
+
+    private peek(): string {
+        return this.source[this.position] || "";
+    }
+
+    private match(expected: string): boolean {
+        if (this.peek() === expected) {
+            this.position++;
+            return true;
+        }
+        return false;
+    }
+
+    tokenize(): Token[] {
+        const tokens: Token[] = [];
+
+        let inComment = false;
+        let global = 0;
+
+        while (this.position < this.source.length) {
+            const char = this.advance();
+
+            if (global > 0) global--;
+
+            if (inComment) {
+                if (char == '\n') inComment = false;
+                continue;
+            } else if (char == '/' && this.peek() == '/') {
+                inComment = true
+            } else if (this.isWhitespace(char)) {
+                continue;
+            } else if (this.isAlpha(char)) {
+                let identifier = char;
+                while (this.isAlpha(this.peek()) || this.isDigit(this.peek())) {
+                    identifier += this.advance();
+                }
+
+                // if (identifier === "#include") {
+                //     // while(this.isWhitespace(this.peek()) && this.position < this.source.length) {
+                //     //     this.advance()
+                //     // }
+                //     // if (this.advance() != '<') throw "Expected a < after #include"
+                //     // let id = ''
+                //     // while (this.peek() != '>'
+                //     //     && (this.isAlpha(this.peek()) || this.isDigit(this.peek()))) {
+                //     //     id += this.advance();
+                //     // }
+                //     // if (this.advance() != '>') throw "Expected a > after #include <..."
+                //     tokens.push({ type: TokenType.INCLUDE, value: identifier });
+                // } else if (identifier === "var") tokens.push({ type: TokenType.VAR, value: global > 0 ? 'global' : identifier });
+                // else if (identifier === "list") tokens.push({ type: TokenType.LIST, value: global > 0 ? 'global' : identifier });
+                // else if (identifier === "global") global = 2;
+                if (identifier === "fn") tokens.push({ type: TokenType.FN_DECL, value: identifier });
+                else if (identifier === "if") tokens.push({ type: TokenType.IF, value: identifier });
+                else if (identifier === "else") tokens.push({ type: TokenType.ELSE, value: identifier });
+                else if (identifier === "while") tokens.push({ type: TokenType.WHILE, value: identifier });
+                else if (types.includes(identifier)) tokens.push({ type: TokenType.TYPE, value: identifier });
+                // else if (identifier === "for") tokens.push({ type: TokenType.FOR, value: identifier });
+                else tokens.push({ type: TokenType.IDENTIFIER, value: identifier });
+            } else if (this.isDigit(char)) {
+                let number = char;
+                while (this.isDigit(this.peek())) {
+                    number += this.advance();
+                }
+                tokens.push({ type: TokenType.NUMBER, value: number });
+            } else if (char === '"') {
+                let string = "";
+                while (this.peek() !== '"' && this.peek() !== "") {
+                    string += this.advance();
+                }
+                if (!this.match('"')) {
+                    throw new Error("Unterminated string");
+                }
+                tokens.push({ type: TokenType.LITERAL, value: string });
+            } else if (char === "(") tokens.push({ type: TokenType.LPAREN, value: char });
+            else if (char === ")") tokens.push({ type: TokenType.RPAREN, value: char });
+            else if (char === "{") tokens.push({ type: TokenType.LBRACE, value: char });
+            else if (char === "}") tokens.push({ type: TokenType.RBRACE, value: char });
+            else if (char === "[") tokens.push({ type: TokenType.LBRACKET, value: char });
+            else if (char === "]") tokens.push({ type: TokenType.RBRACKET, value: char });
+            else if (char === ",") tokens.push({ type: TokenType.COMMA, value: char });
+            else if (char === "+") tokens.push({ type: TokenType.BINOP, value: char });
+            else if (char === "-") tokens.push({ type: TokenType.BINOP, value: char });
+            else if (char === "*") tokens.push({ type: TokenType.BINOP, value: char });
+            else if (char === "/") tokens.push({ type: TokenType.BINOP, value: char });
+            else if (char === "%") tokens.push({ type: TokenType.BINOP, value: char });
+            else if (char === "=" && this.peek() === '=') {
+                tokens.push({ type: TokenType.BINOP, value: char });
+                this.advance();
+            }
+            else if (char === "&" && this.peek() === '&') {
+                tokens.push({ type: TokenType.BINOP, value: char });
+                this.advance();
+            }
+            else if (char === "!" && this.peek() === '=') {
+                tokens.push({ type: TokenType.BINOP, value: '!=' });
+                this.advance();
+            }
+            else if (char === "<" && this.peek() === '=') {
+                tokens.push({ type: TokenType.BINOP, value: '<=' });
+                this.advance();
+            }
+            else if (char === ">" && this.peek() === '=') {
+                tokens.push({ type: TokenType.BINOP, value: '>=' });
+                this.advance();
+            }
+            else if (char === ">") tokens.push({ type: TokenType.BINOP, value: char });
+            else if (char === "<") tokens.push({ type: TokenType.BINOP, value: char });
+            else if (char === "=") tokens.push({ type: TokenType.ASSIGN, value: char });
+            else {
+                throw new Error(`Unexpected character: ${char}`);
+            }
+        }
+
+        tokens.push({ type: TokenType.EOF, value: "" });
+        return tokens;
+    }
+}
\ No newline at end of file