#!/usr/bin/env python3 # -*- coding:utf-8 -*- # Author: ENOENT import re class Assembler: opcodeID = { "in": 0x11, "out": 0x12, "set": 0x31, "load": 0x22, "store": 0x23, "add": 0x32, "sub": 0x33, "xor": 0x34, "push": 0x41, "pop": 0x42, "cmp": 0x51, "jg": 0x61, "jl": 0x62, "je" : 0x63, "goto": 0x64, } registerID = { "R1": 0x1, "R2": 0x2, "R3": 0x3, "R4": 0x4, "A1": 0x5, "SP": 0x6 } IP = 0 labels = {} unseenLabels = {} bytecode = bytes() def _isValidRegister(self, arg): return bool(self.registerID.get(arg)) def _isValidInstruction(self, op): return bool(self.operationID.get(op)) def _isValidImmediate(self, arg): if arg.startswith("0x") and len(arg) <= 4: try: _ = int(arg[2:], 16) return True except: return False return False def _isValidLabel(self, label): return bool(re.match("^[a-zA-Z0-9_]*$", label)) def _getSecondByte(self, a1isreg, a2isreg, reg1, reg2): b2 = [] if a1isreg: b2.append("1") else: b2.append("0") if a2isreg: b2.append("1") else: b2.append("0") for i in bin(reg1)[2:].zfill(3): b2.append(i) for i in bin(reg2)[2:].zfill(3): b2.append(i) return int(''.join(b2), 2) def _handleInstruction(self, n, op, args, nArgs, a1CanBeImm, a2CanBeImm=False): b1, b3 = self.opcodeID[op], 0 a1isreg, a2isreg = False, False reg1, reg2 = 0, 0 if len(args) != nArgs: raise Exception("Line {} : Invalid argument count for instruction {}".format(n, op.upper()), args) # first argument a = args[0] if self._isValidRegister(a): reg1 = self.registerID[a] a1isreg = True elif a1CanBeImm and self._isValidImmediate(a): a1isreg = False b3 = int(a[2:], 16) else: raise Exception("Line {} : Invalid argument for instruction {}".format(n, op.upper()), a) if nArgs == 2: # second argument a = args[1] if self._isValidRegister(a): reg2 = self.registerID[a] a2isreg = True elif a2CanBeImm and self._isValidImmediate(a): b3 = int(a[2:], 16) else: raise Exception("Line {} : Invalid argument for instruction {}".format(n, op.upper()), a) array = [b1, self._getSecondByte(a1isreg, a2isreg, reg1, reg2)] # If there is an immediate value if (a1CanBeImm and not a1isreg) or (a2CanBeImm and not a2isreg): array.append(b3) return (bytes(array), len(array)) def _registerLabel(self, n, label): if self._isValidLabel(label): # note la références self.labels[label] = self.IP if self.unseenLabels.get(label) != None: # cherche dans le bytecode les référence à ce label précédamment vu offset = self.IP offset = offset.to_bytes(2, 'big') self.bytecode = self.bytecode.replace(label.encode("utf-8"), offset) del self.unseenLabels[label] else: raise Exception("Line {} : Invalid label name".format(n), a) def _handleJump(self, n, op, args, nArgs=1): b1 = self.opcodeID[op] isForwardJump = False offset = 0 if len(args) != nArgs: raise Exception("Line {} : Invalid argument count for instruction {}".format(n, op.upper()), args) # first argument label = args[0].lower() if self._isValidLabel(label): if self.labels.get(label) != None: # jump back offset = self.labels[label] offset = offset.to_bytes(2, 'big') else: # jump forward self.unseenLabels[label] = n isForwardJump = True else: raise Exception("Line {} : Invalid argument for instruction {}".format(n, op.upper()), a) array = [b1] if isForwardJump: array += list(label.encode("utf-8")) else: array += offset return (bytes(array), 3) def _toOpcode(self, n, instruction): """ instruction est une array [opcode, arg1, arg2, ...] """ op = instruction[0] if op == "in": return self._handleInstruction(n, op, instruction[1:], 1, True) elif op == "out": return self._handleInstruction(n, op, instruction[1:], 1, True) elif op == "push": return self._handleInstruction(n, op, instruction[1:], 1, True) elif op == "pop": return self._handleInstruction(n, op, instruction[1:], 1, False) elif op == "set": return self._handleInstruction(n, op, instruction[1:], 2, False, True) elif op == "load": return self._handleInstruction(n, op, instruction[1:], 2, False, True) elif op == "store": return self._handleInstruction(n, op, instruction[1:], 2, True, False) elif op == "add": return self._handleInstruction(n, op, instruction[1:], 2, False, True) elif op == "sub": return self._handleInstruction(n, op, instruction[1:], 2, False, True) elif op == "xor": return self._handleInstruction(n, op, instruction[1:], 2, False, True) elif op == "cmp": return self._handleInstruction(n, op, instruction[1:], 2, False, True) elif op == "jg": return self._handleJump(n, op, instruction[1:]) elif op == "jl": return self._handleJump(n, op, instruction[1:]) elif op == "je": return self._handleJump(n, op, instruction[1:]) elif op == "goto": return self._handleJump(n, op, instruction[1:]) elif op.startswith("<") and op.endswith(">"): label = op[1:-1] self._registerLabel(n, label) return (bytes(), 0) else: raise Exception("Line {} : Invalid instruction".format(n), instruction) def _parseLine(self, n, l): """ returns (byte-code, IP increment) """ # remove comments l = l.split("#")[0].strip() op = l.split(" ")[0].lower() if op: instruction = [op] rest = ''.join(l.split(" ")[1:]) args = rest.split(",") for e in args: if e: instruction.append(e) return self._toOpcode(n, instruction) return (bytes(),0) def compile(self, code): lines = code.split("\n") for n in range(len(lines)): byte, offset = self._parseLine(n, lines[n]) self.IP += offset self.bytecode += byte if len(self.unseenLabels) != 0: label, n = self.unseenLabels.popitem() raise Exception("Line {} : Unseen label".format(n), label) return self.bytecode