The Art of
ASSEMBLY LANGUAGE PROGRAMMING

Chapter Sixteen (Part 10)

Table of Content

Chapter Sixteen (Part 12) 

CHAPTER SIXTEEN:
PATTERN MATCHING (Part 11)

16.8.4 - A Tiny Assembler

16.8.4 A Tiny Assembler

Although the UCR Standard Library pattern matching routines would probably not be appropriate for writing a full lexical analyzer or compiler they are useful for writing small compilers/assemblers or programs where speed of compilation/assembly is of little concern. One good example is the simple nonsymbolic assembler appearing in the SIM886 simulator for an earlier version of the x86 processors. This "mini-assembler" accepts an x86 assembly language statement and immediately assembles it into memory. This allows SIM886 users to create simple assembly language programs within the SIM886 monitor/debugger. Using the Standard Library pattern matching routines makes it very easy to implement such an assembler.

The grammar for this miniassembler is

Stmt Grp1 reg " " operand |

Grp2 reg " " reg " " constant |

Grp3 operand |

goto operand |

halt

Grp1 load | store | add | sub

Grp2 ifeq | iflt | ifgt

Grp3 get | put

reg ax | bx | cx | dx

operand reg | constant | [bx] | constant [bx]

constant hexdigit constant | hexdigit

hexdigit 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | a | b | c | d | e | f

There are some minor semantic details that the program handles (such as disallowing stores into immediate operands). The assembly code for the miniassembler follows:

; ASM.ASM
;

.xlist
include         stdlib.a
matchfuncs
includelib      stdlib.lib
.list

dseg            segment para public 'data'

; Some sample statements to assemble:

Str1            byte    "load ax
0"
0
Str2            byte    "load ax
bx"
0
Str3            byte    "load ax
ax"
0
Str4            byte    "add ax
15"
0
Str5            byte    "sub ax
[bx]"
0
Str6            byte    "store bx
[1000]"
0
Str7            byte    "load bx
2000[bx]"
0
Str8            byte    "goto 3000"
0
Str9            byte    "iflt ax
bx
100"
0
Str10           byte    "halt"
0
Str11           byte    "This is illegal"
0
Str12           byte    "load ax
store"
0
Str13           byte    "store ax
1000"
0
Str14           byte    "ifeq ax
0
0"
0

; Variables used by the assembler.

AsmConst        word    0
AsmOpcode       byte    0
AsmOprnd1       byte    0
AsmOprnd2       byte    0

include stdsets.a       ;Bring in the standard char sets.

; Patterns for the assembler:

; Pattern is (
;        (load|store|add|sub) reg "
" operand |
;        (ifeq|iflt|ifgt) reg1 "
" reg2 "
" const |
;        (get|put) operand |
;        goto operand |
;        halt
;        )
;
; With a few semantic additions (e.g.
cannot store to a const).

InstrPat        pattern {spancset
WhiteSpace
Grp1
Grp1}

Grp1            pattern {sl_Match2
Grp1Strs
Grp2
Grp1Oprnds}
Grp1Strs        pattern {TryLoad

Grp1Store}
Grp1Store       pattern {TryStore

Grp1Add}
Grp1Add         pattern {TryAdd

Grp1Sub}
Grp1Sub         pattern {TrySub}

; Patterns for the LOAD
STORE
ADD
and SUB instructions.

LoadPat         pattern {MatchStr
LoadInstr2}
LoadInstr2      byte    "LOAD"
0

StorePat        pattern {MatchStr
StoreInstr2}
StoreInstr2     byte    "STORE"
0

AddPat          pattern {MatchStr
AddInstr2}
AddInstr2       byte    "ADD"
0

SubPat          pattern {MatchStr
SubInstr2}
SubInstr2       byte    "SUB"
0

; Patterns for the group one (LOAD/STORE/ADD/SUB) instruction operands:

Grp1Oprnds      pattern {spancset
WhiteSpace
Grp1reg
Grp1reg}
Grp1Reg         pattern {MatchReg
AsmOprnd1

Grp1ws2}
Grp1ws2         pattern {spancset
WhiteSpace
Grp1Comma
Grp1Comma}
Grp1Comma       pattern {MatchChar
'
'
0
Grp1ws3}
Grp1ws3         pattern {spancset
WhiteSpace
Grp1Op2
Grp1Op2}
Grp1Op2         pattern {MatchGen


EndOfLine}
EndOfLine       pattern {spancset
WhiteSpace
NullChar
NullChar}
NullChar        pattern {EOS}

Grp1Op2Reg      pattern {MatchReg
AsmOprnd2}

; Patterns for the group two instructions (IFEQ
IFLT
IFGT):

Grp2            pattern {sl_Match2
Grp2Strs
Grp3
Grp2Oprnds}
Grp2Strs        pattern {TryIFEQ

Grp2IFLT}
Grp2IFLT        pattern {TryIFLT

Grp2IFGT}
Grp2IFGT        pattern {TryIFGT}

Grp2Oprnds      pattern {spancset
WhiteSpace
Grp2reg
Grp2reg}
Grp2Reg         pattern {MatchReg
AsmOprnd1

Grp2ws2}
Grp2ws2         pattern {spancset
WhiteSpace
Grp2Comma
Grp2Comma}
Grp2Comma       pattern {MatchChar
'
'
0
Grp2ws3}
Grp2ws3         pattern {spancset
WhiteSpace
Grp2Reg2
Grp2Reg2}
Grp2Reg2        pattern {MatchReg
AsmOprnd2

Grp2ws4}
Grp2ws4         pattern {spancset
WhiteSpace
Grp2Comma2
Grp2Comma2}
Grp2Comma2      pattern {MatchChar
'
'
0
Grp2ws5}
Grp2ws5         pattern {spancset
WhiteSpace
Grp2Op3
Grp2Op3}
Grp2Op3         pattern {ConstPat


EndOfLine}

; Patterns for the IFEQ
IFLT
and IFGT instructions.

IFEQPat         pattern {MatchStr
IFEQInstr2}
IFEQInstr2      byte    "IFEQ"
0

IFLTPat         pattern {MatchStr
IFLTInstr2}
IFLTInstr2      byte    "IFLT"
0

IFGTPat         pattern {MatchStr
IFGTInstr2}
IFGTInstr2      byte    "IFGT"
0

; Grp3 Patterns:

Grp3            pattern {sl_Match2
Grp3Strs
Grp4
Grp3Oprnds}
Grp3Strs        pattern {TryGet

Grp3Put}
Grp3Put         pattern {TryPut

Grp3GOTO}
Grp3Goto        pattern {TryGOTO}

; Patterns for the GET and PUT instructions.

GetPat          pattern {MatchStr
GetInstr2}
GetInstr2       byte    "GET"
0

PutPat          pattern {MatchStr
PutInstr2}
PutInstr2       byte    "PUT"
0

GOTOPat         pattern {MatchStr
GOTOInstr2}
GOTOInstr2      byte    "GOTO"
0

; Patterns for the group three (PUT/GET/GOTO) instruction operands:

Grp3Oprnds      pattern {spancset
WhiteSpace
Grp3Op
Grp3Op}
Grp3Op          pattern {MatchGen


EndOfLine}

; Patterns for the group four instruction (HALT).

Grp4            pattern {TryHalt


EndOfLine}

HaltPat         pattern {MatchStr
HaltInstr2}
HaltInstr2      byte    "HALT"
0

; Patterns to match the four non-register addressing modes:

BXIndrctPat     pattern {MatchStr
BXIndrctStr}
BXIndrctStr     byte    "[BX]"
0

BXIndexedPat    pattern {ConstPat


BXIndrctPat}

DirectPat       pattern {MatchChar
'['

DP2}
DP2             pattern {ConstPat


DP3}
DP3             pattern {MatchChar
']'}

ImmediatePat    pattern {ConstPat}

; Pattern to match a hex constant:

HexConstPat     pattern {Spancset
xdigits}

dseg            ends

cseg            segment para public 'code'
assume  cs:cseg
ds:dseg

; The store macro tweaks the DS register and stores into the
; specified variable in DSEG.

store           macro   Where
What
push    ds
push    ax
mov     ax
seg Where
mov     ds
ax
mov     Where
What
pop     ax
pop     ds
endm

; Pattern matching routines for the assembler.
; Each mnemonic has its own corresponding matching function that
; attempts to match the mnemonic. If it does
it initializes the
; AsmOpcode variable with the base opcode of the instruction.

; Compare against the "LOAD" string.

TryLoad         proc    far
push    dx
push    si
ldxi    LoadPat
match2
jnc     NoTLMatch

store   AsmOpcode
0    ;Initialize base opcode.

NoTLMatch:      pop     si
pop     dx
ret
TryLoad         endp

; Compare against the "STORE" string.

TryStore        proc    far
push    dx
push    si
ldxi    StorePat
match2
jnc     NoTSMatch
store   AsmOpcode
1    ;Initialize base opcode.

NoTSMatch:      pop     si
pop     dx
ret
TryStore        endp

; Compare against the "ADD" string.

TryAdd          proc    far
push    dx
push    si
ldxi    AddPat
match2
jnc     NoTAMatch
store   AsmOpcode
2    ;Initialize ADD opcode.

NoTAMatch:      pop     si
pop     dx
ret
TryAdd          endp

; Compare against the "SUB" string.

TrySub          proc    far
push    dx
push    si
ldxi    SubPat
match2
jnc     NoTMMatch
store   AsmOpcode
3    ;Initialize SUB opcode.

NoTMMatch:      pop     si
pop     dx
ret
TrySub          endp

; Compare against the "IFEQ" string.

TryIFEQ         proc    far
push    dx
push    si
ldxi    IFEQPat
match2
jnc     NoIEMatch
store   AsmOpcode
4    ;Initialize IFEQ opcode.

NoIEMatch:      pop     si
pop     dx
ret
TryIFEQ         endp

; Compare against the "IFLT" string.

TryIFLT         proc    far
push    dx
push    si
ldxi    IFLTPat
match2
jnc     NoILMatch
store   AsmOpcode
5    ;Initialize IFLT opcode.

NoILMatch:      pop     si
pop     dx
ret
TryIFLT         endp

; Compare against the "IFGT" string.

TryIFGT         proc    far
push    dx
push    si
ldxi    IFGTPat
match2
jnc     NoIGMatch
store   AsmOpcode
6    ;Initialize IFGT opcode.

NoIGMatch:      pop     si
pop     dx
ret
TryIFGT         endp

; Compare against the "GET" string.

TryGET          proc    far
push    dx
push    si
ldxi    GetPat
match2
jnc     NoGMatch
store   AsmOpcode
7    ;Initialize Special opcode.
store   AsmOprnd1
2    ;GET's Special opcode.

NoGMatch:       pop     si
pop     dx
ret
TryGET          endp

; Compare against the "PUT" string.

TryPut          proc    far
push    dx
push    si
ldxi    PutPat
match2
jnc     NoPMatch
store   AsmOpcode
7    ;Initialize Special opcode.
store   AsmOprnd1
3    ;PUT's Special opcode.

NoPMatch:       pop     si
pop     dx
ret
TryPUT          endp

; Compare against the "GOTO" string.

TryGOTO         proc    far
push    dx
push    si
ldxi    GOTOPat
match2
jnc     NoGMatch
store   AsmOpcode
7    ;Initialize Special opcode.
store   AsmOprnd1
1    ;PUT's Special opcode.

NoGMatch:       pop     si
pop     dx
ret
TryGOTO         endp

; Compare against the "HALT" string.

TryHalt         proc    far
push    dx
push    si
ldxi    HaltPat
match2
jnc     NoHMatch
store   AsmOpcode
7    ;Initialize Special opcode.
store   AsmOprnd1
0    ;Halt's special opcode.
store   AsmOprnd2
0

NoHMatch:       pop     si
pop     dx
ret
TryHALT         endp

; MatchReg checks to see if we've got a valid register value. On entry

; DS:SI points at the location to store the byte opcode (0
1
2
or 3) for
; a reasonable register (AX
BX
CX
or DX); ES:DI points at the string
; containing (hopefully) the register operand
and CX points at the last
; location plus one we can check in the string.
;
; On return
Carry=1 for success
0 for failure. ES:AX must point beyond
; the characters which make up the register if we have a match.

MatchReg        proc    far

; ES:DI Points at two characters which should be AX/BX/CX/DX. Anything
; else is an error.

cmp     byte ptr es:1[di]
'X'  ;Everyone needs this
jne     BadReg
xor     ax
ax                  ;886 "AX" reg code.
cmp     byte ptr es:[di]
'A'   ;AX?
je      GoodReg
inc     ax
cmp     byte ptr es:[di]
'B'   ;BX?
je      GoodReg
inc     ax
cmp     byte ptr es:[di]
'C'   ;CX?
je      GoodReg
inc     ax
cmp     byte ptr es:[di]
'D'   ;DX?
je      GoodReg
BadReg:         clc
mov     ax
di
ret

GoodReg:
mov     ds:[si]
al             ;Save register opcode.
lea     ax
2[di]               ;Skip past register.
cmp     ax
cx                  ;Be sure we didn't go
ja      BadReg                  ; too far.
stc
ret
MatchReg        endp

; MatchGen-     Matches a general addressing mode. Stuffs the appropriate
;               addressing mode code into AsmOprnd2. If a 16-bit constant
;               is required by this addressing mode
this code shoves that
;               into the AsmConst variable.

MatchGen        proc    far
push    dx
push    si

; Try a register operand.

ldxi    Grp1Op2Reg
match2
jc      MGDone

; Try "[bx]".

ldxi    BXIndrctPat
match2
jnc     TryBXIndexed
store   AsmOprnd2
4
jmp     MGDone

; Look for an operand of the form "xxxx[bx]".

TryBXIndexed:
ldxi    BXIndexedPat
match2
jnc     TryDirect
store   AsmOprnd2
5
jmp     MGDone

; Try a direct address operand "[xxxx]".

TryDirect:
ldxi    DirectPat
match2
jnc     TryImmediate
store   AsmOprnd2
6
jmp     MGDone

; Look for an immediate operand "xxxx".

TryImmediate:
ldxi    ImmediatePat
match2
jnc     MGDone
store   AsmOprnd2
7

MGDone:
pop     si
pop     dx
ret
MatchGen        endp

; ConstPat-     Matches a 16-bit hex constant. If it matches
it converts
;               the string to an integer and stores it into AsmConst.

ConstPat        proc    far
push    dx
push    si
ldxi    HexConstPat
match2
jnc     CPDone

push    ds
push    ax
mov     ax
seg AsmConst
mov     ds
ax
atoh
mov     AsmConst
ax
pop     ax
pop     ds
stc

CPDone:         pop     si
pop     dx
ret
ConstPat        endp

; Assemble-     This code assembles the instruction that ES:DI points
;               at and displays the hex opcode(s) for that instruction.

Assemble        proc    near

; Print out the instruction we're about to assemble.

print
byte    "Assembling: "
0
strupr
puts
putcr

; Assemble the instruction:

ldxi    InstrPat
xor     cx
cx
match
jnc     SyntaxError

; Quick check for illegal instructions:

cmp     AsmOpcode
7            ;Special/Get instr.
jne     TryStoreInstr
cmp     AsmOprnd1
2            ;GET opcode
je      SeeIfImm
cmp     AsmOprnd1
1            ;Goto opcode
je      IsGOTO

TryStoreInstr:  cmp     AsmOpcode
1            ;Store Instruction
jne     InstrOkay

SeeIfImm:       cmp     AsmOprnd2
7            ;Immediate Adrs Mode
jne     InstrOkay
print
db      "Syntax error: store/get immediate not allowed."
db      " Try Again"
cr
lf
0
jmp     ASMDone

IsGOTO:         cmp     AsmOprnd2
7            ;Immediate mode for GOTO
je      InstrOkay
print
db      "Syntax error: GOTO only allows immediate "
byte    "mode."
cr
lf
db      0
jmp     ASMDone

; Merge the opcode and operand fields together in the instruction byte

; then output the opcode byte.

InstrOkay:      mov     al
AsmOpcode
shl     al
1
shl     al
1
or      al
AsmOprnd1
shl     al
1
shl     al
1
shl     al
1
or      al
AsmOprnd2
puth
cmp     AsmOpcode
4            ;IFEQ instruction
jb      SimpleInstr
cmp     AsmOpcode
6            ;IFGT instruction
jbe     PutConstant

SimpleInstr:    cmp     AsmOprnd2
5
jb ASMDone

; If this instruction has a 16 bit operand
output it here.

PutConstant:    mov     al
' '
putc
mov     ax
ASMConst
puth
mov     al
' '
putc
xchg    al
ah
puth
jmp     ASMDone

SyntaxError:    print
db      "Syntax error in instruction."
db      cr
lf
0

ASMDone:        putcr
ret
Assemble        endp

; Main program that tests the assembler.

Main            proc
mov     ax
seg dseg    ;Set up the segment registers
mov     ds
ax
mov     es
ax
meminit

lesi    Str1
call    Assemble
lesi    Str2
call    Assemble
lesi    Str3
call    Assemble
lesi    Str4
call    Assemble
lesi    Str5
call    Assemble
lesi    Str6
call    Assemble
lesi    Str7
call    Assemble
lesi    Str8
call    Assemble
lesi    Str9
call    Assemble
lesi    Str10
call    Assemble
lesi    Str11
call    Assemble
lesi    Str12
call    Assemble
lesi    Str13
call    Assemble
lesi    Str14
call    Assemble

Quit:           ExitPgm
Main            endp
cseg            ends

sseg            segment para stack 'stack'
stk             db      256 dup ("stack ")
sseg            ends

zzzzzzseg       segment para public 'zzzzzz'
LastBytes       db      16 dup (?)
zzzzzzseg       ends
end     Main

Sample Output:

Assembling: LOAD AX
0
07 00 00
Assembling: LOAD AX
BX
01
Assembling: LOAD AX
AX
00
Assembling: ADD AX
15
47 15 00
Assembling: SUB AX
[BX]
64
Assembling: STORE BX
[1000]
2E 00 10
Assembling: LOAD BX
2000[BX]
0D 00 20
Assembling: GOTO 3000
EF 00 30
Assembling: IFLT AX
BX
100
A1 00 01
Assembling: HALT
E0
Assembling: THIS IS ILLEGAL
Syntax error in instruction.

Assembling: LOAD AX
STORE
Syntax error in instruction.

Assembling: STORE AX
1000
Syntax error: store/get immediate not allowed. Try Again

Assembling: IFEQ AX
0
0
Syntax error in instruction.

Chapter Sixteen (Part 10)

Table of Content

Chapter Sixteen (Part 12) 

Chapter Sixteen: Pattern Matching (Part 11)
29 SEP 1996