This is just a few generic routines for 16-bit multiplication and division. Multiplicands are in {temp} and {temp2}, while the high and low word of the product is in {temp3} and {temp4}. For Division, the result is stored in {temp4} and {temp3} is just used as a scratchpad register. Routines expect registers to be in 16-bit mode before and after. Contents of A/X/Y are destroyed so push/pull registers before and after if needed.
unsigned_16x16_multiplication:
sep #$10
ldx {mult_a}
stx $4202
ldy {mult_b}
sty $4203 //set up 1st multiply
ldx {mult_b}+1
clc
lda $4216 //load $4216 for 1st multiply
stx $4203 //start 2nd multiply
sta {product}
stz {product}+2 //high word of product needs to be cleared
lda $4216 //read $4216 from 2nd multiply
ldx {mult_a}+1
stx $4202 //set up 3rd multiply
sty $4203 //y still contains temp2
ldy {mult_b}+1
adc {product}+1
adc $4216 //add 3rd product
sta {product}+1
sty $4203 //set up 4th multiply
lda {product}+2 //carry bit to last byte of product
bcc +
adc #$00ff
+;
adc $4216 //add 4th product
sta {product}+2 //final store
rep #$10
rts
signed_16x16_multiplication:
sep #$10
ldx {mult_a}
stx $4202
ldy {mult_b}
sty $4203 //set up 1st multiply
ldx {mult_b}+1
clc
lda $4216 //load $4216 for 1st multiply
stx $4203 //start 2nd multiply
sta {product}
stz {product}+2 //high word of product needs to be cleared
lda $4216 //read $4216 from 2nd multiply
ldx {mult_a}+1
stx $4202 //set up 3rd multiply
sty $4203 //y still contains temp2
ldy {mult_b}+1
adc {product}+1
adc $4216 //add 3rd product
sta {product}+1
sty $4203 //set up 4th multiply
lda {product}+2 //carry bit to last byte of product
bcc +
adc #$00ff
+;
adc $4216 //add 4th product
cpx #$80
bcc +
sbc {mult_b}
+;
cpy #$80
bcc +
sbc {mult_a}
+;
sta {product}+2 //final store
rep #$10
rts
divide_by_16_bit:
ldx {div_a}
stx {shift_div_a}
lda {div_b}
bne +
sec
rts //set carry to indicate divide by zero error
+;
cmp #$0100
bcc divide_by_8_bit
-;
lsr {shift_div_a} // Divide numerator by 2
lsr // Divide denominator by 2,
adc #$0000 // adding carry flag into itself.
cmp #$0100 // until it is under 256.
bcs -
ldx {shift_div_a}
stx $4204 // WRDIVL
sep #$20 // 8-bit accumulator
sta $4206 // WRDIVB
nop #5
stz {quotient}+1
lda $4214 // RDDIVL
sta {quotient}
sta $4202 // WRMPYA
lda {div_b}+1
sta $4203 // WRMPYB
lda {div_b}
xba
lda $4216
xba
sta $4203
lda #$00
rep #$21
adc $4216
sta {shift_div_a}
lda {div_a}
sec
sbc {shift_div_a}
-;
cmp {div_b}
bcc +
sbc {div_b}
inc {quotient}
bra -
+;
sta {remainder}
rts //should end with carry clear to indicate valid answer
divide_by_8_bit:
stx $4204 // WRDIVL
sep #$20
sta $4206 // WRDIVB
nop #5
rep #$21 //should end with carry clear to indicate valid answer
lda $4214
sta {quotient}
lda $4216
sta {remainder}
rts
A ca65 version of this routine can be found in common.s of the Nova the Squirrel 2 source.