This is just a few generic routines for 16-bit multiplication and division. Multiplicands are in {temp} and {temp2}, while the high and low word of the product is in {temp3} and {temp4}. For Division, the result is stored in {temp4} and {temp3} is just used as a scratchpad register. Routines expect registers to be in 16-bit mode before and after. Contents of A/X/Y are destroyed so push/pull registers before and after if needed.

unsigned_16x16_multiplication:
sep #$10
ldx {temp}
stx $4202
ldy {temp2}
sty $4203			//set up 1st multiply
ldx {temp2}+1
clc
lda $4216			//load $4216 for 1st multiply
stx $4203			//start 2nd multiply
sta {temp3}
stz {temp4}			//high word of product needs to be cleared
lda $4216			//read $4216 from 2nd multiply
ldx {temp}+1
stx $4202			//set up 3rd multiply
sty $4203			//y still contains temp2
ldy {temp2}+1
adc {temp3}+1
adc $4216			//add 3rd product
sta {temp3}+1
sty $4203			//set up 4th multiply
lda {temp4}			//carry bit to last byte of product
bcc +
adc #$00ff
+;
adc $4216			//add 4th product
sta {temp4}			//final store
rep #$10
rts

signed_16x16_multiplication:
sep #$10
ldx {temp}
stx $4202
ldy {temp2}
sty $4203			//set up 1st multiply
ldx {temp2}+1
clc
lda $4216			//load $4216 for 1st multiply
stx $4203			//start 2nd multiply
sta {temp3}
stz {temp4}			//high word of product needs to be cleared
lda $4216			//read $4216 from 2nd multiply
ldx {temp}+1
stx $4202			//set up 3rd multiply
sty $4203			//y still contains temp2
ldy {temp2}+1
adc {temp3}+1
adc $4216			//add 3rd product
sta {temp3}+1
sty $4203			//set up 4th multiply
lda {temp4}			//carry bit to last byte of product
bcc +
adc #$00ff
+;
adc $4216			//add 4th product
cpx #$80
bcc +
sbc {temp2}
+;
cpy #$80
bcc +
sbc {temp}
+;
sta {temp4}			//final store
rep #$10
rts


divide_by_16_bit:
lda {temp}
sta {temp3}
stz {temp4}
lda {temp2}
bne +
sec
rts		//set carry to indicate divide by zero error
+;
cmp #$0100
bcc divided_by_8_bit
-;
lsr                 // Divide numerator by 2, 
adc #$0000          // adding carry flag into itself.
lsr {temp3}         // Divide denominator by 2
cmp #$0100          // until it is under 256.
bcs -
ldx {temp3}
stx $4204           // WRDIVL
sep #$20            // 8-bit accumulator
sta $4206           // WRDIVB
nop #7
lda $4214           // RDDIVL
sta {temp4}
sta $4202           // WRMPYA
lda {temp2}
sta $4203           // WRMPYB
lda {temp2}+1
nop
ldx $4216           // RDMPYL
sta $4203           // WRMPYB
stx {temp3}
lda {temp3}+1
clc
adc $4216           // RDMPYL
sta {temp3}+1
rep #$20            // 16-bit accumulator
lda {temp}
sec
sbc {temp3}
-;
cmp {temp2}
bcc +
sbc {temp2}
inc {temp4}
bra -
+;
rts                     //should end with carry clear to indicate valid answer
divide_by_8_bit:
ldx {temp3}
stx $4204           // WRDIVL
sep #$20
sta $4206           // WRDIVB
nop #5
rep #$21		//clear carry to indicate valid answer
lda $4214
sta {temp4}
rts

A ca65 version of this routine can be found in common.s of the Nova the Squirrel 2 source.