This is just a few generic routines for 16-bit multiplication and division. Multiplicands are in {temp} and {temp2}, while the high and low word of the product is in {temp3} and {temp4}. For Division, the result is stored in {temp4} and {temp3} is just used as a scratchpad register. Routines expect registers to be in 16-bit mode before and after. Contents of A/X/Y are destroyed so push/pull registers before and after if needed.

unsigned_16x16_multiplication:
sep #$10
ldx {mult_a}
stx $4202
ldy {mult_b}
sty $4203			//set up 1st multiply
ldx {mult_b}+1
clc
lda $4216			//load $4216 for 1st multiply
stx $4203			//start 2nd multiply
sta {product}
stz {product}+2		//high word of product needs to be cleared
lda $4216			//read $4216 from 2nd multiply
ldx {mult_a}+1
stx $4202			//set up 3rd multiply
sty $4203			//y still contains temp2
ldy {mult_b}+1
adc {product}+1
adc $4216			//add 3rd product
sta {product}+1
sty $4203			//set up 4th multiply
lda {product}+2		//carry bit to last byte of product
bcc +
adc #$00ff
+;
adc $4216			//add 4th product
sta {product}+2		//final store
rep #$10
rts

signed_16x16_multiplication:
sep #$10
ldx {mult_a}
stx $4202
ldy {mult_b}
sty $4203			//set up 1st multiply
ldx {mult_b}+1
clc
lda $4216			//load $4216 for 1st multiply
stx $4203			//start 2nd multiply
sta {product}
stz {product}+2		//high word of product needs to be cleared
lda $4216			//read $4216 from 2nd multiply
ldx {mult_a}+1
stx $4202			//set up 3rd multiply
sty $4203			//y still contains temp2
ldy {mult_b}+1
adc {product}+1
adc $4216			//add 3rd product
sta {product}+1
sty $4203			//set up 4th multiply
lda {product}+2		//carry bit to last byte of product
bcc +
adc #$00ff
+;
adc $4216			//add 4th product
cpx #$80
bcc +
sbc {mult_b}
+;
cpy #$80
bcc +
sbc {mult_a}
+;
sta {product}+2		//final store
rep #$10
rts


divide_by_16_bit:
ldx {div_a}
stx {shift_div_a}
lda {div_b}
bne +
sec
rts		    //set carry to indicate divide by zero error
+;
cmp #$0100
bcc divide_by_8_bit
-;
lsr {shift_div_a}   // Divide numerator by 2
lsr                 // Divide denominator by 2, 
adc #$0000          // adding carry flag into itself.
cmp #$0100	    // until it is under 256.
bcs -
ldx {shift_div_a}
stx $4204           // WRDIVL
sep #$20            // 8-bit accumulator
sta $4206           // WRDIVB
nop #5
stz {quotient}+1
lda $4214           // RDDIVL
sta {quotient}
sta $4202           // WRMPYA
lda {div_b}+1
sta $4203           // WRMPYB
lda {div_b}
xba
lda $4216
xba
sta $4203
lda #$00
rep #$21
adc $4216
sta {shift_div_a}
lda {div_a}
sec
sbc {shift_div_a}
-;
cmp {div_b}
bcc +
sbc {div_b}
inc {quotient}
bra -
+;
sta {remainder}
rts                         //should end with carry clear to indicate valid answer
divide_by_8_bit:
stx $4204           	// WRDIVL
sep #$20
sta $4206           	// WRDIVB
nop #5
rep #$21                    //should end with carry clear to indicate valid answer
lda $4214
sta {quotient}
lda $4216
sta {remainder}
rts

A ca65 version of this routine can be found in common.s of the Nova the Squirrel 2 source.