ifndef included_xmul
define included_xmul
include "../common/pushpop.asm"
include "../common/mov.asm"
include "mul/mul64.asm"
include "routines/rl64.asm"
var_z=xOP3+16
;uses 60 bytes after xOP1
xmul:
;Input:
; HL points to one number
; DE points to another
;Timing, excluding special cases (which take ~ 800cc):
;1057+{0,3}+{0,172}+mul64
;max: 1232+max(mul64)
; 11245cc
;min: 1057+min(mul64)
; 6688cc
;avg: 1144.5+avg(mul64)
; 9865.233ccs
push hl
push de
push bc
push af
push ix
push bc
call xmulpp;+_
pop hl
push de
ex de,hl
ld hl,var_z+8
call mov8
ex de,hl
pop de
ld (hl),e
inc hl
ld (hl),d
pop ix
pop af
pop bc
pop de
pop hl
ret
xmulpp;_:
push de
ld de,xOP1
call mov10
pop hl
call mov10
ld de,(xOP2+8)
ld hl,(xOP1+8)
xmul_stepin_geomean:
ld a,h
xor d
ld b,a
res 7,d
res 7,h
ld a,h : or l : jp z,casemul
ld a,d : or e : jp z,casemul2
add hl,de
ld de,$4000
sbc hl,de
jp c,mul_zero
jp m,mul_inf
sla b
jr nc,$+4;+_
set 7,h
;_:
push hl
call mul64
ld a,(var_z+15)
add a,a
pop de
jr c,xmul_incderet;+_
ifdef inc_FMA
ld hl,var_z
call rl64
else
ld hl,var_z+7
sla (hl)
endif
inc hl
jp rl64
xmul_incderet;_:
inc de
ret
casemul:
;xOP1 is inf/nan/0
ld hl,xOP2+9
ld a,(hl)
and $7F
dec hl
or (hl)
dec hl
ld a,(hl)
ld hl,xOP1
jr nz,casemul2_copy
;now we have two special cases to multipy together
;inf*inf-> inf
;0*0 -> 0
;
;nan*nan-> NaN
;inf*nan-> NaN
;inf*0 -> NaN
;nan*inf-> NaN
;nan*0 -> NaN
;0*inf -> NaN
;0*nan -> NaN
sla b
ld de,0
rr d
and $C0
ld c,a
ld a,(xOP1+7)
and $C0
cp c
jr z,$+4;+_
ld a,$40
;_:
ld (var_z+15),a
ret
casemul2:
;finite times inf/nan/0, so xOP2 -> out
ld hl,xOP2
casemul2_copy:
ld de,var_z+8
call mov8
ld e,(hl)
inc hl
ld d,(hl)
ret
mul_zero:
xor a
ld (var_z+15),a
ld d,a
ret
mul_inf:
ld d,e
ld a,255
ld (var_z+15),a
ret
endif