Login

Subversion Repositories NedoOS

Rev

Blame | Last modification | View Log | Download | RSS feed

 ifndef included_mul64
 define included_mul64
 include "../../common/mov.asm"
 include "mul32.asm"

var_z=xOP3+16
z0_64=var_z
z2_64=z0_64+8
z0_32=z2_64+8
z2_32=z0_32+4

mul64:
;multiplies the 64-bit integers at xOP1 and xOP2
;min: 1740+3*min(mul32)
;     5631cc
;max: 1901+3*max(mul32)
;     10013cc
;avg: 1797+3*avg(mul32) + 9572881/2^24
;   :~8720.733cc

  ld de,(xOP1+6)
  ld hl,(xOP1+4)
  ld bc,(xOP2+6)
  ld ix,(xOP2+4)
  call mul32
  ;copy the 8 bytes at z0_32 to z2_64
  ld hl,z0_32
  ld de,z2_64
  call mov8


  ld de,(xOP1+2)
  ld hl,(xOP1)
  ld bc,(xOP2+2)
  ld ix,(xOP2)
  call mul32
  ;copy the 8 bytes at z0_32 to z0_64
  ld hl,z0_32
  ld de,z0_64
  call mov8

;now I need to subtract the 32-bit digits from each other
  xor a
  ld hl,(xOP1)
  ld bc,(xOP1+4)
  sbc hl,bc
  ex de,hl
  ld hl,(xOP1+2)
  ld bc,(xOP1+6)
  sbc hl,bc
  jr nc,mul64_sub32p1;+_
  ld b,a : sub e : ld e,a
  ld a,b : sbc a,d : ld d,a
  ld a,b : sbc a,l : ld l,a
  ld a,b : sbc a,h : ld h,a
  ld a,b
mul64_sub32p1;_:
  rla
  push hl   ;top byte
  push de

  ld hl,(xOP2)
  ld bc,(xOP2+4)
  sbc hl,bc
  ex de,hl
  ld hl,(xOP2+2)
  ld bc,(xOP2+6)
  sbc hl,bc
  jr nc,mul64_sub32p2;+_
  ld c,a
  xor a
  ld b,a
  sub e : ld e,a
  ld a,b : sbc a,d : ld d,a
  ld a,b : sbc a,l : ld l,a
  ld a,b : sbc a,h : ld h,a
  ld a,c
  inc a
mul64_sub32p2;_:
  ex de,hl
  pop ix
  pop bc
  push af
  call mul32
  pop af    ;holds the sign in the low bit

  rra
  jp c,mul64_add
;need to perform z0+z2-result
  xor a
  ld hl,(z0_64)
  ld de,(z2_64)
  add hl,de
  ld (xOP1),hl
  ld hl,(z0_64+2)
  ld de,(z2_64+2)
  adc hl,de
  ld (xOP1+2),hl
  ld hl,(z0_64+4)
  ld de,(z2_64+4)
  adc hl,de
  ld (xOP1+4),hl
  ld hl,(z0_64+6)
  ld de,(z2_64+6)
  adc hl,de
  ld (xOP1+6),hl
  rla
;now need to subtract
  ld hl,(xOP1)
  ld de,(z0_32)
  sbc hl,de
  ld (xOP1),hl
  ld hl,(xOP1+2)
  ld de,(z0_32+2)
  sbc hl,de
  ld (xOP1+2),hl
  ld hl,(xOP1+4)
  ld de,(z0_32+4)
  sbc hl,de
  ld (xOP1+4),hl
  ld hl,(xOP1+6)
  ld de,(z0_32+6)
  sbc hl,de
  ld (xOP1+6),hl
  sbc a,0
mul64_final:
;now need to add it back in
  ld hl,(z0_64+4)
  ld de,(xOP1)
  add hl,de
  ld (z0_64+4),hl
  ld hl,(z0_64+6)
  ld de,(xOP1+2)
  adc hl,de
  ld (z0_64+6),hl
  ld hl,(z0_64+8)
  ld de,(xOP1+4)
  adc hl,de
  ld (z0_64+8),hl
  ld hl,(z0_64+10)
  ld de,(xOP1+6)
  adc hl,de
  ld (z0_64+10),hl
  ld hl,z0_64+12
  adc a,(hl)
  ld (hl),a
  ret nc
  inc hl : inc (hl) : ret nz
  inc hl : inc (hl) : ret nz
  inc hl : inc (hl) : ret
mul64_add:
;add to the current result
;z0+z2+result
  xor a
  ld hl,(z0_64)
  ld de,(z2_64)
  add hl,de
  ld (xOP1),hl
  ld hl,(z0_64+2)
  ld de,(z2_64+2)
  adc hl,de
  ld (xOP1+2),hl
  ld hl,(z0_64+4)
  ld de,(z2_64+4)
  adc hl,de
  ld (xOP1+4),hl
  ld hl,(z0_64+6)
  ld de,(z2_64+6)
  adc hl,de
  ld (xOP1+6),hl
  rla
;now need to subtract
  ld hl,(xOP1)
  ld de,(z0_32)
  add hl,de
  ld (xOP1),hl
  ld hl,(xOP1+2)
  ld de,(z0_32+2)
  adc hl,de
  ld (xOP1+2),hl
  ld hl,(xOP1+4)
  ld de,(z0_32+4)
  adc hl,de
  ld (xOP1+4),hl
  ld hl,(xOP1+6)
  ld de,(z0_32+6)
  adc hl,de
  ld (xOP1+6),hl
  adc a,0
  jp mul64_final
 endif