?login_element?

Subversion Repositories NedoOS

Rev

Blame | Last modification | View Log | Download

  1.  ifndef included_mul64
  2.  define included_mul64
  3.  include "../../common/mov.asm"
  4.  include "mul32.asm"
  5.  
  6. var_z=xOP3+16
  7. z0_64=var_z
  8. z2_64=z0_64+8
  9. z0_32=z2_64+8
  10. z2_32=z0_32+4
  11.  
  12. mul64:
  13. ;multiplies the 64-bit integers at xOP1 and xOP2
  14. ;min: 1740+3*min(mul32)
  15. ;     5631cc
  16. ;max: 1901+3*max(mul32)
  17. ;     10013cc
  18. ;avg: 1797+3*avg(mul32) + 9572881/2^24
  19. ;   :~8720.733cc
  20.  
  21.   ld de,(xOP1+6)
  22.   ld hl,(xOP1+4)
  23.   ld bc,(xOP2+6)
  24.   ld ix,(xOP2+4)
  25.   call mul32
  26.   ;copy the 8 bytes at z0_32 to z2_64
  27.   ld hl,z0_32
  28.   ld de,z2_64
  29.   call mov8
  30.  
  31.  
  32.   ld de,(xOP1+2)
  33.   ld hl,(xOP1)
  34.   ld bc,(xOP2+2)
  35.   ld ix,(xOP2)
  36.   call mul32
  37.   ;copy the 8 bytes at z0_32 to z0_64
  38.   ld hl,z0_32
  39.   ld de,z0_64
  40.   call mov8
  41.  
  42. ;now I need to subtract the 32-bit digits from each other
  43.   xor a
  44.   ld hl,(xOP1)
  45.   ld bc,(xOP1+4)
  46.   sbc hl,bc
  47.   ex de,hl
  48.   ld hl,(xOP1+2)
  49.   ld bc,(xOP1+6)
  50.   sbc hl,bc
  51.   jr nc,mul64_sub32p1;+_
  52.   ld b,a : sub e : ld e,a
  53.   ld a,b : sbc a,d : ld d,a
  54.   ld a,b : sbc a,l : ld l,a
  55.   ld a,b : sbc a,h : ld h,a
  56.   ld a,b
  57. mul64_sub32p1;_:
  58.   rla
  59.   push hl   ;top byte
  60.   push de
  61.  
  62.   ld hl,(xOP2)
  63.   ld bc,(xOP2+4)
  64.   sbc hl,bc
  65.   ex de,hl
  66.   ld hl,(xOP2+2)
  67.   ld bc,(xOP2+6)
  68.   sbc hl,bc
  69.   jr nc,mul64_sub32p2;+_
  70.   ld c,a
  71.   xor a
  72.   ld b,a
  73.   sub e : ld e,a
  74.   ld a,b : sbc a,d : ld d,a
  75.   ld a,b : sbc a,l : ld l,a
  76.   ld a,b : sbc a,h : ld h,a
  77.   ld a,c
  78.   inc a
  79. mul64_sub32p2;_:
  80.   ex de,hl
  81.   pop ix
  82.   pop bc
  83.   push af
  84.   call mul32
  85.   pop af    ;holds the sign in the low bit
  86.  
  87.   rra
  88.   jp c,mul64_add
  89. ;need to perform z0+z2-result
  90.   xor a
  91.   ld hl,(z0_64)
  92.   ld de,(z2_64)
  93.   add hl,de
  94.   ld (xOP1),hl
  95.   ld hl,(z0_64+2)
  96.   ld de,(z2_64+2)
  97.   adc hl,de
  98.   ld (xOP1+2),hl
  99.   ld hl,(z0_64+4)
  100.   ld de,(z2_64+4)
  101.   adc hl,de
  102.   ld (xOP1+4),hl
  103.   ld hl,(z0_64+6)
  104.   ld de,(z2_64+6)
  105.   adc hl,de
  106.   ld (xOP1+6),hl
  107.   rla
  108. ;now need to subtract
  109.   ld hl,(xOP1)
  110.   ld de,(z0_32)
  111.   sbc hl,de
  112.   ld (xOP1),hl
  113.   ld hl,(xOP1+2)
  114.   ld de,(z0_32+2)
  115.   sbc hl,de
  116.   ld (xOP1+2),hl
  117.   ld hl,(xOP1+4)
  118.   ld de,(z0_32+4)
  119.   sbc hl,de
  120.   ld (xOP1+4),hl
  121.   ld hl,(xOP1+6)
  122.   ld de,(z0_32+6)
  123.   sbc hl,de
  124.   ld (xOP1+6),hl
  125.   sbc a,0
  126. mul64_final:
  127. ;now need to add it back in
  128.   ld hl,(z0_64+4)
  129.   ld de,(xOP1)
  130.   add hl,de
  131.   ld (z0_64+4),hl
  132.   ld hl,(z0_64+6)
  133.   ld de,(xOP1+2)
  134.   adc hl,de
  135.   ld (z0_64+6),hl
  136.   ld hl,(z0_64+8)
  137.   ld de,(xOP1+4)
  138.   adc hl,de
  139.   ld (z0_64+8),hl
  140.   ld hl,(z0_64+10)
  141.   ld de,(xOP1+6)
  142.   adc hl,de
  143.   ld (z0_64+10),hl
  144.   ld hl,z0_64+12
  145.   adc a,(hl)
  146.   ld (hl),a
  147.   ret nc
  148.   inc hl : inc (hl) : ret nz
  149.   inc hl : inc (hl) : ret nz
  150.   inc hl : inc (hl) : ret
  151. mul64_add:
  152. ;add to the current result
  153. ;z0+z2+result
  154.   xor a
  155.   ld hl,(z0_64)
  156.   ld de,(z2_64)
  157.   add hl,de
  158.   ld (xOP1),hl
  159.   ld hl,(z0_64+2)
  160.   ld de,(z2_64+2)
  161.   adc hl,de
  162.   ld (xOP1+2),hl
  163.   ld hl,(z0_64+4)
  164.   ld de,(z2_64+4)
  165.   adc hl,de
  166.   ld (xOP1+4),hl
  167.   ld hl,(z0_64+6)
  168.   ld de,(z2_64+6)
  169.   adc hl,de
  170.   ld (xOP1+6),hl
  171.   rla
  172. ;now need to subtract
  173.   ld hl,(xOP1)
  174.   ld de,(z0_32)
  175.   add hl,de
  176.   ld (xOP1),hl
  177.   ld hl,(xOP1+2)
  178.   ld de,(z0_32+2)
  179.   adc hl,de
  180.   ld (xOP1+2),hl
  181.   ld hl,(xOP1+4)
  182.   ld de,(z0_32+4)
  183.   adc hl,de
  184.   ld (xOP1+4),hl
  185.   ld hl,(xOP1+6)
  186.   ld de,(z0_32+6)
  187.   adc hl,de
  188.   ld (xOP1+6),hl
  189.   adc a,0
  190.   jp mul64_final
  191.  endif
  192.