?login_element?

Subversion Repositories NedoOS

Rev

Blame | Last modification | View Log | Download | RSS feed

  1. #ifndef included_xfma
  2. #define included_xfma
  3. #include "pushpop.z80"
  4. #include "mov.z80"
  5. #include "routines/add64.z80"
  6. #include "routines/sub64.z80"
  7. #include "routines/sla64.z80"
  8. #include "routines/rr64.z80"
  9. #include "routines/srl64.z80"
  10. #include "routines/srl64_x4.z80"
  11. #include "routines/swapbuf.z80"
  12. #include "xmul.z80"
  13.  
  14.  
  15. xfma:
  16. ;Fused Multiply-Add
  17. ;Performs x*y+t -> z
  18. ;HL points to x
  19. ;DE points to y
  20. ;BC points to z
  21. ;IX points to t
  22.   call pushpop
  23.   push bc   ;save the output location
  24.   push ix   ;save the location of what to add
  25.  
  26. ;First multiply x and y, but keep full 128-bits precision
  27.   ld bc,var_z+8
  28.   call xmul
  29.  
  30. ;Now we need to perform a high-precision addition
  31. ;First we move the float to scrap
  32.   pop hl
  33.   ld de,var_z-10
  34.   call mov10
  35.  
  36. ;Now do a special add
  37.   call fma_add
  38.  
  39. ;Now return the result
  40.   pop de
  41.   ld hl,var_z+8
  42.   jp mov10
  43.  
  44. fma_add:
  45. ; Zero out the bottom 8 bytes of the addend
  46.   ld hl,0
  47.   ld (var_z-12),hl
  48.   ld (var_z-14),hl
  49.   ld (var_z-16),hl
  50.   ld (var_z-18),hl
  51.  
  52.  
  53. ; Check for special cases
  54.         ld de,(var_z-2)
  55.         ld hl,(var_z+16)
  56.   res 7,h
  57.   res 7,d
  58.  
  59.   ld a,h
  60.   or l
  61.   jp z,caseadd_fma
  62.   ld a,d
  63.   or e
  64.   jp z,caseadd1_fma
  65.  
  66. ; Now make sure var_z has the bigger exponent
  67.   sbc hl,de
  68.   jr nc,+_
  69.   xor a
  70.   sub l
  71.   ld l,a
  72.   sbc a,a
  73.   sub h
  74.   ld h,a
  75.   push hl
  76. ; We need to swap.
  77.   ld de,var_z-18
  78.   ld hl,var_z
  79.   ld bc,18
  80.   call swapbuf
  81.   pop hl
  82. _:
  83.   ld a,h
  84.   or a
  85.   ret nz
  86.   ld a,l
  87.   cp 130
  88.   ret nc
  89. ;Now we need to shift down by A bits.
  90.   or a
  91.   jr z,add_shifted_fma
  92.   rra \ call c,srl_var_z_m_18
  93.   rra \ call c,srl2_var_z_m_18
  94.   rra \ call c,srl4_var_z_m_18
  95.   and $1F
  96.   ld l,a
  97.   ld bc,255&(var_z-19)
  98.   ld h,(var_z-18)>>8
  99.   add hl,bc
  100.   sub 18
  101.   cpl
  102.   ld c,a
  103.   ld de,var_z-19
  104.   ldir
  105.   ld c,a
  106.   ld a,17
  107.   sub c
  108.   jr z,add_shifted_fma
  109.   ld b,a
  110.   xor a
  111.   ld (de),a \ inc de \ djnz $-2
  112. add_shifted_fma:
  113. ;If the signs match, then just add
  114. ;If they differ, then subtract
  115.         ld hl,var_z-1
  116.         ld a,(var_z+17)
  117.   xor (hl)
  118.   jp p,xfma_add
  119.  
  120. ; Subtract the mantissas
  121.   ld hl,var_z-18
  122.   ld de,var_z
  123.   call sub64
  124.   inc hl
  125.   inc de
  126.   call sbc64
  127.   jr nc,+_
  128. ;Negate the mantissa, invert the sign
  129. ; Invert the sign
  130.   inc de
  131.   inc de
  132.   ld a,(de)
  133.   xor 80h
  134.   ld (de),a
  135.  
  136. ; Negate the mantissa
  137.   ld hl,var_z
  138.   ld bc,$1000
  139.   ld a,c \ sbc a,(hl) \ ld (hl),a \ inc hl
  140.   djnz $-4
  141. _:
  142.   ret m
  143.  
  144. ;need to shift up until top bit is 1. Should be at most 1 shift, I think
  145.   ld de,(var_z+16)
  146.  
  147. ; Make sure that the mantissa isn't zero
  148.   ld hl,var_z
  149.   ld b,15
  150.   ld a,(hl)
  151. _:
  152.   inc hl \ or (hl) \ jr nz,+_
  153.   djnz -_
  154.   inc hl
  155.   ld (hl),a
  156.   inc hl
  157.   ld (hl),a
  158.   ret
  159.  
  160. _:
  161.   dec de
  162.   ld a,d
  163.   and $7F
  164.   or e
  165.   jp z,add_zero_fma
  166.  
  167.   ld hl,var_z
  168.   call sla64
  169.   inc hl
  170.   call rl64
  171.   jp p,-_
  172.   ld (var_z+16),de
  173.   ret
  174.  
  175. xfma_add:
  176.   ;add the mantissas
  177.   ld hl,var_z-18
  178.   ld de,var_z
  179.   call add64
  180.   inc hl
  181.   inc de
  182.   call adc64
  183.   ret nc
  184.   ex de,hl
  185.   inc hl
  186.   inc (hl) \ jr nz,+_
  187.   inc hl
  188.   inc (hl)
  189.   ld a,(hl)
  190.   dec hl
  191.   and $7F
  192.   jr z,add_inf_fma
  193.   scf
  194. _:
  195.   dec hl
  196.   jp rr64
  197. srl4_var_z_m_18:
  198.   ld hl,var_z-3
  199.   ld b,a
  200.   call srl64_4
  201.   dec hl
  202.   call rrd8
  203.   ld a,b
  204.   ret
  205. srl2_var_z_m_18:
  206.   call srl_var_z_m_18
  207. srl_var_z_m_18:
  208.   ld hl,var_z-3
  209.   ld b,a
  210.   call srl64
  211.   dec hl
  212.   call rr64
  213.   ld a,b
  214.   ret
  215. caseadd_fma:
  216. ;zero+x => x for all x
  217. ;NaN +x => NaN for all x
  218. ;inf-inf=> NaN
  219. ;inf +x => inf, x != inf
  220.   ret
  221. caseadd1_fma:
  222. ;x+zero => x
  223. ;x+inf  => inf
  224. ;x+NaN  => NaN
  225.   ret
  226. add_zero_fma:
  227.   xor a
  228.   ld (var_z+15),a
  229.   ld h,a
  230.   ld l,a
  231.   ld (var_z+16),hl
  232.   ret
  233. add_inf_fma:
  234.   xor a
  235.   ld (var_z+15),a
  236.   dec a
  237.   ld h,a
  238.   ld l,a
  239.   ld (var_z+16),hl
  240.   ret
  241. #endif
  242.