?login_element?

Subversion Repositories NedoOS

Rev

Blame | Last modification | View Log | Download

  1.  ifndef included_xbg
  2.  define included_xbg
  3.  include "../common/pushpop.asm"
  4.  include "../common/mov.asm"
  5.  include "mul/xmul11.asm"
  6.  include "mul/xmul13.asm"
  7.  include "mul/xmul31.asm"
  8.  include "mul/xmul7.asm"
  9.  include "mul/xmul17.asm"
  10.  include "mul/xmul15.asm"
  11.  include "mul/xmul5.asm"
  12.  
  13.  include "xsub.asm"
  14.  include "xdiv.asm"
  15.  include "xamean.asm"
  16.  include "xgeomean.asm"
  17.  
  18. var_a=xOP3+42 ;FIXME
  19. var_g=var_a+10
  20. var_a0=var_a+20
  21. var_a1=var_a+30
  22. var_a2=var_a+40
  23. var_a3=var_a+50
  24. var_a4=var_a+60
  25. var_a5=var_a+70
  26.  
  27. ;This is an accelerated Borchardt-Gauss algorithm
  28. ;It is used in computing inverse trancendentals and the like.
  29. ;This implmenetation actually computes 1/BG(a,g) since this is useful.
  30. ;
  31. ;Basic outline:
  32. ;  a0 is the initial a
  33. ;  g0 is the initial g
  34. ;  compute a_(n+1) = (a_n+g_n)/2
  35. ;  compute a_(g+1) = (a_(n+1)+g_n)/2
  36. ;
  37. ;a1<<=2
  38. ;a2<<=6
  39. ;a3<<=12
  40. ;a4<<=20
  41. ;a5<<=30
  42. ;a6<<=42
  43. ;a3*=85
  44. ;a1+=a5
  45. ;a1*=105
  46. ;a2+=a4
  47. ;a2*=21
  48. ;a2-=a3
  49. ;a2*=341
  50. ;a2-=a1
  51. ;a2*=13
  52. ;a2+=a0
  53. ;a2+=a6
  54. ;return 3028466566125/a2
  55. ;
  56. xbg:
  57. ;1510+9*mov10+6*xamean+5*xgeomean+4*xadd+2*xsub+xdiv+xmul3+xmul5+2*xmul7+xmul11+xmul13+xmul15+xmul17+xmul31
  58. ;+{0,9}*(if pow>255-{2,6,12,20,30,42} choose 9*{1,2,3,4,5,6})
  59. ;min: 13329+5*xgeomean+12*xadd+xmul3+xmul5+2*xmul7+xmul11+xmul13+xmul15+xmul17+xmul31
  60. ;max: 18584+5*xgeomean+12*xadd+xmul3+xmul5+2*xmul7+xmul11+xmul13+xmul15+xmul17+xmul31
  61. ;avg: 99383.979+12*xadd+xmul3+xmul5+2*xmul7+xmul11+xmul13+xmul15+xmul17+xmul31
  62.   push hl
  63.   push de
  64.   push bc
  65.   push af
  66.   push bc
  67.   call xbgpp;+_
  68.   pop de
  69.   ld hl,var_a
  70.   call mov10
  71.   pop af
  72.   pop bc
  73.   pop de
  74.   pop hl
  75.   ret
  76. xbgpp;_:
  77.   push de
  78.   ld de,var_a
  79.   call mov10
  80.   pop hl
  81.   call mov10
  82.         ld hl,(var_a+8)
  83.         ld de,(var_g+8)
  84.         ld a,h
  85.         or d
  86.   jp m,bg_NaN
  87.         ld a,h : or l : jp z,casebg
  88.         ld a,d : or e : jp z,casebg2
  89.   ld hl,var_a
  90.   ld de,var_a0
  91.   call mov10
  92.  
  93.   ld de,var_a : ld b,d : ld c,e : call xamean
  94.   ld b,h : ld c,l : call xgeomean
  95.   ld hl,var_a1 : ex de,hl : call mov10
  96.  
  97.   ld de,var_a : ld b,d : ld c,e : call xamean
  98.   ld b,h : ld c,l : call xgeomean
  99.   ld hl,var_a2 : ex de,hl : call mov10
  100.  
  101.   ld de,var_a : ld b,d : ld c,e : call xamean
  102.   ld b,h : ld c,l : call xgeomean
  103.   ld hl,var_a3 : ex de,hl : call mov10
  104.  
  105.   ld de,var_a : ld b,d : ld c,e : call xamean
  106.   ld b,h : ld c,l : call xgeomean
  107.   ld hl,var_a4 : ex de,hl : call mov10
  108.  
  109.   ld de,var_a : ld b,d : ld c,e : call xamean
  110.   ld b,h : ld c,l : call xgeomean
  111.   ld hl,var_a5 : ex de,hl : call mov10
  112.  
  113.   ld de,var_a : ld b,d : ld c,e : call xamean   ;a7
  114.  
  115. ;Now we are going to adjust the exponents
  116.   ld hl,(var_a1+8)
  117.   ld a,l
  118.   add a,2
  119.   ld l,a
  120.   jr nc,$+2+1+3;+_
  121.   inc h
  122.   jp m,bg_Zero
  123. ;_:
  124.   ld (var_a1+8),hl
  125.   ld hl,(var_a2+8)
  126.   ld a,l
  127.   add a,6
  128.   ld l,a
  129.   jr nc,$+2+1+3;+_
  130.   inc h
  131.   jp m,bg_Zero
  132. ;_:
  133.   ld (var_a2+8),hl
  134.   ld hl,(var_a3+8)
  135.   ld a,l
  136.   add a,12
  137.   ld l,a
  138.   jr nc,$+2+1+3;+_
  139.   inc h
  140.   jp m,bg_Zero
  141. ;_:
  142.   ld (var_a3+8),hl
  143.   ld hl,(var_a4+8)
  144.   ld a,l
  145.   add a,20
  146.   ld l,a
  147.   jr nc,$+2+1+3;+_
  148.   inc h
  149.   jp m,bg_Zero
  150. ;_:
  151.   ld (var_a4+8),hl
  152.   ld hl,(var_a5+8)
  153.   ld a,l
  154.   add a,30
  155.   ld l,a
  156.   jr nc,$+2+1+3;+_
  157.   inc h
  158.   jp m,bg_Zero
  159. ;_:
  160.   ld (var_a5+8),hl
  161.   ld hl,(var_a+8)
  162.   ld a,l
  163.   add a,42
  164.   ld l,a
  165.   jr nc,$+2+1+3;+_
  166.   inc h
  167.   jp m,bg_Zero
  168. ;_:
  169.   ld (var_a+8),hl
  170. ;NOTE: I have verified that this is working up to here
  171. ;Accuracy is lost in the bottom two bits by the time a6 is computed.
  172. ;Next, multiply a3 by 85
  173.   ld hl,var_a3
  174.   ld b,h
  175.   ld c,l
  176.   call xmul5
  177.   call xmul17
  178.  
  179.   ld hl,var_a1
  180.   ld de,var_a5
  181.   ld b,h
  182.   ld c,l
  183.   call xadd
  184.   call xmul15
  185.   call xmul7
  186.  
  187.   ld hl,var_a2
  188.   ld de,var_a4
  189.   ld b,h
  190.   ld c,l
  191.   call xadd
  192.   call xmul3
  193.   call xmul7
  194.  
  195.   ld de,var_a3
  196.   call xsub
  197.   call xmul11
  198.   call xmul31
  199.  
  200.   ld de,var_a1
  201.   call xsub
  202.   call xmul13
  203.  
  204.   ld de,var_a0
  205.   call xadd
  206.   ld h,b
  207.   ld l,c
  208.   ld de,var_a
  209.   ld b,d
  210.   ld c,e
  211.   call xadd
  212.   ld hl,xconst_3028466566125
  213.   jp xdiv
  214. casebg:
  215. ;1/bg(0,x)    -> is actually permissable
  216. ;1/bg(inf,x)  -> 0
  217. ;1/bg(inf,inf)-> 0
  218. ;1/bg(0,inf)  -> 0 *** Not NaN, even though bg(inf,0)==NaN
  219. ;1/bg(0,0)    -> NaN
  220. ;1/bg(NaN,*)  -> NaN
  221. ;1/bg(inf,NaN)-> NaN
  222. ;1/bg(inf,0)  -> NaN
  223. ;1/bg(0,NaN)  -> NaN
  224.  
  225.   ret
  226. casebg2:
  227. ;1/bg(x,NaN)  -> NaN
  228. ;1/bg(x,0)    -> NaN
  229. ;1/bg(x,inf)  -> 0
  230.   ld a,(var_g+7)
  231.   add a,a
  232.   jr nc,bg_NaN
  233.   add a,a
  234.   jr nc,bg_NaN
  235. bg_Zero:
  236.   xor a
  237.   ld h,a
  238.   ld l,a
  239.   ld (xOP1+8),hl
  240.   ld (xOP1+7),a
  241.   ret
  242. bg_NaN:
  243.   ld hl,0
  244.   ld (xOP1+8),hl
  245.   ld a,$40
  246.   ld (xOP1+7),a
  247.   ret
  248. ;#undefine var_a
  249. ;#undefine var_g
  250. ;#undefine var_a0
  251. ;#undefine var_a1
  252. ;#undefine var_a2
  253. ;#undefine var_a3
  254. ;#undefine var_a4
  255. ;#undefine var_a5
  256.  endif
  257.