?login_element?

Subversion Repositories NedoOS

Rev

Blame | Last modification | View Log | Download

  1.  ifndef included_xadd
  2.  define included_xadd
  3.  include "../common/pushpop.asm"
  4.  include "../common/mov.asm"
  5.  include "routines/swapxOP2xOP3.asm"
  6.  include "routines/sub64.asm"
  7.  include "routines/add64.asm"
  8.  
  9. xadd:
  10. ;Input:
  11. ;  HL points to one number
  12. ;  DE points to another
  13. ;  BC points to output
  14. ;945
  15. ;+{0,57+swapxOP2xOP3}
  16. ;+{0,123+{0,249}+{0,430}+{0,210}+21*n+{0,232-26n}}, n from 0 to 9
  17. ;+{sub,add}
  18. ;sub:
  19. ;  84+{0,256}+sbc64  ;there is a 25% chance of it going here, I believe
  20. ;  {0,333+{0,7+13+{0,7+13+{0,7+13+{0,7+13+{0,7+13+{0,7+13+{0,29}}}}}}}}   ;this part happens at most once (50%) unless inputs have the same exponent, then it is about 8/3 (accurate to 17 digits)
  21. ;add:
  22. ;  69+adc64+{0,201+{0,39}
  23. ;avg: 2111.3076938cc, I think. A lot of weird probability.
  24.   call pushpop
  25.   push bc
  26.   call xaddpp;+_
  27.   pop de
  28.   ld hl,xOP3
  29.   jp mov10
  30. xaddpp;_:
  31. ;copy the inputs to xOP2 and xOP3, leaving xOP1 open for shifting
  32.   push de
  33.   ld de,xOP2
  34.   call mov10
  35.   pop hl
  36.   call mov10
  37. subadd_stepin:
  38.         ld de,(xOP2+8)
  39.         ld hl,(xOP3+8)
  40.   res 7,h
  41.   res 7,d
  42.   xor a
  43.   ld (xOP2-1),a
  44.   ld a,h
  45.   or l
  46.   jp z,caseadd1
  47.   ld a,d
  48.   or e
  49.   jp z,caseadd
  50. ; Now make sure xOP3 has the bigger exponent
  51.   sbc hl,de
  52.   jr nc,xadd_noswap;+_
  53.   xor a
  54.   sub l
  55.   ld l,a
  56.   sbc a,a
  57.   sub h
  58.   ld h,a
  59.   push hl
  60.   call swapxOP2xOP3   ;need to swap xOP2 and xOP3
  61.   pop hl
  62. xadd_noswap;_:
  63.   ld a,h
  64.   or a
  65.   ret nz
  66.   ld a,l
  67.   cp 66
  68.   ret nc
  69. ;Now we need to shift down by A bits.
  70.   or a
  71.   jr z,add_shifted
  72.   rra : call c,srlxOP2_mantissa
  73.   rra : call c,srl2xOP2_mantissa
  74.   rra : call c,srl4xOP2_mantissa
  75.   and $1F
  76.   ld l,a
  77.   ld bc,(xOP2&255)-1
  78.   ld h,xOP2>>8
  79.   add hl,bc
  80.   sub 10
  81.   cpl
  82.   ld c,a
  83.   ld de,xOP2-1
  84.   ldir
  85.   ld c,a
  86.   ld a,9
  87.   sub c
  88.   jr z,add_shifted
  89.   ld b,a
  90.   xor a
  91.   ld (de),a : inc de : djnz $-2
  92. add_shifted:
  93. ;If the signs match, then just add
  94. ;If they differ, then subtract
  95.         ld hl,xOP2+9
  96.         ld a,(xOP3+9)
  97.   xor (hl)
  98.   jp p,xadd_add
  99. ;subtract the mantissas
  100.   xor a
  101.   ld hl,xOP2-1
  102.   sub (hl)
  103.   ld (hl),a
  104.   inc hl
  105.   ld de,xOP3
  106.   call sbc64
  107.   jr nc,xadd_mantissap;+_
  108.   ;now we need to negate the mantissa, invert the sign
  109.   inc de
  110.   inc de
  111.   ld a,(de)
  112.   xor 80h
  113.   ld (de),a
  114.   ld hl,xOP2-1
  115.   xor a
  116.   ld c,a
  117.   sub (hl)
  118.   ld (hl),a
  119.   ld hl,xOP3
  120.   ld a,c : sbc a,(hl) : ld (hl),a
  121.   inc hl : ld a,c : sbc a,(hl) : ld (hl),a
  122.   inc hl : ld a,c : sbc a,(hl) : ld (hl),a
  123.   inc hl : ld a,c : sbc a,(hl) : ld (hl),a
  124.   inc hl : ld a,c : sbc a,(hl) : ld (hl),a
  125.   inc hl : ld a,c : sbc a,(hl) : ld (hl),a
  126.   inc hl : ld a,c : sbc a,(hl) : ld (hl),a
  127.   inc hl : ld a,c : sbc a,(hl) : ld (hl),a
  128. xadd_mantissap;_:
  129.   ret m
  130. ;need to shift up until top bit is 1. Should be at most 1 shift, I think
  131.  
  132.  
  133.   ld de,(xOP3+8)
  134.   ld a,(xOP2-1)
  135.   ld hl,xOP3-1
  136.   ld (hl),a
  137. ;We need to make sure that the mantissa isn't zero
  138.   inc hl : or (hl) : jr nz,xadd_mantissanz;+_
  139.   inc hl : or (hl) : jr nz,xadd_mantissanz;+_
  140.   inc hl : or (hl) : jr nz,xadd_mantissanz;+_
  141.   inc hl : or (hl) : jr nz,xadd_mantissanz;+_
  142.   inc hl : or (hl) : jr nz,xadd_mantissanz;+_
  143.   inc hl : or (hl) : jr nz,xadd_mantissanz;+_
  144.   inc hl : or (hl) : jr nz,xadd_mantissanz;+_
  145.   inc hl : or (hl) : jr nz,xadd_mantissanz;+_
  146.   ld h,a
  147.   ld l,a
  148.   ld (xOP3+8),hl
  149.   ret
  150. add_zero:
  151.   ld (xOP3+7),a
  152.   ld h,a
  153.   ld l,a
  154.   ld (xOP3+8),hl
  155.   ret
  156. xadd_mantissanz;_:
  157.   dec de
  158.   ld a,d
  159.   and $7F
  160.   or e
  161.   jr z,add_zero
  162.  
  163.   ld hl,xOP3-1
  164.   sla (hl) : inc hl
  165.   rl (hl) : inc hl
  166.   rl (hl) : inc hl
  167.   rl (hl) : inc hl
  168.   rl (hl) : inc hl
  169.   rl (hl) : inc hl
  170.   rl (hl) : inc hl
  171.   rl (hl) : inc hl
  172.   rl (hl)
  173.   jp p,xadd_mantissanz;-_
  174.   ld (xOP3+8),de
  175.   ret
  176. xadd_add:
  177.   ;add the mantissas
  178.   ld hl,xOP2-1
  179.   rl (hl)
  180.   inc hl
  181.   ld de,xOP3
  182.   call adc64
  183.   ret nc
  184.   ex de,hl
  185.   inc hl
  186.   inc (hl) : jr nz,xadd_nobig;+_
  187.   inc hl
  188.   inc (hl)
  189.   ld a,(hl)
  190.   dec hl
  191.   and $7F
  192.   jr z,add_inf
  193.   scf
  194. xadd_nobig;_:
  195.   dec hl : rr (hl)
  196.   dec hl : rr (hl)
  197.   dec hl : rr (hl)
  198.   dec hl : rr (hl)
  199.   dec hl : rr (hl)
  200.   dec hl : rr (hl)
  201.   dec hl : rr (hl)
  202.   dec hl : rr (hl)
  203.   ret
  204. srl4xOP2_mantissa:
  205. ;242cc
  206.   ld hl,xOP2+7
  207.   ld b,a
  208.   xor a
  209.   rrd : dec hl
  210.   rrd : dec hl
  211.   rrd : dec hl
  212.   rrd : dec hl
  213.   rrd : dec hl
  214.   rrd : dec hl
  215.   rrd : dec hl
  216.   rrd : dec hl
  217.   rrd
  218.   ld a,b
  219.   ret
  220. srl2xOP2_mantissa:
  221. ;423
  222.   call srlxOP2_mantissa
  223. srlxOP2_mantissa:
  224. ;203
  225.   ld hl,xOP2+7
  226.   srl (hl) : dec hl
  227.   rr (hl) : dec hl
  228.   rr (hl) : dec hl
  229.   rr (hl) : dec hl
  230.   rr (hl) : dec hl
  231.   rr (hl) : dec hl
  232.   rr (hl) : dec hl
  233.   rr (hl) : dec hl
  234.   rr (hl)
  235.   ret
  236. add_inf:
  237.   ld (xOP3+7),a
  238.   ld hl,-1
  239.   ld (xOP3+8),hl
  240.   ret
  241.  
  242. caseadd:
  243. ;OP2 is special, OP3 is not
  244. ;zero+x => x for all x
  245. ;NaN +x => NaN for all x
  246. ;inf +x => inf, x != inf
  247.   ld a,(xOP2+7)
  248.   and $C0
  249.   ret z
  250. xadd_return_xOP2:
  251.   pop hl    ;pop the return
  252.   pop de    ;pop the pointer to the output
  253.   ld hl,xOP2
  254.   jp mov10
  255.  
  256.  
  257. caseadd1:
  258. ;OP3 is special, OP2 is unknown
  259. ;x+zero => x
  260. ;x+inf  => inf
  261. ;x+NaN  => NaN
  262. ;return result in xOP3
  263.   ld a,(xOP3+7)
  264.   and $C0
  265.   jr z,xadd_return_xOP2   ;xOP3 is 0, so return xOP2
  266.   ret p   ;NaN+x == NaN, NaN in xOP3 so return
  267.  
  268. ;if xOP2 is NaN or -inf, return NaN
  269. ;otherwise, return xOP2
  270.   ld a,d
  271.   or e
  272.   ret nz
  273.   ld a,(xOP2+7)
  274.   and $C0
  275.   ret z
  276.   jp p,xadd_return_NaN
  277. ;both inputs are inf, so make sure signs match
  278.   ld a,(xOP3+9)
  279.   ld d,a
  280.   ld a,(xOP2+9)
  281.   xor d
  282.   ret p
  283. xadd_return_NaN:
  284.   pop hl
  285.   pop de
  286.   ld hl,xconst_NaN
  287.   jp mov10
  288.  
  289.  endif
  290.