?login_element?

Subversion Repositories NedoOS

Rev

Blame | Last modification | View Log | Download | RSS feed

  1.  ifndef included_sqrt32
  2.  define included_sqrt32
  3.  include "../../common/sqrtHLIX.asm"
  4.  include "../../common/mul16.asm"
  5.  include "../div/div32_16.asm"
  6.  
  7.  
  8. var_c=xOP1   ;input
  9. var_y=var_c+4 ;  used for sqrt32
  10. var_x=xOP2   ;output
  11. var_b=xOP2   ; 4 bytes, result gets copied to bottom anyways
  12. var_a=xOP2   ;   2 bytes
  13. var_z0=xOP2+8;used in sqr32
  14. ;NOTE!
  15. ;This expects the top two bits to be non-zero
  16.  
  17. sqrt32:
  18. ;Speed: 358+sqrtHLIX+div32_16+mul16
  19. ;min: 2472      ;might be faster now, need to re-analyze
  20. ;max: 3262      ;+37cc slower, need to re-analyze
  21. ;avg: 2958.637  ;~37cc slower, need to re-analyze
  22. ;Step one is to generate 16 accurate bits
  23.   ld hl,(var_y+2)
  24.   ld ix,(var_y)
  25.   call sqrtHLIX
  26.   ld (var_x+6),de
  27.   ld b,d
  28.   ld c,e
  29. ;now AHL is the remainder with A at most 1.
  30. ;Fetch the next 16 bits and put them in DE
  31.   ld de,(var_c+2)
  32.  
  33. ;AHLDE is the new remainder
  34. ;Need to divide by 2, then divide by the 16-bit (var_x+4)
  35.   rra
  36.   rr h
  37.   rr l
  38.   rr d
  39.   rr e
  40.   ld ixh,d
  41.   ld ixl,e
  42.  
  43.   or a
  44.   sbc hl,bc
  45.   jr z,sqrt32_higher_prec
  46.   add hl,bc
  47.  
  48.   call div32_16   ;returns DE=quotient, HL is remainder
  49. ;Need to compute remainder
  50. ;(HL*2+(var_c+2)&1)*65536+(var_c))-DE*DE
  51.  
  52.  
  53.   ld (var_x+4),de
  54.   push hl
  55.   ld b,d
  56.   ld c,e
  57.   call mul16
  58.   ld b,h
  59.   ld c,l
  60.   ;DEBC
  61.   ld hl,(var_c)
  62.   ld a,(var_c+2)
  63.   and 1
  64.   sbc hl,bc
  65.   ld b,h
  66.   ld c,l
  67.  
  68.   rra
  69.   pop hl
  70.   adc hl,hl
  71.   rla
  72.   sbc hl,de
  73.   sbc a,0
  74.   ret
  75.  
  76. sqrt32_higher_prec:
  77. ;so we know the output is FFFF
  78.   add hl,bc
  79.   ld de,(var_c+2)
  80.   ld a,d
  81.   and $80
  82.   add a,a
  83.   adc hl,hl
  84.   adc a,a
  85.   ex de,hl
  86.   ld bc,$0001
  87.   ;or a
  88.   sbc hl,bc
  89.   ex de,hl
  90.   ld bc,$FFFE
  91.   sbc hl,bc
  92.   sbc a,0
  93.  
  94. ;AHLDE is the remainder
  95.   inc c
  96.   ld (var_x+4),bc
  97.   ld b,d
  98.   ld c,e
  99.   ret
  100.  
  101.  
  102.  
  103.  
  104. ;#undefine var_b xOP2   ; 4 bytes, result gets copied to bottom anyways
  105. ;#undefine var_a xOP2   ;   2 bytes
  106. ;#undefine var_c xOP2+8 ;input
  107. ;#undefine var_y var_c+4 ;  used for sqrt32
  108. ;#undefine var_z0 xOP2+16
  109.  endif
  110.