Login

Subversion Repositories NedoOS

Rev

Blame | Last modification | View Log | Download | RSS feed

 ifndef included_sqrt32
 define included_sqrt32
 include "../../common/sqrtHLIX.asm"
 include "../../common/mul16.asm"
 include "../div/div32_16.asm"


var_c=xOP1   ;input
var_y=var_c+4 ;  used for sqrt32
var_x=xOP2   ;output
var_b=xOP2   ; 4 bytes, result gets copied to bottom anyways
var_a=xOP2   ;   2 bytes
var_z0=xOP2+8;used in sqr32
;NOTE!
;This expects the top two bits to be non-zero

sqrt32:
;Speed: 358+sqrtHLIX+div32_16+mul16
;min: 2472      ;might be faster now, need to re-analyze
;max: 3262      ;+37cc slower, need to re-analyze
;avg: 2958.637  ;~37cc slower, need to re-analyze
;Step one is to generate 16 accurate bits
  ld hl,(var_y+2)
  ld ix,(var_y)
  call sqrtHLIX
  ld (var_x+6),de
  ld b,d
  ld c,e
;now AHL is the remainder with A at most 1.
;Fetch the next 16 bits and put them in DE
  ld de,(var_c+2)

;AHLDE is the new remainder
;Need to divide by 2, then divide by the 16-bit (var_x+4)
  rra
  rr h
  rr l
  rr d
  rr e
  ld ixh,d
  ld ixl,e

  or a
  sbc hl,bc
  jr z,sqrt32_higher_prec
  add hl,bc

  call div32_16   ;returns DE=quotient, HL is remainder
;Need to compute remainder
;(HL*2+(var_c+2)&1)*65536+(var_c))-DE*DE


  ld (var_x+4),de
  push hl
  ld b,d
  ld c,e
  call mul16
  ld b,h
  ld c,l
  ;DEBC
  ld hl,(var_c)
  ld a,(var_c+2)
  and 1
  sbc hl,bc
  ld b,h
  ld c,l

  rra
  pop hl
  adc hl,hl
  rla
  sbc hl,de
  sbc a,0
  ret

sqrt32_higher_prec:
;so we know the output is FFFF
  add hl,bc
  ld de,(var_c+2)
  ld a,d
  and $80
  add a,a
  adc hl,hl
  adc a,a
  ex de,hl
  ld bc,$0001
  ;or a
  sbc hl,bc
  ex de,hl
  ld bc,$FFFE
  sbc hl,bc
  sbc a,0

;AHLDE is the remainder
  inc c
  ld (var_x+4),bc
  ld b,d
  ld c,e
  ret




;#undefine var_b xOP2   ; 4 bytes, result gets copied to bottom anyways
;#undefine var_a xOP2   ;   2 bytes
;#undefine var_c xOP2+8 ;input
;#undefine var_y var_c+4 ;  used for sqrt32
;#undefine var_z0 xOP2+16
 endif