Finally, a bn_div_words() in VAX assembler that goes through all tests.

PR: 413
2024-11-24 02:23:51 +08:00 · 2002-12-23 11:25:51 +00:00 · 2002-12-23 11:25:51 +00:00 · e9883d285d
commit e9883d285d
parent 8d6ad9e39d
1 changed files with 82 additions and 56 deletions
--- a/crypto/bn/asm/vms.mar
+++ b/crypto/bn/asm/vms.mar
@ -172,7 +172,7 @@ n=12 ;(AP)	n	by value (input)
 ; }
 ;
 ; Using EDIV would be very easy, if it didn't do signed calculations.
-; Any time, any of the input numbers are signed, there are problems,
+; Any time any of the input numbers are signed, there are problems,
 ; usually with integer overflow, at which point it returns useless
 ; data (the quotient gets the value of l, and the remainder becomes 0).
 ;
@ -180,21 +180,26 @@ n=12 ;(AP)	n	by value (input)
 ; it by 2 (unsigned), do the division, multiply the resulting quotient
 ; and remainder by 2, add the bit that was dropped when dividing by 2
 ; to the remainder, and do some adjustment so the remainder doesn't
-; end up larger than the divisor.  This method works as long as the
-; divisor is positive, so we'll keep that (with a small adjustment)
-; as the main method.
-; For some cases when the divisor is negative (from EDIV's point of
-; view, i.e. when the highest bit is set), dividing the dividend by
-; 2 isn't enough, it needs to be divided by 4.  Furthermore, the
-; divisor needs to be divided by 2 (unsigned) as well, to avoid more
-; problems with the sign.  In this case, a little extra fiddling with
-; the remainder is required.
+; end up larger than the divisor.  For some cases when the divisor is
+; negative (from EDIV's point of view, i.e. when the highest bit is set),
+; dividing the dividend by 2 isn't enough, and since some operations
+; might generate integer overflows even when the dividend is divided by
+; 4 (when the high part of the shifted down dividend ends up being exactly
+; half of the divisor, the result is the quotient 0x80000000, which is
+; negative...) it needs to be divided by 8.  Furthermore, the divisor needs
+; to be divided by 2 (unsigned) as well, to avoid more problems with the sign.
+; In this case, a little extra fiddling with the remainder is required.
 ;
 ; So, the simplest way to handle this is always to divide the dividend
-; by 4, and to divide the divisor by 2 if it's highest bit is set.
-; After EDIV has been used, the quotient gets multiplied by 4 if the
-; original divisor was positive, otherwise 2.  The remainder, oddly
-; enough, is *always* multiplied by 4.
+; by 8, and to divide the divisor by 2 if it's highest bit is set.
+; After EDIV has been used, the quotient gets multiplied by 8 if the
+; original divisor was positive, otherwise 4.  The remainder, oddly
+; enough, is *always* multiplied by 8.
+; NOTE: in the case mentioned above, where the high part of the shifted
+; down dividend ends up being exactly half the shifted down divisor, we
+; end up with a 33 bit quotient.  That's no problem however, it usually
+; means we have ended up with a too large remainder as well, and the
+; problem is fixed by the last part of the algorithm (next paragraph).
 ;
 ; The routine ends with comparing the resulting remainder with the
 ; original divisor and if the remainder is larger, subtract the
@ -204,15 +209,19 @@ n=12 ;(AP)	n	by value (input)
 ; The complete algorithm looks like this:
 ;
 ; d'    = d
-; l'    = l & 3
-; [h,l] = [h,l] >> 2
+; l'    = l & 7
+; [h,l] = [h,l] >> 3
 ; [q,r] = floor([h,l] / d)	# This is the EDIV operation
 ; if (q < 0) q = -q		# I doubt this is necessary any more
 ;
-; r'    = r >> 30
-; if (d' >= 0) q = q << 1
-; q     = q << 1
-; r     = (r << 2) + l'
+; r'    = r >> 29
+; if (d' >= 0)
+;   q'  = q >> 29
+;   q   = q << 3
+; else
+;   q'  = q >> 30
+;   q   = q << 2
+; r     = (r << 3) + l'
 ;
 ; if (d' < 0)
 ;   {
@ -220,14 +229,14 @@ n=12 ;(AP)	n	by value (input)
 ;     while ([r',r] < 0)
 ;       {
 ;         [r',r] = [r',r] + d
-;         q = q - 1
+;         [q',q] = [q',q] - 1
 ;       }
 ;   }
 ;
-; while ([r',r] >= d)
+; while ([r',r] >= d')
 ;   {
-;     [r',r] = [r',r] - d
-;     q = q + 1
+;     [r',r] = [r',r] - d'
+;     [q',q] = [q',q] + 1
 ;   }
 ;
 ; return q
@ -236,31 +245,37 @@ h=4 ;(AP)	h	by value (input)
 l=8 ;(AP)	l	by value (input)
 d=12 ;(AP)	d	by value (input)

-;lprim=r5
-;rprim=r6
-;dprim=r7
-
+;r2 = l, q
+;r3 = h, r
+;r4 = d
+;r5 = l'
+;r6 = r'
+;r7 = d'
+;r8 = q'

 	.psect	code,nowrt

-.entry	bn_div_words,^m<r2,r3,r4,r5,r6,r7>
+.entry	bn_div_words,^m<r2,r3,r4,r5,r6,r7,r8>
 	movl	l(ap),r2
 	movl	h(ap),r3
 	movl	d(ap),r4

-	bicl3	#^XFFFFFFFC,r2,r5 ; l' = l & 3
-	bicl3	#^X00000003,r2,r2
+	bicl3	#^XFFFFFFF8,r2,r5 ; l' = l & 7
+	bicl3	#^X00000007,r2,r2

-	bicl3	#^XFFFFFFFC,r3,r6
-	bicl3	#^X00000003,r3,r3
+	bicl3	#^XFFFFFFF8,r3,r6
+	bicl3	#^X00000007,r3,r3
        
 	addl	r6,r2
-	rotl	#-2,r2,r2	; l = l >> 2
-	rotl	#-2,r3,r3	; h = h >> 2
+
+	rotl	#-3,r2,r2	; l = l >> 3
+	rotl	#-3,r3,r3	; h = h >> 3
                
-	movl	#0,r6
 	movl	r4,r7		; d' = d

+	movl	#0,r6		; r' = 0
+	movl	#0,r8		; q' = 0
+
 	tstl	r4
 	beql	666$		; Uh-oh, the divisor is 0...
 	bgtr	1$
@ -277,37 +292,36 @@ d=12 ;(AP)	d	by value (input)
 3$:     
 	tstl	r7
 	blss	4$
-	ashl	#1,r2,r2	; if d' >= 0, q = q << 1
-4$:     
-	ashl	#1,r2,r2	; q = q << 1
-	rotl	#2,r3,r3	; r = r << 2
-	bicl3	#^XFFFFFFFC,r3,r6 ; r' gets the high bits from r
-	bicl3	#^X00000003,r3,r3
+	rotl	#3,r2,r2	;   q = q << 3
+	bicl3	#^XFFFFFFF8,r2,r8 ;    q' gets the high bits from q
+	bicl3	#^X00000007,r2,r2
+	bsb	41$
+4$:				; else
+	rotl	#2,r2,r2	;   q = q << 2
+	bicl3	#^XFFFFFFFC,r2,r8 ;   q' gets the high bits from q
+	bicl3	#^X00000003,r2,r2
+41$:
+	rotl	#3,r3,r3	; r = r << 3
+	bicl3	#^XFFFFFFF8,r3,r6 ; r' gets the high bits from r
+	bicl3	#^X00000007,r3,r3
 	addl	r5,r3		; r = r + l'

 	tstl	r7
 	bgeq	5$
 	bitl	#1,r7
 	beql	5$		; if d' < 0 && d' & 1
-	subl	r2,r3		;   [r',r] = [r',r] - q
-	sbwc	#0,r6
+	subl	r2,r3		;   [r',r] = [r',r] - [q',q]
+	sbwc	r8,r6
 45$:
 	bgeq	5$		;   while r < 0
-	decl	r2		;     q = q - 1
-	addl	r7,r3		;     [r',r] = [r',r] + d
+	decl	r2		;     [q',q] = [q',q] - 1
+	sbwc	#0,r8
+	addl	r7,r3		;     [r',r] = [r',r] + d'
 	adwc	#0,r6
 	brb	45$

-5$:
-	tstl	r6
-	bneq	6$
-	cmpl	r3,r7
-	blssu	42$		; while [r',r] >= d'
-6$:
-	subl	r7,r3		;   [r',r] = [r',r] - d
-	sbwc	#0,r6
-	incl	r2		;   q = q + 1
-	brb	5$	
+; The return points are placed in the middle to keep a short distance from
+; all the branch points
 42$:
 ;	movl	r3,r1
 	movl	r2,r0
@ -315,6 +329,18 @@ d=12 ;(AP)	d	by value (input)
 666$:
 	movl	#^XFFFFFFFF,r0
 	ret
+
+5$:
+	tstl	r6
+	bneq	6$
+	cmpl	r3,r7
+	blssu	42$		; while [r',r] >= d'
+6$:
+	subl	r7,r3		;   [r',r] = [r',r] - d'
+	sbwc	#0,r6
+	incl	r2		;   [q',q] = [q',q] + 1
+	adwc	#0,r8
+	brb	5$	

 	.title	vax_bn_add_words  unsigned add of two arrays
 ;