Sat Sep 7 13:25:55 1996 James G. Smith <jsmith@cygnus.co.uk>

* config/tc-mips.c (COUNT_TOP_ZEROES): Added macro to count
 	leading zeroes.
	(load_register): Ensure hi32 bits are not lost during lo32bit
 	processing. Fix shift offset that was overflowing into the next
 	instruction field. Add code to generate shorter sequences for
 	constants with a single contiguous seqeuence of ones.

Fri Sep  6 18:23:54 1996  James G. Smith  <jsmith@cygnus.co.uk>

	* gas/mips/dli.{s,d}: More test cases added.

NOTE: The COUNT_TOP_ZEROES macro is a bit bulky, and the same result
can be achieved by using a "standard" ffs() routine:
	count = ffs(~v);
	count = count == 0 ? 0 : 33 - count;

However the following timings (VR4300 CPU clock ticks on a CMA101
board) show the performance gain.

Number of	ffs()		for loop	if/then/else	conditional
leading								   ?:
zeroes
-------------------------------------------------------------------------------
  0		 167		 179		266		251
  1		1718		 283		263		259
  2		1670		 379		287		295
  3		1622		 475		311		311
  4		1574		 571		295		287
  5		1534		 667		311		319
  6		1478		 763		307		299
  7		1430		 859		323		323
  8		1382		 962		287		295
  9		1334		1051		319		311
 10		1286		1154		299		307
 11		1238		1250		323		331
 12		1183		1346		299		307
 13		1135		1442		331		323
 14		1087		1546		311		319
 15		1039		1642		335		343
 16		 991		1730		295		287
 17		 950		1834		311		319
 18		 895		1922		307		299
 19		 847		2026		331		323
 20		 799		2122		307		299
 21		 751		2218		323		323
 22		 703		2314		311		311
 23		 655		2417		343		335
 24		 599		2506		307		299
 25		 559		2602		331		331
 26		 511		2705		311		319
 27		 463		2801		343		335
 28		 407		2897		311		319
 29		 367		2993		343		335
 30		 311		3097		323		331
 31		 271		3185		355		355
 32		 215		3233		379		371
This commit is contained in:
Jackie Smith Cashion 1996-09-07 12:45:19 +00:00
parent ea2155e858
commit ff8716f58e

View File

@ -1864,6 +1864,82 @@ check_absolute_expr (ip, ex)
as_warn ("Instruction %s requires absolute expression", ip->insn_mo->name);
}
/* Count the leading zeroes by performing a binary chop. This is a
bulky bit of source, but performance is a LOT better for the
majority of values than a simple loop to count the bits:
for (lcnt = 0; (lcnt < 32); lcnt++)
if ((v) & (1 << (31 - lcnt)))
break;
However it is not code size friendly, and the gain will drop a bit
on certain cached systems.
*/
#define COUNT_TOP_ZEROES(v) \
(((v) & ~0xffff) == 0 \
? ((v) & ~0xff) == 0 \
? ((v) & ~0xf) == 0 \
? ((v) & ~0x3) == 0 \
? ((v) & ~0x1) == 0 \
? !(v) \
? 32 \
: 31 \
: 30 \
: ((v) & ~0x7) == 0 \
? 29 \
: 28 \
: ((v) & ~0x3f) == 0 \
? ((v) & ~0x1f) == 0 \
? 27 \
: 26 \
: ((v) & ~0x7f) == 0 \
? 25 \
: 24 \
: ((v) & ~0xfff) == 0 \
? ((v) & ~0x3ff) == 0 \
? ((v) & ~0x1ff) == 0 \
? 23 \
: 22 \
: ((v) & ~0x7ff) == 0 \
? 21 \
: 20 \
: ((v) & ~0x3fff) == 0 \
? ((v) & ~0x1fff) == 0 \
? 19 \
: 18 \
: ((v) & ~0x7fff) == 0 \
? 17 \
: 16 \
: ((v) & ~0xffffff) == 0 \
? ((v) & ~0xfffff) == 0 \
? ((v) & ~0x3ffff) == 0 \
? ((v) & ~0x1ffff) == 0 \
? 15 \
: 14 \
: ((v) & ~0x7ffff) == 0 \
? 13 \
: 12 \
: ((v) & ~0x3fffff) == 0 \
? ((v) & ~0x1fffff) == 0 \
? 11 \
: 10 \
: ((v) & ~0x7fffff) == 0 \
? 9 \
: 8 \
: ((v) & ~0xfffffff) == 0 \
? ((v) & ~0x3ffffff) == 0 \
? ((v) & ~0x1ffffff) == 0 \
? 7 \
: 6 \
: ((v) & ~0x7ffffff) == 0 \
? 5 \
: 4 \
: ((v) & ~0x3fffffff) == 0 \
? ((v) & ~0x1fffffff) == 0 \
? 3 \
: 2 \
: ((v) & ~0x7fffffff) == 0 \
? 1 \
: 0)
/* load_register()
* This routine generates the least number of instructions neccessary to load
* an absolute expression value into a register.
@ -1876,7 +1952,7 @@ load_register (counter, reg, ep, dbl)
int dbl;
{
int shift, freg;
expressionS hi32, lo32;
expressionS hi32, lo32, tmp;
if (ep->X_op != O_big)
{
@ -1910,7 +1986,7 @@ load_register (counter, reg, ep, dbl)
|| ! ep->X_unsigned
|| sizeof (ep->X_add_number) > 4
|| (ep->X_add_number & 0x80000000) == 0))
|| (mips_isa < 3
|| ((mips_isa < 3 || !dbl)
&& (ep->X_add_number &~ 0xffffffff) == 0))
{
/* 32 bit values require an lui. */
@ -1978,6 +2054,59 @@ load_register (counter, reg, ep, dbl)
return;
}
}
/* Check for 16bit shifted constant: */
shift = 32;
tmp.X_add_number = hi32.X_add_number << shift | lo32.X_add_number;
/* We know that hi32 is non-zero, so start the mask on the first
bit of the hi32 value: */
shift = 17;
do
{
if ((tmp.X_add_number & ~((offsetT)0xffff << shift)) == 0)
{
tmp.X_op = O_constant;
tmp.X_add_number >>= shift;
macro_build ((char *) NULL, counter, &tmp, "ori", "t,r,i", reg, 0,
(int) BFD_RELOC_LO16);
macro_build ((char *) NULL, counter, NULL,
(shift >= 32) ? "dsll32" : "dsll",
"d,w,<", reg, reg, (shift >= 32) ? shift - 32 : shift);
return;
}
shift++;
} while (shift <= (64 - 16));
freg = 0;
shift = 32;
tmp.X_add_number = hi32.X_add_number << shift | lo32.X_add_number;
while ((tmp.X_add_number & 1) == 0)
{
tmp.X_add_number >>= 1;
freg++;
}
if (((tmp.X_add_number + 1) & tmp.X_add_number) == 0) /* (power-of-2 - 1) */
{
shift = COUNT_TOP_ZEROES((unsigned int)hi32.X_add_number);
if (shift != 0)
{
tmp.X_op = O_constant;
tmp.X_add_number = (offsetT)-1;
macro_build ((char *) NULL, counter, &tmp, "addiu", "t,r,j", reg, 0,
(int) BFD_RELOC_LO16); /* set all ones */
if (freg != 0)
{
freg += shift;
macro_build ((char *) NULL, counter, NULL,
(freg >= 32) ? "dsll32" : "dsll",
"d,w,<", reg, reg,
(freg >= 32) ? freg - 32 : freg);
}
macro_build ((char *) NULL, counter, NULL, (shift >= 32) ? "dsrl32" : "dsrl",
"d,w,<", reg, reg, (shift >= 32) ? shift - 32 : shift);
return;
}
}
load_register (counter, reg, &hi32, 0);
freg = reg;
}
@ -1999,7 +2128,7 @@ load_register (counter, reg, ep, dbl)
macro_build ((char *) NULL, counter, &lo32, "lui", "t,u", reg,
(int) BFD_RELOC_HI16);
macro_build ((char *) NULL, counter, NULL, "dsrl32", "d,w,<", reg,
reg, 32);
reg, 0);
return;
}