mirror of
https://gcc.gnu.org/git/gcc.git
synced 2024-11-27 22:03:57 +08:00
This patch adds new builtins to check for cpu type and features.
2012-04-24 Sriraman Tallam <tmsriram@google.com> This patch adds new builtins to check for cpu type and features. * __builtin_cpu_is ("<CPUNAME>") * __builtin_cpu_supports ("<FEATURE>") apart from the cpu init builtin, __builtin_cpu_init. List of CPU names : * "amd" * "intel" * "atom" * "core2" * "corei7" * "nehalem" * "westmere" * "sandybridge" * "amdfam10h" * "barcelona" * "shanghai" * "istanbul" * "bdver1" * "bdver2" List of CPU features : * "cmov" * "mmx" * "popcnt" * "sse" * "sse2" * "sse3" * "ssse3" * "sse4.1" * "sse4.2" * "avx" * config/i386/i386.c (build_processor_model_struct): New function. (make_var_decl): New function. (fold_builtin_cpu): New function. (ix86_fold_builtin): New function. (make_cpu_type_builtin): New function. (ix86_init_platform_type_builtins): New function. (ix86_expand_builtin): Expand new builtins by folding them. (ix86_init_builtins): Make new builtins to detect CPU type. (TARGET_FOLD_BUILTIN): New macro. (IX86_BUILTIN_CPU_INIT): New enum value. (IX86_BUILTIN_CPU_IS): New enum value. (IX86_BUILTIN_CPU_SUPPORTS): New enum value. * config/i386/i386-builtin-types.def: New function type. * testsuite/gcc.target/builtin_target.c: New testcase. * doc/extend.texi: Document builtins. * libgcc/config/i386/i386-cpuinfo.c: New file. * libgcc/config/i386/t-cpuinfo: New file. * libgcc/config.host: Include t-cpuinfo. * libgcc/config/i386/libgcc-glibc.ver: Version symbol __cpu_model. From-SVN: r186789
This commit is contained in:
parent
e1be98a467
commit
792317cc77
@ -1,3 +1,21 @@
|
||||
2012-04-24 Sriraman Tallam <tmsriram@google.com>
|
||||
|
||||
* config/i386/i386.c (build_processor_model_struct): New function.
|
||||
(make_var_decl): New function.
|
||||
(fold_builtin_cpu): New function.
|
||||
(ix86_fold_builtin): New function.
|
||||
(make_cpu_type_builtin): New function.
|
||||
(ix86_init_platform_type_builtins): New function.
|
||||
(ix86_expand_builtin): Expand new builtins by folding them.
|
||||
(ix86_init_builtins): Make new builtins to detect CPU type.
|
||||
(TARGET_FOLD_BUILTIN): New macro.
|
||||
(IX86_BUILTIN_CPU_INIT): New enum value.
|
||||
(IX86_BUILTIN_CPU_IS): New enum value.
|
||||
(IX86_BUILTIN_CPU_SUPPORTS): New enum value.
|
||||
* config/i386/i386-builtin-types.def: New function type.
|
||||
* testsuite/gcc.target/builtin_target.c: New testcase.
|
||||
* doc/extend.texi: Document builtins.
|
||||
|
||||
2012-04-24 Olivier Hainque <hainque@adacore.com>
|
||||
|
||||
* common.opt (gdwarf-): Initialize dwarf_version to -1 instead of 2.
|
||||
|
@ -155,6 +155,7 @@ DEF_FUNCTION_TYPE (INT, V4SF)
|
||||
DEF_FUNCTION_TYPE (INT, V8QI)
|
||||
DEF_FUNCTION_TYPE (INT, V8SF)
|
||||
DEF_FUNCTION_TYPE (INT, V32QI)
|
||||
DEF_FUNCTION_TYPE (INT, PCCHAR)
|
||||
DEF_FUNCTION_TYPE (INT64, INT64)
|
||||
DEF_FUNCTION_TYPE (INT64, V2DF)
|
||||
DEF_FUNCTION_TYPE (INT64, V4SF)
|
||||
|
@ -25855,6 +25855,11 @@ enum ix86_builtins
|
||||
/* CFString built-in for darwin */
|
||||
IX86_BUILTIN_CFSTRING,
|
||||
|
||||
/* Builtins to get CPU type and supported features. */
|
||||
IX86_BUILTIN_CPU_INIT,
|
||||
IX86_BUILTIN_CPU_IS,
|
||||
IX86_BUILTIN_CPU_SUPPORTS,
|
||||
|
||||
IX86_BUILTIN_MAX
|
||||
};
|
||||
|
||||
@ -27673,6 +27678,334 @@ ix86_init_mmx_sse_builtins (void)
|
||||
}
|
||||
}
|
||||
|
||||
/* This builds the processor_model struct type defined in
|
||||
libgcc/config/i386/i386-cpuinfo.c */
|
||||
|
||||
static tree
|
||||
build_processor_model_struct (void)
|
||||
{
|
||||
const char *field_name[] = {"__cpu_vendor", "__cpu_type", "__cpu_subtype",
|
||||
"__cpu_features"};
|
||||
tree field = NULL_TREE, field_chain = NULL_TREE;
|
||||
int i;
|
||||
tree type = make_node (RECORD_TYPE);
|
||||
|
||||
/* The first 3 fields are unsigned int. */
|
||||
for (i = 0; i < 3; ++i)
|
||||
{
|
||||
field = build_decl (UNKNOWN_LOCATION, FIELD_DECL,
|
||||
get_identifier (field_name[i]), unsigned_type_node);
|
||||
if (field_chain != NULL_TREE)
|
||||
DECL_CHAIN (field) = field_chain;
|
||||
field_chain = field;
|
||||
}
|
||||
|
||||
/* The last field is an array of unsigned integers of size one. */
|
||||
field = build_decl (UNKNOWN_LOCATION, FIELD_DECL,
|
||||
get_identifier (field_name[3]),
|
||||
build_array_type (unsigned_type_node,
|
||||
build_index_type (size_one_node)));
|
||||
if (field_chain != NULL_TREE)
|
||||
DECL_CHAIN (field) = field_chain;
|
||||
field_chain = field;
|
||||
|
||||
finish_builtin_struct (type, "__processor_model", field_chain, NULL_TREE);
|
||||
return type;
|
||||
}
|
||||
|
||||
/* Returns a extern, comdat VAR_DECL of type TYPE and name NAME. */
|
||||
|
||||
static tree
|
||||
make_var_decl (tree type, const char *name)
|
||||
{
|
||||
tree new_decl;
|
||||
|
||||
new_decl = build_decl (UNKNOWN_LOCATION,
|
||||
VAR_DECL,
|
||||
get_identifier(name),
|
||||
type);
|
||||
|
||||
DECL_EXTERNAL (new_decl) = 1;
|
||||
TREE_STATIC (new_decl) = 1;
|
||||
TREE_PUBLIC (new_decl) = 1;
|
||||
DECL_INITIAL (new_decl) = 0;
|
||||
DECL_ARTIFICIAL (new_decl) = 0;
|
||||
DECL_PRESERVE_P (new_decl) = 1;
|
||||
|
||||
make_decl_one_only (new_decl, DECL_ASSEMBLER_NAME (new_decl));
|
||||
assemble_variable (new_decl, 0, 0, 0);
|
||||
|
||||
return new_decl;
|
||||
}
|
||||
|
||||
/* FNDECL is a __builtin_cpu_is or a __builtin_cpu_supports call that is folded
|
||||
into an integer defined in libgcc/config/i386/i386-cpuinfo.c */
|
||||
|
||||
static tree
|
||||
fold_builtin_cpu (tree fndecl, tree *args)
|
||||
{
|
||||
unsigned int i;
|
||||
enum ix86_builtins fn_code = (enum ix86_builtins)
|
||||
DECL_FUNCTION_CODE (fndecl);
|
||||
tree param_string_cst = NULL;
|
||||
|
||||
/* This is the order of bit-fields in __processor_features in
|
||||
i386-cpuinfo.c */
|
||||
enum processor_features
|
||||
{
|
||||
F_CMOV = 0,
|
||||
F_MMX,
|
||||
F_POPCNT,
|
||||
F_SSE,
|
||||
F_SSE2,
|
||||
F_SSE3,
|
||||
F_SSSE3,
|
||||
F_SSE4_1,
|
||||
F_SSE4_2,
|
||||
F_AVX,
|
||||
F_MAX
|
||||
};
|
||||
|
||||
/* These are the values for vendor types and cpu types and subtypes
|
||||
in i386-cpuinfo.c. Cpu types and subtypes should be subtracted by
|
||||
the corresponding start value. */
|
||||
enum processor_model
|
||||
{
|
||||
M_INTEL = 1,
|
||||
M_AMD,
|
||||
M_CPU_TYPE_START,
|
||||
M_INTEL_ATOM,
|
||||
M_INTEL_CORE2,
|
||||
M_INTEL_COREI7,
|
||||
M_AMDFAM10H,
|
||||
M_AMDFAM15H,
|
||||
M_CPU_SUBTYPE_START,
|
||||
M_INTEL_COREI7_NEHALEM,
|
||||
M_INTEL_COREI7_WESTMERE,
|
||||
M_INTEL_COREI7_SANDYBRIDGE,
|
||||
M_AMDFAM10H_BARCELONA,
|
||||
M_AMDFAM10H_SHANGHAI,
|
||||
M_AMDFAM10H_ISTANBUL,
|
||||
M_AMDFAM15H_BDVER1,
|
||||
M_AMDFAM15H_BDVER2
|
||||
};
|
||||
|
||||
static struct _arch_names_table
|
||||
{
|
||||
const char *const name;
|
||||
const enum processor_model model;
|
||||
}
|
||||
const arch_names_table[] =
|
||||
{
|
||||
{"amd", M_AMD},
|
||||
{"intel", M_INTEL},
|
||||
{"atom", M_INTEL_ATOM},
|
||||
{"core2", M_INTEL_CORE2},
|
||||
{"corei7", M_INTEL_COREI7},
|
||||
{"nehalem", M_INTEL_COREI7_NEHALEM},
|
||||
{"westmere", M_INTEL_COREI7_WESTMERE},
|
||||
{"sandybridge", M_INTEL_COREI7_SANDYBRIDGE},
|
||||
{"amdfam10h", M_AMDFAM10H},
|
||||
{"barcelona", M_AMDFAM10H_BARCELONA},
|
||||
{"shanghai", M_AMDFAM10H_SHANGHAI},
|
||||
{"istanbul", M_AMDFAM10H_ISTANBUL},
|
||||
{"amdfam15h", M_AMDFAM15H},
|
||||
{"bdver1", M_AMDFAM15H_BDVER1},
|
||||
{"bdver2", M_AMDFAM15H_BDVER2},
|
||||
};
|
||||
|
||||
static struct _isa_names_table
|
||||
{
|
||||
const char *const name;
|
||||
const enum processor_features feature;
|
||||
}
|
||||
const isa_names_table[] =
|
||||
{
|
||||
{"cmov", F_CMOV},
|
||||
{"mmx", F_MMX},
|
||||
{"popcnt", F_POPCNT},
|
||||
{"sse", F_SSE},
|
||||
{"sse2", F_SSE2},
|
||||
{"sse3", F_SSE3},
|
||||
{"ssse3", F_SSSE3},
|
||||
{"sse4.1", F_SSE4_1},
|
||||
{"sse4.2", F_SSE4_2},
|
||||
{"avx", F_AVX}
|
||||
};
|
||||
|
||||
static tree __processor_model_type = NULL_TREE;
|
||||
static tree __cpu_model_var = NULL_TREE;
|
||||
|
||||
if (__processor_model_type == NULL_TREE)
|
||||
__processor_model_type = build_processor_model_struct ();
|
||||
|
||||
if (__cpu_model_var == NULL_TREE)
|
||||
__cpu_model_var = make_var_decl (__processor_model_type,
|
||||
"__cpu_model");
|
||||
|
||||
gcc_assert ((args != NULL) && (*args != NULL));
|
||||
|
||||
param_string_cst = *args;
|
||||
while (param_string_cst
|
||||
&& TREE_CODE (param_string_cst) != STRING_CST)
|
||||
{
|
||||
/* *args must be a expr that can contain other EXPRS leading to a
|
||||
STRING_CST. */
|
||||
if (!EXPR_P (param_string_cst))
|
||||
{
|
||||
error ("Parameter to builtin must be a string constant or literal");
|
||||
return integer_zero_node;
|
||||
}
|
||||
param_string_cst = TREE_OPERAND (EXPR_CHECK (param_string_cst), 0);
|
||||
}
|
||||
|
||||
gcc_assert (param_string_cst);
|
||||
|
||||
if (fn_code == IX86_BUILTIN_CPU_IS)
|
||||
{
|
||||
tree ref;
|
||||
tree field;
|
||||
unsigned int field_val = 0;
|
||||
unsigned int NUM_ARCH_NAMES
|
||||
= sizeof (arch_names_table) / sizeof (struct _arch_names_table);
|
||||
|
||||
for (i = 0; i < NUM_ARCH_NAMES; i++)
|
||||
if (strcmp (arch_names_table[i].name,
|
||||
TREE_STRING_POINTER (param_string_cst)) == 0)
|
||||
break;
|
||||
|
||||
if (i == NUM_ARCH_NAMES)
|
||||
{
|
||||
error ("Parameter to builtin not valid: %s",
|
||||
TREE_STRING_POINTER (param_string_cst));
|
||||
return integer_zero_node;
|
||||
}
|
||||
|
||||
field = TYPE_FIELDS (__processor_model_type);
|
||||
field_val = arch_names_table[i].model;
|
||||
|
||||
/* CPU types are stored in the next field. */
|
||||
if (field_val > M_CPU_TYPE_START
|
||||
&& field_val < M_CPU_SUBTYPE_START)
|
||||
{
|
||||
field = DECL_CHAIN (field);
|
||||
field_val -= M_CPU_TYPE_START;
|
||||
}
|
||||
|
||||
/* CPU subtypes are stored in the next field. */
|
||||
if (field_val > M_CPU_SUBTYPE_START)
|
||||
{
|
||||
field = DECL_CHAIN ( DECL_CHAIN (field));
|
||||
field_val -= M_CPU_SUBTYPE_START;
|
||||
}
|
||||
|
||||
/* Get the appropriate field in __cpu_model. */
|
||||
ref = build3 (COMPONENT_REF, TREE_TYPE (field), __cpu_model_var,
|
||||
field, NULL_TREE);
|
||||
|
||||
/* Check the value. */
|
||||
return build2 (EQ_EXPR, unsigned_type_node, ref,
|
||||
build_int_cstu (unsigned_type_node, field_val));
|
||||
}
|
||||
else if (fn_code == IX86_BUILTIN_CPU_SUPPORTS)
|
||||
{
|
||||
tree ref;
|
||||
tree array_elt;
|
||||
tree field;
|
||||
unsigned int field_val = 0;
|
||||
unsigned int NUM_ISA_NAMES
|
||||
= sizeof (isa_names_table) / sizeof (struct _isa_names_table);
|
||||
|
||||
for (i = 0; i < NUM_ISA_NAMES; i++)
|
||||
if (strcmp (isa_names_table[i].name,
|
||||
TREE_STRING_POINTER (param_string_cst)) == 0)
|
||||
break;
|
||||
|
||||
if (i == NUM_ISA_NAMES)
|
||||
{
|
||||
error ("Parameter to builtin not valid: %s",
|
||||
TREE_STRING_POINTER (param_string_cst));
|
||||
return integer_zero_node;
|
||||
}
|
||||
|
||||
field = TYPE_FIELDS (__processor_model_type);
|
||||
/* Get the last field, which is __cpu_features. */
|
||||
while (DECL_CHAIN (field))
|
||||
field = DECL_CHAIN (field);
|
||||
|
||||
/* Get the appropriate field: __cpu_model.__cpu_features */
|
||||
ref = build3 (COMPONENT_REF, TREE_TYPE (field), __cpu_model_var,
|
||||
field, NULL_TREE);
|
||||
|
||||
/* Access the 0th element of __cpu_features array. */
|
||||
array_elt = build4 (ARRAY_REF, unsigned_type_node, ref,
|
||||
integer_zero_node, NULL_TREE, NULL_TREE);
|
||||
|
||||
field_val = (1 << isa_names_table[i].feature);
|
||||
/* Return __cpu_model.__cpu_features[0] & field_val */
|
||||
return build2 (BIT_AND_EXPR, unsigned_type_node, array_elt,
|
||||
build_int_cstu (unsigned_type_node, field_val));
|
||||
}
|
||||
gcc_unreachable ();
|
||||
}
|
||||
|
||||
static tree
|
||||
ix86_fold_builtin (tree fndecl, int n_args,
|
||||
tree *args, bool ignore ATTRIBUTE_UNUSED)
|
||||
{
|
||||
if (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD)
|
||||
{
|
||||
enum ix86_builtins fn_code = (enum ix86_builtins)
|
||||
DECL_FUNCTION_CODE (fndecl);
|
||||
if (fn_code == IX86_BUILTIN_CPU_IS
|
||||
|| fn_code == IX86_BUILTIN_CPU_SUPPORTS)
|
||||
{
|
||||
gcc_assert (n_args == 1);
|
||||
return fold_builtin_cpu (fndecl, args);
|
||||
}
|
||||
}
|
||||
|
||||
return NULL_TREE;
|
||||
}
|
||||
|
||||
/* Make builtins to detect cpu type and features supported. NAME is
|
||||
the builtin name, CODE is the builtin code, and FTYPE is the function
|
||||
type of the builtin. */
|
||||
|
||||
static void
|
||||
make_cpu_type_builtin (const char* name, int code,
|
||||
enum ix86_builtin_func_type ftype, bool is_const)
|
||||
{
|
||||
tree decl;
|
||||
tree type;
|
||||
|
||||
type = ix86_get_builtin_func_type (ftype);
|
||||
decl = add_builtin_function (name, type, code, BUILT_IN_MD,
|
||||
NULL, NULL_TREE);
|
||||
gcc_assert (decl != NULL_TREE);
|
||||
ix86_builtins[(int) code] = decl;
|
||||
TREE_READONLY (decl) = is_const;
|
||||
}
|
||||
|
||||
/* Make builtins to get CPU type and features supported. The created
|
||||
builtins are :
|
||||
|
||||
__builtin_cpu_init (), to detect cpu type and features,
|
||||
__builtin_cpu_is ("<CPUNAME>"), to check if cpu is of type <CPUNAME>,
|
||||
__builtin_cpu_supports ("<FEATURE>"), to check if cpu supports <FEATURE>
|
||||
*/
|
||||
|
||||
static void
|
||||
ix86_init_platform_type_builtins (void)
|
||||
{
|
||||
make_cpu_type_builtin ("__builtin_cpu_init", IX86_BUILTIN_CPU_INIT,
|
||||
INT_FTYPE_VOID, false);
|
||||
make_cpu_type_builtin ("__builtin_cpu_is", IX86_BUILTIN_CPU_IS,
|
||||
INT_FTYPE_PCCHAR, true);
|
||||
make_cpu_type_builtin ("__builtin_cpu_supports", IX86_BUILTIN_CPU_SUPPORTS,
|
||||
INT_FTYPE_PCCHAR, true);
|
||||
}
|
||||
|
||||
/* Internal method for ix86_init_builtins. */
|
||||
|
||||
static void
|
||||
@ -27756,6 +28089,9 @@ ix86_init_builtins (void)
|
||||
|
||||
ix86_init_builtin_types ();
|
||||
|
||||
/* Builtins to get CPU type and features. */
|
||||
ix86_init_platform_type_builtins ();
|
||||
|
||||
/* TFmode support builtins. */
|
||||
def_builtin_const (0, "__builtin_infq",
|
||||
FLOAT128_FTYPE_VOID, IX86_BUILTIN_INFQ);
|
||||
@ -29374,6 +29710,28 @@ ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
|
||||
enum machine_mode mode0, mode1, mode2, mode3, mode4;
|
||||
unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
|
||||
|
||||
/* For CPU builtins that can be folded, fold first and expand the fold. */
|
||||
switch (fcode)
|
||||
{
|
||||
case IX86_BUILTIN_CPU_INIT:
|
||||
{
|
||||
/* Make it call __cpu_indicator_init in libgcc. */
|
||||
tree call_expr, fndecl, type;
|
||||
type = build_function_type_list (integer_type_node, NULL_TREE);
|
||||
fndecl = build_fn_decl ("__cpu_indicator_init", type);
|
||||
call_expr = build_call_expr (fndecl, 0);
|
||||
return expand_expr (call_expr, target, mode, EXPAND_NORMAL);
|
||||
}
|
||||
case IX86_BUILTIN_CPU_IS:
|
||||
case IX86_BUILTIN_CPU_SUPPORTS:
|
||||
{
|
||||
tree arg0 = CALL_EXPR_ARG (exp, 0);
|
||||
tree fold_expr = fold_builtin_cpu (fndecl, &arg0);
|
||||
gcc_assert (fold_expr != NULL_TREE);
|
||||
return expand_expr (fold_expr, target, mode, EXPAND_NORMAL);
|
||||
}
|
||||
}
|
||||
|
||||
/* Determine whether the builtin function is available under the current ISA.
|
||||
Originally the builtin was not created if it wasn't applicable to the
|
||||
current ISA based on the command line switches. With function specific
|
||||
@ -39100,6 +39458,9 @@ ix86_autovectorize_vector_sizes (void)
|
||||
#undef TARGET_BUILD_BUILTIN_VA_LIST
|
||||
#define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
|
||||
|
||||
#undef TARGET_FOLD_BUILTIN
|
||||
#define TARGET_FOLD_BUILTIN ix86_fold_builtin
|
||||
|
||||
#undef TARGET_ENUM_VA_LIST_P
|
||||
#define TARGET_ENUM_VA_LIST_P ix86_enum_va_list
|
||||
|
||||
|
@ -9419,6 +9419,144 @@ Similar to @code{__builtin_huge_val}, except the return type is @code{__float128
|
||||
@findex __builtin_huge_valq
|
||||
@end table
|
||||
|
||||
The following built-in functions are always available and can be used to
|
||||
check the target platform type.
|
||||
|
||||
@deftypefn {Built-in Function} void __builtin_cpu_init (void)
|
||||
This function runs the CPU detection code to check the type of CPU and the
|
||||
features supported. This builtin needs to be invoked along with the builtins
|
||||
to check CPU type and features, @code{__builtin_cpu_is} and
|
||||
@code{__builtin_cpu_supports}, only when used in a function that will be
|
||||
executed before any constructors are called. The CPU detection code is
|
||||
automatically executed in a very high priority constructor.
|
||||
|
||||
For example, this function has to be used in @code{ifunc} resolvers which
|
||||
check for CPU type using the builtins, @code{__builtin_cpu_is}
|
||||
and @code{__builtin_cpu_supports}.
|
||||
@smallexample
|
||||
|
||||
static void (*resolve_memcpy (void)) (void)
|
||||
@{
|
||||
// ifunc resolvers fire before constructors, explicitly call the init
|
||||
// function.
|
||||
__builtin_cpu_init ();
|
||||
if (__builtin_cpu_supports ("ssse3"))
|
||||
return ssse3_memcpy; // super fast memcpy with ssse3 instructions.
|
||||
else
|
||||
return default_memcpy;
|
||||
@}
|
||||
|
||||
void *memcpy (void *, const void *, size_t)
|
||||
__attribute__ ((ifunc ("resolve_memcpy")));
|
||||
@end smallexample
|
||||
|
||||
@end deftypefn
|
||||
|
||||
@deftypefn {Built-in Function} int __builtin_cpu_is (const char *@var{cpuname})
|
||||
This function returns a positive integer if the runtime cpu is of type @var{cpuname}
|
||||
and returns @code{0} otherwise. The following cpu names can be detected:
|
||||
|
||||
@table @samp
|
||||
@item intel
|
||||
Intel CPU.
|
||||
|
||||
@item atom
|
||||
Intel ATOM CPU.
|
||||
|
||||
@item core2
|
||||
Intel Core2 CPU.
|
||||
|
||||
@item corei7
|
||||
Intel Corei7 CPU.
|
||||
|
||||
@item nehalem
|
||||
Intel Corei7 Nehalem CPU.
|
||||
|
||||
@item westmere
|
||||
Intel Corei7 Westmere CPU.
|
||||
|
||||
@item sandybridge
|
||||
Intel Corei7 Sandybridge CPU.
|
||||
|
||||
@item amd
|
||||
AMD CPU.
|
||||
|
||||
@item amdfam10h
|
||||
AMD family 10h CPU.
|
||||
|
||||
@item barcelona
|
||||
AMD family 10h Barcelona CPU.
|
||||
|
||||
@item shanghai
|
||||
AMD family 10h Shanghai CPU.
|
||||
|
||||
@item istanbul
|
||||
AMD family 10h Istanbul CPU.
|
||||
|
||||
@item amdfam15h
|
||||
AMD family 15h CPU.
|
||||
|
||||
@item bdver1
|
||||
AMD family 15h Bulldozer version 1.
|
||||
|
||||
@item bdver2
|
||||
AMD family 15h Bulldozer version 2.
|
||||
@end table
|
||||
|
||||
Here is an example:
|
||||
@smallexample
|
||||
if (__builtin_cpu_is ("corei7"))
|
||||
@{
|
||||
do_corei7 (); //Corei7 specific implementation.
|
||||
@}
|
||||
else
|
||||
@{
|
||||
do_generic (); //Generic implementation.
|
||||
@}
|
||||
@end smallexample
|
||||
@end deftypefn
|
||||
|
||||
@deftypefn {Built-in Function} int __builtin_cpu_supports (const char *@var{feature})
|
||||
This function returns a postive integer if the runtime cpu supports @var{feature}
|
||||
and returns @code{0} otherwise. The following features can be detected:
|
||||
|
||||
@table @samp
|
||||
@item cmov
|
||||
CMOV instruction.
|
||||
@item mmx
|
||||
MMX instructions.
|
||||
@item popcnt
|
||||
POPCNT instruction.
|
||||
@item sse
|
||||
SSE instructions.
|
||||
@item sse2
|
||||
SSE2 instructions.
|
||||
@item sse3
|
||||
SSE3 instructions.
|
||||
@item ssse3
|
||||
SSSE3 instructions.
|
||||
@item sse4.1
|
||||
SSE4.1 instructions.
|
||||
@item sse4.2
|
||||
SSE4.2 instructions.
|
||||
@item avx
|
||||
AVX instructions.
|
||||
@end table
|
||||
|
||||
Here is an example:
|
||||
@smallexample
|
||||
if (__builtin_cpu_supports ("popcnt"))
|
||||
@{
|
||||
asm("popcnt %1,%0" : "=r"(count) : "rm"(n) : "cc");
|
||||
@}
|
||||
else
|
||||
@{
|
||||
count = generic_countbits (n); //generic implementation.
|
||||
@}
|
||||
@end smallexample
|
||||
@end deftypefn
|
||||
|
||||
|
||||
The following built-in functions are made available by @option{-mmmx}.
|
||||
All of them generate the machine instruction that is part of the name.
|
||||
|
||||
|
70
gcc/testsuite/gcc.target/i386/builtin_target.c
Normal file
70
gcc/testsuite/gcc.target/i386/builtin_target.c
Normal file
@ -0,0 +1,70 @@
|
||||
/* This test checks if the __builtin_cpu_is and __builtin_cpu_supports calls
|
||||
are recognized. */
|
||||
|
||||
/* { dg-do run } */
|
||||
|
||||
#include <assert.h>
|
||||
|
||||
int
|
||||
fn1 ()
|
||||
{
|
||||
/* Check CPU Features. */
|
||||
assert (__builtin_cpu_supports ("cmov") >= 0);
|
||||
|
||||
assert (__builtin_cpu_supports ("mmx") >= 0);
|
||||
|
||||
assert (__builtin_cpu_supports ("popcnt") >= 0);
|
||||
|
||||
assert (__builtin_cpu_supports ("sse") >= 0);
|
||||
|
||||
assert (__builtin_cpu_supports ("sse2") >= 0);
|
||||
|
||||
assert (__builtin_cpu_supports ("sse3") >= 0);
|
||||
|
||||
assert (__builtin_cpu_supports ("ssse3") >= 0);
|
||||
|
||||
assert (__builtin_cpu_supports ("sse4.1") >= 0);
|
||||
|
||||
assert (__builtin_cpu_supports ("sse4.2") >= 0);
|
||||
|
||||
assert (__builtin_cpu_supports ("avx") >= 0);
|
||||
|
||||
/* Check CPU type. */
|
||||
assert (__builtin_cpu_is ("amd") >= 0);
|
||||
|
||||
assert (__builtin_cpu_is ("intel") >= 0);
|
||||
|
||||
assert (__builtin_cpu_is ("atom") >= 0);
|
||||
|
||||
assert (__builtin_cpu_is ("core2") >= 0);
|
||||
|
||||
assert (__builtin_cpu_is ("corei7") >= 0);
|
||||
|
||||
assert (__builtin_cpu_is ("nehalem") >= 0);
|
||||
|
||||
assert (__builtin_cpu_is ("westmere") >= 0);
|
||||
|
||||
assert (__builtin_cpu_is ("sandybridge") >= 0);
|
||||
|
||||
assert (__builtin_cpu_is ("amdfam10h") >= 0);
|
||||
|
||||
assert (__builtin_cpu_is ("barcelona") >= 0);
|
||||
|
||||
assert (__builtin_cpu_is ("shanghai") >= 0);
|
||||
|
||||
assert (__builtin_cpu_is ("istanbul") >= 0);
|
||||
|
||||
assert (__builtin_cpu_is ("amdfam15h") >= 0);
|
||||
|
||||
assert (__builtin_cpu_is ("bdver1") >= 0);
|
||||
|
||||
assert (__builtin_cpu_is ("bdver2") >= 0);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int main ()
|
||||
{
|
||||
__builtin_cpu_init ();
|
||||
return fn1 ();
|
||||
}
|
@ -1,3 +1,10 @@
|
||||
2012-04-24 Sriraman Tallam <tmsriram@google.com>
|
||||
|
||||
* libgcc/config/i386/i386-cpuinfo.c: New file.
|
||||
* libgcc/config/i386/t-cpuinfo: New file.
|
||||
* libgcc/config.host: Include t-cpuinfo.
|
||||
* libgcc/config/i386/libgcc-glibc.ver: Version symbol __cpu_model.
|
||||
|
||||
2012-04-24 Chao-ying Fu <fu@mips.com>
|
||||
|
||||
* unwind-dw2-fde-dip.c: Define USE_PT_GNU_EH_FRAME for BIONIC.
|
||||
|
@ -1130,7 +1130,7 @@ i[34567]86-*-linux* | x86_64-*-linux* | \
|
||||
i[34567]86-*-kfreebsd*-gnu | x86_64-*-kfreebsd*-gnu | \
|
||||
i[34567]86-*-knetbsd*-gnu | \
|
||||
i[34567]86-*-gnu*)
|
||||
tmake_file="${tmake_file} t-tls i386/t-linux"
|
||||
tmake_file="${tmake_file} t-tls i386/t-linux i386/t-cpuinfo"
|
||||
if test "$libgcc_cv_cfi" = "yes"; then
|
||||
tmake_file="${tmake_file} t-stack i386/t-stack-i386"
|
||||
fi
|
||||
|
316
libgcc/config/i386/i386-cpuinfo.c
Normal file
316
libgcc/config/i386/i386-cpuinfo.c
Normal file
@ -0,0 +1,316 @@
|
||||
/* Get CPU type and Features for x86 processors.
|
||||
Copyright (C) 2012 Free Software Foundation, Inc.
|
||||
Contributed by Sriraman Tallam (tmsriram@google.com)
|
||||
|
||||
This file is part of GCC.
|
||||
|
||||
GCC is free software; you can redistribute it and/or modify it under
|
||||
the terms of the GNU General Public License as published by the Free
|
||||
Software Foundation; either version 3, or (at your option) any later
|
||||
version.
|
||||
|
||||
GCC is distributed in the hope that it will be useful, but WITHOUT ANY
|
||||
WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with GCC; see the file COPYING3. If not see
|
||||
<http://www.gnu.org/licenses/>. */
|
||||
|
||||
#include "cpuid.h"
|
||||
#include "tsystem.h"
|
||||
|
||||
int __cpu_indicator_init (void) __attribute__ ((constructor (101)));
|
||||
|
||||
enum vendor_signatures
|
||||
{
|
||||
SIG_INTEL = 0x756e6547 /* Genu */,
|
||||
SIG_AMD = 0x68747541 /* Auth */
|
||||
};
|
||||
|
||||
/* Processor Vendor and Models. */
|
||||
|
||||
enum processor_vendor
|
||||
{
|
||||
VENDOR_INTEL = 1,
|
||||
VENDOR_AMD,
|
||||
VENDOR_OTHER,
|
||||
VENDOR_MAX
|
||||
};
|
||||
|
||||
enum processor_types
|
||||
{
|
||||
INTEL_ATOM = 1,
|
||||
INTEL_CORE2,
|
||||
INTEL_COREI7,
|
||||
AMDFAM10H,
|
||||
AMDFAM15H,
|
||||
CPU_TYPE_MAX
|
||||
};
|
||||
|
||||
enum processor_subtypes
|
||||
{
|
||||
INTEL_COREI7_NEHALEM = 1,
|
||||
INTEL_COREI7_WESTMERE,
|
||||
INTEL_COREI7_SANDYBRIDGE,
|
||||
AMDFAM10H_BARCELONA,
|
||||
AMDFAM10H_SHANGHAI,
|
||||
AMDFAM10H_ISTANBUL,
|
||||
AMDFAM15H_BDVER1,
|
||||
AMDFAM15H_BDVER2,
|
||||
CPU_SUBTYPE_MAX
|
||||
};
|
||||
|
||||
/* ISA Features supported. */
|
||||
|
||||
enum processor_features
|
||||
{
|
||||
FEATURE_CMOV = 0,
|
||||
FEATURE_MMX,
|
||||
FEATURE_POPCNT,
|
||||
FEATURE_SSE,
|
||||
FEATURE_SSE2,
|
||||
FEATURE_SSE3,
|
||||
FEATURE_SSSE3,
|
||||
FEATURE_SSE4_1,
|
||||
FEATURE_SSE4_2,
|
||||
FEATURE_AVX
|
||||
};
|
||||
|
||||
struct __processor_model
|
||||
{
|
||||
unsigned int __cpu_vendor;
|
||||
unsigned int __cpu_type;
|
||||
unsigned int __cpu_subtype;
|
||||
unsigned int __cpu_features[1];
|
||||
} __cpu_model;
|
||||
|
||||
|
||||
/* Get the specific type of AMD CPU. */
|
||||
|
||||
static void
|
||||
get_amd_cpu (unsigned int family, unsigned int model)
|
||||
{
|
||||
switch (family)
|
||||
{
|
||||
/* AMD Family 10h. */
|
||||
case 0x10:
|
||||
switch (model)
|
||||
{
|
||||
case 0x2:
|
||||
/* Barcelona. */
|
||||
__cpu_model.__cpu_type = AMDFAM10H;
|
||||
__cpu_model.__cpu_subtype = AMDFAM10H_BARCELONA;
|
||||
break;
|
||||
case 0x4:
|
||||
/* Shanghai. */
|
||||
__cpu_model.__cpu_type = AMDFAM10H;
|
||||
__cpu_model.__cpu_subtype = AMDFAM10H_SHANGHAI;
|
||||
break;
|
||||
case 0x8:
|
||||
/* Istanbul. */
|
||||
__cpu_model.__cpu_type = AMDFAM10H;
|
||||
__cpu_model.__cpu_subtype = AMDFAM10H_ISTANBUL;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
break;
|
||||
/* AMD Family 15h. */
|
||||
case 0x15:
|
||||
__cpu_model.__cpu_type = AMDFAM15H;
|
||||
/* Bulldozer version 1. */
|
||||
if ( model <= 0xf)
|
||||
__cpu_model.__cpu_subtype = AMDFAM15H_BDVER1;
|
||||
/* Bulldozer version 2. */
|
||||
if (model >= 0x10 && model <= 0x1f)
|
||||
__cpu_model.__cpu_subtype = AMDFAM15H_BDVER2;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/* Get the specific type of Intel CPU. */
|
||||
|
||||
static void
|
||||
get_intel_cpu (unsigned int family, unsigned int model, unsigned int brand_id)
|
||||
{
|
||||
/* Parse family and model only if brand ID is 0. */
|
||||
if (brand_id == 0)
|
||||
{
|
||||
switch (family)
|
||||
{
|
||||
case 0x5:
|
||||
/* Pentium. */
|
||||
break;
|
||||
case 0x6:
|
||||
switch (model)
|
||||
{
|
||||
case 0x1c:
|
||||
case 0x26:
|
||||
/* Atom. */
|
||||
__cpu_model.__cpu_type = INTEL_ATOM;
|
||||
break;
|
||||
case 0x1a:
|
||||
case 0x1e:
|
||||
case 0x1f:
|
||||
case 0x2e:
|
||||
/* Nehalem. */
|
||||
__cpu_model.__cpu_type = INTEL_COREI7;
|
||||
__cpu_model.__cpu_subtype = INTEL_COREI7_NEHALEM;
|
||||
break;
|
||||
case 0x25:
|
||||
case 0x2c:
|
||||
case 0x2f:
|
||||
/* Westmere. */
|
||||
__cpu_model.__cpu_type = INTEL_COREI7;
|
||||
__cpu_model.__cpu_subtype = INTEL_COREI7_WESTMERE;
|
||||
break;
|
||||
case 0x2a:
|
||||
/* Sandy Bridge. */
|
||||
__cpu_model.__cpu_type = INTEL_COREI7;
|
||||
__cpu_model.__cpu_subtype = INTEL_COREI7_SANDYBRIDGE;
|
||||
break;
|
||||
case 0x17:
|
||||
case 0x1d:
|
||||
/* Penryn. */
|
||||
case 0x0f:
|
||||
/* Merom. */
|
||||
__cpu_model.__cpu_type = INTEL_CORE2;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
/* We have no idea. */
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
get_available_features (unsigned int ecx, unsigned int edx)
|
||||
{
|
||||
unsigned int features = 0;
|
||||
|
||||
if (edx & bit_CMOV)
|
||||
features |= (1 << FEATURE_CMOV);
|
||||
if (edx & bit_MMX)
|
||||
features |= (1 << FEATURE_MMX);
|
||||
if (edx & bit_SSE)
|
||||
features |= (1 << FEATURE_SSE);
|
||||
if (edx & bit_SSE2)
|
||||
features |= (1 << FEATURE_SSE2);
|
||||
if (ecx & bit_POPCNT)
|
||||
features |= (1 << FEATURE_POPCNT);
|
||||
if (ecx & bit_SSE3)
|
||||
features |= (1 << FEATURE_SSE3);
|
||||
if (ecx & bit_SSSE3)
|
||||
features |= (1 << FEATURE_SSSE3);
|
||||
if (ecx & bit_SSE4_1)
|
||||
features |= (1 << FEATURE_SSE4_1);
|
||||
if (ecx & bit_SSE4_2)
|
||||
features |= (1 << FEATURE_SSE4_2);
|
||||
if (ecx & bit_AVX)
|
||||
features |= (1 << FEATURE_AVX);
|
||||
|
||||
__cpu_model.__cpu_features[0] = features;
|
||||
}
|
||||
|
||||
/* A noinline function calling __get_cpuid. Having many calls to
|
||||
cpuid in one function in 32-bit mode causes GCC to complain:
|
||||
"can't find a register in class CLOBBERED_REGS". This is
|
||||
related to PR rtl-optimization 44174. */
|
||||
|
||||
static int __attribute__ ((noinline))
|
||||
__get_cpuid_output (unsigned int __level,
|
||||
unsigned int *__eax, unsigned int *__ebx,
|
||||
unsigned int *__ecx, unsigned int *__edx)
|
||||
{
|
||||
return __get_cpuid (__level, __eax, __ebx, __ecx, __edx);
|
||||
}
|
||||
|
||||
|
||||
/* A constructor function that is sets __cpu_model and __cpu_features with
|
||||
the right values. This needs to run only once. This constructor is
|
||||
given the highest priority and it should run before constructors without
|
||||
the priority set. However, it still runs after ifunc initializers and
|
||||
needs to be called explicitly there. */
|
||||
|
||||
int __attribute__ ((constructor (101)))
|
||||
__cpu_indicator_init (void)
|
||||
{
|
||||
unsigned int eax, ebx, ecx, edx;
|
||||
|
||||
int max_level = 5;
|
||||
unsigned int vendor;
|
||||
unsigned int model, family, brand_id;
|
||||
unsigned int extended_model, extended_family;
|
||||
|
||||
/* This function needs to run just once. */
|
||||
if (__cpu_model.__cpu_vendor)
|
||||
return 0;
|
||||
|
||||
/* Assume cpuid insn present. Run in level 0 to get vendor id. */
|
||||
if (!__get_cpuid_output (0, &eax, &ebx, &ecx, &edx))
|
||||
return -1;
|
||||
|
||||
vendor = ebx;
|
||||
max_level = eax;
|
||||
|
||||
if (max_level < 1)
|
||||
return -1;
|
||||
|
||||
if (!__get_cpuid_output (1, &eax, &ebx, &ecx, &edx))
|
||||
return -1;
|
||||
|
||||
model = (eax >> 4) & 0x0f;
|
||||
family = (eax >> 8) & 0x0f;
|
||||
brand_id = ebx & 0xff;
|
||||
extended_model = (eax >> 12) & 0xf0;
|
||||
extended_family = (eax >> 20) & 0xff;
|
||||
|
||||
if (vendor == SIG_INTEL)
|
||||
{
|
||||
/* Adjust model and family for Intel CPUS. */
|
||||
if (family == 0x0f)
|
||||
{
|
||||
family += extended_family;
|
||||
model += extended_model;
|
||||
}
|
||||
else if (family == 0x06)
|
||||
model += extended_model;
|
||||
|
||||
/* Get CPU type. */
|
||||
get_intel_cpu (family, model, brand_id);
|
||||
/* Find available features. */
|
||||
get_available_features (ecx, edx);
|
||||
__cpu_model.__cpu_vendor = VENDOR_INTEL;
|
||||
}
|
||||
else if (vendor == SIG_AMD)
|
||||
{
|
||||
/* Adjust model and family for AMD CPUS. */
|
||||
if (family == 0x0f)
|
||||
{
|
||||
family += extended_family;
|
||||
model += (extended_model << 4);
|
||||
}
|
||||
|
||||
/* Get CPU type. */
|
||||
get_amd_cpu (family, model);
|
||||
/* Find available features. */
|
||||
get_available_features (ecx, edx);
|
||||
__cpu_model.__cpu_vendor = VENDOR_AMD;
|
||||
}
|
||||
else
|
||||
__cpu_model.__cpu_vendor = VENDOR_OTHER;
|
||||
|
||||
gcc_assert (__cpu_model.__cpu_vendor < VENDOR_MAX);
|
||||
gcc_assert (__cpu_model.__cpu_type < CPU_TYPE_MAX);
|
||||
gcc_assert (__cpu_model.__cpu_subtype < CPU_SUBTYPE_MAX);
|
||||
|
||||
return 0;
|
||||
}
|
@ -147,6 +147,10 @@ GCC_4.3.0 {
|
||||
__trunctfxf2
|
||||
__unordtf2
|
||||
}
|
||||
|
||||
GCC_4.8.0 {
|
||||
__cpu_model
|
||||
}
|
||||
%else
|
||||
GCC_4.4.0 {
|
||||
__addtf3
|
||||
@ -183,4 +187,8 @@ GCC_4.4.0 {
|
||||
GCC_4.5.0 {
|
||||
__extendxftf2
|
||||
}
|
||||
|
||||
GCC_4.8.0 {
|
||||
__cpu_model
|
||||
}
|
||||
%endif
|
||||
|
1
libgcc/config/i386/t-cpuinfo
Normal file
1
libgcc/config/i386/t-cpuinfo
Normal file
@ -0,0 +1 @@
|
||||
LIB2ADD += $(srcdir)/config/i386/i386-cpuinfo.c
|
Loading…
Reference in New Issue
Block a user