From fca9dc00583665bb89e63df672ba2f2184bb9ee6 Mon Sep 17 00:00:00 2001 From: Zdenek Dvorak Date: Sun, 19 Oct 2003 23:37:32 +0200 Subject: [PATCH] Makefile.in (toplev.o): Add value-prof.h dependency. * Makefile.in (toplev.o): Add value-prof.h dependency. (value-prof.o): Add REGS_H dependency. * common.opt (fprofile-values, fvpt): New. * flags.h (flag_value_profile_transformations): Declare. * opts.c (common_handle_option): Handle -fprofile_values and -fvpt. * profile.c (branch_prob): Don't remove death notes here. * timevar.def (TV_VPT): New. * value-prof.c: Include regs.h. (insn_divmod_values_to_profile, gen_divmod_fixed_value, gen_mod_pow2, gen_mod_subtract, divmod_fixed_value_transform,mod_pow2_value_transform, mod_subtract_transform, value_profile_transformations): New. (insn_values_to_profile): Call insn_divmod_values_to_profile. (find_values_to_profile): Add dumps. * value-prof.h (value_profile_transformations): Declare. * toplev.c: Include value-prof.h. (rest_of_handle_value_profile_transformations): New. (enum dump_file_index): Add DFI_vpt. (dump_file): Add vpt dump. (flag_value_profile_transformations): New. (lang_independent_options): Add flag_profile_values and flag_value_profile_transformations. (rest_of_compilation): Call rest_of_handle_value_profile_transformations. (process_options): Let -fvpt imply -fprofile-values. * doc/invoke.texi (-fvpt): Document. From-SVN: r72685 --- gcc/ChangeLog | 29 +++ gcc/Makefile.in | 4 +- gcc/common.opt | 8 + gcc/doc/invoke.texi | 12 +- gcc/flags.h | 3 + gcc/opts.c | 8 + gcc/profile.c | 2 - gcc/timevar.def | 1 + gcc/toplev.c | 42 ++- gcc/value-prof.c | 603 +++++++++++++++++++++++++++++++++++++++++++- gcc/value-prof.h | 1 + 11 files changed, 703 insertions(+), 10 deletions(-) diff --git a/gcc/ChangeLog b/gcc/ChangeLog index f65329cac4c6..5dc8b6803f51 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,32 @@ +2003-10-19 Zdenek Dvorak + + * Makefile.in (toplev.o): Add value-prof.h dependency. + (value-prof.o): Add REGS_H dependency. + * common.opt (fprofile-values, fvpt): New. + * flags.h (flag_value_profile_transformations): Declare. + * opts.c (common_handle_option): Handle -fprofile_values and + -fvpt. + * profile.c (branch_prob): Don't remove death notes here. + * timevar.def (TV_VPT): New. + * value-prof.c: Include regs.h. + (insn_divmod_values_to_profile, gen_divmod_fixed_value, gen_mod_pow2, + gen_mod_subtract, divmod_fixed_value_transform,mod_pow2_value_transform, + mod_subtract_transform, value_profile_transformations): New. + (insn_values_to_profile): Call insn_divmod_values_to_profile. + (find_values_to_profile): Add dumps. + * value-prof.h (value_profile_transformations): Declare. + * toplev.c: Include value-prof.h. + (rest_of_handle_value_profile_transformations): New. + (enum dump_file_index): Add DFI_vpt. + (dump_file): Add vpt dump. + (flag_value_profile_transformations): New. + (lang_independent_options): Add flag_profile_values and + flag_value_profile_transformations. + (rest_of_compilation): Call + rest_of_handle_value_profile_transformations. + (process_options): Let -fvpt imply -fprofile-values. + * doc/invoke.texi (-fvpt): Document. + 2003-10-19 Jan Hubicka * i386.c (print_reg): Do not abort on certain registers. diff --git a/gcc/Makefile.in b/gcc/Makefile.in index 492118a7e732..7ba6b7d0089e 100644 --- a/gcc/Makefile.in +++ b/gcc/Makefile.in @@ -1499,7 +1499,7 @@ toplev.o : toplev.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) $(TREE_H) $(RTL_ function.h flags.h xcoffout.h input.h $(INSN_ATTR_H) output.h $(DIAGNOSTIC_H) \ debug.h insn-config.h intl.h $(RECOG_H) Makefile toplev.h \ dwarf2out.h sdbout.h dbxout.h $(EXPR_H) hard-reg-set.h $(BASIC_BLOCK_H) \ - graph.h $(LOOP_H) except.h $(REGS_H) $(TIMEVAR_H) \ + graph.h $(LOOP_H) except.h $(REGS_H) $(TIMEVAR_H) value-prof.h \ ssa.h $(PARAMS_H) $(TM_P_H) reload.h dwarf2asm.h $(TARGET_H) \ langhooks.h insn-flags.h cfglayout.h real.h cfgloop.h \ hosthooks.h $(LANGHOOKS_DEF_H) cgraph.h $(COVERAGE_H) @@ -1653,7 +1653,7 @@ profile.o : profile.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) $(RTL_H) \ toplev.h $(BASIC_BLOCK_H) $(COVERAGE_H) $(TREE_H) value-prof.h value-prof.o : value-prof.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) $(RTL_H) \ $(BASIC_BLOCK_H) hard-reg-set.h value-prof.h $(EXPR_H) output.h flags.h \ - $(RECOG_H) insn-config.h $(OPTABS_H) + $(RECOG_H) insn-config.h $(OPTABS_H) $(REGS_H) loop.o : loop.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) $(RTL_H) flags.h $(LOOP_H) \ insn-config.h $(REGS_H) hard-reg-set.h $(RECOG_H) $(EXPR_H) \ real.h $(PREDICT_H) $(BASIC_BLOCK_H) function.h cfgloop.h \ diff --git a/gcc/common.opt b/gcc/common.opt index fdf28b214259..2ad0e0a58f62 100644 --- a/gcc/common.opt +++ b/gcc/common.opt @@ -517,6 +517,10 @@ fprofile-arcs Common Insert arc-based program profiling code +fprofile-values +Common +Insert code to profile values of expressions + frandom-seed Common @@ -719,6 +723,10 @@ fverbose-asm Common Add extra commentary to assembler output +fvpt +Common +Use expression value profiles in optimizations + fweb Common Construct webs and split unrelated uses of single variable diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi index a154b7c6e3b6..c99af2cb4d6f 100644 --- a/gcc/doc/invoke.texi +++ b/gcc/doc/invoke.texi @@ -264,7 +264,7 @@ in the following sections. @xref{Optimize Options,,Options that Control Optimization}. @gccoptlist{-falign-functions=@var{n} -falign-jumps=@var{n} @gol -falign-labels=@var{n} -falign-loops=@var{n} @gol --fbranch-probabilities -fprofile-values -fbranch-target-load-optimize @gol +-fbranch-probabilities -fprofile-values -fvpt -fbranch-target-load-optimize @gol -fbranch-target-load-optimize2 -fcaller-saves -fcprop-registers @gol -fcse-follow-jumps -fcse-skip-blocks -fdata-sections @gol -fdelayed-branch -fdelete-null-pointer-checks @gol @@ -4564,6 +4564,16 @@ With @option{-fbranch-probabilities}, it reads back the data gathered from profiling values of expressions and adds @samp{REG_VALUE_PROFILE} notes to instructions for their later usage in optimizations. +@item -fvpt +@opindex fvpt +If combined with @option{-fprofile-arcs}, it instructs the compiler to add +a code to gather information about values of expressions. + +With @option{-fbranch-probabilities}, it reads back the data gathered +and actually performs the optimizations based on them. +Currently the optimizations include specialization of division operation +using the knowledge about the value of the denominator. + @item -fnew-ra @opindex fnew-ra Use a graph coloring register allocator. Currently this option is meant diff --git a/gcc/flags.h b/gcc/flags.h index 2a1a10ff679d..5b6f7283b6c0 100644 --- a/gcc/flags.h +++ b/gcc/flags.h @@ -680,6 +680,9 @@ extern int flag_gcse_sm; extern int flag_gcse_las; +/* Nonzero if value histograms should be used to optimize code. */ +extern int flag_value_profile_transformations; + /* Perform branch target register optimization before prologue / epilogue threading. */ diff --git a/gcc/opts.c b/gcc/opts.c index e4c9ad454941..6439e26670f0 100644 --- a/gcc/opts.c +++ b/gcc/opts.c @@ -1182,6 +1182,14 @@ common_handle_option (size_t scode, const char *arg, profile_arc_flag = value; break; + case OPT_fprofile_values: + flag_profile_values = value; + break; + + case OPT_fvpt: + flag_value_profile_transformations = value; + break; + case OPT_frandom_seed: /* The real switch is -fno-random-seed. */ if (value) diff --git a/gcc/profile.c b/gcc/profile.c index 082ed322e5a7..625a904a428c 100644 --- a/gcc/profile.c +++ b/gcc/profile.c @@ -988,8 +988,6 @@ branch_prob (void) allocate_reg_info (max_reg_num (), FALSE, FALSE); } - if (flag_profile_values) - count_or_remove_death_notes (NULL, 1); remove_fake_edges (); free_aux_for_edges (); /* Re-merge split basic blocks and the mess introduced by diff --git a/gcc/timevar.def b/gcc/timevar.def index 99a0b09d1e7b..74fda87e8379 100644 --- a/gcc/timevar.def +++ b/gcc/timevar.def @@ -72,6 +72,7 @@ DEFTIMEVAR (TV_TRACER , "tracer") DEFTIMEVAR (TV_WEB , "web") DEFTIMEVAR (TV_CSE2 , "CSE 2") DEFTIMEVAR (TV_BRANCH_PROB , "branch prediction") +DEFTIMEVAR (TV_VPT , "value profile opts") DEFTIMEVAR (TV_FLOW , "flow analysis") DEFTIMEVAR (TV_COMBINE , "combiner") DEFTIMEVAR (TV_IFCVT , "if-conversion") diff --git a/gcc/toplev.c b/gcc/toplev.c index c1a05f6e26de..151f6c652806 100644 --- a/gcc/toplev.c +++ b/gcc/toplev.c @@ -78,6 +78,7 @@ Software Foundation, 59 Temple Place - Suite 330, Boston, MA #include "cgraph.h" #include "opts.h" #include "coverage.h" +#include "value-prof.h" #if defined (DWARF2_UNWIND_INFO) || defined (DWARF2_DEBUGGING_INFO) #include "dwarf2out.h" @@ -137,6 +138,7 @@ static void rest_of_handle_null_pointer (tree, rtx); static void rest_of_handle_addressof (tree, rtx); static void rest_of_handle_cfg (tree, rtx); static void rest_of_handle_branch_prob (tree, rtx); +static void rest_of_handle_value_profile_transformations (tree, rtx); static void rest_of_handle_if_conversion (tree, rtx); static void rest_of_handle_if_after_combine (tree, rtx); static void rest_of_handle_tracer (tree, rtx); @@ -264,6 +266,7 @@ enum dump_file_index DFI_bypass, DFI_cfg, DFI_bp, + DFI_vpt, DFI_ce1, DFI_tracer, DFI_web, @@ -296,7 +299,7 @@ enum dump_file_index Remaining -d letters: " m q " - " JK O Q V Y " + " JK O Q Y " */ static struct dump_file_info dump_file[DFI_MAX] = @@ -318,6 +321,7 @@ static struct dump_file_info dump_file[DFI_MAX] = { "bypass", 'G', 1, 0, 0 }, /* Yes, duplicate enable switch. */ { "cfg", 'f', 1, 0, 0 }, { "bp", 'b', 1, 0, 0 }, + { "vpt", 'V', 1, 0, 0 }, { "ce1", 'C', 1, 0, 0 }, { "tracer", 'T', 1, 0, 0 }, { "web", 'Z', 0, 0, 0 }, @@ -404,6 +408,9 @@ int profile_arc_flag = 0; int flag_profile_values = 0; +/* Nonzero if value histograms should be used to optimize code. */ +int flag_value_profile_transformations = 0; + /* Nonzero if generating info for gcov to calculate line test coverage. */ int flag_test_coverage = 0; @@ -1110,6 +1117,8 @@ static const lang_independent_options f_options[] = {"asynchronous-unwind-tables", &flag_asynchronous_unwind_tables, 1 }, {"non-call-exceptions", &flag_non_call_exceptions, 1 }, {"profile-arcs", &profile_arc_flag, 1 }, + {"profile-values", &flag_profile_values, 1 }, + {"vpt", &flag_value_profile_transformations, 1 }, {"test-coverage", &flag_test_coverage, 1 }, {"branch-probabilities", &flag_branch_probabilities, 1 }, {"profile", &profile_flag, 1 }, @@ -2462,6 +2471,7 @@ rest_of_handle_branch_prob (tree decl, rtx insns) timevar_push (TV_BRANCH_PROB); open_dump_file (DFI_bp, decl); + if (profile_arc_flag || flag_test_coverage || flag_branch_probabilities) branch_prob (); @@ -2481,6 +2491,20 @@ rest_of_handle_branch_prob (tree decl, rtx insns) timevar_pop (TV_BRANCH_PROB); } +/* Do optimizations based on expression value profiles. */ +static void +rest_of_handle_value_profile_transformations (tree decl, rtx insns) +{ + open_dump_file (DFI_vpt, decl); + timevar_push (TV_VPT); + + if (value_profile_transformations ()) + cleanup_cfg (CLEANUP_EXPENSIVE); + + timevar_pop (TV_VPT); + close_dump_file (DFI_vpt, print_rtl_with_bb, insns); +} + /* Do control and data flow analysis; write some of the results to the dump file. */ static void @@ -3345,7 +3369,18 @@ rest_of_compilation (tree decl) if (optimize > 0 || profile_arc_flag || flag_test_coverage || flag_branch_probabilities) - rest_of_handle_branch_prob (decl, insns); + { + rest_of_handle_branch_prob (decl, insns); + + if (flag_branch_probabilities + && flag_profile_values + && flag_value_profile_transformations) + rest_of_handle_value_profile_transformations (decl, insns); + + /* Remove the death notes created for vpt. */ + if (flag_profile_values) + count_or_remove_death_notes (NULL, 1); + } if (optimize > 0) rest_of_handle_if_conversion (decl, insns); @@ -4228,6 +4263,9 @@ process_options (void) if (flag_unit_at_a_time && ! lang_hooks.callgraph.expand_function) flag_unit_at_a_time = 0; + if (flag_value_profile_transformations) + flag_profile_values = 1; + /* Warn about options that are not supported on this machine. */ #ifndef INSN_SCHEDULING if (flag_schedule_insns || flag_schedule_insns_after_reload) diff --git a/gcc/value-prof.c b/gcc/value-prof.c index 5dca281b1df8..3924fa76c184 100644 --- a/gcc/value-prof.c +++ b/gcc/value-prof.c @@ -32,6 +32,7 @@ Software Foundation, 59 Temple Place - Suite 330, Boston, MA #include "insn-config.h" #include "recog.h" #include "optabs.h" +#include "regs.h" /* In this file value profile based optimizations will be placed (none are here just now, but they are hopefully coming soon). @@ -49,7 +50,17 @@ Software Foundation, 59 Temple Place - Suite 330, Boston, MA -- the expression that is profiled -- list of counters starting from the first one. */ +static void insn_divmod_values_to_profile (rtx, unsigned *, + struct histogram_value **); static void insn_values_to_profile (rtx, unsigned *, struct histogram_value **); +static rtx gen_divmod_fixed_value (enum machine_mode, enum rtx_code, rtx, rtx, + rtx, gcov_type); +static rtx gen_mod_pow2 (enum machine_mode, enum rtx_code, rtx, rtx, rtx); +static rtx gen_mod_subtract (enum machine_mode, enum rtx_code, rtx, rtx, rtx, + int); +static bool divmod_fixed_value_transform (rtx insn); +static bool mod_pow2_value_transform (rtx); +static bool mod_subtract_transform (rtx); /* Release the list of VALUES of length N_VALUES for that we want to measure histograms. */ @@ -60,13 +71,106 @@ free_profiled_values (unsigned n_values ATTRIBUTE_UNUSED, free (values); } +/* Find values inside INSN for that we want to measure histograms for + division/modulo optimization. */ +static void +insn_divmod_values_to_profile (rtx insn, unsigned *n_values, + struct histogram_value **values) +{ + rtx set, set_src, op1, op2; + enum machine_mode mode; + + if (!INSN_P (insn)) + return; + + set = single_set (insn); + if (!set) + return; + + mode = GET_MODE (SET_DEST (set)); + if (!INTEGRAL_MODE_P (mode)) + return; + + set_src = SET_SRC (set); + switch (GET_CODE (set_src)) + { + case DIV: + case MOD: + case UDIV: + case UMOD: + op1 = XEXP (set_src, 0); + op2 = XEXP (set_src, 1); + if (side_effects_p (op2)) + return; + + /* Check for a special case where the divisor is power of 2. */ + if ((GET_CODE (set_src) == UMOD) && !CONSTANT_P (op2)) + { + *values = xrealloc (*values, + (*n_values + 1) + * sizeof (struct histogram_value)); + (*values)[*n_values].value = op2; + (*values)[*n_values].seq = NULL_RTX; + (*values)[*n_values].mode = mode; + (*values)[*n_values].insn = insn; + (*values)[*n_values].type = HIST_TYPE_POW2; + (*values)[*n_values].hdata.pow2.may_be_other = 1; + (*n_values)++; + } + + /* Check whether the divisor is not in fact a constant. */ + if (!CONSTANT_P (op2)) + { + *values = xrealloc (*values, + (*n_values + 1) + * sizeof (struct histogram_value)); + (*values)[*n_values].value = op2; + (*values)[*n_values].mode = mode; + (*values)[*n_values].seq = NULL_RTX; + (*values)[*n_values].insn = insn; + (*values)[*n_values].type = HIST_TYPE_SINGLE_VALUE; + (*n_values)++; + } + + /* For mod, check whether it is not often a noop (or replacable by + a few subtractions). */ + if (GET_CODE (set_src) == UMOD && !side_effects_p (op1)) + { + rtx tmp; + + *values = xrealloc (*values, + (*n_values + 1) + * sizeof (struct histogram_value)); + start_sequence (); + tmp = simplify_gen_binary (DIV, mode, copy_rtx (op1), copy_rtx (op2)); + (*values)[*n_values].value = force_operand (tmp, NULL_RTX); + (*values)[*n_values].seq = get_insns (); + end_sequence (); + (*values)[*n_values].mode = mode; + (*values)[*n_values].insn = insn; + (*values)[*n_values].type = HIST_TYPE_INTERVAL; + (*values)[*n_values].hdata.intvl.int_start = 0; + (*values)[*n_values].hdata.intvl.steps = 2; + (*values)[*n_values].hdata.intvl.may_be_less = 1; + (*values)[*n_values].hdata.intvl.may_be_more = 1; + (*n_values)++; + } + return; + + default: + return; + } +} + /* Find values inside INSN for that we want to measure histograms and adds them to list VALUES (increasing the record of its length in N_VALUES). */ static void -insn_values_to_profile (rtx insn ATTRIBUTE_UNUSED, - unsigned *n_values ATTRIBUTE_UNUSED, - struct histogram_value **values ATTRIBUTE_UNUSED) +insn_values_to_profile (rtx insn, + unsigned *n_values, + struct histogram_value **values) { + if (flag_value_profile_transformations) + insn_divmod_values_to_profile (insn, n_values, values); } /* Find list of values for that we want to measure histograms. */ @@ -86,21 +190,40 @@ find_values_to_profile (unsigned *n_values, struct histogram_value **values) switch ((*values)[i].type) { case HIST_TYPE_INTERVAL: + if (rtl_dump_file) + fprintf (rtl_dump_file, + "Interval counter for insn %d, range %d -- %d.\n", + INSN_UID ((*values)[i].insn), + (*values)[i].hdata.intvl.int_start, + ((*values)[i].hdata.intvl.int_start + + (*values)[i].hdata.intvl.steps - 1)); (*values)[i].n_counters = (*values)[i].hdata.intvl.steps + ((*values)[i].hdata.intvl.may_be_less ? 1 : 0) + ((*values)[i].hdata.intvl.may_be_more ? 1 : 0); break; case HIST_TYPE_POW2: + if (rtl_dump_file) + fprintf (rtl_dump_file, + "Pow2 counter for insn %d.\n", + INSN_UID ((*values)[i].insn)); (*values)[i].n_counters = GET_MODE_BITSIZE ((*values)[i].mode) + ((*values)[i].hdata.pow2.may_be_other ? 1 : 0); break; case HIST_TYPE_SINGLE_VALUE: + if (rtl_dump_file) + fprintf (rtl_dump_file, + "Single value counter for insn %d.\n", + INSN_UID ((*values)[i].insn)); (*values)[i].n_counters = 3; break; case HIST_TYPE_CONST_DELTA: + if (rtl_dump_file) + fprintf (rtl_dump_file, + "Constant delta counter for insn %d.\n", + INSN_UID ((*values)[i].insn)); (*values)[i].n_counters = 4; break; @@ -109,3 +232,477 @@ find_values_to_profile (unsigned *n_values, struct histogram_value **values) } } } + +/* Main entry point. Finds REG_VALUE_PROFILE notes from profiler and uses + them to identify and exploit properties of values that are hard to analyze + statically. + + We do following transformations: + + 1) + + x = a / b; + + where b is almost always a constant N is transformed to + + if (b == N) + x = a / N; + else + x = a / b; + + Analogically with % + + 2) + + x = a % b + + where b is almost always a power of 2 and the division is unsigned + TODO -- handle signed case as well + + if ((b & (b - 1)) == 0) + x = a & (b - 1); + else + x = x % b; + + Note that when b = 0, no error will occur and x = a; this is correct, + as result of such operation is undefined. + + 3) + + x = a % b + + where a is almost always less then b and the division is unsigned + TODO -- handle signed case as well + + x = a; + if (x >= b) + x %= b; + + 4) + + x = a % b + + where a is almost always less then 2 * b and the division is unsigned + TODO -- handle signed case as well + + x = a; + if (x >= b) + x -= b; + if (x >= b) + x %= b; + + It would be possible to continue analogically for K * b for other small + K's, but it is probably not useful. + + TODO: + + There are other useful cases that could be handled by a similar mechanism, + for example: + + for (i = 0; i < n; i++) + ... + + transform to (for constant N): + + if (n == N) + for (i = 0; i < N; i++) + ... + else + for (i = 0; i < n; i++) + ... + making unroller happy. Since this may grow the code significantly, + we would have to be very careful here. */ + +bool +value_profile_transformations () +{ + rtx insn, next; + int changed = false; + + for (insn = get_insns (); insn; insn = next) + { + next = NEXT_INSN (insn); + + if (!INSN_P (insn)) + continue; + + /* Scan for insn carrying a histogram. */ + if (!find_reg_note (insn, REG_VALUE_PROFILE, 0)) + continue; + + /* Ignore cold areas -- we are growing a code. */ + if (!maybe_hot_bb_p (BLOCK_FOR_INSN (insn))) + continue; + + if (rtl_dump_file) + { + fprintf (rtl_dump_file, "Trying transformations on insn %d\n", + INSN_UID (insn)); + print_rtl_single (rtl_dump_file, insn); + } + + /* Transformations: */ + if (flag_value_profile_transformations + && (mod_subtract_transform (insn) + || divmod_fixed_value_transform (insn) + || mod_pow2_value_transform (insn))) + changed = true; + } + + if (changed) + { + commit_edge_insertions (); + allocate_reg_info (max_reg_num (), FALSE, FALSE); + } + + return changed; +} + +/* Generate code for transformation 1 (with MODE and OPERATION, operands OP1 + and OP2 whose value is expected to be VALUE and result TARGET). */ +static rtx +gen_divmod_fixed_value (enum machine_mode mode, enum rtx_code operation, + rtx target, rtx op1, rtx op2, gcov_type value) +{ + rtx tmp, tmp1; + rtx neq_label = gen_label_rtx (); + rtx end_label = gen_label_rtx (); + rtx sequence; + + start_sequence (); + + if (!REG_P (op2)) + { + tmp = gen_reg_rtx (mode); + emit_move_insn (tmp, copy_rtx (op2)); + } + else + tmp = op2; + + do_compare_rtx_and_jump (tmp, GEN_INT (value), NE, 0, mode, NULL_RTX, + NULL_RTX, neq_label); + tmp1 = simplify_gen_binary (operation, mode, copy_rtx (op1), GEN_INT (value)); + tmp1 = force_operand (tmp1, target); + if (tmp1 != target) + emit_move_insn (copy_rtx (target), copy_rtx (tmp1)); + + emit_jump_insn (gen_jump (end_label)); + emit_barrier (); + + emit_label (neq_label); + tmp1 = simplify_gen_binary (operation, mode, copy_rtx (op1), copy_rtx (tmp)); + tmp1 = force_operand (tmp1, target); + if (tmp1 != target) + emit_move_insn (copy_rtx (target), copy_rtx (tmp1)); + + emit_label (end_label); + + sequence = get_insns (); + end_sequence (); + rebuild_jump_labels (sequence); + return sequence; +} + +/* Do transform 1) on INSN if applicable. */ +static bool +divmod_fixed_value_transform (rtx insn) +{ + rtx set, set_src, set_dest, op1, op2, value, histogram; + enum rtx_code code; + enum machine_mode mode; + gcov_type val, count, all; + edge e; + + set = single_set (insn); + if (!set) + return false; + + set_src = SET_SRC (set); + set_dest = SET_DEST (set); + code = GET_CODE (set_src); + mode = GET_MODE (set_dest); + + if (code != DIV && code != MOD && code != UDIV && code != UMOD) + return false; + op1 = XEXP (set_src, false); + op2 = XEXP (set_src, 1); + + for (histogram = REG_NOTES (insn); + histogram; + histogram = XEXP (histogram, 1)) + if (REG_NOTE_KIND (histogram) == REG_VALUE_PROFILE + && XEXP (XEXP (histogram, 0), 0) == GEN_INT (HIST_TYPE_SINGLE_VALUE)) + break; + + if (!histogram) + return false; + + histogram = XEXP (XEXP (histogram, 0), 1); + value = XEXP (histogram, 0); + histogram = XEXP (histogram, 1); + val = INTVAL (XEXP (histogram, 0)); + histogram = XEXP (histogram, 1); + count = INTVAL (XEXP (histogram, 0)); + histogram = XEXP (histogram, 1); + all = INTVAL (XEXP (histogram, 0)); + + /* We requiere that count is at least half of all; this means + that for the transformation to fire the value must be constant + at least 50% of time (and 75% gives the garantee of usage). */ + if (!rtx_equal_p (op2, value) || 2 * count < all) + return false; + + if (rtl_dump_file) + fprintf (rtl_dump_file, "Div/mod by constant transformation on insn %d\n", + INSN_UID (insn)); + + e = split_block (BLOCK_FOR_INSN (insn), PREV_INSN (insn)); + delete_insn (insn); + + insert_insn_on_edge ( + gen_divmod_fixed_value (mode, code, set_dest, op1, op2, val), e); + + return true; +} + +/* Generate code for transformation 2 (with MODE and OPERATION, operands OP1 + and OP2 and result TARGET). */ +static rtx +gen_mod_pow2 (enum machine_mode mode, enum rtx_code operation, rtx target, + rtx op1, rtx op2) +{ + rtx tmp, tmp1, tmp2, tmp3; + rtx neq_label = gen_label_rtx (); + rtx end_label = gen_label_rtx (); + rtx sequence; + + start_sequence (); + + if (!REG_P (op2)) + { + tmp = gen_reg_rtx (mode); + emit_move_insn (tmp, copy_rtx (op2)); + } + else + tmp = op2; + + tmp1 = expand_simple_binop (mode, PLUS, tmp, constm1_rtx, NULL_RTX, + 0, OPTAB_WIDEN); + tmp2 = expand_simple_binop (mode, AND, tmp, tmp1, NULL_RTX, + 0, OPTAB_WIDEN); + do_compare_rtx_and_jump (tmp2, const0_rtx, NE, 0, mode, NULL_RTX, + NULL_RTX, neq_label); + tmp3 = expand_simple_binop (mode, AND, op1, tmp1, target, + 0, OPTAB_WIDEN); + if (tmp3 != target) + emit_move_insn (copy_rtx (target), tmp3); + emit_jump_insn (gen_jump (end_label)); + emit_barrier (); + + emit_label (neq_label); + tmp1 = simplify_gen_binary (operation, mode, copy_rtx (op1), copy_rtx (tmp)); + tmp1 = force_operand (tmp1, target); + if (tmp1 != target) + emit_move_insn (target, tmp1); + + emit_label (end_label); + + sequence = get_insns (); + end_sequence (); + rebuild_jump_labels (sequence); + return sequence; +} + +/* Do transform 2) on INSN if applicable. */ +static bool +mod_pow2_value_transform (rtx insn) +{ + rtx set, set_src, set_dest, op1, op2, value, histogram; + enum rtx_code code; + enum machine_mode mode; + gcov_type wrong_values, count; + edge e; + int i; + + set = single_set (insn); + if (!set) + return false; + + set_src = SET_SRC (set); + set_dest = SET_DEST (set); + code = GET_CODE (set_src); + mode = GET_MODE (set_dest); + + if (code != UMOD) + return false; + op1 = XEXP (set_src, 0); + op2 = XEXP (set_src, 1); + + for (histogram = REG_NOTES (insn); + histogram; + histogram = XEXP (histogram, 1)) + if (REG_NOTE_KIND (histogram) == REG_VALUE_PROFILE + && XEXP (XEXP (histogram, 0), 0) == GEN_INT (HIST_TYPE_POW2)) + break; + + if (!histogram) + return false; + + histogram = XEXP (XEXP (histogram, 0), 1); + value = XEXP (histogram, 0); + histogram = XEXP (histogram, 1); + wrong_values =INTVAL (XEXP (histogram, 0)); + histogram = XEXP (histogram, 1); + + count = 0; + for (i = 0; i < GET_MODE_BITSIZE (mode); i++) + { + count += INTVAL (XEXP (histogram, 0)); + histogram = XEXP (histogram, 1); + } + + if (!rtx_equal_p (op2, value)) + return false; + + /* We require that we hit a power of two at least half of all evaluations. */ + if (count < wrong_values) + return false; + + if (rtl_dump_file) + fprintf (rtl_dump_file, "Mod power of 2 transformation on insn %d\n", + INSN_UID (insn)); + + e = split_block (BLOCK_FOR_INSN (insn), PREV_INSN (insn)); + delete_insn (insn); + + insert_insn_on_edge ( + gen_mod_pow2 (mode, code, set_dest, op1, op2), e); + + return true; +} + +/* Generate code for transformations 3 and 4 (with MODE and OPERATION, + operands OP1 and OP2, result TARGET and at most SUB subtractions). */ +static rtx +gen_mod_subtract (enum machine_mode mode, enum rtx_code operation, + rtx target, rtx op1, rtx op2, int sub) +{ + rtx tmp, tmp1; + rtx end_label = gen_label_rtx (); + rtx sequence; + int i; + + start_sequence (); + + if (!REG_P (op2)) + { + tmp = gen_reg_rtx (mode); + emit_move_insn (tmp, copy_rtx (op2)); + } + else + tmp = op2; + + emit_move_insn (target, copy_rtx (op1)); + do_compare_rtx_and_jump (target, tmp, LTU, 0, mode, NULL_RTX, + NULL_RTX, end_label); + + + for (i = 0; i < sub; i++) + { + tmp1 = expand_simple_binop (mode, MINUS, target, tmp, target, + 0, OPTAB_WIDEN); + if (tmp1 != target) + emit_move_insn (target, tmp1); + do_compare_rtx_and_jump (target, tmp, LTU, 0, mode, NULL_RTX, + NULL_RTX, end_label); + } + + tmp1 = simplify_gen_binary (operation, mode, copy_rtx (target), copy_rtx (tmp)); + tmp1 = force_operand (tmp1, target); + if (tmp1 != target) + emit_move_insn (target, tmp1); + + emit_label (end_label); + + sequence = get_insns (); + end_sequence (); + rebuild_jump_labels (sequence); + return sequence; +} + +/* Do transforms 3) and 4) on INSN if applicable. */ +static bool +mod_subtract_transform (rtx insn) +{ + rtx set, set_src, set_dest, op1, op2, value, histogram; + enum rtx_code code; + enum machine_mode mode; + gcov_type wrong_values, counts[2], count, all; + edge e; + int i; + + set = single_set (insn); + if (!set) + return false; + + set_src = SET_SRC (set); + set_dest = SET_DEST (set); + code = GET_CODE (set_src); + mode = GET_MODE (set_dest); + + if (code != UMOD) + return false; + op1 = XEXP (set_src, 0); + op2 = XEXP (set_src, 1); + + for (histogram = REG_NOTES (insn); + histogram; + histogram = XEXP (histogram, 1)) + if (REG_NOTE_KIND (histogram) == REG_VALUE_PROFILE + && XEXP (XEXP (histogram, 0), 0) == GEN_INT (HIST_TYPE_INTERVAL)) + break; + + if (!histogram) + return false; + + histogram = XEXP (XEXP (histogram, 0), 1); + value = XEXP (histogram, 0); + histogram = XEXP (histogram, 1); + + all = 0; + for (i = 0; i < 2; i++) + { + counts[i] = INTVAL (XEXP (histogram, 0)); + all += counts[i]; + histogram = XEXP (histogram, 1); + } + wrong_values = INTVAL (XEXP (histogram, 0)); + histogram = XEXP (histogram, 1); + wrong_values += INTVAL (XEXP (histogram, 0)); + all += wrong_values; + + /* We require that we use just subtractions in at least 50% of all + evaluations. */ + count = 0; + for (i = 0; i < 2; i++) + { + count += counts[i]; + if (count * 2 >= all) + break; + } + + if (i == 2) + return false; + + if (rtl_dump_file) + fprintf (rtl_dump_file, "Mod subtract transformation on insn %d\n", + INSN_UID (insn)); + + e = split_block (BLOCK_FOR_INSN (insn), PREV_INSN (insn)); + delete_insn (insn); + + insert_insn_on_edge ( + gen_mod_subtract (mode, code, set_dest, op1, op2, i), e); + + return true; +} diff --git a/gcc/value-prof.h b/gcc/value-prof.h index 233895e4a319..afbeb916eb85 100644 --- a/gcc/value-prof.h +++ b/gcc/value-prof.h @@ -61,3 +61,4 @@ struct histogram_value extern void find_values_to_profile (unsigned *, struct histogram_value **); extern void free_profiled_values (unsigned, struct histogram_value *); +extern bool value_profile_transformations (void);