如何编写测试用例检查gcc中的优化选项?
各位前辈,gcc提供了N多的优化选项,但是光看介绍很难精确理解作了什么样的工作,现在我想自己写一些测试代码,在编译的时候加上或者去掉一些贬义选项,然后看看生成的汇编代码,这应该是比较基本的验证方法吧,可是怎么写这些代码呢?有做过这方面工作的朋友么?
如果你对这篇内容有疑问,欢迎到本站社区发帖提问 参与讨论,获取更多帮助,或者扫码二维码加入 Web 技术交流群。
绑定邮箱获取回复消息
由于您还没有绑定你的真实邮箱,如果其他用户或者作者回复了您的评论,将不能在第一时间通知您!
发布评论
评论(4)
恩,鸡蛋问题。你想写测试代码,必须深刻理解那些选项是做什么的。 但是你没有写测试代码,没法深刻理解那些选项是做什么的。
其实你可以google,还可以在gcc 和 gcc-patches mailing list 上查一下加入那个选项的patch和相应的讨论。
以前写过一个脚本,可以用来生成gcc选项组合。
#! /bin/bash
#####################################################
# GCC Option Generator #
# #
# This file provides functions and data, which help #
# to generate the GCC options. You can include them #
# in your shell script, like this: #
# #
# source ./gcc-optgen.sh or #
# . ./gcc-optgen.sh #
# #
# Written by Eric Fisher, joefoxreal@gmail.com. #
#####################################################
#########################################################
# The following functions are used as local/static ones #
#########################################################
# $(local_get_value value_name)
# Get the value of the option.
local_get_value()
{
n=$1
echo ${!n}
}
# $(local_emit_option a)
# Emit the option, which can be:
# -fabc
# -fabc=value_abc
# --param#abc=value_abc
local_emit_option()
{
n=${1//"#"/' '}
m=$(expr index "$n" '=')
if (($m==0)); then
echo $n
else
echo ${n:0:$m}$(local_get_value ${n:$m})
fi
}
###################################################
# The following functions are used as extern ones #
###################################################
# $(random_between min max)
# Select a random int value between min and max.
random_between()
{
RANDOM=$(date +%N | sed -e 's/000$//' -e 's/^0//')
n=$(($1+RANDOM%($2-$1+1)))
echo $n
}
# $(random_in a b c...)
# Select a random value in a b c...
random_in()
{
n=$(random_between 0 $(($#-1)))
shift $n
echo $1
}
# $(option_number a b c...)
# Get the number of options.
option_number()
{
echo $#
}
# $(option_select index a b c...)
# Select a specified option in a b c...
# Index is started from 0.
option_select()
{
shift $(($1+1))
n=$(local_emit_option $1)
echo $n
}
# $(option_random_select a b c...)
# Select a random option in a b c...
option_random_select()
{
n=$(random_between 0 $(($#-1)))
n=$(option_select $n $@)
echo $n
}
# $(option_random_reverse a b c...)
# Select a random flag in a b c... and turn on/off the switch
option_random_reverse()
{
n=$(option_random_select $@)
if expr match $n '-[fm]' &> /dev/null \
&& ! expr index $n '=' &> /dev/null; then
if expr match $n '-[fm]no-' &> /dev/null; then
echo ${n:0:2}${n:5}
else
echo ${n:0:2}no-${n:2}
fi
else
echo $n
fi
}
###################
# flag definition #
###################
# options which -O1 enables
IN_O1="-fauto-inc-dec -fcprop-registers -fdce -fdefer-pop -fdelayed-branch
-fdse -fguess-branch-probability -fif-conversion2 -fif-conversion
-finline-small-functions -fipa-pure-const -fipa-reference
-fmerge-constants -fsplit-wide-types -ftree-builtin-call-dce -ftree-ccp
-ftree-ch -ftree-copyrename -ftree-dce -ftree-dominator-opts -ftree-dse
-ftree-fre -ftree-sra -ftree-ter -funit-at-a-time"
# options which -O2 enables besides IN_O1
IN_O2="-fthread-jumps -falign-functions -falign-jumps -falign-loops
-falign-labels -fcaller-saves -fcrossjumping -fcse-follow-jumps
-fcse-skip-blocks -fdelete-null-pointer-checks -fexpensive-optimizations
-fgcse -fgcse-lm -findirect-inlining -foptimize-sibling-calls
-fpeephole2 -fregmove -freorder-blocks -freorder-functions
-frerun-cse-after-loop -fsched-interblock -fsched-spec -fschedule-insns
-fschedule-insns2 -fstrict-aliasing -fstrict-overflow
-ftree-switch-conversion -ftree-pre -ftree-vrp"
# options which -O3 enables besides IN_O2
IN_O3="-finline-functions -funswitch-loops -fpredictive-commoning
-fgcse-after-reload -ftree-vectorize"
# options which -Os enables besides IN_O2
IN_Os="-falign-functions -falign-jumps -falign-loops -falign-labels
-freorder-blocks -freorder-blocks-and-partition -fprefetch-loop-arrays
-ftree-vect-loop-version"
# options which -O... enables, but can be used for fine tuning
FINE_TUNING="-fno-default-inline -fno-defer-pop -fforward-propagate
-fomit-frame-pointer -foptimize-sibling-calls -fno-inline
-finline-small-functions -findirect-inlining -finline-functions
-finline-functions-called-once -fearly-inlining
-fkeep-inline-functions -fkeep-static-consts -fmerge-constants
-fmerge-all-constants -fmodulo-sched -fmodulo-sched-allow-regmoves
-fno-branch-count-reg -fno-function-cse -fno-zero-initialized-in-bss
-fmudflap -fmudflapth -fmudflapir -fthread-jumps -fsplit-wide-types
-fcse-follow-jumps -fcse-skip-blocks -frerun-cse-after-loop -fgcse -fgcse-lm
-fgcse-sm -fgcse-las -fgcse-after-reload -funsafe-loop-optimizations
-fcrossjumping -fauto-inc-dec -fdce -fdse -fif-conversion -fif-conversion2
-fdelete-null-pointer-checks -fexpensive-optimizations
-foptimize-register-move -fregmove -fira
-fira-coalesce -fno-ira-share-save-slots -fno-ira-share-spill-slots
-fdelayed-branch -fschedule-insns -fschedule-insns2
-fno-sched-interblock -fno-sched-spec -fsched-spec-load
-fsched-spec-load-dangerous -fsched-stalled-insns
-fsched-stalled-insns-dep -fsched2-use-superblocks
-fsched2-use-traces -fsee -freschedule-modulo-scheduled-loops
-fselective-scheduling -fselective-scheduling2 -fsel-sched-pipelining
-fcaller-saves -fconserve-stack -ftree-reassoc -ftree-pre -ftree-fre
-ftree-copy-prop -fipa-pure-const"
# parameter options for fine tuning
FINE_TUNING_PARAM="-finline-limit=value_inline_limit
-fira#-fira-algorithm=value_ira_algorithm
-fsched-stalled-insns#-fsched-stalled-insns=value_sched_stalled_insns
-fsched-stalled-insns-dep#-fsched-stalled-insns-dep=value_stalled_insns_dep"
# options used for floating point
FLOATING_POINT="-ffloat-store -ffast-math -fno-math-errno
-funsafe-math-optimizations -fassociative-math -freciprocal-math
-ffinite-math-only -fno-signed-zeros -fno-trapping-math -frounding-math
-frtl-abstract-sequences -fsignaling-nans -fsingle-precision-constant
-fcx-limited-range -fcx-fortran-rules"
# options not in -O..., may or may not improve performance
NOT_IN_O="-frename-registers -ftracer -funroll-loops -funroll-all-loops
-fpeel-loops -fmove-loop-invariants -funswitch-loops -ffunction-sections
-fdata-sections -fbranch-target-load-optimize -fbranch-target-load-optimize2
-fbtr-bb-exclusive -fsection-anchors"
# --parm name=value, parameters for fine tuning
PARAM="--param#sra-max-structure-size=value_sra_max_structure_size
--param#sra-field-structure-ratio=value_sra_field_structure_ratio
--param#struct-reorg-cold-struct-ratio=value_struct_reorg_cold_struct_ratio
--param#predictable-branch-cost-outcome=value_predictable_branch_cost
--param#max-crossjump-edges=value_max_crosssjump_edges
--param#min-crossjump-insns=value_min_crossjump_insns
--param#max-grow-copy-bb-insns=value_max_grow_copy_bb_insns
--param#max-goto-duplication-insns=value_max_goto_duplication_insns
--param#max-delay-slot-insn-search=value_max_delay_slot_insn_search
--param#max-delay-slot-live-search=value_delay_slot_live_search
--param#max-gcse-memory=value_max_gcse_memory
--param#max-gcse-passes=value_max_gcse_passes
--param#max-pending-list-length=value_max_pending_list_length
--param#max-inline-insns-single=value_inline_insns_single
--param#max-inline-insns-auto=value_max_inline_insns_auto
--param#large-function-insns=value_large_function_insns
--param#large-function-growth=value_large_function_growth
--param#large-unit-insns=value_large_unit_insns
--param#inline-unit-growth=value_inline_unit_growth
--param#ipcp-unit-growth=value_ipcp_unit_growth
--param#large-stack-frame=value_large_stack_frame
--param#large-stack-frame-growth=value_large_stack_frame
--param#max-inline-insns-recursive=value_max_inline_insns_recursive
--param#max-inline-insns-recursive-auto=value_max_inline_insns_recursive_auto
--param#max-inline-recursive-depth=value_max_inline_recursive_depth
--param#max-inline-recursive-depth-auto=value_max_inline_recursive_depth_auto
--param#min-inline-recursive-probability=value_min_inline_recursive_probability
--param#inline-call-cost=value_inline_call_cost
--param#min-vect-loop-bound=value_min_vect_loop_bound
--param#max-unrolled-insns=value_max_unrolled_insns
--param#max-average-unrolled-insns=value_max_average_unrooled_insns
--param#max-unroll-times=value_max_unroll_times
--param#max-peeled-insns=value_max_peeled_insns
--param#max-peel-times=value_max_peel_times
--param#max-completely-peeled-insns=value_max_completely_peeled_insns
--param#max-completely-peel-times=value_max_completely_peel_times
--param#max-unswitch-insns=value_max_unswitch_insns
--param#max-unswitch-level=value_max_unswitch_level
--param#lim-expensive=value_lim_expensive
--param#iv-consider-all-candidates-bound=value_iv_consider_all_candidates_bouond
--param#iv-max-considered-uses=value_iv_max_considered_uses
--param#iv-always-prune-cand-set-bound=value_iv_always_prune_cand_set_bound
--param#scev-max-expr-size=value_scev_max_expr_size
--param#omega-max-vars=value_omega_max_vars
--param#omega-max-geqs=value_omega_max_geqs
--param#omega-max-eqs=value_omega_max_eqs
--param#omega-max-wild-cards=value_omega_max_wild_cards
--param#omega-hash-table-size=value_omega_hash_table_size
--param#omega-max-keys=value_omega_max_keys
--param#omega-eliminate-redundant-constraints=value_omega_eliminate_redundant_constraints
--param#vect-max-version-for-alignment-checks=value_vect_max_version_for_alignment_checks
--param#vect-max-version-for-alias-checks=value_vect_max_version_for_alias_checks
--param#max-iterations-to-track=value_max_iterations_to_track
--param#hot-bb-count-fraction=value_hot_bb_count_fraction
--param#hot-bb-frequency-fraction=value_hot_bb_frequency_fraction
--param#max-predicted-iterations=value_max_predicted_iterations
--param#align-threshold=value_align_threshold
--param#align-loop-iterations=value_align_loop_iterations
--param#tracer-dynamic-coverage=value_tracer_dynamic_coverage
--param#tracer-dynamic-coverage-feedback=value_tracer_dynamic_coverage_feedback
--param#tracer-max-code-growth=value_tracer_max_code_growth
--param#tracer-min-branch-ratio=value_tracer_min_branch_ratio
--param#tracer-min-branch-ratio-feedback=value_tracer_min_branch_ratio_feedback
--param#max-cse-path-length=value_max_cse_path_length
--param#max-cse-insns=value_max_cse_insns
--param#max-aliased-vops=value_max_aliased_vops
--param#avg-aliased-vops=value_avg_aliased_vops
--param#ggc-min-expand=value_ggc_min_expand
--param#ggc-min-heapsize=value_ggc_min_heapsize
--param#max-reload-search-insns=value_max_reload_search_insns
--param#max-cselib-memory-locations=value_max_cselib_memory_locations
--param#reorder-blocks-duplicate=value_reorder_blocks_duplicate
--param#reorder-blocks-duplicate-feedback=value_reorder_blocks_duplicate_feedback
--param#max-sched-ready-insns=value_max_sched_ready_insns
--param#max-sched-region-blocks=value_max_sched_region_blocks
--param#max-pipeline-region-blocks=value_max_pipeline_region_blocks
--param#max-sched-region-insns=value_max_sched_region_insns
--param#max-pipeline-region-insns=value_max_pipeline_region_insns
--param#max-sched-region-insns=value_max_sched_region_insns
--param#max-pipeline-region-insns=value_max_pipeline_region_insns
--param#min-spec-prob=value_min_spec_prob
--param#max-sched-extend-regions-iters=value_max_sched_extend_regions_iters
--param#max-sched-insn-conflict-delay=value_max_sched_insn_conflict_delay
--param#sched-spec-prob-cutoff=value_sched_spec_prob_cutoff
--param#sched-mem-true-dep-cost=value_sched_mem_true_dep_cost
--param#selsched-max-lookahead=value_selsched_max_lookahead
--param#selsched-max-sched-times=value_selsched_max_sched_times
--param#selsched-max-insns-to-rename=value_selsched_max_insns_to_rename
--param#max-last-value-rtl=value_max_last_value_rtl
--param#integer-share-limit=value_integer_share_limit
--param#min-virtual-mappings=value_min_virtual_mappings
--param#virtual-mappings-ratio=value_virtual_mappings_ratio
--param#ssp-buffer-size=value_ssp_buffer_size
--param#max-jump-thread-duplication-stmts=value_max_jump_thread_duplication_stmts
--param#max-fields-for-field-sensitive=value_max_fields_for_field_sensitive
--param#prefetch-latency=value_prefetch_latency
--param#simultaneous-prefetches=value_simultaneous_prefe
--param#l1-cache-line-size=value_cache_line
--param#l1-cache-size=value_l1_cache_size
--param#l2-cache-size=value_l2_cache_size
--param#use-canonical-types=value_use_canonical_types
--param#switch-conversion-max-branch-ratio=value_switch_conversion_max_branch_ratio
--param#max-partial-antic-length=value_max_partial_antic_length
--param#sccvn-max-scc-size=value_sccvn_max_scc_size
--param#ira-max-loops-num=value_ira_max_loops_num"
##################
# generate flags #
##################
## example1: for switch options
# for ((i=0;i<$(option_number $FINE_TUNING);i++)); do
# OPTGEN_FLAGS="-O3 $(option_select $i $FINE_TUNING)"
# echo $OPTGEN_FLAGS
# done
## example2: for parameter options
# value_inline_limit=$(random_between 0 5)
# value_ira_algorithm=$(random_in regional CB mixed)
# value_sched_stalled_insns=$(random_between 0 2)
# value_stalled_insns_dep=$(random_between 0 2)
#
# for ((i=0;i<$(option_number $FINE_TUNING_PARAM);i++)); do
# OPTGEN_FLAGS="-O3 $(option_select $i $FINE_TUNING_PARAM)"
# echo $OPTGEN_FLAGS
# done
[ 本帖最后由 EricFisher 于 2009-11-10 14:16 编辑 ]
十分感谢,有空的时候试一下,呵呵
在gcc源码包的test目录下面应该就有吧。反正binutils的gld下面的test case目录下就有很多现成的测试用例帮助理解gld