trace
MySQL trace 使用及实现调研
使用方法及作用
- 主要目的是输出优化过程中的各种细节,无论debug还是release,都可以使用,且只限定在 opt 阶段,和mysql debug有却别
SET optimizer_trace="enabled=on";
2. exec sql;
-- optimizer_switch='hypergraph_optimizer=off'
select trace from `information_schema`.`optimizer_trace`\G
trace: {
"steps": [
{
"join_preparation": {
"select#": 1,
"steps": [
{
"expanded_query": "/* select#1 */ select `article_rank`.`aid` AS `aid`,sum(`article_rank`.`pv`) AS `num` from `article_rank` FORCE INDEX (`idx_day_aid_pv`) where (`article_rank`.`day` > 20190115) group by `article_rank`.`aid` order by `num` desc limit 10"
}
]
}
},
{
"join_optimization": {
"select#": 1,
"steps": [
{
"condition_processing": {
"condition": "WHERE",
"original_condition": "(`article_rank`.`day` > 20190115)",
"steps": [
{
"transformation": "equality_propagation",
"resulting_condition": "(`article_rank`.`day` > 20190115)"
},
{
"transformation": "constant_propagation",
"resulting_condition": "(`article_rank`.`day` > 20190115)"
},
{
"transformation": "trivial_condition_removal",
"resulting_condition": "(`article_rank`.`day` > 20190115)"
}
]
}
},
{
"substitute_generated_columns": {
}
},
{
"table_dependencies": [
{
"table": "`article_rank` FORCE INDEX (`idx_day_aid_pv`)",
"row_may_be_null": false,
"map_bit": 0,
"depends_on_map_bits": [
]
}
]
},
{
"ref_optimizer_key_uses": [
]
},
{
"rows_estimation": [
{
"table": "`article_rank` FORCE INDEX (`idx_day_aid_pv`)",
"range_analysis": {
"table_scan": {
"rows": 1,
"cost": 1.79769e+308
},
"potential_range_indexes": [
{
"index": "PRIMARY",
"usable": false,
"cause": "not_applicable"
},
{
"index": "idx_day",
"usable": false,
"cause": "not_applicable"
},
{
"index": "idx_day_aid_pv",
"usable": true,
"key_parts": [
"day",
"aid",
"pv",
"id"
]
},
{
"index": "idx_aid_day_pv",
"usable": false,
"cause": "not_applicable"
}
],
"best_covering_index_scan": {
"index": "idx_day_aid_pv",
"cost": 0.35,
"chosen": true
},
"setup_range_conditions": [
],
"group_index_skip_scan": {
"chosen": false,
"cause": "not_applicable_aggregate_function"
},
"skip_scan_range": {
"chosen": false,
"cause": "has_group_by"
},
"analyzing_range_alternatives": {
"range_scan_alternatives": [
{
"index": "idx_day_aid_pv",
"ranges": [
"20190115 < day"
],
"index_dives_for_eq_ranges": true,
"rowid_ordered": false,
"using_mrr": false,
"index_only": true,
"in_memory": 1,
"rows": 1,
"cost": 0.36,
"chosen": false,
"cause": "cost"
}
],
"analyzing_roworder_intersect": {
"usable": false,
"cause": "too_few_roworder_scans"
}
}
}
}
]
},
{
"considered_execution_plans": [
{
"plan_prefix": [
],
"table": "`article_rank` FORCE INDEX (`idx_day_aid_pv`)",
"best_access_path": {
"considered_access_paths": [
{
"rows_to_scan": 1,
"filtering_effect": [
],
"final_filtering_effect": 1,
"access_type": "scan",
"resulting_rows": 1,
"cost": 0.6,
"chosen": true
}
]
},
"condition_filtering_pct": 100,
"rows_for_plan": 1,
"cost_for_plan": 0.6,
"chosen": true
}
]
},
{
"attaching_conditions_to_tables": {
"original_condition": "(`article_rank`.`day` > 20190115)",
"attached_conditions_computation": [
],
"attached_conditions_summary": [
{
"table": "`article_rank` FORCE INDEX (`idx_day_aid_pv`)",
"attached": "(`article_rank`.`day` > 20190115)"
}
]
}
},
{
"optimizing_distinct_group_by_order_by": {
"simplifying_order_by": {
"original_clause": "`num` desc",
"items": [
{
"item": "sum(`article_rank`.`pv`)"
}
],
"resulting_clause_is_simple": false,
"resulting_clause": "`num` desc"
},
"simplifying_group_by": {
"original_clause": "`article_rank`.`aid`",
"items": [
{
"item": "`article_rank`.`aid`"
}
],
"resulting_clause_is_simple": true,
"resulting_clause": "`article_rank`.`aid`"
}
}
},
{
"reconsidering_access_paths_for_index_ordering": {
"clause": "GROUP BY",
"steps": [
],
"index_order_summary": {
"table": "`article_rank` FORCE INDEX (`idx_day_aid_pv`)",
"index_provides_order": false,
"order_direction": "undefined",
"index": "idx_day_aid_pv",
"plan_changed": false
}
}
},
{
"finalizing_table_conditions": [
{
"table": "`article_rank` FORCE INDEX (`idx_day_aid_pv`)",
"original_table_condition": "(`article_rank`.`day` > 20190115)",
"final_table_condition ": "(`article_rank`.`day` > 20190115)"
}
]
},
{
"refine_plan": [
{
"table": "`article_rank` FORCE INDEX (`idx_day_aid_pv`)"
}
]
},
{
"considering_tmp_tables": [
{
"adding_tmp_table_in_plan_at_position": 1,
"write_method": "continuously_update_group_row"
},
{
"adding_sort_to_table": ""
}
]
}
]
}
},
{
"join_execution": {
"select#": 1,
"steps": [
{
"temp_table_aggregate": {
"select#": 1,
"steps": [
{
"creating_tmp_table": {
"tmp_table_info": {
"table": "<temporary>",
"in_plan_at_position": 1,
"columns": 2,
"row_length": 20,
"key_length": 4,
"unique_constraint": false,
"makes_grouped_rows": true,
"cannot_insert_duplicates": false,
"location": "TempTable"
}
}
}
]
}
},
{
"sorting_table": "<temporary>",
"filesort_information": [
{
"direction": "desc",
"expression": "`num`"
}
],
"filesort_priority_queue_optimization": {
"limit": 10,
"chosen": true
},
"filesort_execution": [
],
"filesort_summary": {
"memory_available": 262144,
"key_size": 17,
"row_size": 37,
"max_rows_per_buffer": 11,
"num_rows_estimate": 18446744073709551615,
"num_rows_found": 0,
"num_initial_chunks_spilled_to_disk": 0,
"peak_memory_used": 495,
"sort_algorithm": "none",
"unpacked_addon_fields": "using_priority_queue",
"sort_mode": "<fixed_sort_key, additional_fields>"
}
}
]
}
}
]
}
--
hypergraph_optimizer
{
"join_optimizer": [
"Join list after simplification:",
"* article_rank true join_type=left",
"* m1 join_type=inner",
"",
"Made this relational tree; WHERE condition is (none):",
"* Left join [companion set 0x7730d8b4f4c8] (no join conditions)",
" * m1 [companion set 0x7730d8b4f4c8]",
" * article_rank [companion set 0x7730d8b4f4e8]",
"",
"Pushing conditions down.",
"",
"After pushdown; remaining WHERE conditions are (none), table filters are (none):",
"* Left join [companion set 0x7730d8b4f4c8] (no join conditions)",
" * m1 [companion set 0x7730d8b4f4c8]",
" * article_rank [companion set 0x7730d8b4f4e8]",
"",
"Companion set: 0x7730d8b4f4c8:{}",
"Companion set: 0x7730d8b4f4e8:{}",
"Selectivity of join [left] (none):",
"",
"Constructed hypergraph:",
"digraph G { # 1 edges",
" m1 -> article_rank [label=\"[left] (none) (1)\"]",
"}",
"",
"",
"",
"No interesting orders found. Not collecting functional dependencies.",
"",
"",
"Enumerating subplans:",
"",
"Found node article_rank [rows=1]",
" - {INDEX_SCAN, cost=2.18, init_cost=0, rows=1} [idx_day_aid_pv] is first alternative, keeping",
" - {INDEX_SCAN, cost=2.18, init_cost=0, rows=1} [idx_aid_day_pv] is not better than existing path {INDEX_SCAN, cost=2.18, init_cost=0, rows=1}, discarding",
" - {TABLE_SCAN, cost=0.47, init_cost=0, rows=1} is better than previous {INDEX_SCAN, cost=2.18, init_cost=0, rows=1}, replacing",
" - current access paths for {article_rank}: {TABLE_SCAN, cost=0.47, init_cost=0, rows=1})",
"",
"Found node m1 [rows=1]",
" - {TABLE_SCAN, cost=0.295, init_cost=0, rows=1} is first alternative, keeping",
" - current access paths for {m1}: {TABLE_SCAN, cost=0.295, init_cost=0, rows=1})",
"",
"Found sets {m1} and {article_rank}, connected by condition [left] (none)",
" - {HASH_JOIN, cost=2.61, init_cost=1.97, rescan_cost=0.641, rows=1, join_order=(m1,article_rank)} is first alternative, keeping",
" - current access paths for {m1,article_rank}: {HASH_JOIN, cost=2.61, init_cost=1.97, rescan_cost=0.641, rows=1, join_order=(m1,article_rank)})",
"",
"Enumerated 3 subplans keeping a total of 3 access paths, got 1 candidate(s) to finalize:",
"Adding final predicates",
" - {HASH_JOIN, cost=2.61, init_cost=1.97, rescan_cost=0.641, rows=1, join_order=(m1,article_rank)} is first alternative, keeping",
"Final cost is 2.6."
]
}
详细实现
核心原理为
-
实现一个结构体,trace 当前语句,因为会 trace sp 的情况,所以一条语句可能有多个 stmt
-
实现格式的控制流程,区分上下级关系,区分 array,block 等,然后往当前 stmt 中添加适当的内容
-
此时这些信息都保存在 thd 的 trace_ctx 中,需要查询的时候
- 查询 information_schema.optimizer_trace
- 表是 临时表,会从 使用函数 fill_optimizer_trace_info 获取 trace_ctx 的信息,填充到 files 中
-
hypergraph_optimizer 单独使用另一套采集方案,可以查看 TraceStarted ,但是底层实际是同一种东西,只是在切换到 hypergraph_optimizer 之前和之后,特殊处理一下
-
optimizer_trace 是 information_schema 中的表,实际上不存在,实际作用只是为了实现 trace_info 的输出,详细查看st_schema_table
相关模块及数据结构
- trace_xxx trace 的信息采集模块,采集 opt 中的详细信息
- optimizer_trace 输出模块,information_schema 的一张零食表
- explain format=json 需要支持 json 格式的输出,由于 Opt_trace_context 可以遍历 explain tree 并且输出 json 信息,所以刚好比较合适处理这个信息, 实际上这里只需要输出 json 格式的 str 即可,explain 需要重构
优化方向
- 当前是以表的方式去使用,就代表优化中,会按照表的方式去处理,但是实际上更应该类似 function
- 建议实现类似 pg 的 funcscan 或者 duckdb 中的函数处理,轻量化处理
对比其他
pg
pg 有类似的功能,但是没有这么详细,而且也只是针对于每个阶段进行汇总,不是针对于每一步,另外 pg 的这个功能是为了 debug 使用的,功能比较简单,必须得设置宏才能使用,也没有控制的开关,只是简单的 print
就类似于 pg 社区反对使用 hint 的理由一样,他们相信大多是时候优化器可以根据你给定的参数和配置选择当前较好的执行计划,而无需手工介入,所以他们没有提供类似的 trace 根据, dba 无需知道内部的优化细节。
duckdb
完全没有类似的功能,最总只有一个 profing ,类似 pg 的analyze
总结
相比起来,MySQL 可能由于优化器不能提供较好的执行计划,所以需要给 dba 一个解释。。