tpcds 04 OOM

同这个帖子一样,遇到tpcds 04 oom,内存180g三个be + 1个fe,跑04 oom。

Fragment 3 |
| │ BackendNum: 3 |
| │ InstancePeakMemoryUsage: 7.254 GB, InstanceAllocatedMemoryUsage: 123.383 GB |
| │ PrepareTime: 5.808ms |
| └──DATA_STREAM_SINK (id=16) |
| │ PartitionType: HASH_PARTITIONED |
| │ PartitionExprs: [93: i_category, 91: i_class, 89: i_brand, 102: i_product_name, …] |
| └──AGGREGATION (id=15) [serialize, update] |
| │ Estimates: [row: 440496327, cpu: 51126635986.38, memory: 45840680052.73, network: 0.00, cost: 130662627453.08] |
| │ TotalTime: 3s772ms (33.95%) [CPUTime: 3s772ms] |
| │ OutputRows: 49.224M (49223725) |
| │ PeakMemory: 6.184 MB, AllocatedMemory: 46.176 GB |
| │ AggExprs: [sum(103: coalesce)] |
| │ GroupingExprs: [93: i_category, 91: i_class, 89: i_brand, 102: i_product_name, …] |
| │ Detail Timers: |
| │ AggComputeTime: 1s257ms [min=438.902ms, max=2s889ms] |
| └──REPEAT (id=14) |
| │ Estimates: [row: 440496327, cpu: 0.00, memory: 0.00, network: 0.00, cost: 13417949354.43] |
| │ TotalTime: 718.084ms (6.46%) [CPUTime: 718.084ms] |
| │ OutputRows: 485.911M (485910657) |
| └──PROJECT (id=13)

这内存也太大了吧,直接oom

一个服务器 3个BE+1个FE吗?

set enable_rewrite_groupingsets_to_union_all=on 再试试

对,是的

好的,

这个是已知的一个BadCase,还没有自动做优化,后面会以一种比较友好的方式实现GroupSets.

guc无效,报错:
ERROR 1064 (HY000): Memory of process exceed limit. try consume:0 Used: 28219187144, Limit: 163827471974. Mem usage has exceed the limit of BE

set pipeline_dop=1能跑出来吗

这个延时还跑不出来,当前版本只能开spill了

spill是开的

StarRocks > show variables;
±---------------------------------------------±--------------------------+
| Variable_name | Value |
±---------------------------------------------±--------------------------+
| SQL_AUTO_IS_NULL | false |
| activate_all_roles_on_login | false |
| analyze_mv | sample |
| audit_execute_stmt | false |
| auto_increment_increment | 1 |
| autocommit | true |
| big_query_log_cpu_second_threshold | 480 |
| big_query_log_scan_bytes_threshold | 10737418240 |
| big_query_log_scan_rows_threshold | 1000000000 |
| big_query_profile_threshold | 0s |
| broadcast_row_limit | 15000000 |
| cbo_cte_reuse | true |
| cbo_derive_join_is_null_predicate | true |
| cbo_derive_range_join_predicate | false |
| cbo_enable_low_cardinality_optimize | true |
| cbo_max_reorder_node_use_dp | 10 |
| cbo_max_reorder_node_use_exhaustive | 4 |
| cbo_push_down_distinct_below_window | true |
| cbo_push_down_topn_limit | 1000 |
| cbo_reorder_threshold_use_exhaustive | 6 |
| character_set_client | utf8 |
| character_set_connection | utf8 |
| character_set_database | utf8 |
| character_set_results | utf8 |
| character_set_server | utf8 |
| collation_connection | utf8_general_ci |
| collation_database | utf8_general_ci |
| collation_server | utf8_general_ci |
| connector_io_tasks_per_scan_operator | 16 |
| connector_scan_use_query_mem_ratio | 0.3 |
| connector_sink_compression_codec | uncompressed |
| count_distinct_column_buckets | 1024 |
| default_rowset_type | alpha |
| default_table_compression | lz4_frame |
| disable_colocate_join | false |
| disable_join_reorder | false |
| div_precision_increment | 4 |
| eable_force_rule_based_mv_rewrite | false |
| enable_adaptive_sink_dop | true |
| enable_big_query_log | true |
| enable_cbo_table_prune | false |
| enable_collect_table_level_scan_stats | true |
| enable_connector_adaptive_io_tasks | true |
| enable_distinct_column_bucketization | false |
| enable_file_metacache | false |
| enable_filter_unused_columns_in_scan_stage | true |
| enable_global_runtime_filter | true |
| enable_group_level_query_queue | false |
| enable_groupby_use_output_alias | false |
| enable_hive_column_stats | true |
| enable_hive_metadata_cache_with_insert | false |
| enable_iceberg_column_statistics | false |
| enable_iceberg_identity_column_optimize | true |
| enable_incremental_mv | false |
| enable_insert_strict | true |
| enable_load_profile | false |
| enable_local_shuffle_agg | true |
| enable_materialized_view_for_insert | false |
| enable_materialized_view_rewrite | true |
| enable_materialized_view_rewrite_greedy_mode | false |
| enable_materialized_view_union_rewrite | true |
| enable_materialized_view_view_delta_rewrite | true |
| enable_multicolumn_global_runtime_filter | false |
| enable_mv_planner | false |
| enable_outer_join_reorder | true |
| enable_parallel_merge | true |
| enable_per_bucket_optimize | true |
| enable_pipeline_engine | true |
| enable_plan_serialize_concurrently | true |
| enable_populate_datacache | true |
| enable_predicate_reorder | false |
| enable_prepare_stmt | true |
| enable_profile | false |
| enable_prune_complex_types | true |
| enable_prune_complex_types_in_unnest | true |
| enable_prune_iceberg_manifest | true |
| enable_query_cache | false |
| enable_query_dump | false |
| enable_query_queue_load | false |
| enable_query_queue_select | false |
| enable_query_queue_statistic | false |
| enable_query_tablet_affinity | false |
| enable_rbo_table_prune | false |
| enable_read_iceberg_puffin_ndv | true |
| enable_rewrite_bitmap_union_to_bitamp_agg | true |
| enable_rewrite_groupingsets_to_union_all | true |
| enable_rewrite_simple_agg_to_meta_scan | false |
| enable_rewrite_sum_by_associative_rule | true |
| enable_rule_based_materialized_view_rewrite | true |
| enable_runtime_adaptive_dop | false |
| enable_scan_datacache | false |
| enable_shared_scan | false |
| enable_short_circuit | false |
| enable_sort_aggregate | false |
| enable_spill | true |
| enable_split_scan_predicate_with_date | false |
| enable_strict_order_by | true |
| enable_sync_materialized_view_rewrite | true |
| enable_tablet_internal_parallel | true |
| enable_topn_runtime_filter | true |
| enable_view_based_mv_rewrite | false |
| enable_write_hive_external_table | false |
| event_scheduler | OFF |
| force_schedule_local | false |
| forward_to_leader | false |
| full_sort_late_materialization | true |
| group_concat_max_len | 1024 |
| hash_join_push_down_right_table | true |
| hive_partition_stats_sample_size | 3000 |
| hive_temp_staging_dir | /tmp/starrocks |
| hudi_mor_force_jni_reader | false |
| init_connect | |
| innodb_read_only | true |
| interactive_timeout | 3600 |
| interleaving_group_size | 10 |
| io_tasks_per_scan_operator | 4 |
| join_implementation_mode_v2 | auto |
| language | /starrocks/share/english/ |
| large_decimal_underlying_type | panic |
| license | Apache License 2.0 |
| load_mem_limit | 0 |
| load_transmission_compression_type | NO_COMPRESSION |
| log_rejected_record_num | 0 |
| lower_case_table_names | 0 |
| materialized_view_rewrite_mode | DEFAULT |
| max_allowed_packet | 33554432 |
| max_parallel_scan_instance_num | -1 |
| max_pipeline_dop | 64 |
| max_pushdown_conditions_per_column | -1 |
| max_scan_key_num | -1 |
| nested_mv_rewrite_max_level | 3 |
| net_buffer_length | 16384 |
| net_read_timeout | 60 |
| net_write_timeout | 60 |
| new_planner_agg_stage | 0 |
| new_planner_optimize_timeout | 3000 |
| optimizer_materialized_view_timelimit | 1000 |
| paimon_force_jni_reader | false |
| parallel_exchange_instance_num | -1 |
| parallel_fragment_exec_instance_num | 8 |
| parse_tokens_limit | 3500000 |
| partial_update_mode | auto |
| performance_schema | false |
| pipeline_dop | 0 |
| pipeline_profile_level | 1 |
| pipeline_sink_dop | 0 |
| prefer_compute_node | false |
| query_cache_agg_cardinality_limit | 5000000 |
| query_cache_entry_max_bytes | 4194304 |
| query_cache_entry_max_rows | 409600 |
| query_cache_force_populate | false |
| query_cache_hot_partition_num | 3 |
| query_cache_size | 1048576 |
| query_cache_type | 0 |
| query_delivery_timeout | 300 |
| query_mem_limit | 163827471974 |
| query_queue_concurrency_limit | 0 |
| query_queue_cpu_used_permille_limit | 0 |
| query_queue_driver_high_water | -1 |
| query_queue_driver_low_water | -1 |
| query_queue_fresh_resource_usage_interval_ms | 5000 |
| query_queue_max_queued_queries | 1024 |
| query_queue_mem_used_pct_limit | 0.0 |
| query_queue_pending_timeout_second | 300 |
| query_timeout | 300 |
| range_pruner_max_predicate | 100 |
| resource_group | |
| runtime_filter_on_exchange_node | false |
| runtime_join_filter_push_down_limit | 1024000 |
| runtime_profile_report_interval | 10 |
| scan_use_query_mem_ratio | 0.3 |
| spill_encode_level | 7 |
| spill_mode | force |
| spill_revocable_max_bytes | 0 |
| sql_dialect | StarRocks |
| sql_mode | ONLY_FULL_GROUP_BY |
| sql_quote_show_create | true |
| sql_safe_updates | 0 |
| sql_select_limit | 9223372036854775807 |
| statistic_collect_parallel | 1 |
| storage_engine | olap |
| streaming_preaggregation_mode | auto |
| system_time_zone | UTC |
| thrift_plan_protocol | binary |
| time_zone | UTC |
| transaction_isolation | REPEATABLE-READ |
| transaction_read_only | OFF |
| transmission_compression_type | NO_COMPRESSION |
| transmission_encode_level | 7 |
| tx_isolation | REPEATABLE-READ |
| tx_visible_wait_timeout | 10 |
| use_compute_nodes | 0 |
| use_page_cache | true |
| version | 5.1.0 |
| version_comment | 3.2.3-a40e2f8 |
| wait_timeout | 28800 |
±---------------------------------------------±--------------------------+

pipeline_dop=1试过吗,还有spill改成强制模式试试