diff --git a/tidb/CHANGELOG.md b/tidb/CHANGELOG.md index 006ea5e4f3..da7104aea8 100644 --- a/tidb/CHANGELOG.md +++ b/tidb/CHANGELOG.md @@ -1,5 +1,19 @@ # CHANGELOG - TiDB +## 2.2.0 / 2026-04-19 + +***Added***: + +* Add `tiflash_syncing_data_freshness` histogram metric to track TiFlash replication lag from TiKV ([#XXXX](https://github.com/DataDog/integrations-extras/pull/XXXX)) +* Add PD client metrics: `pd_client_cmd_handle_cmds_duration_seconds` and `pd_client_request_handle_requests_duration_seconds` +* Add TiDB session phase duration metrics: `tidb_session_parse_duration_seconds`, `tidb_session_compile_duration_seconds`, `tidb_session_execute_duration_seconds`, `tidb_session_transaction_duration_seconds` +* Add TiDB connection metrics: `tidb_server_get_token_duration_seconds`, `tidb_server_conn_idle_duration_seconds` +* Add TiDB server metrics: `tidb_server_query_total`, `tidb_server_disconnection_total`, `tidb_server_plan_cache_total`, `tidb_server_plan_cache_miss_total` +* Add TiDB TiKV client metric: `tidb_tikvclient_request_seconds` +* Add TiKV raftstore metrics: `tikv_raftstore_append_log_duration_seconds`, `tikv_raftstore_apply_log_duration_seconds`, `tikv_raftstore_commit_log_duration_seconds`, `tikv_raftstore_store_duration_secs`, `tikv_raftstore_apply_duration_secs` +* Add TiKV storage and gRPC metrics: `tikv_storage_engine_async_request_duration_seconds`, `tikv_grpc_msg_duration_seconds`, `tikv_engine_flow_bytes`, `tikv_thread_cpu_seconds_total` +* Add unit tests and fixture data for all new metrics + ## 2.1.1 / 2025-10-17 ***Added*** diff --git a/tidb/assets/dashboards/overview.json b/tidb/assets/dashboards/overview.json index c28f3b424a..0ebb3b8030 100644 --- a/tidb/assets/dashboards/overview.json +++ b/tidb/assets/dashboards/overview.json @@ -854,6 +854,508 @@ "x": 0, "y": 17 } + }, + { + "definition": { + "background_color": "vivid_green", + "layout_type": "ordered", + "show_title": true, + "title": "TiDB query internals", + "type": "group", + "widgets": [ + { + "definition": { + "legend_columns": ["avg", "min", "max"], + "legend_layout": "vertical", + "markers": [], + "requests": [ + { + "display_type": "area", + "formulas": [{"alias": "query / s", "formula": "query1"}], + "queries": [{"data_source": "metrics", "name": "query1", "query": "sum:tidb_cluster.tidb_server_query_total{$cluster_name,$host} by {type}.as_rate()"}], + "response_format": "timeseries", + "style": {"line_type": "solid", "line_width": "normal", "palette": "dog_classic"} + } + ], + "show_legend": true, + "title": "Total queries per second", + "title_align": "left", + "title_size": "16", + "type": "timeseries", + "yaxis": {"include_zero": true, "label": "", "max": "auto", "min": "auto", "scale": "linear"} + }, + "id": 1001000000000001, + "layout": {"height": 3, "width": 6, "x": 0, "y": 0} + }, + { + "definition": { + "legend_columns": ["avg", "min", "max"], + "legend_layout": "vertical", + "markers": [], + "requests": [ + { + "display_type": "area", + "formulas": [ + {"alias": "hit / s", "formula": "query1"}, + {"alias": "miss / s", "formula": "query2"} + ], + "queries": [ + {"data_source": "metrics", "name": "query1", "query": "sum:tidb_cluster.tidb_server_plan_cache_total{$cluster_name,$host}.as_rate()"}, + {"data_source": "metrics", "name": "query2", "query": "sum:tidb_cluster.tidb_server_plan_cache_miss_total{$cluster_name,$host}.as_rate()"} + ], + "response_format": "timeseries", + "style": {"line_type": "solid", "line_width": "normal", "palette": "dog_classic"} + } + ], + "show_legend": true, + "title": "Plan cache hits/misses per second", + "title_align": "left", + "title_size": "16", + "type": "timeseries", + "yaxis": {"include_zero": true, "label": "", "max": "auto", "min": "auto", "scale": "linear"} + }, + "id": 1001000000000002, + "layout": {"height": 3, "width": 6, "x": 6, "y": 0} + }, + { + "definition": { + "legend_columns": ["avg", "min", "max"], + "legend_layout": "auto", + "markers": [], + "requests": [ + { + "display_type": "line", + "formulas": [ + {"alias": "avg", "formula": "query1"}, + {"alias": "p99", "formula": "query2"} + ], + "queries": [ + {"data_source": "metrics", "name": "query1", "query": "avg:tidb_cluster.tidb_session_parse_duration_seconds{$cluster_name,$host}"}, + {"data_source": "metrics", "name": "query2", "query": "p99:tidb_cluster.tidb_session_parse_duration_seconds{$cluster_name,$host}"} + ], + "response_format": "timeseries", + "style": {"line_type": "solid", "line_width": "normal", "palette": "dog_classic"} + } + ], + "show_legend": true, + "title": "Parse duration", + "title_align": "left", + "title_size": "16", + "type": "timeseries", + "yaxis": {"include_zero": true, "label": "", "max": "auto", "min": "auto", "scale": "linear"} + }, + "id": 1001000000000003, + "layout": {"height": 3, "width": 4, "x": 0, "y": 3} + }, + { + "definition": { + "legend_columns": ["avg", "min", "max"], + "legend_layout": "auto", + "markers": [], + "requests": [ + { + "display_type": "line", + "formulas": [ + {"alias": "avg", "formula": "query1"}, + {"alias": "p99", "formula": "query2"} + ], + "queries": [ + {"data_source": "metrics", "name": "query1", "query": "avg:tidb_cluster.tidb_session_compile_duration_seconds{$cluster_name,$host}"}, + {"data_source": "metrics", "name": "query2", "query": "p99:tidb_cluster.tidb_session_compile_duration_seconds{$cluster_name,$host}"} + ], + "response_format": "timeseries", + "style": {"line_type": "solid", "line_width": "normal", "palette": "dog_classic"} + } + ], + "show_legend": true, + "title": "Compile duration", + "title_align": "left", + "title_size": "16", + "type": "timeseries", + "yaxis": {"include_zero": true, "label": "", "max": "auto", "min": "auto", "scale": "linear"} + }, + "id": 1001000000000004, + "layout": {"height": 3, "width": 4, "x": 4, "y": 3} + }, + { + "definition": { + "legend_columns": ["avg", "min", "max"], + "legend_layout": "auto", + "markers": [], + "requests": [ + { + "display_type": "line", + "formulas": [ + {"alias": "avg", "formula": "query1"}, + {"alias": "p99", "formula": "query2"} + ], + "queries": [ + {"data_source": "metrics", "name": "query1", "query": "avg:tidb_cluster.tidb_session_execute_duration_seconds{$cluster_name,$host}"}, + {"data_source": "metrics", "name": "query2", "query": "p99:tidb_cluster.tidb_session_execute_duration_seconds{$cluster_name,$host}"} + ], + "response_format": "timeseries", + "style": {"line_type": "solid", "line_width": "normal", "palette": "dog_classic"} + } + ], + "show_legend": true, + "title": "Execute duration", + "title_align": "left", + "title_size": "16", + "type": "timeseries", + "yaxis": {"include_zero": true, "label": "", "max": "auto", "min": "auto", "scale": "linear"} + }, + "id": 1001000000000005, + "layout": {"height": 3, "width": 4, "x": 8, "y": 3} + }, + { + "definition": { + "legend_columns": ["avg", "min", "max"], + "legend_layout": "auto", + "markers": [], + "requests": [ + { + "display_type": "line", + "formulas": [ + {"alias": "avg", "formula": "query1"}, + {"alias": "p99", "formula": "query2"} + ], + "queries": [ + {"data_source": "metrics", "name": "query1", "query": "avg:tidb_cluster.tidb_tikvclient_request_seconds{$cluster_name,$host} by {type}"}, + {"data_source": "metrics", "name": "query2", "query": "p99:tidb_cluster.tidb_tikvclient_request_seconds{$cluster_name,$host} by {type}"} + ], + "response_format": "timeseries", + "style": {"line_type": "solid", "line_width": "normal", "palette": "dog_classic"} + } + ], + "show_legend": true, + "title": "TiKV client request duration (from TiDB)", + "title_align": "left", + "title_size": "16", + "type": "timeseries", + "yaxis": {"include_zero": true, "label": "", "max": "auto", "min": "auto", "scale": "linear"} + }, + "id": 1001000000000006, + "layout": {"height": 3, "width": 6, "x": 0, "y": 6} + }, + { + "definition": { + "legend_columns": ["avg", "min", "max"], + "legend_layout": "auto", + "markers": [], + "requests": [ + { + "display_type": "line", + "formulas": [ + {"alias": "avg", "formula": "query1"}, + {"alias": "p99", "formula": "query2"} + ], + "queries": [ + {"data_source": "metrics", "name": "query1", "query": "avg:tidb_cluster.tidb_server_conn_idle_duration_seconds{$cluster_name,$host}"}, + {"data_source": "metrics", "name": "query2", "query": "p99:tidb_cluster.tidb_server_conn_idle_duration_seconds{$cluster_name,$host}"} + ], + "response_format": "timeseries", + "style": {"line_type": "solid", "line_width": "normal", "palette": "dog_classic"} + } + ], + "show_legend": true, + "title": "Connection idle duration", + "title_align": "left", + "title_size": "16", + "type": "timeseries", + "yaxis": {"include_zero": true, "label": "", "max": "auto", "min": "auto", "scale": "linear"} + }, + "id": 1001000000000007, + "layout": {"height": 3, "width": 6, "x": 6, "y": 6} + } + ] + }, + "id": 1001000000000000, + "layout": {"height": 10, "width": 12, "x": 0, "y": 21} + }, + { + "definition": { + "background_color": "vivid_orange", + "layout_type": "ordered", + "show_title": true, + "title": "TiFlash replication", + "type": "group", + "widgets": [ + { + "definition": { + "legend_columns": ["avg", "min", "max"], + "legend_layout": "auto", + "markers": [], + "requests": [ + { + "display_type": "line", + "formulas": [ + {"alias": "avg", "formula": "query1"}, + {"alias": "p50", "formula": "query2"}, + {"alias": "p95", "formula": "query3"}, + {"alias": "p99", "formula": "query4"} + ], + "queries": [ + {"data_source": "metrics", "name": "query1", "query": "avg:tidb_cluster.tiflash_syncing_data_freshness{$cluster_name,$host} by {host}"}, + {"data_source": "metrics", "name": "query2", "query": "p50:tidb_cluster.tiflash_syncing_data_freshness{$cluster_name,$host} by {host}"}, + {"data_source": "metrics", "name": "query3", "query": "p95:tidb_cluster.tiflash_syncing_data_freshness{$cluster_name,$host} by {host}"}, + {"data_source": "metrics", "name": "query4", "query": "p99:tidb_cluster.tiflash_syncing_data_freshness{$cluster_name,$host} by {host}"} + ], + "response_format": "timeseries", + "style": {"line_type": "solid", "line_width": "normal", "palette": "dog_classic"} + } + ], + "show_legend": true, + "title": "TiFlash replication lag (seconds)", + "title_align": "left", + "title_size": "16", + "type": "timeseries", + "yaxis": {"include_zero": true, "label": "", "max": "auto", "min": "auto", "scale": "linear"} + }, + "id": 1002000000000001, + "layout": {"height": 3, "width": 12, "x": 0, "y": 0} + } + ] + }, + "id": 1002000000000000, + "layout": {"height": 4, "width": 12, "x": 0, "y": 31} + }, + { + "definition": { + "background_color": "vivid_purple", + "layout_type": "ordered", + "show_title": true, + "title": "TiKV raftstore & gRPC", + "type": "group", + "widgets": [ + { + "definition": { + "legend_columns": ["avg", "min", "max"], + "legend_layout": "auto", + "markers": [], + "requests": [ + { + "display_type": "line", + "formulas": [ + {"alias": "append p99", "formula": "query1"}, + {"alias": "apply p99", "formula": "query2"}, + {"alias": "commit p99", "formula": "query3"} + ], + "queries": [ + {"data_source": "metrics", "name": "query1", "query": "p99:tidb_cluster.tikv_raftstore_append_log_duration_seconds{$cluster_name,$host} by {host}"}, + {"data_source": "metrics", "name": "query2", "query": "p99:tidb_cluster.tikv_raftstore_apply_log_duration_seconds{$cluster_name,$host} by {host}"}, + {"data_source": "metrics", "name": "query3", "query": "p99:tidb_cluster.tikv_raftstore_commit_log_duration_seconds{$cluster_name,$host} by {host}"} + ], + "response_format": "timeseries", + "style": {"line_type": "solid", "line_width": "normal", "palette": "dog_classic"} + } + ], + "show_legend": true, + "title": "Raftstore log duration p99 (append / apply / commit)", + "title_align": "left", + "title_size": "16", + "type": "timeseries", + "yaxis": {"include_zero": true, "label": "", "max": "auto", "min": "auto", "scale": "linear"} + }, + "id": 1003000000000001, + "layout": {"height": 3, "width": 4, "x": 0, "y": 0} + }, + { + "definition": { + "legend_columns": ["avg", "min", "max"], + "legend_layout": "auto", + "markers": [], + "requests": [ + { + "display_type": "line", + "formulas": [ + {"alias": "avg", "formula": "query1"}, + {"alias": "p99", "formula": "query2"} + ], + "queries": [ + {"data_source": "metrics", "name": "query1", "query": "avg:tidb_cluster.tikv_grpc_msg_duration_seconds{$cluster_name,$host} by {type}"}, + {"data_source": "metrics", "name": "query2", "query": "p99:tidb_cluster.tikv_grpc_msg_duration_seconds{$cluster_name,$host} by {type}"} + ], + "response_format": "timeseries", + "style": {"line_type": "solid", "line_width": "normal", "palette": "dog_classic"} + } + ], + "show_legend": true, + "title": "TiKV gRPC message duration", + "title_align": "left", + "title_size": "16", + "type": "timeseries", + "yaxis": {"include_zero": true, "label": "", "max": "auto", "min": "auto", "scale": "linear"} + }, + "id": 1003000000000002, + "layout": {"height": 3, "width": 4, "x": 4, "y": 0} + }, + { + "definition": { + "legend_columns": ["avg", "min", "max"], + "legend_layout": "vertical", + "markers": [], + "requests": [ + { + "display_type": "area", + "formulas": [ + {"alias": "read / s", "formula": "query1"}, + {"alias": "write / s", "formula": "query2"} + ], + "queries": [ + {"data_source": "metrics", "name": "query1", "query": "sum:tidb_cluster.tikv_engine_flow_bytes{type:bytes_read,$cluster_name,$host} by {host}.as_rate()"}, + {"data_source": "metrics", "name": "query2", "query": "sum:tidb_cluster.tikv_engine_flow_bytes{type:bytes_written,$cluster_name,$host} by {host}.as_rate()"} + ], + "response_format": "timeseries", + "style": {"line_type": "solid", "line_width": "normal", "palette": "dog_classic"} + } + ], + "show_legend": true, + "title": "TiKV engine flow bytes per second", + "title_align": "left", + "title_size": "16", + "type": "timeseries", + "yaxis": {"include_zero": true, "label": "", "max": "auto", "min": "auto", "scale": "linear"} + }, + "id": 1003000000000003, + "layout": {"height": 3, "width": 4, "x": 8, "y": 0} + }, + { + "definition": { + "legend_columns": ["avg", "min", "max"], + "legend_layout": "auto", + "markers": [], + "requests": [ + { + "display_type": "line", + "formulas": [ + {"alias": "store p99", "formula": "query1"}, + {"alias": "apply p99", "formula": "query2"} + ], + "queries": [ + {"data_source": "metrics", "name": "query1", "query": "p99:tidb_cluster.tikv_raftstore_store_duration_secs{$cluster_name,$host} by {host}"}, + {"data_source": "metrics", "name": "query2", "query": "p99:tidb_cluster.tikv_raftstore_apply_duration_secs{$cluster_name,$host} by {host}"} + ], + "response_format": "timeseries", + "style": {"line_type": "solid", "line_width": "normal", "palette": "dog_classic"} + } + ], + "show_legend": true, + "title": "Raftstore store/apply duration p99", + "title_align": "left", + "title_size": "16", + "type": "timeseries", + "yaxis": {"include_zero": true, "label": "", "max": "auto", "min": "auto", "scale": "linear"} + }, + "id": 1003000000000004, + "layout": {"height": 3, "width": 6, "x": 0, "y": 3} + }, + { + "definition": { + "legend_columns": ["avg", "min", "max"], + "legend_layout": "auto", + "markers": [], + "requests": [ + { + "display_type": "line", + "formulas": [ + {"alias": "avg", "formula": "query1"}, + {"alias": "p99", "formula": "query2"} + ], + "queries": [ + {"data_source": "metrics", "name": "query1", "query": "avg:tidb_cluster.tikv_storage_engine_async_request_duration_seconds{$cluster_name,$host} by {type}"}, + {"data_source": "metrics", "name": "query2", "query": "p99:tidb_cluster.tikv_storage_engine_async_request_duration_seconds{$cluster_name,$host} by {type}"} + ], + "response_format": "timeseries", + "style": {"line_type": "solid", "line_width": "normal", "palette": "dog_classic"} + } + ], + "show_legend": true, + "title": "TiKV async storage request duration", + "title_align": "left", + "title_size": "16", + "type": "timeseries", + "yaxis": {"include_zero": true, "label": "", "max": "auto", "min": "auto", "scale": "linear"} + }, + "id": 1003000000000005, + "layout": {"height": 3, "width": 6, "x": 6, "y": 3} + } + ] + }, + "id": 1003000000000000, + "layout": {"height": 7, "width": 12, "x": 0, "y": 35} + }, + { + "definition": { + "background_color": "vivid_yellow", + "layout_type": "ordered", + "show_title": true, + "title": "PD client", + "type": "group", + "widgets": [ + { + "definition": { + "legend_columns": ["avg", "min", "max"], + "legend_layout": "auto", + "markers": [], + "requests": [ + { + "display_type": "line", + "formulas": [ + {"alias": "avg", "formula": "query1"}, + {"alias": "p99", "formula": "query2"} + ], + "queries": [ + {"data_source": "metrics", "name": "query1", "query": "avg:tidb_cluster.pd_client_cmd_handle_cmds_duration_seconds{$cluster_name,$host} by {type}"}, + {"data_source": "metrics", "name": "query2", "query": "p99:tidb_cluster.pd_client_cmd_handle_cmds_duration_seconds{$cluster_name,$host} by {type}"} + ], + "response_format": "timeseries", + "style": {"line_type": "solid", "line_width": "normal", "palette": "dog_classic"} + } + ], + "show_legend": true, + "title": "PD command duration", + "title_align": "left", + "title_size": "16", + "type": "timeseries", + "yaxis": {"include_zero": true, "label": "", "max": "auto", "min": "auto", "scale": "linear"} + }, + "id": 1004000000000001, + "layout": {"height": 3, "width": 6, "x": 0, "y": 0} + }, + { + "definition": { + "legend_columns": ["avg", "min", "max"], + "legend_layout": "auto", + "markers": [], + "requests": [ + { + "display_type": "line", + "formulas": [ + {"alias": "avg", "formula": "query1"}, + {"alias": "p99", "formula": "query2"} + ], + "queries": [ + {"data_source": "metrics", "name": "query1", "query": "avg:tidb_cluster.pd_client_request_handle_requests_duration_seconds{$cluster_name,$host} by {type}"}, + {"data_source": "metrics", "name": "query2", "query": "p99:tidb_cluster.pd_client_request_handle_requests_duration_seconds{$cluster_name,$host} by {type}"} + ], + "response_format": "timeseries", + "style": {"line_type": "solid", "line_width": "normal", "palette": "dog_classic"} + } + ], + "show_legend": true, + "title": "PD request duration", + "title_align": "left", + "title_size": "16", + "type": "timeseries", + "yaxis": {"include_zero": true, "label": "", "max": "auto", "min": "auto", "scale": "linear"} + }, + "id": 1004000000000002, + "layout": {"height": 3, "width": 6, "x": 6, "y": 0} + } + ] + }, + "id": 1004000000000000, + "layout": {"height": 4, "width": 12, "x": 0, "y": 42} } ] } diff --git a/tidb/datadog_checks/tidb/check.py b/tidb/datadog_checks/tidb/check.py index c2d15b26e1..c43e779b28 100644 --- a/tidb/datadog_checks/tidb/check.py +++ b/tidb/datadog_checks/tidb/check.py @@ -2,7 +2,7 @@ from datadog_checks.base import OpenMetricsBaseCheck -from .metrics import TIDB_METRICS, TIFLASH_METRICS, TIKV_METRICS +from .metrics import PD_METRICS, TIDB_METRICS, TIFLASH_METRICS, TIKV_METRICS from .utils import build_check @@ -29,14 +29,14 @@ def __init__(self, name, init_config, instances=None): "pd", { 'pd_metric_url': 'http://localhost:2379/metrics', - 'metrics': TIDB_METRICS + TIFLASH_METRICS + TIKV_METRICS, + 'metrics': TIDB_METRICS + TIFLASH_METRICS + TIKV_METRICS + PD_METRICS, }, ), 'tidb_cloud': build_check( "pd", { 'pd_metric_url': 'http://localhost:2379/metrics', - 'metrics': TIDB_METRICS + TIFLASH_METRICS + TIKV_METRICS, + 'metrics': TIDB_METRICS + TIFLASH_METRICS + TIKV_METRICS + PD_METRICS, }, ), } diff --git a/tidb/datadog_checks/tidb/metrics.py b/tidb/datadog_checks/tidb/metrics.py index 8c2c208bf3..e123fedb7c 100644 --- a/tidb/datadog_checks/tidb/metrics.py +++ b/tidb/datadog_checks/tidb/metrics.py @@ -4,8 +4,22 @@ 'tidb_server_execute_error_total', 'tidb_server_handle_query_duration_seconds', 'tidb_server_connections', + 'tidb_server_query_total', + 'tidb_server_disconnection_total', + 'tidb_server_plan_cache_total', + 'tidb_server_plan_cache_miss_total', + # session phase duration metrics + 'tidb_session_parse_duration_seconds', + 'tidb_session_compile_duration_seconds', + 'tidb_session_execute_duration_seconds', + 'tidb_session_transaction_duration_seconds', + 'tidb_server_get_token_duration_seconds', + 'tidb_server_conn_idle_duration_seconds', + # tikv client metrics from TiDB side + 'tidb_tikvclient_request_seconds', # cpu metrics 'process_cpu_seconds_total', + 'process_start_time_seconds', # memory metrics 'process_resident_memory_bytes', # no disk metrics for TiDB @@ -14,6 +28,7 @@ TIKV_METRICS = [ # cpu metrics 'process_cpu_seconds_total', + 'tikv_thread_cpu_seconds_total', # memory metrics 'process_resident_memory_bytes', # disk metrics @@ -21,6 +36,16 @@ 'tikv_store_size_bytes', # disk traffic metrics 'tikv_io_bytes', + 'tikv_engine_flow_bytes', + # gRPC metrics + 'tikv_grpc_msg_duration_seconds', + # raftstore metrics + 'tikv_raftstore_append_log_duration_seconds', + 'tikv_raftstore_apply_log_duration_seconds', + 'tikv_raftstore_commit_log_duration_seconds', + 'tikv_raftstore_store_duration_secs', + 'tikv_raftstore_apply_duration_secs', + 'tikv_storage_engine_async_request_duration_seconds', ] TIFLASH_METRICS = [ # cpu metrics @@ -30,5 +55,11 @@ # disk metrics {'tiflash_system_current_metric_StoreSizeUsed': 'tiflash_store_size_used_bytes'}, {'tiflash_system_current_metric_StoreSizeCapacity': 'tiflash_store_size_capacity_bytes'}, - # no disk traffic metrics for TiFlash + # replication lag metrics + {'tiflash_syncing_data_freshness': 'tiflash_syncing_data_freshness'}, +] +PD_METRICS = [ + # client command duration metrics + 'pd_client_cmd_handle_cmds_duration_seconds', + 'pd_client_request_handle_requests_duration_seconds', ] diff --git a/tidb/metadata.csv b/tidb/metadata.csv index 956a98028c..ab68d1ebd7 100644 --- a/tidb/metadata.csv +++ b/tidb/metadata.csv @@ -4,10 +4,51 @@ tidb_cluster.tidb_server_execute_error_total,count,,error,,The total number of e tidb_cluster.tidb_server_connections,gauge,,connection,,Current number of connections in TiDB server,1,tidb,, tidb_cluster.tidb_server_handle_query_duration_seconds.count,count,,query,,The total number of handled queries in server,1,tidb,, tidb_cluster.tidb_server_handle_query_duration_seconds.sum,count,,second,,The sum of handled query duration in server,1,tidb,, +tidb_cluster.tidb_server_query_total,count,,query,,The total number of queries processed by TiDB,1,tidb,, +tidb_cluster.tidb_server_disconnection_total,count,,connection,,The total number of disconnections from TiDB,1,tidb,, +tidb_cluster.tidb_server_plan_cache_total,count,,hit,,The total number of plan cache hits in TiDB,1,tidb,, +tidb_cluster.tidb_server_plan_cache_miss_total,count,,miss,,The total number of plan cache misses in TiDB,1,tidb,, +tidb_cluster.tidb_session_parse_duration_seconds.count,count,,query,,The total number of parse operations in TiDB session,1,tidb,, +tidb_cluster.tidb_session_parse_duration_seconds.sum,count,,second,,The sum of parse duration in TiDB session,1,tidb,, +tidb_cluster.tidb_session_compile_duration_seconds.count,count,,query,,The total number of compile operations in TiDB session,1,tidb,, +tidb_cluster.tidb_session_compile_duration_seconds.sum,count,,second,,The sum of compile duration in TiDB session,1,tidb,, +tidb_cluster.tidb_session_execute_duration_seconds.count,count,,query,,The total number of execute operations in TiDB session,1,tidb,, +tidb_cluster.tidb_session_execute_duration_seconds.sum,count,,second,,The sum of execute duration in TiDB session,1,tidb,, +tidb_cluster.tidb_session_transaction_duration_seconds.count,count,,transaction,,The total number of transactions in TiDB session,1,tidb,, +tidb_cluster.tidb_session_transaction_duration_seconds.sum,count,,second,,The sum of transaction duration in TiDB session,1,tidb,, +tidb_cluster.tidb_server_get_token_duration_seconds.count,count,,query,,The total number of token acquisitions in TiDB,1,tidb,, +tidb_cluster.tidb_server_get_token_duration_seconds.sum,count,,second,,The sum of token acquisition duration in TiDB,1,tidb,, +tidb_cluster.tidb_server_conn_idle_duration_seconds.count,count,,connection,,The total number of idle connection samples in TiDB,1,tidb,, +tidb_cluster.tidb_server_conn_idle_duration_seconds.sum,count,,second,,The sum of idle connection duration in TiDB,1,tidb,, +tidb_cluster.tidb_tikvclient_request_seconds.count,count,,request,,The total number of TiKV client requests from TiDB,1,tidb,, +tidb_cluster.tidb_tikvclient_request_seconds.sum,count,,second,,The sum of TiKV client request duration from TiDB,1,tidb,, tidb_cluster.tikv_engine_size_bytes,gauge,,byte,,The disk usage bytes of TiKV instances,1,tidb,, tidb_cluster.tikv_store_size_bytes,gauge,,byte,,The disk capacity bytes of TiKV instances,1,tidb,, tidb_cluster.tikv_io_bytes,count,,byte,,The io read/write bytes of TiKV instances,1,tidb,, +tidb_cluster.tikv_engine_flow_bytes,count,,byte,,The flow bytes through TiKV engine,1,tidb,, +tidb_cluster.tikv_thread_cpu_seconds_total,count,,second,,The total CPU time spent by TiKV threads,1,tidb,, +tidb_cluster.tikv_grpc_msg_duration_seconds.count,count,,request,,The total number of gRPC messages processed by TiKV,1,tidb,, +tidb_cluster.tikv_grpc_msg_duration_seconds.sum,count,,second,,The sum of gRPC message processing duration in TiKV,1,tidb,, +tidb_cluster.tikv_raftstore_append_log_duration_seconds.count,count,,operation,,The total number of raft log append operations in TiKV,1,tidb,, +tidb_cluster.tikv_raftstore_append_log_duration_seconds.sum,count,,second,,The sum of raft log append duration in TiKV,1,tidb,, +tidb_cluster.tikv_raftstore_apply_log_duration_seconds.count,count,,operation,,The total number of raft log apply operations in TiKV,1,tidb,, +tidb_cluster.tikv_raftstore_apply_log_duration_seconds.sum,count,,second,,The sum of raft log apply duration in TiKV,1,tidb,, +tidb_cluster.tikv_raftstore_commit_log_duration_seconds.count,count,,operation,,The total number of raft log commit operations in TiKV,1,tidb,, +tidb_cluster.tikv_raftstore_commit_log_duration_seconds.sum,count,,second,,The sum of raft log commit duration in TiKV,1,tidb,, +tidb_cluster.tikv_raftstore_store_duration_secs.count,count,,operation,,The total number of raft store operations in TiKV,1,tidb,, +tidb_cluster.tikv_raftstore_store_duration_secs.sum,count,,second,,The sum of raft store operation duration in TiKV,1,tidb,, +tidb_cluster.tikv_raftstore_apply_duration_secs.count,count,,operation,,The total number of raft apply operations in TiKV,1,tidb,, +tidb_cluster.tikv_raftstore_apply_duration_secs.sum,count,,second,,The sum of raft apply operation duration in TiKV,1,tidb,, +tidb_cluster.tikv_storage_engine_async_request_duration_seconds.count,count,,request,,The total number of async storage engine requests in TiKV,1,tidb,, +tidb_cluster.tikv_storage_engine_async_request_duration_seconds.sum,count,,second,,The sum of async storage engine request duration in TiKV,1,tidb,, tidb_cluster.tiflash_store_size_used_bytes,gauge,,byte,,The disk usage bytes of TiFlash instances,1,tidb,, tidb_cluster.tiflash_store_size_capacity_bytes,gauge,,byte,,The disk capacity bytes of TiFlash instances,1,tidb,, +tidb_cluster.tiflash_syncing_data_freshness.sum,count,,second,,The total replication lag seconds from TiKV to TiFlash,1,tidb,, +tidb_cluster.tiflash_syncing_data_freshness.count,count,,query,,The total number of TiFlash replication lag observations,1,tidb,, +tidb_cluster.tiflash_syncing_data_freshness.bucket,count,,query,,The histogram buckets for TiFlash replication lag,1,tidb,, +tidb_cluster.pd_client_cmd_handle_cmds_duration_seconds.count,count,,command,,The total number of PD client commands handled,1,tidb,, +tidb_cluster.pd_client_cmd_handle_cmds_duration_seconds.sum,count,,second,,The sum of PD client command handling duration,1,tidb,, +tidb_cluster.pd_client_request_handle_requests_duration_seconds.count,count,,request,,The total number of PD client requests handled,1,tidb,, +tidb_cluster.pd_client_request_handle_requests_duration_seconds.sum,count,,second,,The sum of PD client request handling duration,1,tidb,, tidb_cluster.process_cpu_seconds_total,count,,second,,The cpu usage seconds of TiDB/TiKV/TiFlash instances,1,tidb,, tidb_cluster.process_resident_memory_bytes,gauge,,byte,,The resident memory bytes of TiDB/TiKV/TiFlash instances,1,tidb,, diff --git a/tidb/tests/conftest.py b/tidb/tests/conftest.py index e4168b0edb..9eb38d4247 100644 --- a/tidb/tests/conftest.py +++ b/tidb/tests/conftest.py @@ -73,6 +73,19 @@ def mock_tikv_metrics(): yield +@pytest.fixture() +def mock_pd_metrics(): + with mock.patch( + 'requests.Session.get', + return_value=mock.MagicMock( + status_code=200, + iter_lines=lambda **kwargs: _get_mock_metrics("mock_pd_metrics.txt").split("\n"), + headers={'Content-Type': "text/plain"}, + ), + ): + yield + + def _get_mock_metrics(filename): f_name = os.path.join(os.path.dirname(__file__), 'fixtures', filename) with open(f_name, 'r') as f: @@ -119,6 +132,15 @@ def tikv_instance(): } +@pytest.fixture(scope="session") +def pd_instance(): + return { + 'pd_metric_url': "http://{}:{}/metrics".format(HOST, PD_PORT), + 'max_returned_metrics': "10000", + 'tags': ['tidb_cluster_name:test'], + } + + # Integration test docker-compose environment diff --git a/tidb/tests/expected.py b/tidb/tests/expected.py index 750bc0b1c2..920adf43a6 100644 --- a/tidb/tests/expected.py +++ b/tidb/tests/expected.py @@ -19,6 +19,62 @@ 'tidb_cluster_component:tidb', 'tidb_cluster_name:test', ], + 'tidb_cluster.tidb_server_query_total': [ + 'type:OK', + 'tidb_cluster_component:tidb', + 'tidb_cluster_name:test', + ], + 'tidb_cluster.tidb_server_disconnection_total': [ + 'result:ok', + 'tidb_cluster_component:tidb', + 'tidb_cluster_name:test', + ], + 'tidb_cluster.tidb_server_plan_cache_total': [ + 'type:hit', + 'tidb_cluster_component:tidb', + 'tidb_cluster_name:test', + ], + 'tidb_cluster.tidb_server_plan_cache_miss_total': [ + 'type:miss', + 'tidb_cluster_component:tidb', + 'tidb_cluster_name:test', + ], + 'tidb_cluster.tidb_session_parse_duration_seconds.sum': [ + 'sql_type:general', + 'tidb_cluster_component:tidb', + 'tidb_cluster_name:test', + ], + 'tidb_cluster.tidb_session_compile_duration_seconds.sum': [ + 'sql_type:general', + 'tidb_cluster_component:tidb', + 'tidb_cluster_name:test', + ], + 'tidb_cluster.tidb_session_execute_duration_seconds.sum': [ + 'type:general', + 'tidb_cluster_component:tidb', + 'tidb_cluster_name:test', + ], + 'tidb_cluster.tidb_session_transaction_duration_seconds.sum': [ + 'sql_type:general', + 'type:commit', + 'tidb_cluster_component:tidb', + 'tidb_cluster_name:test', + ], + 'tidb_cluster.tidb_server_get_token_duration_seconds.sum': [ + 'tidb_cluster_component:tidb', + 'tidb_cluster_name:test', + ], + 'tidb_cluster.tidb_server_conn_idle_duration_seconds.sum': [ + 'in_txn:0', + 'tidb_cluster_component:tidb', + 'tidb_cluster_name:test', + ], + 'tidb_cluster.tidb_tikvclient_request_seconds.sum': [ + 'store:1', + 'type:Prewrite', + 'tidb_cluster_component:tidb', + 'tidb_cluster_name:test', + ], 'tidb_cluster.process_cpu_seconds_total': [ 'tidb_cluster_component:tidb', 'tidb_cluster_name:test', @@ -47,6 +103,15 @@ 'tidb_cluster_component:tiflash', 'tidb_cluster_name:test', ], + 'tidb_cluster.tiflash_syncing_data_freshness.sum': [ + 'tidb_cluster_component:tiflash', + 'tidb_cluster_name:test', + ], + 'tidb_cluster.tiflash_syncing_data_freshness.count': [ + 'upper_bound:none', + 'tidb_cluster_component:tiflash', + 'tidb_cluster_name:test', + ], }, 'service_check': { 'tidb_cluster.prometheus.health': [ @@ -96,6 +161,52 @@ 'tidb_cluster_component:tikv', 'tidb_cluster_name:test', ], + 'tidb_cluster.tikv_engine_flow_bytes': [ + 'db:kv', + 'type:keys_read', + 'tidb_cluster_component:tikv', + 'tidb_cluster_name:test', + ], + 'tidb_cluster.tikv_thread_cpu_seconds_total': [ + 'name:raftstore', + 'tidb_cluster_component:tikv', + 'tidb_cluster_name:test', + ], + 'tidb_cluster.tikv_grpc_msg_duration_seconds.sum': [ + 'type:kv_get', + 'tidb_cluster_component:tikv', + 'tidb_cluster_name:test', + ], + 'tidb_cluster.tikv_raftstore_append_log_duration_seconds.sum': [ + 'type:normal', + 'tidb_cluster_component:tikv', + 'tidb_cluster_name:test', + ], + 'tidb_cluster.tikv_raftstore_apply_log_duration_seconds.sum': [ + 'type:normal', + 'tidb_cluster_component:tikv', + 'tidb_cluster_name:test', + ], + 'tidb_cluster.tikv_raftstore_commit_log_duration_seconds.sum': [ + 'type:normal', + 'tidb_cluster_component:tikv', + 'tidb_cluster_name:test', + ], + 'tidb_cluster.tikv_raftstore_store_duration_secs.sum': [ + 'type:normal', + 'tidb_cluster_component:tikv', + 'tidb_cluster_name:test', + ], + 'tidb_cluster.tikv_raftstore_apply_duration_secs.sum': [ + 'type:normal', + 'tidb_cluster_component:tikv', + 'tidb_cluster_name:test', + ], + 'tidb_cluster.tikv_storage_engine_async_request_duration_seconds.sum': [ + 'type:write', + 'tidb_cluster_component:tikv', + 'tidb_cluster_name:test', + ], 'tidb_cluster.process_cpu_seconds_total': [ 'tidb_cluster_component:tikv', 'tidb_cluster_name:test', @@ -113,3 +224,25 @@ ], }, } + +EXPECTED_PD = { + 'metrics': { + 'tidb_cluster.pd_client_cmd_handle_cmds_duration_seconds.sum': [ + 'type:tso', + 'tidb_cluster_component:pd', + 'tidb_cluster_name:test', + ], + 'tidb_cluster.pd_client_request_handle_requests_duration_seconds.sum': [ + 'type:tso', + 'tidb_cluster_component:pd', + 'tidb_cluster_name:test', + ], + }, + 'service_check': { + 'tidb_cluster.prometheus.health': [ + 'endpoint:http://localhost:2379/metrics', + 'tidb_cluster_component:pd', + 'tidb_cluster_name:test', + ], + }, +} diff --git a/tidb/tests/fixtures/mock_pd_metrics.txt b/tidb/tests/fixtures/mock_pd_metrics.txt new file mode 100644 index 0000000000..a968065867 --- /dev/null +++ b/tidb/tests/fixtures/mock_pd_metrics.txt @@ -0,0 +1,14 @@ +# HELP pd_client_cmd_handle_cmds_duration_seconds Bucketed histogram of processing time (s) of handled cmds. +# TYPE pd_client_cmd_handle_cmds_duration_seconds histogram +pd_client_cmd_handle_cmds_duration_seconds_bucket{type="tso",le="0.001"} 500 +pd_client_cmd_handle_cmds_duration_seconds_bucket{type="tso",le="0.005"} 520 +pd_client_cmd_handle_cmds_duration_seconds_bucket{type="tso",le="+Inf"} 520 +pd_client_cmd_handle_cmds_duration_seconds_sum{type="tso"} 0.21 +pd_client_cmd_handle_cmds_duration_seconds_count{type="tso"} 520 +# HELP pd_client_request_handle_requests_duration_seconds Bucketed histogram of processing time (s) of handled requests. +# TYPE pd_client_request_handle_requests_duration_seconds histogram +pd_client_request_handle_requests_duration_seconds_bucket{type="tso",le="0.001"} 500 +pd_client_request_handle_requests_duration_seconds_bucket{type="tso",le="0.005"} 520 +pd_client_request_handle_requests_duration_seconds_bucket{type="tso",le="+Inf"} 520 +pd_client_request_handle_requests_duration_seconds_sum{type="tso"} 0.19 +pd_client_request_handle_requests_duration_seconds_count{type="tso"} 520 diff --git a/tidb/tests/fixtures/mock_tidb_metrics.txt b/tidb/tests/fixtures/mock_tidb_metrics.txt index 34fed9075f..0410c3e516 100644 --- a/tidb/tests/fixtures/mock_tidb_metrics.txt +++ b/tidb/tests/fixtures/mock_tidb_metrics.txt @@ -472,3 +472,58 @@ tidb_server_execute_error_total{type="schema:1146"} 4 # HELP process_resident_memory_bytes Resident memory size in bytes. # TYPE process_resident_memory_bytes gauge process_resident_memory_bytes 9.3704192e+07 +# HELP tidb_server_query_total Counter of queries. +# TYPE tidb_server_query_total counter +tidb_server_query_total{type="OK"} 150 +tidb_server_query_total{type="Error"} 2 +# HELP tidb_server_disconnection_total Counter of disconnections. +# TYPE tidb_server_disconnection_total counter +tidb_server_disconnection_total{result="ok"} 5 +# HELP tidb_server_plan_cache_total Counter of plan cache hit. +# TYPE tidb_server_plan_cache_total counter +tidb_server_plan_cache_total{type="hit"} 42 +# HELP tidb_server_plan_cache_miss_total Counter of plan cache miss. +# TYPE tidb_server_plan_cache_miss_total counter +tidb_server_plan_cache_miss_total{type="miss"} 8 +# HELP tidb_session_parse_duration_seconds Bucketed histogram of processing time (s) in parse phase. +# TYPE tidb_session_parse_duration_seconds histogram +tidb_session_parse_duration_seconds_bucket{sql_type="general",le="0.0005"} 10 +tidb_session_parse_duration_seconds_bucket{sql_type="general",le="+Inf"} 10 +tidb_session_parse_duration_seconds_sum{sql_type="general"} 0.0012 +tidb_session_parse_duration_seconds_count{sql_type="general"} 10 +# HELP tidb_session_compile_duration_seconds Bucketed histogram of processing time (s) in compile phase. +# TYPE tidb_session_compile_duration_seconds histogram +tidb_session_compile_duration_seconds_bucket{sql_type="general",le="0.0005"} 10 +tidb_session_compile_duration_seconds_bucket{sql_type="general",le="+Inf"} 10 +tidb_session_compile_duration_seconds_sum{sql_type="general"} 0.0015 +tidb_session_compile_duration_seconds_count{sql_type="general"} 10 +# HELP tidb_session_execute_duration_seconds Bucketed histogram of processing time (s) in execute phase. +# TYPE tidb_session_execute_duration_seconds histogram +tidb_session_execute_duration_seconds_bucket{type="general",le="0.0005"} 8 +tidb_session_execute_duration_seconds_bucket{type="general",le="+Inf"} 8 +tidb_session_execute_duration_seconds_sum{type="general"} 0.0089 +tidb_session_execute_duration_seconds_count{type="general"} 8 +# HELP tidb_session_transaction_duration_seconds Bucketed histogram of processing time (s) for transaction. +# TYPE tidb_session_transaction_duration_seconds histogram +tidb_session_transaction_duration_seconds_bucket{sql_type="general",type="commit",le="0.001"} 3 +tidb_session_transaction_duration_seconds_bucket{sql_type="general",type="commit",le="+Inf"} 3 +tidb_session_transaction_duration_seconds_sum{sql_type="general",type="commit"} 0.0031 +tidb_session_transaction_duration_seconds_count{sql_type="general",type="commit"} 3 +# HELP tidb_server_get_token_duration_seconds Duration (us) for getting token, it should be small until concurrency limit is reached. +# TYPE tidb_server_get_token_duration_seconds histogram +tidb_server_get_token_duration_seconds_bucket{le="1"} 100 +tidb_server_get_token_duration_seconds_bucket{le="+Inf"} 100 +tidb_server_get_token_duration_seconds_sum 0.05 +tidb_server_get_token_duration_seconds_count 100 +# HELP tidb_server_conn_idle_duration_seconds Bucketed histogram of connection idle duration. +# TYPE tidb_server_conn_idle_duration_seconds histogram +tidb_server_conn_idle_duration_seconds_bucket{in_txn="0",le="0.1"} 20 +tidb_server_conn_idle_duration_seconds_bucket{in_txn="0",le="+Inf"} 20 +tidb_server_conn_idle_duration_seconds_sum{in_txn="0"} 1.2 +tidb_server_conn_idle_duration_seconds_count{in_txn="0"} 20 +# HELP tidb_tikvclient_request_seconds Bucketed histogram of time spent in TiKV requests. +# TYPE tidb_tikvclient_request_seconds histogram +tidb_tikvclient_request_seconds_bucket{store="1",type="Prewrite",le="0.001"} 5 +tidb_tikvclient_request_seconds_bucket{store="1",type="Prewrite",le="+Inf"} 5 +tidb_tikvclient_request_seconds_sum{store="1",type="Prewrite"} 0.004 +tidb_tikvclient_request_seconds_count{store="1",type="Prewrite"} 5 diff --git a/tidb/tests/fixtures/mock_tiflash_metrics.txt b/tidb/tests/fixtures/mock_tiflash_metrics.txt index 41dcdfa55d..42b50cf520 100644 --- a/tidb/tests/fixtures/mock_tiflash_metrics.txt +++ b/tidb/tests/fixtures/mock_tiflash_metrics.txt @@ -1,3 +1,20 @@ +# HELP tiflash_syncing_data_freshness Histogram of the replication lag (seconds) from TiKV to TiFlash +# TYPE tiflash_syncing_data_freshness histogram +tiflash_syncing_data_freshness_bucket{le="0.001"} 7816 +tiflash_syncing_data_freshness_bucket{le="0.002"} 8063 +tiflash_syncing_data_freshness_bucket{le="0.004"} 8501 +tiflash_syncing_data_freshness_bucket{le="0.008"} 12399 +tiflash_syncing_data_freshness_bucket{le="0.016"} 29931 +tiflash_syncing_data_freshness_bucket{le="0.032"} 31719 +tiflash_syncing_data_freshness_bucket{le="0.064"} 32241 +tiflash_syncing_data_freshness_bucket{le="0.128"} 32652 +tiflash_syncing_data_freshness_bucket{le="0.256"} 33059 +tiflash_syncing_data_freshness_bucket{le="0.512"} 33244 +tiflash_syncing_data_freshness_bucket{le="1.024"} 33285 +tiflash_syncing_data_freshness_bucket{le="2.048"} 33293 +tiflash_syncing_data_freshness_bucket{le="+Inf"} 33294 +tiflash_syncing_data_freshness_sum 545.12 +tiflash_syncing_data_freshness_count 33294 # HELP tiflash_system_current_metric_StoreSizeUsed System current metric StoreSizeUsed # TYPE tiflash_system_current_metric_StoreSizeUsed gauge tiflash_system_current_metric_StoreSizeUsed 13744.000000 diff --git a/tidb/tests/fixtures/mock_tikv_metrics.txt b/tidb/tests/fixtures/mock_tikv_metrics.txt index c9c188937f..d6bcc81163 100644 --- a/tidb/tests/fixtures/mock_tikv_metrics.txt +++ b/tidb/tests/fixtures/mock_tikv_metrics.txt @@ -39,3 +39,53 @@ process_cpu_seconds_total 7.01 # HELP process_resident_memory_bytes Resident memory size in bytes. # TYPE process_resident_memory_bytes gauge process_resident_memory_bytes 640008192 +# HELP tikv_engine_flow_bytes Bytes of read/written. +# TYPE tikv_engine_flow_bytes counter +tikv_engine_flow_bytes{db="kv",type="keys_read"} 1234 +tikv_engine_flow_bytes{db="kv",type="bytes_read"} 56789 +# HELP tikv_thread_cpu_seconds_total Total user and system CPU time spent in seconds. +# TYPE tikv_thread_cpu_seconds_total counter +tikv_thread_cpu_seconds_total{name="raftstore"} 3.21 +tikv_thread_cpu_seconds_total{name="apply"} 1.54 +# HELP tikv_grpc_msg_duration_seconds Bucketed histogram of grpc message handling. +# TYPE tikv_grpc_msg_duration_seconds histogram +tikv_grpc_msg_duration_seconds_bucket{type="kv_get",le="0.005"} 200 +tikv_grpc_msg_duration_seconds_bucket{type="kv_get",le="+Inf"} 200 +tikv_grpc_msg_duration_seconds_sum{type="kv_get"} 0.32 +tikv_grpc_msg_duration_seconds_count{type="kv_get"} 200 +# HELP tikv_raftstore_append_log_duration_seconds Bucketed histogram of peer appending log duration. +# TYPE tikv_raftstore_append_log_duration_seconds histogram +tikv_raftstore_append_log_duration_seconds_bucket{type="normal",le="0.005"} 150 +tikv_raftstore_append_log_duration_seconds_bucket{type="normal",le="+Inf"} 150 +tikv_raftstore_append_log_duration_seconds_sum{type="normal"} 0.18 +tikv_raftstore_append_log_duration_seconds_count{type="normal"} 150 +# HELP tikv_raftstore_apply_log_duration_seconds Bucketed histogram of peer applying log duration. +# TYPE tikv_raftstore_apply_log_duration_seconds histogram +tikv_raftstore_apply_log_duration_seconds_bucket{type="normal",le="0.005"} 148 +tikv_raftstore_apply_log_duration_seconds_bucket{type="normal",le="+Inf"} 148 +tikv_raftstore_apply_log_duration_seconds_sum{type="normal"} 0.17 +tikv_raftstore_apply_log_duration_seconds_count{type="normal"} 148 +# HELP tikv_raftstore_commit_log_duration_seconds Bucketed histogram of peer committing log duration. +# TYPE tikv_raftstore_commit_log_duration_seconds histogram +tikv_raftstore_commit_log_duration_seconds_bucket{type="normal",le="0.005"} 148 +tikv_raftstore_commit_log_duration_seconds_bucket{type="normal",le="+Inf"} 148 +tikv_raftstore_commit_log_duration_seconds_sum{type="normal"} 0.16 +tikv_raftstore_commit_log_duration_seconds_count{type="normal"} 148 +# HELP tikv_raftstore_store_duration_secs Bucketed histogram of store writing duration. +# TYPE tikv_raftstore_store_duration_secs histogram +tikv_raftstore_store_duration_secs_bucket{type="normal",le="0.005"} 300 +tikv_raftstore_store_duration_secs_bucket{type="normal",le="+Inf"} 300 +tikv_raftstore_store_duration_secs_sum{type="normal"} 0.45 +tikv_raftstore_store_duration_secs_count{type="normal"} 300 +# HELP tikv_raftstore_apply_duration_secs Bucketed histogram of apply duration. +# TYPE tikv_raftstore_apply_duration_secs histogram +tikv_raftstore_apply_duration_secs_bucket{type="normal",le="0.005"} 295 +tikv_raftstore_apply_duration_secs_bucket{type="normal",le="+Inf"} 295 +tikv_raftstore_apply_duration_secs_sum{type="normal"} 0.44 +tikv_raftstore_apply_duration_secs_count{type="normal"} 295 +# HELP tikv_storage_engine_async_request_duration_seconds Bucketed histogram of processing durations of async requests. +# TYPE tikv_storage_engine_async_request_duration_seconds histogram +tikv_storage_engine_async_request_duration_seconds_bucket{type="write",le="0.005"} 100 +tikv_storage_engine_async_request_duration_seconds_bucket{type="write",le="+Inf"} 100 +tikv_storage_engine_async_request_duration_seconds_sum{type="write"} 0.12 +tikv_storage_engine_async_request_duration_seconds_count{type="write"} 100 diff --git a/tidb/tests/test_tidb.py b/tidb/tests/test_tidb.py index 9606a8a0ec..86573842b0 100644 --- a/tidb/tests/test_tidb.py +++ b/tidb/tests/test_tidb.py @@ -4,7 +4,7 @@ from datadog_checks.base.utils.tagging import GENERIC_TAGS from datadog_checks.tidb import TiDBCheck -from .expected import EXPECTED_TIDB, EXPECTED_TIFLASH, EXPECTED_TIFLASH_PROXY, EXPECTED_TIKV +from .expected import EXPECTED_PD, EXPECTED_TIDB, EXPECTED_TIFLASH, EXPECTED_TIFLASH_PROXY, EXPECTED_TIKV @pytest.mark.unit @@ -41,6 +41,12 @@ def test_tikv_mock_metrics(aggregator, mock_tikv_metrics, tikv_instance): _check_and_assert(aggregator, EXPECTED_TIKV, check) +@pytest.mark.unit +def test_pd_mock_metrics(aggregator, mock_pd_metrics, pd_instance): + check = TiDBCheck("test_pd_mock_metrics", {}, [pd_instance]) + _check_and_assert(aggregator, EXPECTED_PD, check) + + @pytest.mark.integration @pytest.mark.usefixtures('dd_environment') def test_cluster_metrics(aggregator, tikv_instance, tidb_instance, tiflash_instance, tiflash_proxy_instance):