Skip to content

Commit e737657

Browse files
Added Invalid indexes and Bloat reporting
1 parent 950cf25 commit e737657

File tree

2 files changed

+238
-10
lines changed

2 files changed

+238
-10
lines changed

docker-compose.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ services:
2020
2121
# Target Database - The PostgreSQL database being monitored
2222
target-db:
23-
image: postgres:17
23+
image: postgres:15
2424
container_name: target-db
2525
environment:
2626
POSTGRES_DB: target_database
@@ -35,7 +35,7 @@ services:
3535

3636
# Postgres Sink - Storage for metrics in PostgreSQL format
3737
sink-postgres:
38-
image: postgres:17
38+
image: postgres:15
3939
container_name: sink-postgres
4040
environment:
4141
POSTGRES_DB: postgres

reporter/postgres_reports.py

Lines changed: 236 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
"""
33
PostgreSQL Reports Generator using PromQL
44
5-
This script generates reports for specific PostgreSQL check types (A002, A003, A004, A007)
5+
This script generates reports for specific PostgreSQL check types (A002, A003, A004, A007, H001, F005, F004)
66
by querying Prometheus metrics using PromQL queries.
77
"""
88

@@ -236,6 +236,219 @@ def generate_a007_altered_settings_report(self, cluster: str = "local", node_nam
236236

237237
return self.format_report_data("A007", altered_settings, node_name)
238238

239+
def generate_h001_invalid_indexes_report(self, cluster: str = "local", node_name: str = "node-01") -> Dict[str, Any]:
240+
"""
241+
Generate H001 Invalid Indexes report.
242+
243+
Args:
244+
cluster: Cluster name
245+
node_name: Node name
246+
247+
Returns:
248+
Dictionary containing invalid indexes information
249+
"""
250+
print("Generating H001 Invalid Indexes report...")
251+
252+
# Query invalid indexes using the pgwatch_pg_invalid_indexes metric
253+
invalid_indexes_query = f'pgwatch_pg_invalid_indexes{{cluster="{cluster}", node_name="{node_name}"}}'
254+
result = self.query_instant(invalid_indexes_query)
255+
256+
invalid_indexes = []
257+
total_size = 0
258+
259+
if result.get('status') == 'success' and result.get('data', {}).get('result'):
260+
for item in result['data']['result']:
261+
# Extract index information from labels and values
262+
schema_name = item['metric'].get('schema_name', 'unknown')
263+
table_name = item['metric'].get('table_name', 'unknown')
264+
index_name = item['metric'].get('index_name', 'unknown')
265+
relation_name = item['metric'].get('relation_name', f"{schema_name}.{table_name}")
266+
267+
# Get index size from the metric value
268+
index_size_bytes = float(item['value'][1]) if item.get('value') else 0
269+
supports_fk = item['metric'].get('supports_fk', '0')
270+
271+
invalid_index = {
272+
"schema_name": schema_name,
273+
"table_name": table_name,
274+
"index_name": index_name,
275+
"relation_name": relation_name,
276+
"index_size_bytes": index_size_bytes,
277+
"index_size_pretty": self.format_bytes(index_size_bytes),
278+
"supports_fk": bool(int(supports_fk))
279+
}
280+
281+
invalid_indexes.append(invalid_index)
282+
total_size += index_size_bytes
283+
284+
return self.format_report_data("H001", {
285+
"invalid_indexes": invalid_indexes,
286+
"total_count": len(invalid_indexes),
287+
"total_size_bytes": total_size,
288+
"total_size_pretty": self.format_bytes(total_size)
289+
}, node_name)
290+
291+
def generate_f005_btree_bloat_report(self, cluster: str = "local", node_name: str = "node-01") -> Dict[str, Any]:
292+
"""
293+
Generate F005 Autovacuum: Btree Index Bloat (Estimated) report.
294+
295+
Args:
296+
cluster: Cluster name
297+
node_name: Node name
298+
299+
Returns:
300+
Dictionary containing btree index bloat information
301+
"""
302+
print("Generating F005 Autovacuum: Btree Index Bloat (Estimated) report...")
303+
304+
# Query btree bloat using multiple metrics
305+
bloat_queries = {
306+
'extra_size': f'pgwatch_pg_btree_bloat_extra_size{{cluster="{cluster}", node_name="{node_name}"}}',
307+
'extra_pct': f'pgwatch_pg_btree_bloat_extra_pct{{cluster="{cluster}", node_name="{node_name}"}}',
308+
'bloat_size': f'pgwatch_pg_btree_bloat_bloat_size{{cluster="{cluster}", node_name="{node_name}"}}',
309+
'bloat_pct': f'pgwatch_pg_btree_bloat_bloat_pct{{cluster="{cluster}", node_name="{node_name}"}}',
310+
}
311+
312+
bloated_indexes = {}
313+
314+
for metric_type, query in bloat_queries.items():
315+
result = self.query_instant(query)
316+
if result.get('status') == 'success' and result.get('data', {}).get('result'):
317+
for item in result['data']['result']:
318+
print(item)
319+
schema_name = item['metric'].get('schemaname', 'unknown')
320+
table_name = item['metric'].get('tblname', 'unknown')
321+
index_name = item['metric'].get('idxname', 'unknown')
322+
323+
index_key = f"{schema_name}.{table_name}.{index_name}"
324+
325+
if index_key not in bloated_indexes:
326+
bloated_indexes[index_key] = {
327+
"schema_name": schema_name,
328+
"table_name": table_name,
329+
"index_name": index_name,
330+
"extra_size": 0,
331+
"extra_pct": 0,
332+
"bloat_size": 0,
333+
"bloat_pct": 0,
334+
}
335+
336+
value = float(item['value'][1]) if item.get('value') else 0
337+
bloated_indexes[index_key][metric_type] = value
338+
339+
# Convert to list and add pretty formatting
340+
bloated_indexes_list = []
341+
total_bloat_size = 0
342+
343+
for index_data in bloated_indexes.values():
344+
# Skip indexes with minimal bloat
345+
if index_data['bloat_pct'] >= 10: # Only report indexes with >= 10% bloat
346+
index_data['extra_size_pretty'] = self.format_bytes(index_data['extra_size'])
347+
index_data['bloat_size_pretty'] = self.format_bytes(index_data['bloat_size'])
348+
349+
bloated_indexes_list.append(index_data)
350+
total_bloat_size += index_data['bloat_size']
351+
352+
# Sort by bloat percentage descending
353+
bloated_indexes_list.sort(key=lambda x: x['bloat_pct'], reverse=True)
354+
355+
return self.format_report_data("F005", {
356+
"bloated_indexes": bloated_indexes_list,
357+
"total_count": len(bloated_indexes_list),
358+
"total_bloat_size_bytes": total_bloat_size,
359+
"total_bloat_size_pretty": self.format_bytes(total_bloat_size)
360+
}, node_name)
361+
362+
def generate_f004_heap_bloat_report(self, cluster: str = "local", node_name: str = "node-01") -> Dict[str, Any]:
363+
"""
364+
Generate F004 Autovacuum: Heap Bloat (Estimated) report.
365+
366+
Args:
367+
cluster: Cluster name
368+
node_name: Node name
369+
370+
Returns:
371+
Dictionary containing heap bloat information
372+
"""
373+
print("Generating F004 Autovacuum: Heap Bloat (Estimated) report...")
374+
375+
# Query table bloat using multiple metrics
376+
bloat_queries = {
377+
'real_size': f'pgwatch_pg_table_bloat_real_size{{cluster="{cluster}", node_name="{node_name}"}}',
378+
'extra_size': f'pgwatch_pg_table_bloat_extra_size{{cluster="{cluster}", node_name="{node_name}"}}',
379+
'extra_pct': f'pgwatch_pg_table_bloat_extra_pct{{cluster="{cluster}", node_name="{node_name}"}}',
380+
'bloat_size': f'pgwatch_pg_table_bloat_bloat_size{{cluster="{cluster}", node_name="{node_name}"}}',
381+
'bloat_pct': f'pgwatch_pg_table_bloat_bloat_pct{{cluster="{cluster}", node_name="{node_name}"}}',
382+
}
383+
384+
bloated_tables = {}
385+
for metric_type, query in bloat_queries.items():
386+
result = self.query_instant(query)
387+
if result.get('status') == 'success' and result.get('data', {}).get('result'):
388+
for item in result['data']['result']:
389+
schema_name = item['metric'].get('schemaname', 'unknown')
390+
table_name = item['metric'].get('tblname', 'unknown')
391+
392+
table_key = f"{schema_name}.{table_name}"
393+
394+
if table_key not in bloated_tables:
395+
bloated_tables[table_key] = {
396+
"schema_name": schema_name,
397+
"table_name": table_name,
398+
"real_size": 0,
399+
"extra_size": 0,
400+
"extra_pct": 0,
401+
"bloat_size": 0,
402+
"bloat_pct": 0,
403+
}
404+
405+
value = float(item['value'][1]) if item.get('value') else 0
406+
bloated_tables[table_key][metric_type] = value
407+
408+
# Convert to list and add pretty formatting
409+
bloated_tables_list = []
410+
total_bloat_size = 0
411+
412+
for table_data in bloated_tables.values():
413+
# Skip tables with minimal bloat
414+
if table_data['bloat_pct'] >= 10: # Only report tables with >= 10% bloat
415+
table_data['real_size_pretty'] = self.format_bytes(table_data['real_size'])
416+
table_data['extra_size_pretty'] = self.format_bytes(table_data['extra_size'])
417+
table_data['bloat_size_pretty'] = self.format_bytes(table_data['bloat_size'])
418+
419+
bloated_tables_list.append(table_data)
420+
total_bloat_size += table_data['bloat_size']
421+
422+
# Sort by bloat percentage descending
423+
bloated_tables_list.sort(key=lambda x: x['bloat_pct'], reverse=True)
424+
425+
return self.format_report_data("F004", {
426+
"bloated_tables": bloated_tables_list,
427+
"total_count": len(bloated_tables_list),
428+
"total_bloat_size_bytes": total_bloat_size,
429+
"total_bloat_size_pretty": self.format_bytes(total_bloat_size)
430+
}, node_name)
431+
432+
def format_bytes(self, bytes_value: float) -> str:
433+
"""Format bytes value for human readable display."""
434+
if bytes_value == 0:
435+
return "0 B"
436+
437+
units = ['B', 'KB', 'MB', 'GB', 'TB']
438+
unit_index = 0
439+
value = float(bytes_value)
440+
441+
while value >= 1024 and unit_index < len(units) - 1:
442+
value /= 1024
443+
unit_index += 1
444+
445+
if value >= 100:
446+
return f"{value:.0f} {units[unit_index]}"
447+
elif value >= 10:
448+
return f"{value:.1f} {units[unit_index]}"
449+
else:
450+
return f"{value:.2f} {units[unit_index]}"
451+
239452
def format_report_data(self, check_id: str, data: Dict[str, Any], host: str = "target-database") -> Dict[str, Any]:
240453
"""
241454
Format data to match template structure.
@@ -388,7 +601,7 @@ def get_cluster_metric_description(self, metric_name: str) -> str:
388601

389602
def generate_all_reports(self, cluster: str = "local", node_name: str = "node-01") -> Dict[str, Any]:
390603
"""
391-
Generate all four reports.
604+
Generate all reports.
392605
393606
Args:
394607
cluster: Cluster name
@@ -404,6 +617,9 @@ def generate_all_reports(self, cluster: str = "local", node_name: str = "node-01
404617
reports['A003'] = self.generate_a003_settings_report(cluster, node_name)
405618
reports['A004'] = self.generate_a004_cluster_report(cluster, node_name)
406619
reports['A007'] = self.generate_a007_altered_settings_report(cluster, node_name)
620+
reports['H001'] = self.generate_h001_invalid_indexes_report(cluster, node_name)
621+
reports['F005'] = self.generate_f005_btree_bloat_report(cluster, node_name)
622+
reports['F004'] = self.generate_f004_heap_bloat_report(cluster, node_name)
407623

408624
return reports
409625
def create_report(self, api_url, token, project, epoch):
@@ -458,14 +674,16 @@ def main():
458674
help='Cluster name (default: local)')
459675
parser.add_argument('--node-name', default='node-01',
460676
help='Node name (default: node-01)')
461-
parser.add_argument('--check-id', choices=['A002', 'A003', 'A004', 'A007', 'ALL'],
677+
parser.add_argument('--check-id', choices=['A002', 'A003', 'A004', 'A007', 'H001', 'F005', 'F004', 'ALL'],
462678
help='Specific check ID to generate (default: ALL)')
463679
parser.add_argument('--output', default='-',
464680
help='Output file (default: stdout)')
465681
parser.add_argument('--api-url', default='https://postgres.ai/api/general')
466682
parser.add_argument('--token', default='')
467683
parser.add_argument('--project', default='project-name')
468684
parser.add_argument('--epoch', default='1')
685+
parser.add_argument('--no-upload', action='store_true', default=False,
686+
help='Do not upload reports to the API')
469687

470688
args = parser.parse_args()
471689

@@ -479,13 +697,16 @@ def main():
479697
try:
480698
if args.check_id == 'ALL' or args.check_id is None:
481699
# Generate all reports
700+
if not args.no_upload:
701+
report_id = generator.create_report(args.api_url, args.token, args.project, args.epoch)
482702
reports = generator.generate_all_reports(args.cluster, args.node_name)
483-
report_id = generator.create_report(args.api_url, args.token, args.project, args.epoch)
484703
for report in reports:
485-
json_report = json.dump(reports[report], open(f"{report}.json", "w"))
486-
generator.upload_report_file(args.api_url, args.token, report_id, f"{report}.json")
704+
json.dump(reports[report], open(f"{report}.json", "w"))
705+
if not args.no_upload:
706+
generator.upload_report_file(args.api_url, args.token, report_id, f"{report}.json")
487707
if args.output == '-':
488-
print(json.dumps(reports, indent=2))
708+
709+
pass
489710
else:
490711
with open(args.output, 'w') as f:
491712
json.dump(reports, f, indent=2)
@@ -500,13 +721,20 @@ def main():
500721
report = generator.generate_a004_cluster_report(args.cluster, args.node_name)
501722
elif args.check_id == 'A007':
502723
report = generator.generate_a007_altered_settings_report(args.cluster, args.node_name)
724+
elif args.check_id == 'H001':
725+
report = generator.generate_h001_invalid_indexes_report(args.cluster, args.node_name)
726+
elif args.check_id == 'F005':
727+
report = generator.generate_f005_btree_bloat_report(args.cluster, args.node_name)
728+
elif args.check_id == 'F004':
729+
report = generator.generate_f004_heap_bloat_report(args.cluster, args.node_name)
503730

504731
if args.output == '-':
505732
print(json.dumps(report, indent=2))
506733
else:
507734
with open(args.output, 'w') as f:
508735
json.dump(report, f, indent=2)
509-
generator.upload_report_file(args.api_url, args.token, args.project, args.epoch, args.output)
736+
if not args.no_upload:
737+
generator.upload_report_file(args.api_url, args.token, args.project, args.epoch, args.output)
510738
except Exception as e:
511739
print(f"Error generating reports: {e}")
512740
raise e

0 commit comments

Comments
 (0)