22"""
33PostgreSQL Reports Generator using PromQL
44
5- This script generates reports for specific PostgreSQL check types (A002, A003, A004, A007)
5+ This script generates reports for specific PostgreSQL check types (A002, A003, A004, A007, H001, F005, F004 )
66by querying Prometheus metrics using PromQL queries.
77"""
88
@@ -236,6 +236,219 @@ def generate_a007_altered_settings_report(self, cluster: str = "local", node_nam
236236
237237 return self .format_report_data ("A007" , altered_settings , node_name )
238238
239+ def generate_h001_invalid_indexes_report (self , cluster : str = "local" , node_name : str = "node-01" ) -> Dict [str , Any ]:
240+ """
241+ Generate H001 Invalid Indexes report.
242+
243+ Args:
244+ cluster: Cluster name
245+ node_name: Node name
246+
247+ Returns:
248+ Dictionary containing invalid indexes information
249+ """
250+ print ("Generating H001 Invalid Indexes report..." )
251+
252+ # Query invalid indexes using the pgwatch_pg_invalid_indexes metric
253+ invalid_indexes_query = f'pgwatch_pg_invalid_indexes{{cluster="{ cluster } ", node_name="{ node_name } "}}'
254+ result = self .query_instant (invalid_indexes_query )
255+
256+ invalid_indexes = []
257+ total_size = 0
258+
259+ if result .get ('status' ) == 'success' and result .get ('data' , {}).get ('result' ):
260+ for item in result ['data' ]['result' ]:
261+ # Extract index information from labels and values
262+ schema_name = item ['metric' ].get ('schema_name' , 'unknown' )
263+ table_name = item ['metric' ].get ('table_name' , 'unknown' )
264+ index_name = item ['metric' ].get ('index_name' , 'unknown' )
265+ relation_name = item ['metric' ].get ('relation_name' , f"{ schema_name } .{ table_name } " )
266+
267+ # Get index size from the metric value
268+ index_size_bytes = float (item ['value' ][1 ]) if item .get ('value' ) else 0
269+ supports_fk = item ['metric' ].get ('supports_fk' , '0' )
270+
271+ invalid_index = {
272+ "schema_name" : schema_name ,
273+ "table_name" : table_name ,
274+ "index_name" : index_name ,
275+ "relation_name" : relation_name ,
276+ "index_size_bytes" : index_size_bytes ,
277+ "index_size_pretty" : self .format_bytes (index_size_bytes ),
278+ "supports_fk" : bool (int (supports_fk ))
279+ }
280+
281+ invalid_indexes .append (invalid_index )
282+ total_size += index_size_bytes
283+
284+ return self .format_report_data ("H001" , {
285+ "invalid_indexes" : invalid_indexes ,
286+ "total_count" : len (invalid_indexes ),
287+ "total_size_bytes" : total_size ,
288+ "total_size_pretty" : self .format_bytes (total_size )
289+ }, node_name )
290+
291+ def generate_f005_btree_bloat_report (self , cluster : str = "local" , node_name : str = "node-01" ) -> Dict [str , Any ]:
292+ """
293+ Generate F005 Autovacuum: Btree Index Bloat (Estimated) report.
294+
295+ Args:
296+ cluster: Cluster name
297+ node_name: Node name
298+
299+ Returns:
300+ Dictionary containing btree index bloat information
301+ """
302+ print ("Generating F005 Autovacuum: Btree Index Bloat (Estimated) report..." )
303+
304+ # Query btree bloat using multiple metrics
305+ bloat_queries = {
306+ 'extra_size' : f'pgwatch_pg_btree_bloat_extra_size{{cluster="{ cluster } ", node_name="{ node_name } "}}' ,
307+ 'extra_pct' : f'pgwatch_pg_btree_bloat_extra_pct{{cluster="{ cluster } ", node_name="{ node_name } "}}' ,
308+ 'bloat_size' : f'pgwatch_pg_btree_bloat_bloat_size{{cluster="{ cluster } ", node_name="{ node_name } "}}' ,
309+ 'bloat_pct' : f'pgwatch_pg_btree_bloat_bloat_pct{{cluster="{ cluster } ", node_name="{ node_name } "}}' ,
310+ }
311+
312+ bloated_indexes = {}
313+
314+ for metric_type , query in bloat_queries .items ():
315+ result = self .query_instant (query )
316+ if result .get ('status' ) == 'success' and result .get ('data' , {}).get ('result' ):
317+ for item in result ['data' ]['result' ]:
318+ print (item )
319+ schema_name = item ['metric' ].get ('schemaname' , 'unknown' )
320+ table_name = item ['metric' ].get ('tblname' , 'unknown' )
321+ index_name = item ['metric' ].get ('idxname' , 'unknown' )
322+
323+ index_key = f"{ schema_name } .{ table_name } .{ index_name } "
324+
325+ if index_key not in bloated_indexes :
326+ bloated_indexes [index_key ] = {
327+ "schema_name" : schema_name ,
328+ "table_name" : table_name ,
329+ "index_name" : index_name ,
330+ "extra_size" : 0 ,
331+ "extra_pct" : 0 ,
332+ "bloat_size" : 0 ,
333+ "bloat_pct" : 0 ,
334+ }
335+
336+ value = float (item ['value' ][1 ]) if item .get ('value' ) else 0
337+ bloated_indexes [index_key ][metric_type ] = value
338+
339+ # Convert to list and add pretty formatting
340+ bloated_indexes_list = []
341+ total_bloat_size = 0
342+
343+ for index_data in bloated_indexes .values ():
344+ # Skip indexes with minimal bloat
345+ if index_data ['bloat_pct' ] >= 10 : # Only report indexes with >= 10% bloat
346+ index_data ['extra_size_pretty' ] = self .format_bytes (index_data ['extra_size' ])
347+ index_data ['bloat_size_pretty' ] = self .format_bytes (index_data ['bloat_size' ])
348+
349+ bloated_indexes_list .append (index_data )
350+ total_bloat_size += index_data ['bloat_size' ]
351+
352+ # Sort by bloat percentage descending
353+ bloated_indexes_list .sort (key = lambda x : x ['bloat_pct' ], reverse = True )
354+
355+ return self .format_report_data ("F005" , {
356+ "bloated_indexes" : bloated_indexes_list ,
357+ "total_count" : len (bloated_indexes_list ),
358+ "total_bloat_size_bytes" : total_bloat_size ,
359+ "total_bloat_size_pretty" : self .format_bytes (total_bloat_size )
360+ }, node_name )
361+
362+ def generate_f004_heap_bloat_report (self , cluster : str = "local" , node_name : str = "node-01" ) -> Dict [str , Any ]:
363+ """
364+ Generate F004 Autovacuum: Heap Bloat (Estimated) report.
365+
366+ Args:
367+ cluster: Cluster name
368+ node_name: Node name
369+
370+ Returns:
371+ Dictionary containing heap bloat information
372+ """
373+ print ("Generating F004 Autovacuum: Heap Bloat (Estimated) report..." )
374+
375+ # Query table bloat using multiple metrics
376+ bloat_queries = {
377+ 'real_size' : f'pgwatch_pg_table_bloat_real_size{{cluster="{ cluster } ", node_name="{ node_name } "}}' ,
378+ 'extra_size' : f'pgwatch_pg_table_bloat_extra_size{{cluster="{ cluster } ", node_name="{ node_name } "}}' ,
379+ 'extra_pct' : f'pgwatch_pg_table_bloat_extra_pct{{cluster="{ cluster } ", node_name="{ node_name } "}}' ,
380+ 'bloat_size' : f'pgwatch_pg_table_bloat_bloat_size{{cluster="{ cluster } ", node_name="{ node_name } "}}' ,
381+ 'bloat_pct' : f'pgwatch_pg_table_bloat_bloat_pct{{cluster="{ cluster } ", node_name="{ node_name } "}}' ,
382+ }
383+
384+ bloated_tables = {}
385+ for metric_type , query in bloat_queries .items ():
386+ result = self .query_instant (query )
387+ if result .get ('status' ) == 'success' and result .get ('data' , {}).get ('result' ):
388+ for item in result ['data' ]['result' ]:
389+ schema_name = item ['metric' ].get ('schemaname' , 'unknown' )
390+ table_name = item ['metric' ].get ('tblname' , 'unknown' )
391+
392+ table_key = f"{ schema_name } .{ table_name } "
393+
394+ if table_key not in bloated_tables :
395+ bloated_tables [table_key ] = {
396+ "schema_name" : schema_name ,
397+ "table_name" : table_name ,
398+ "real_size" : 0 ,
399+ "extra_size" : 0 ,
400+ "extra_pct" : 0 ,
401+ "bloat_size" : 0 ,
402+ "bloat_pct" : 0 ,
403+ }
404+
405+ value = float (item ['value' ][1 ]) if item .get ('value' ) else 0
406+ bloated_tables [table_key ][metric_type ] = value
407+
408+ # Convert to list and add pretty formatting
409+ bloated_tables_list = []
410+ total_bloat_size = 0
411+
412+ for table_data in bloated_tables .values ():
413+ # Skip tables with minimal bloat
414+ if table_data ['bloat_pct' ] >= 10 : # Only report tables with >= 10% bloat
415+ table_data ['real_size_pretty' ] = self .format_bytes (table_data ['real_size' ])
416+ table_data ['extra_size_pretty' ] = self .format_bytes (table_data ['extra_size' ])
417+ table_data ['bloat_size_pretty' ] = self .format_bytes (table_data ['bloat_size' ])
418+
419+ bloated_tables_list .append (table_data )
420+ total_bloat_size += table_data ['bloat_size' ]
421+
422+ # Sort by bloat percentage descending
423+ bloated_tables_list .sort (key = lambda x : x ['bloat_pct' ], reverse = True )
424+
425+ return self .format_report_data ("F004" , {
426+ "bloated_tables" : bloated_tables_list ,
427+ "total_count" : len (bloated_tables_list ),
428+ "total_bloat_size_bytes" : total_bloat_size ,
429+ "total_bloat_size_pretty" : self .format_bytes (total_bloat_size )
430+ }, node_name )
431+
432+ def format_bytes (self , bytes_value : float ) -> str :
433+ """Format bytes value for human readable display."""
434+ if bytes_value == 0 :
435+ return "0 B"
436+
437+ units = ['B' , 'KB' , 'MB' , 'GB' , 'TB' ]
438+ unit_index = 0
439+ value = float (bytes_value )
440+
441+ while value >= 1024 and unit_index < len (units ) - 1 :
442+ value /= 1024
443+ unit_index += 1
444+
445+ if value >= 100 :
446+ return f"{ value :.0f} { units [unit_index ]} "
447+ elif value >= 10 :
448+ return f"{ value :.1f} { units [unit_index ]} "
449+ else :
450+ return f"{ value :.2f} { units [unit_index ]} "
451+
239452 def format_report_data (self , check_id : str , data : Dict [str , Any ], host : str = "target-database" ) -> Dict [str , Any ]:
240453 """
241454 Format data to match template structure.
@@ -388,7 +601,7 @@ def get_cluster_metric_description(self, metric_name: str) -> str:
388601
389602 def generate_all_reports (self , cluster : str = "local" , node_name : str = "node-01" ) -> Dict [str , Any ]:
390603 """
391- Generate all four reports.
604+ Generate all reports.
392605
393606 Args:
394607 cluster: Cluster name
@@ -404,6 +617,9 @@ def generate_all_reports(self, cluster: str = "local", node_name: str = "node-01
404617 reports ['A003' ] = self .generate_a003_settings_report (cluster , node_name )
405618 reports ['A004' ] = self .generate_a004_cluster_report (cluster , node_name )
406619 reports ['A007' ] = self .generate_a007_altered_settings_report (cluster , node_name )
620+ reports ['H001' ] = self .generate_h001_invalid_indexes_report (cluster , node_name )
621+ reports ['F005' ] = self .generate_f005_btree_bloat_report (cluster , node_name )
622+ reports ['F004' ] = self .generate_f004_heap_bloat_report (cluster , node_name )
407623
408624 return reports
409625 def create_report (self , api_url , token , project , epoch ):
@@ -458,14 +674,16 @@ def main():
458674 help = 'Cluster name (default: local)' )
459675 parser .add_argument ('--node-name' , default = 'node-01' ,
460676 help = 'Node name (default: node-01)' )
461- parser .add_argument ('--check-id' , choices = ['A002' , 'A003' , 'A004' , 'A007' , 'ALL' ],
677+ parser .add_argument ('--check-id' , choices = ['A002' , 'A003' , 'A004' , 'A007' , 'H001' , 'F005' , 'F004' , ' ALL' ],
462678 help = 'Specific check ID to generate (default: ALL)' )
463679 parser .add_argument ('--output' , default = '-' ,
464680 help = 'Output file (default: stdout)' )
465681 parser .add_argument ('--api-url' , default = 'https://postgres.ai/api/general' )
466682 parser .add_argument ('--token' , default = '' )
467683 parser .add_argument ('--project' , default = 'project-name' )
468684 parser .add_argument ('--epoch' , default = '1' )
685+ parser .add_argument ('--no-upload' , action = 'store_true' , default = False ,
686+ help = 'Do not upload reports to the API' )
469687
470688 args = parser .parse_args ()
471689
@@ -479,13 +697,16 @@ def main():
479697 try :
480698 if args .check_id == 'ALL' or args .check_id is None :
481699 # Generate all reports
700+ if not args .no_upload :
701+ report_id = generator .create_report (args .api_url , args .token , args .project , args .epoch )
482702 reports = generator .generate_all_reports (args .cluster , args .node_name )
483- report_id = generator .create_report (args .api_url , args .token , args .project , args .epoch )
484703 for report in reports :
485- json_report = json .dump (reports [report ], open (f"{ report } .json" , "w" ))
486- generator .upload_report_file (args .api_url , args .token , report_id , f"{ report } .json" )
704+ json .dump (reports [report ], open (f"{ report } .json" , "w" ))
705+ if not args .no_upload :
706+ generator .upload_report_file (args .api_url , args .token , report_id , f"{ report } .json" )
487707 if args .output == '-' :
488- print (json .dumps (reports , indent = 2 ))
708+
709+ pass
489710 else :
490711 with open (args .output , 'w' ) as f :
491712 json .dump (reports , f , indent = 2 )
@@ -500,13 +721,20 @@ def main():
500721 report = generator .generate_a004_cluster_report (args .cluster , args .node_name )
501722 elif args .check_id == 'A007' :
502723 report = generator .generate_a007_altered_settings_report (args .cluster , args .node_name )
724+ elif args .check_id == 'H001' :
725+ report = generator .generate_h001_invalid_indexes_report (args .cluster , args .node_name )
726+ elif args .check_id == 'F005' :
727+ report = generator .generate_f005_btree_bloat_report (args .cluster , args .node_name )
728+ elif args .check_id == 'F004' :
729+ report = generator .generate_f004_heap_bloat_report (args .cluster , args .node_name )
503730
504731 if args .output == '-' :
505732 print (json .dumps (report , indent = 2 ))
506733 else :
507734 with open (args .output , 'w' ) as f :
508735 json .dump (report , f , indent = 2 )
509- generator .upload_report_file (args .api_url , args .token , args .project , args .epoch , args .output )
736+ if not args .no_upload :
737+ generator .upload_report_file (args .api_url , args .token , args .project , args .epoch , args .output )
510738 except Exception as e :
511739 print (f"Error generating reports: { e } " )
512740 raise e
0 commit comments