@@ -472,8 +472,9 @@ static BufferDesc *BufferAlloc(SMgrRelation smgr,
472472 ForkNumber forkNum ,
473473 BlockNumber blockNum ,
474474 BufferAccessStrategy strategy ,
475- bool * foundPtr );
476- static void FlushBuffer (BufferDesc * buf , SMgrRelation reln );
475+ bool * foundPtr , IOContext * io_context );
476+ static void FlushBuffer (BufferDesc * buf , SMgrRelation reln ,
477+ IOObject io_object , IOContext io_context );
477478static void FindAndDropRelationBuffers (RelFileLocator rlocator ,
478479 ForkNumber forkNum ,
479480 BlockNumber nForkBlock ,
@@ -814,6 +815,8 @@ ReadBuffer_common(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
814815 BufferDesc * bufHdr ;
815816 Block bufBlock ;
816817 bool found ;
818+ IOContext io_context ;
819+ IOObject io_object ;
817820 bool isExtend ;
818821 bool isLocalBuf = SmgrIsTemp (smgr );
819822
@@ -846,7 +849,14 @@ ReadBuffer_common(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
846849
847850 if (isLocalBuf )
848851 {
849- bufHdr = LocalBufferAlloc (smgr , forkNum , blockNum , & found );
852+ /*
853+ * LocalBufferAlloc() will set the io_context to IOCONTEXT_NORMAL. We
854+ * do not use a BufferAccessStrategy for I/O of temporary tables.
855+ * However, in some cases, the "strategy" may not be NULL, so we can't
856+ * rely on IOContextForStrategy() to set the right IOContext for us.
857+ * This may happen in cases like CREATE TEMPORARY TABLE AS...
858+ */
859+ bufHdr = LocalBufferAlloc (smgr , forkNum , blockNum , & found , & io_context );
850860 if (found )
851861 pgBufferUsage .local_blks_hit ++ ;
852862 else if (isExtend )
@@ -862,7 +872,7 @@ ReadBuffer_common(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
862872 * not currently in memory.
863873 */
864874 bufHdr = BufferAlloc (smgr , relpersistence , forkNum , blockNum ,
865- strategy , & found );
875+ strategy , & found , & io_context );
866876 if (found )
867877 pgBufferUsage .shared_blks_hit ++ ;
868878 else if (isExtend )
@@ -977,7 +987,16 @@ ReadBuffer_common(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
977987 */
978988 Assert (!(pg_atomic_read_u32 (& bufHdr -> state ) & BM_VALID )); /* spinlock not needed */
979989
980- bufBlock = isLocalBuf ? LocalBufHdrGetBlock (bufHdr ) : BufHdrGetBlock (bufHdr );
990+ if (isLocalBuf )
991+ {
992+ bufBlock = LocalBufHdrGetBlock (bufHdr );
993+ io_object = IOOBJECT_TEMP_RELATION ;
994+ }
995+ else
996+ {
997+ bufBlock = BufHdrGetBlock (bufHdr );
998+ io_object = IOOBJECT_RELATION ;
999+ }
9811000
9821001 if (isExtend )
9831002 {
@@ -986,6 +1005,8 @@ ReadBuffer_common(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
9861005 /* don't set checksum for all-zero page */
9871006 smgrextend (smgr , forkNum , blockNum , (char * ) bufBlock , false);
9881007
1008+ pgstat_count_io_op (io_object , io_context , IOOP_EXTEND );
1009+
9891010 /*
9901011 * NB: we're *not* doing a ScheduleBufferTagForWriteback here;
9911012 * although we're essentially performing a write. At least on linux
@@ -1013,6 +1034,8 @@ ReadBuffer_common(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
10131034
10141035 smgrread (smgr , forkNum , blockNum , (char * ) bufBlock );
10151036
1037+ pgstat_count_io_op (io_object , io_context , IOOP_READ );
1038+
10161039 if (track_io_timing )
10171040 {
10181041 INSTR_TIME_SET_CURRENT (io_time );
@@ -1106,14 +1129,19 @@ ReadBuffer_common(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
11061129 * *foundPtr is actually redundant with the buffer's BM_VALID flag, but
11071130 * we keep it for simplicity in ReadBuffer.
11081131 *
1132+ * io_context is passed as an output parameter to avoid calling
1133+ * IOContextForStrategy() when there is a shared buffers hit and no IO
1134+ * statistics need be captured.
1135+ *
11091136 * No locks are held either at entry or exit.
11101137 */
11111138static BufferDesc *
11121139BufferAlloc (SMgrRelation smgr , char relpersistence , ForkNumber forkNum ,
11131140 BlockNumber blockNum ,
11141141 BufferAccessStrategy strategy ,
1115- bool * foundPtr )
1142+ bool * foundPtr , IOContext * io_context )
11161143{
1144+ bool from_ring ;
11171145 BufferTag newTag ; /* identity of requested block */
11181146 uint32 newHash ; /* hash value for newTag */
11191147 LWLock * newPartitionLock ; /* buffer partition lock for it */
@@ -1165,8 +1193,11 @@ BufferAlloc(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
11651193 {
11661194 /*
11671195 * If we get here, previous attempts to read the buffer must
1168- * have failed ... but we shall bravely try again.
1196+ * have failed ... but we shall bravely try again. Set
1197+ * io_context since we will in fact need to count an IO
1198+ * Operation.
11691199 */
1200+ * io_context = IOContextForStrategy (strategy );
11701201 * foundPtr = false;
11711202 }
11721203 }
@@ -1180,6 +1211,8 @@ BufferAlloc(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
11801211 */
11811212 LWLockRelease (newPartitionLock );
11821213
1214+ * io_context = IOContextForStrategy (strategy );
1215+
11831216 /* Loop here in case we have to try another victim buffer */
11841217 for (;;)
11851218 {
@@ -1193,7 +1226,7 @@ BufferAlloc(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
11931226 * Select a victim buffer. The buffer is returned with its header
11941227 * spinlock still held!
11951228 */
1196- buf = StrategyGetBuffer (strategy , & buf_state );
1229+ buf = StrategyGetBuffer (strategy , & buf_state , & from_ring );
11971230
11981231 Assert (BUF_STATE_GET_REFCOUNT (buf_state ) == 0 );
11991232
@@ -1247,7 +1280,7 @@ BufferAlloc(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
12471280 UnlockBufHdr (buf , buf_state );
12481281
12491282 if (XLogNeedsFlush (lsn ) &&
1250- StrategyRejectBuffer (strategy , buf ))
1283+ StrategyRejectBuffer (strategy , buf , from_ring ))
12511284 {
12521285 /* Drop lock/pin and loop around for another buffer */
12531286 LWLockRelease (BufferDescriptorGetContentLock (buf ));
@@ -1262,7 +1295,7 @@ BufferAlloc(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
12621295 smgr -> smgr_rlocator .locator .dbOid ,
12631296 smgr -> smgr_rlocator .locator .relNumber );
12641297
1265- FlushBuffer (buf , NULL );
1298+ FlushBuffer (buf , NULL , IOOBJECT_RELATION , * io_context );
12661299 LWLockRelease (BufferDescriptorGetContentLock (buf ));
12671300
12681301 ScheduleBufferTagForWriteback (& BackendWritebackContext ,
@@ -1443,6 +1476,28 @@ BufferAlloc(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
14431476
14441477 LWLockRelease (newPartitionLock );
14451478
1479+ if (oldFlags & BM_VALID )
1480+ {
1481+ /*
1482+ * When a BufferAccessStrategy is in use, blocks evicted from shared
1483+ * buffers are counted as IOOP_EVICT in the corresponding context
1484+ * (e.g. IOCONTEXT_BULKWRITE). Shared buffers are evicted by a
1485+ * strategy in two cases: 1) while initially claiming buffers for the
1486+ * strategy ring 2) to replace an existing strategy ring buffer
1487+ * because it is pinned or in use and cannot be reused.
1488+ *
1489+ * Blocks evicted from buffers already in the strategy ring are
1490+ * counted as IOOP_REUSE in the corresponding strategy context.
1491+ *
1492+ * At this point, we can accurately count evictions and reuses,
1493+ * because we have successfully claimed the valid buffer. Previously,
1494+ * we may have been forced to release the buffer due to concurrent
1495+ * pinners or erroring out.
1496+ */
1497+ pgstat_count_io_op (IOOBJECT_RELATION , * io_context ,
1498+ from_ring ? IOOP_REUSE : IOOP_EVICT );
1499+ }
1500+
14461501 /*
14471502 * Buffer contents are currently invalid. Try to obtain the right to
14481503 * start I/O. If StartBufferIO returns false, then someone else managed
@@ -2563,7 +2618,7 @@ SyncOneBuffer(int buf_id, bool skip_recently_used, WritebackContext *wb_context)
25632618 PinBuffer_Locked (bufHdr );
25642619 LWLockAcquire (BufferDescriptorGetContentLock (bufHdr ), LW_SHARED );
25652620
2566- FlushBuffer (bufHdr , NULL );
2621+ FlushBuffer (bufHdr , NULL , IOOBJECT_RELATION , IOCONTEXT_NORMAL );
25672622
25682623 LWLockRelease (BufferDescriptorGetContentLock (bufHdr ));
25692624
@@ -2813,7 +2868,8 @@ BufferGetTag(Buffer buffer, RelFileLocator *rlocator, ForkNumber *forknum,
28132868 * as the second parameter. If not, pass NULL.
28142869 */
28152870static void
2816- FlushBuffer (BufferDesc * buf , SMgrRelation reln )
2871+ FlushBuffer (BufferDesc * buf , SMgrRelation reln , IOObject io_object ,
2872+ IOContext io_context )
28172873{
28182874 XLogRecPtr recptr ;
28192875 ErrorContextCallback errcallback ;
@@ -2907,6 +2963,26 @@ FlushBuffer(BufferDesc *buf, SMgrRelation reln)
29072963 bufToWrite ,
29082964 false);
29092965
2966+ /*
2967+ * When a strategy is in use, only flushes of dirty buffers already in the
2968+ * strategy ring are counted as strategy writes (IOCONTEXT
2969+ * [BULKREAD|BULKWRITE|VACUUM] IOOP_WRITE) for the purpose of IO
2970+ * statistics tracking.
2971+ *
2972+ * If a shared buffer initially added to the ring must be flushed before
2973+ * being used, this is counted as an IOCONTEXT_NORMAL IOOP_WRITE.
2974+ *
2975+ * If a shared buffer which was added to the ring later because the
2976+ * current strategy buffer is pinned or in use or because all strategy
2977+ * buffers were dirty and rejected (for BAS_BULKREAD operations only)
2978+ * requires flushing, this is counted as an IOCONTEXT_NORMAL IOOP_WRITE
2979+ * (from_ring will be false).
2980+ *
2981+ * When a strategy is not in use, the write can only be a "regular" write
2982+ * of a dirty shared buffer (IOCONTEXT_NORMAL IOOP_WRITE).
2983+ */
2984+ pgstat_count_io_op (IOOBJECT_RELATION , io_context , IOOP_WRITE );
2985+
29102986 if (track_io_timing )
29112987 {
29122988 INSTR_TIME_SET_CURRENT (io_time );
@@ -3549,6 +3625,8 @@ FlushRelationBuffers(Relation rel)
35493625 buf_state &= ~(BM_DIRTY | BM_JUST_DIRTIED );
35503626 pg_atomic_unlocked_write_u32 (& bufHdr -> state , buf_state );
35513627
3628+ pgstat_count_io_op (IOOBJECT_TEMP_RELATION , IOCONTEXT_NORMAL , IOOP_WRITE );
3629+
35523630 /* Pop the error context stack */
35533631 error_context_stack = errcallback .previous ;
35543632 }
@@ -3581,7 +3659,7 @@ FlushRelationBuffers(Relation rel)
35813659 {
35823660 PinBuffer_Locked (bufHdr );
35833661 LWLockAcquire (BufferDescriptorGetContentLock (bufHdr ), LW_SHARED );
3584- FlushBuffer (bufHdr , RelationGetSmgr (rel ));
3662+ FlushBuffer (bufHdr , RelationGetSmgr (rel ), IOOBJECT_RELATION , IOCONTEXT_NORMAL );
35853663 LWLockRelease (BufferDescriptorGetContentLock (bufHdr ));
35863664 UnpinBuffer (bufHdr );
35873665 }
@@ -3679,7 +3757,7 @@ FlushRelationsAllBuffers(SMgrRelation *smgrs, int nrels)
36793757 {
36803758 PinBuffer_Locked (bufHdr );
36813759 LWLockAcquire (BufferDescriptorGetContentLock (bufHdr ), LW_SHARED );
3682- FlushBuffer (bufHdr , srelent -> srel );
3760+ FlushBuffer (bufHdr , srelent -> srel , IOOBJECT_RELATION , IOCONTEXT_NORMAL );
36833761 LWLockRelease (BufferDescriptorGetContentLock (bufHdr ));
36843762 UnpinBuffer (bufHdr );
36853763 }
@@ -3889,7 +3967,7 @@ FlushDatabaseBuffers(Oid dbid)
38893967 {
38903968 PinBuffer_Locked (bufHdr );
38913969 LWLockAcquire (BufferDescriptorGetContentLock (bufHdr ), LW_SHARED );
3892- FlushBuffer (bufHdr , NULL );
3970+ FlushBuffer (bufHdr , NULL , IOOBJECT_RELATION , IOCONTEXT_NORMAL );
38933971 LWLockRelease (BufferDescriptorGetContentLock (bufHdr ));
38943972 UnpinBuffer (bufHdr );
38953973 }
@@ -3916,7 +3994,7 @@ FlushOneBuffer(Buffer buffer)
39163994
39173995 Assert (LWLockHeldByMe (BufferDescriptorGetContentLock (bufHdr )));
39183996
3919- FlushBuffer (bufHdr , NULL );
3997+ FlushBuffer (bufHdr , NULL , IOOBJECT_RELATION , IOCONTEXT_NORMAL );
39203998}
39213999
39224000/*
0 commit comments