PostgreSQL Source Code git master
pg_buffercache_pages.c
Go to the documentation of this file.
1/*-------------------------------------------------------------------------
2 *
3 * pg_buffercache_pages.c
4 * display some contents of the buffer cache
5 *
6 * contrib/pg_buffercache/pg_buffercache_pages.c
7 *-------------------------------------------------------------------------
8 */
9#include "postgres.h"
10
11#include "access/htup_details.h"
12#include "access/relation.h"
13#include "catalog/pg_type.h"
14#include "funcapi.h"
15#include "port/pg_numa.h"
17#include "storage/bufmgr.h"
18#include "utils/rel.h"
19
20
21#define NUM_BUFFERCACHE_PAGES_MIN_ELEM 8
22#define NUM_BUFFERCACHE_PAGES_ELEM 9
23#define NUM_BUFFERCACHE_SUMMARY_ELEM 5
24#define NUM_BUFFERCACHE_USAGE_COUNTS_ELEM 4
25#define NUM_BUFFERCACHE_EVICT_ELEM 2
26#define NUM_BUFFERCACHE_EVICT_RELATION_ELEM 3
27#define NUM_BUFFERCACHE_EVICT_ALL_ELEM 3
28#define NUM_BUFFERCACHE_MARK_DIRTY_ELEM 2
29#define NUM_BUFFERCACHE_MARK_DIRTY_RELATION_ELEM 3
30#define NUM_BUFFERCACHE_MARK_DIRTY_ALL_ELEM 3
31
32#define NUM_BUFFERCACHE_OS_PAGES_ELEM 3
33
35 .name = "pg_buffercache",
36 .version = PG_VERSION
37);
38
39/*
40 * Record structure holding the to be exposed cache data.
41 */
42typedef struct
43{
50 bool isvalid;
51 bool isdirty;
53
54 /*
55 * An int32 is sufficiently large, as MAX_BACKENDS prevents a buffer from
56 * being pinned by too many backends and each backend will only pin once
57 * because of bufmgr.c's PrivateRefCount infrastructure.
58 */
61
62
63/*
64 * Function context for data persisting over repeated calls.
65 */
66typedef struct
67{
71
72/*
73 * Record structure holding the to be exposed cache data for OS pages. This
74 * structure is used by pg_buffercache_os_pages(), where NUMA information may
75 * or may not be included.
76 */
77typedef struct
78{
83
84/*
85 * Function context for data persisting over repeated calls.
86 */
87typedef struct
88{
93
94
95/*
96 * Function returning data from the shared buffer cache - buffer number,
97 * relation node/tablespace/database/blocknum and dirty indicator.
98 */
110
111
112/* Only need to touch memory once per backend process lifetime */
113static bool firstNumaTouch = true;
114
115
116Datum
118{
119 FuncCallContext *funcctx;
120 Datum result;
121 MemoryContext oldcontext;
122 BufferCachePagesContext *fctx; /* User function context. */
123 TupleDesc tupledesc;
124 TupleDesc expected_tupledesc;
125 HeapTuple tuple;
126
127 if (SRF_IS_FIRSTCALL())
128 {
129 int i;
130
131 funcctx = SRF_FIRSTCALL_INIT();
132
133 /* Switch context when allocating stuff to be used in later calls */
134 oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
135
136 /* Create a user function context for cross-call persistence */
138
139 /*
140 * To smoothly support upgrades from version 1.0 of this extension
141 * transparently handle the (non-)existence of the pinning_backends
142 * column. We unfortunately have to get the result type for that... -
143 * we can't use the result type determined by the function definition
144 * without potentially crashing when somebody uses the old (or even
145 * wrong) function definition though.
146 */
147 if (get_call_result_type(fcinfo, NULL, &expected_tupledesc) != TYPEFUNC_COMPOSITE)
148 elog(ERROR, "return type must be a row type");
149
150 if (expected_tupledesc->natts < NUM_BUFFERCACHE_PAGES_MIN_ELEM ||
151 expected_tupledesc->natts > NUM_BUFFERCACHE_PAGES_ELEM)
152 elog(ERROR, "incorrect number of output arguments");
153
154 /* Construct a tuple descriptor for the result rows. */
155 tupledesc = CreateTemplateTupleDesc(expected_tupledesc->natts);
156 TupleDescInitEntry(tupledesc, (AttrNumber) 1, "bufferid",
157 INT4OID, -1, 0);
158 TupleDescInitEntry(tupledesc, (AttrNumber) 2, "relfilenode",
159 OIDOID, -1, 0);
160 TupleDescInitEntry(tupledesc, (AttrNumber) 3, "reltablespace",
161 OIDOID, -1, 0);
162 TupleDescInitEntry(tupledesc, (AttrNumber) 4, "reldatabase",
163 OIDOID, -1, 0);
164 TupleDescInitEntry(tupledesc, (AttrNumber) 5, "relforknumber",
165 INT2OID, -1, 0);
166 TupleDescInitEntry(tupledesc, (AttrNumber) 6, "relblocknumber",
167 INT8OID, -1, 0);
168 TupleDescInitEntry(tupledesc, (AttrNumber) 7, "isdirty",
169 BOOLOID, -1, 0);
170 TupleDescInitEntry(tupledesc, (AttrNumber) 8, "usage_count",
171 INT2OID, -1, 0);
172
173 if (expected_tupledesc->natts == NUM_BUFFERCACHE_PAGES_ELEM)
174 TupleDescInitEntry(tupledesc, (AttrNumber) 9, "pinning_backends",
175 INT4OID, -1, 0);
176
177 fctx->tupdesc = BlessTupleDesc(tupledesc);
178
179 /* Allocate NBuffers worth of BufferCachePagesRec records. */
180 fctx->record = (BufferCachePagesRec *)
182 sizeof(BufferCachePagesRec) * NBuffers);
183
184 /* Set max calls and remember the user function context. */
185 funcctx->max_calls = NBuffers;
186 funcctx->user_fctx = fctx;
187
188 /* Return to original context when allocating transient memory */
189 MemoryContextSwitchTo(oldcontext);
190
191 /*
192 * Scan through all the buffers, saving the relevant fields in the
193 * fctx->record structure.
194 *
195 * We don't hold the partition locks, so we don't get a consistent
196 * snapshot across all buffers, but we do grab the buffer header
197 * locks, so the information of each buffer is self-consistent.
198 */
199 for (i = 0; i < NBuffers; i++)
200 {
201 BufferDesc *bufHdr;
202 uint32 buf_state;
203
205
206 bufHdr = GetBufferDescriptor(i);
207 /* Lock each buffer header before inspecting. */
208 buf_state = LockBufHdr(bufHdr);
209
211 fctx->record[i].relfilenumber = BufTagGetRelNumber(&bufHdr->tag);
212 fctx->record[i].reltablespace = bufHdr->tag.spcOid;
213 fctx->record[i].reldatabase = bufHdr->tag.dbOid;
214 fctx->record[i].forknum = BufTagGetForkNum(&bufHdr->tag);
215 fctx->record[i].blocknum = bufHdr->tag.blockNum;
216 fctx->record[i].usagecount = BUF_STATE_GET_USAGECOUNT(buf_state);
218
219 if (buf_state & BM_DIRTY)
220 fctx->record[i].isdirty = true;
221 else
222 fctx->record[i].isdirty = false;
223
224 /* Note if the buffer is valid, and has storage created */
225 if ((buf_state & BM_VALID) && (buf_state & BM_TAG_VALID))
226 fctx->record[i].isvalid = true;
227 else
228 fctx->record[i].isvalid = false;
229
230 UnlockBufHdr(bufHdr);
231 }
232 }
233
234 funcctx = SRF_PERCALL_SETUP();
235
236 /* Get the saved state */
237 fctx = funcctx->user_fctx;
238
239 if (funcctx->call_cntr < funcctx->max_calls)
240 {
241 uint32 i = funcctx->call_cntr;
243 bool nulls[NUM_BUFFERCACHE_PAGES_ELEM];
244
245 values[0] = Int32GetDatum(fctx->record[i].bufferid);
246 nulls[0] = false;
247
248 /*
249 * Set all fields except the bufferid to null if the buffer is unused
250 * or not valid.
251 */
252 if (fctx->record[i].blocknum == InvalidBlockNumber ||
253 fctx->record[i].isvalid == false)
254 {
255 nulls[1] = true;
256 nulls[2] = true;
257 nulls[3] = true;
258 nulls[4] = true;
259 nulls[5] = true;
260 nulls[6] = true;
261 nulls[7] = true;
262 /* unused for v1.0 callers, but the array is always long enough */
263 nulls[8] = true;
264 }
265 else
266 {
268 nulls[1] = false;
270 nulls[2] = false;
272 nulls[3] = false;
273 values[4] = Int16GetDatum(fctx->record[i].forknum);
274 nulls[4] = false;
275 values[5] = Int64GetDatum((int64) fctx->record[i].blocknum);
276 nulls[5] = false;
277 values[6] = BoolGetDatum(fctx->record[i].isdirty);
278 nulls[6] = false;
280 nulls[7] = false;
281 /* unused for v1.0 callers, but the array is always long enough */
283 nulls[8] = false;
284 }
285
286 /* Build and return the tuple. */
287 tuple = heap_form_tuple(fctx->tupdesc, values, nulls);
288 result = HeapTupleGetDatum(tuple);
289
290 SRF_RETURN_NEXT(funcctx, result);
291 }
292 else
293 SRF_RETURN_DONE(funcctx);
294}
295
296/*
297 * Inquire about OS pages mappings for shared buffers, with NUMA information,
298 * optionally.
299 *
300 * When "include_numa" is false, this routines ignores everything related
301 * to NUMA (returned as NULL values), returning mapping information between
302 * shared buffers and OS pages.
303 *
304 * When "include_numa" is true, NUMA is initialized and numa_node values
305 * are generated. In order to get reliable results we also need to touch
306 * memory pages, so that the inquiry about NUMA memory node does not return
307 * -2, indicating unmapped/unallocated pages.
308 *
309 * Buffers may be smaller or larger than OS memory pages. For each buffer we
310 * return one entry for each memory page used by the buffer (if the buffer is
311 * smaller, it only uses a part of one memory page).
312 *
313 * We expect both sizes (for buffers and memory pages) to be a power-of-2, so
314 * one is always a multiple of the other.
315 *
316 */
317static Datum
319{
320 FuncCallContext *funcctx;
321 MemoryContext oldcontext;
322 BufferCacheOsPagesContext *fctx; /* User function context. */
323 TupleDesc tupledesc;
324 TupleDesc expected_tupledesc;
325 HeapTuple tuple;
326 Datum result;
327
328 if (SRF_IS_FIRSTCALL())
329 {
330 int i,
331 idx;
332 Size os_page_size;
333 int pages_per_buffer;
334 int *os_page_status = NULL;
335 uint64 os_page_count = 0;
336 int max_entries;
337 char *startptr,
338 *endptr;
339
340 /* If NUMA information is requested, initialize NUMA support. */
341 if (include_numa && pg_numa_init() == -1)
342 elog(ERROR, "libnuma initialization failed or NUMA is not supported on this platform");
343
344 /*
345 * The database block size and OS memory page size are unlikely to be
346 * the same. The block size is 1-32KB, the memory page size depends on
347 * platform. On x86 it's usually 4KB, on ARM it's 4KB or 64KB, but
348 * there are also features like THP etc. Moreover, we don't quite know
349 * how the pages and buffers "align" in memory - the buffers may be
350 * shifted in some way, using more memory pages than necessary.
351 *
352 * So we need to be careful about mapping buffers to memory pages. We
353 * calculate the maximum number of pages a buffer might use, so that
354 * we allocate enough space for the entries. And then we count the
355 * actual number of entries as we scan the buffers.
356 *
357 * This information is needed before calling move_pages() for NUMA
358 * node id inquiry.
359 */
360 os_page_size = pg_get_shmem_pagesize();
361
362 /*
363 * The pages and block size is expected to be 2^k, so one divides the
364 * other (we don't know in which direction). This does not say
365 * anything about relative alignment of pages/buffers.
366 */
367 Assert((os_page_size % BLCKSZ == 0) || (BLCKSZ % os_page_size == 0));
368
369 if (include_numa)
370 {
371 void **os_page_ptrs = NULL;
372
373 /*
374 * How many addresses we are going to query? Simply get the page
375 * for the first buffer, and first page after the last buffer, and
376 * count the pages from that.
377 */
378 startptr = (char *) TYPEALIGN_DOWN(os_page_size,
379 BufferGetBlock(1));
380 endptr = (char *) TYPEALIGN(os_page_size,
381 (char *) BufferGetBlock(NBuffers) + BLCKSZ);
382 os_page_count = (endptr - startptr) / os_page_size;
383
384 /* Used to determine the NUMA node for all OS pages at once */
385 os_page_ptrs = palloc0(sizeof(void *) * os_page_count);
386 os_page_status = palloc(sizeof(uint64) * os_page_count);
387
388 /*
389 * Fill pointers for all the memory pages. This loop stores and
390 * touches (if needed) addresses into os_page_ptrs[] as input to
391 * one big move_pages(2) inquiry system call, as done in
392 * pg_numa_query_pages().
393 */
394 idx = 0;
395 for (char *ptr = startptr; ptr < endptr; ptr += os_page_size)
396 {
397 os_page_ptrs[idx++] = ptr;
398
399 /* Only need to touch memory once per backend process lifetime */
400 if (firstNumaTouch)
402 }
403
404 Assert(idx == os_page_count);
405
406 elog(DEBUG1, "NUMA: NBuffers=%d os_page_count=" UINT64_FORMAT " "
407 "os_page_size=%zu", NBuffers, os_page_count, os_page_size);
408
409 /*
410 * If we ever get 0xff back from kernel inquiry, then we probably
411 * have bug in our buffers to OS page mapping code here.
412 */
413 memset(os_page_status, 0xff, sizeof(int) * os_page_count);
414
415 /* Query NUMA status for all the pointers */
416 if (pg_numa_query_pages(0, os_page_count, os_page_ptrs, os_page_status) == -1)
417 elog(ERROR, "failed NUMA pages inquiry: %m");
418 }
419
420 /* Initialize the multi-call context, load entries about buffers */
421
422 funcctx = SRF_FIRSTCALL_INIT();
423
424 /* Switch context when allocating stuff to be used in later calls */
425 oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
426
427 /* Create a user function context for cross-call persistence */
429
430 if (get_call_result_type(fcinfo, NULL, &expected_tupledesc) != TYPEFUNC_COMPOSITE)
431 elog(ERROR, "return type must be a row type");
432
433 if (expected_tupledesc->natts != NUM_BUFFERCACHE_OS_PAGES_ELEM)
434 elog(ERROR, "incorrect number of output arguments");
435
436 /* Construct a tuple descriptor for the result rows. */
437 tupledesc = CreateTemplateTupleDesc(expected_tupledesc->natts);
438 TupleDescInitEntry(tupledesc, (AttrNumber) 1, "bufferid",
439 INT4OID, -1, 0);
440 TupleDescInitEntry(tupledesc, (AttrNumber) 2, "os_page_num",
441 INT8OID, -1, 0);
442 TupleDescInitEntry(tupledesc, (AttrNumber) 3, "numa_node",
443 INT4OID, -1, 0);
444
445 fctx->tupdesc = BlessTupleDesc(tupledesc);
446 fctx->include_numa = include_numa;
447
448 /*
449 * Each buffer needs at least one entry, but it might be offset in
450 * some way, and use one extra entry. So we allocate space for the
451 * maximum number of entries we might need, and then count the exact
452 * number as we're walking buffers. That way we can do it in one pass,
453 * without reallocating memory.
454 */
455 pages_per_buffer = Max(1, BLCKSZ / os_page_size) + 1;
456 max_entries = NBuffers * pages_per_buffer;
457
458 /* Allocate entries for BufferCacheOsPagesRec records. */
459 fctx->record = (BufferCacheOsPagesRec *)
461 sizeof(BufferCacheOsPagesRec) * max_entries);
462
463 /* Return to original context when allocating transient memory */
464 MemoryContextSwitchTo(oldcontext);
465
466 if (include_numa && firstNumaTouch)
467 elog(DEBUG1, "NUMA: page-faulting the buffercache for proper NUMA readouts");
468
469 /*
470 * Scan through all the buffers, saving the relevant fields in the
471 * fctx->record structure.
472 *
473 * We don't hold the partition locks, so we don't get a consistent
474 * snapshot across all buffers, but we do grab the buffer header
475 * locks, so the information of each buffer is self-consistent.
476 */
477 startptr = (char *) TYPEALIGN_DOWN(os_page_size, (char *) BufferGetBlock(1));
478 idx = 0;
479 for (i = 0; i < NBuffers; i++)
480 {
481 char *buffptr = (char *) BufferGetBlock(i + 1);
482 BufferDesc *bufHdr;
483 uint32 bufferid;
484 int32 page_num;
485 char *startptr_buff,
486 *endptr_buff;
487
489
490 bufHdr = GetBufferDescriptor(i);
491
492 /* Lock each buffer header before inspecting. */
493 LockBufHdr(bufHdr);
494 bufferid = BufferDescriptorGetBuffer(bufHdr);
495 UnlockBufHdr(bufHdr);
496
497 /* start of the first page of this buffer */
498 startptr_buff = (char *) TYPEALIGN_DOWN(os_page_size, buffptr);
499
500 /* end of the buffer (no need to align to memory page) */
501 endptr_buff = buffptr + BLCKSZ;
502
503 Assert(startptr_buff < endptr_buff);
504
505 /* calculate ID of the first page for this buffer */
506 page_num = (startptr_buff - startptr) / os_page_size;
507
508 /* Add an entry for each OS page overlapping with this buffer. */
509 for (char *ptr = startptr_buff; ptr < endptr_buff; ptr += os_page_size)
510 {
511 fctx->record[idx].bufferid = bufferid;
512 fctx->record[idx].page_num = page_num;
513 fctx->record[idx].numa_node = include_numa ? os_page_status[page_num] : -1;
514
515 /* advance to the next entry/page */
516 ++idx;
517 ++page_num;
518 }
519 }
520
521 Assert(idx <= max_entries);
522
523 if (include_numa)
524 Assert(idx >= os_page_count);
525
526 /* Set max calls and remember the user function context. */
527 funcctx->max_calls = idx;
528 funcctx->user_fctx = fctx;
529
530 /* Remember this backend touched the pages (only relevant for NUMA) */
531 if (include_numa)
532 firstNumaTouch = false;
533 }
534
535 funcctx = SRF_PERCALL_SETUP();
536
537 /* Get the saved state */
538 fctx = funcctx->user_fctx;
539
540 if (funcctx->call_cntr < funcctx->max_calls)
541 {
542 uint32 i = funcctx->call_cntr;
545
546 values[0] = Int32GetDatum(fctx->record[i].bufferid);
547 nulls[0] = false;
548
549 values[1] = Int64GetDatum(fctx->record[i].page_num);
550 nulls[1] = false;
551
552 if (fctx->include_numa)
553 {
555 nulls[2] = false;
556 }
557 else
558 {
559 values[2] = (Datum) 0;
560 nulls[2] = true;
561 }
562
563 /* Build and return the tuple. */
564 tuple = heap_form_tuple(fctx->tupdesc, values, nulls);
565 result = HeapTupleGetDatum(tuple);
566
567 SRF_RETURN_NEXT(funcctx, result);
568 }
569 else
570 SRF_RETURN_DONE(funcctx);
571}
572
573/*
574 * pg_buffercache_os_pages
575 *
576 * Retrieve information about OS pages, with or without NUMA information.
577 */
578Datum
580{
581 bool include_numa;
582
583 /* Get the boolean parameter that controls the NUMA behavior. */
584 include_numa = PG_GETARG_BOOL(0);
585
586 return pg_buffercache_os_pages_internal(fcinfo, include_numa);
587}
588
589/* Backward-compatible wrapper for v1.6. */
590Datum
592{
593 /* Call internal function with include_numa=true */
594 return pg_buffercache_os_pages_internal(fcinfo, true);
595}
596
597Datum
599{
600 Datum result;
601 TupleDesc tupledesc;
602 HeapTuple tuple;
605
606 int32 buffers_used = 0;
607 int32 buffers_unused = 0;
608 int32 buffers_dirty = 0;
609 int32 buffers_pinned = 0;
610 int64 usagecount_total = 0;
611
612 if (get_call_result_type(fcinfo, NULL, &tupledesc) != TYPEFUNC_COMPOSITE)
613 elog(ERROR, "return type must be a row type");
614
615 for (int i = 0; i < NBuffers; i++)
616 {
617 BufferDesc *bufHdr;
618 uint32 buf_state;
619
621
622 /*
623 * This function summarizes the state of all headers. Locking the
624 * buffer headers wouldn't provide an improved result as the state of
625 * the buffer can still change after we release the lock and it'd
626 * noticeably increase the cost of the function.
627 */
628 bufHdr = GetBufferDescriptor(i);
629 buf_state = pg_atomic_read_u32(&bufHdr->state);
630
631 if (buf_state & BM_VALID)
632 {
633 buffers_used++;
634 usagecount_total += BUF_STATE_GET_USAGECOUNT(buf_state);
635
636 if (buf_state & BM_DIRTY)
637 buffers_dirty++;
638 }
639 else
640 buffers_unused++;
641
642 if (BUF_STATE_GET_REFCOUNT(buf_state) > 0)
643 buffers_pinned++;
644 }
645
646 memset(nulls, 0, sizeof(nulls));
647 values[0] = Int32GetDatum(buffers_used);
648 values[1] = Int32GetDatum(buffers_unused);
649 values[2] = Int32GetDatum(buffers_dirty);
650 values[3] = Int32GetDatum(buffers_pinned);
651
652 if (buffers_used != 0)
653 values[4] = Float8GetDatum((double) usagecount_total / buffers_used);
654 else
655 nulls[4] = true;
656
657 /* Build and return the tuple. */
658 tuple = heap_form_tuple(tupledesc, values, nulls);
659 result = HeapTupleGetDatum(tuple);
660
661 PG_RETURN_DATUM(result);
662}
663
664Datum
666{
667 ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo;
668 int usage_counts[BM_MAX_USAGE_COUNT + 1] = {0};
669 int dirty[BM_MAX_USAGE_COUNT + 1] = {0};
670 int pinned[BM_MAX_USAGE_COUNT + 1] = {0};
672 bool nulls[NUM_BUFFERCACHE_USAGE_COUNTS_ELEM] = {0};
673
674 InitMaterializedSRF(fcinfo, 0);
675
676 for (int i = 0; i < NBuffers; i++)
677 {
679 uint32 buf_state = pg_atomic_read_u32(&bufHdr->state);
680 int usage_count;
681
683
684 usage_count = BUF_STATE_GET_USAGECOUNT(buf_state);
685 usage_counts[usage_count]++;
686
687 if (buf_state & BM_DIRTY)
688 dirty[usage_count]++;
689
690 if (BUF_STATE_GET_REFCOUNT(buf_state) > 0)
691 pinned[usage_count]++;
692 }
693
694 for (int i = 0; i < BM_MAX_USAGE_COUNT + 1; i++)
695 {
696 values[0] = Int32GetDatum(i);
697 values[1] = Int32GetDatum(usage_counts[i]);
698 values[2] = Int32GetDatum(dirty[i]);
699 values[3] = Int32GetDatum(pinned[i]);
700
701 tuplestore_putvalues(rsinfo->setResult, rsinfo->setDesc, values, nulls);
702 }
703
704 return (Datum) 0;
705}
706
707/*
708 * Helper function to check if the user has superuser privileges.
709 */
710static void
712{
713 if (!superuser())
715 (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
716 errmsg("must be superuser to use %s()",
717 func_name)));
718}
719
720/*
721 * Try to evict a shared buffer.
722 */
723Datum
725{
726 Datum result;
727 TupleDesc tupledesc;
728 HeapTuple tuple;
730 bool nulls[NUM_BUFFERCACHE_EVICT_ELEM] = {0};
731
733 bool buffer_flushed;
734
735 if (get_call_result_type(fcinfo, NULL, &tupledesc) != TYPEFUNC_COMPOSITE)
736 elog(ERROR, "return type must be a row type");
737
738 pg_buffercache_superuser_check("pg_buffercache_evict");
739
740 if (buf < 1 || buf > NBuffers)
741 elog(ERROR, "bad buffer ID: %d", buf);
742
743 values[0] = BoolGetDatum(EvictUnpinnedBuffer(buf, &buffer_flushed));
744 values[1] = BoolGetDatum(buffer_flushed);
745
746 tuple = heap_form_tuple(tupledesc, values, nulls);
747 result = HeapTupleGetDatum(tuple);
748
749 PG_RETURN_DATUM(result);
750}
751
752/*
753 * Try to evict specified relation.
754 */
755Datum
757{
758 Datum result;
759 TupleDesc tupledesc;
760 HeapTuple tuple;
762 bool nulls[NUM_BUFFERCACHE_EVICT_RELATION_ELEM] = {0};
763
764 Oid relOid;
765 Relation rel;
766
767 int32 buffers_evicted = 0;
768 int32 buffers_flushed = 0;
769 int32 buffers_skipped = 0;
770
771 if (get_call_result_type(fcinfo, NULL, &tupledesc) != TYPEFUNC_COMPOSITE)
772 elog(ERROR, "return type must be a row type");
773
774 pg_buffercache_superuser_check("pg_buffercache_evict_relation");
775
776 relOid = PG_GETARG_OID(0);
777
778 rel = relation_open(relOid, AccessShareLock);
779
782 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
783 errmsg("relation uses local buffers, %s() is intended to be used for shared buffers only",
784 "pg_buffercache_evict_relation")));
785
786 EvictRelUnpinnedBuffers(rel, &buffers_evicted, &buffers_flushed,
787 &buffers_skipped);
788
790
791 values[0] = Int32GetDatum(buffers_evicted);
792 values[1] = Int32GetDatum(buffers_flushed);
793 values[2] = Int32GetDatum(buffers_skipped);
794
795 tuple = heap_form_tuple(tupledesc, values, nulls);
796 result = HeapTupleGetDatum(tuple);
797
798 PG_RETURN_DATUM(result);
799}
800
801
802/*
803 * Try to evict all shared buffers.
804 */
805Datum
807{
808 Datum result;
809 TupleDesc tupledesc;
810 HeapTuple tuple;
812 bool nulls[NUM_BUFFERCACHE_EVICT_ALL_ELEM] = {0};
813
814 int32 buffers_evicted = 0;
815 int32 buffers_flushed = 0;
816 int32 buffers_skipped = 0;
817
818 if (get_call_result_type(fcinfo, NULL, &tupledesc) != TYPEFUNC_COMPOSITE)
819 elog(ERROR, "return type must be a row type");
820
821 pg_buffercache_superuser_check("pg_buffercache_evict_all");
822
823 EvictAllUnpinnedBuffers(&buffers_evicted, &buffers_flushed,
824 &buffers_skipped);
825
826 values[0] = Int32GetDatum(buffers_evicted);
827 values[1] = Int32GetDatum(buffers_flushed);
828 values[2] = Int32GetDatum(buffers_skipped);
829
830 tuple = heap_form_tuple(tupledesc, values, nulls);
831 result = HeapTupleGetDatum(tuple);
832
833 PG_RETURN_DATUM(result);
834}
835
836/*
837 * Try to mark a shared buffer as dirty.
838 */
839Datum
841{
842
843 Datum result;
844 TupleDesc tupledesc;
845 HeapTuple tuple;
847 bool nulls[NUM_BUFFERCACHE_MARK_DIRTY_ELEM] = {0};
848
850 bool buffer_already_dirty;
851
852 if (get_call_result_type(fcinfo, NULL, &tupledesc) != TYPEFUNC_COMPOSITE)
853 elog(ERROR, "return type must be a row type");
854
855 pg_buffercache_superuser_check("pg_buffercache_mark_dirty");
856
857 if (buf < 1 || buf > NBuffers)
858 elog(ERROR, "bad buffer ID: %d", buf);
859
860 values[0] = BoolGetDatum(MarkDirtyUnpinnedBuffer(buf, &buffer_already_dirty));
861 values[1] = BoolGetDatum(buffer_already_dirty);
862
863 tuple = heap_form_tuple(tupledesc, values, nulls);
864 result = HeapTupleGetDatum(tuple);
865
866 PG_RETURN_DATUM(result);
867}
868
869/*
870 * Try to mark all the shared buffers of a relation as dirty.
871 */
872Datum
874{
875 Datum result;
876 TupleDesc tupledesc;
877 HeapTuple tuple;
880
881 Oid relOid;
882 Relation rel;
883
884 int32 buffers_already_dirty = 0;
885 int32 buffers_dirtied = 0;
886 int32 buffers_skipped = 0;
887
888 if (get_call_result_type(fcinfo, NULL, &tupledesc) != TYPEFUNC_COMPOSITE)
889 elog(ERROR, "return type must be a row type");
890
891 pg_buffercache_superuser_check("pg_buffercache_mark_dirty_relation");
892
893 relOid = PG_GETARG_OID(0);
894
895 rel = relation_open(relOid, AccessShareLock);
896
899 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
900 errmsg("relation uses local buffers, %s() is intended to be used for shared buffers only",
901 "pg_buffercache_mark_dirty_relation")));
902
903 MarkDirtyRelUnpinnedBuffers(rel, &buffers_dirtied, &buffers_already_dirty,
904 &buffers_skipped);
905
907
908 values[0] = Int32GetDatum(buffers_dirtied);
909 values[1] = Int32GetDatum(buffers_already_dirty);
910 values[2] = Int32GetDatum(buffers_skipped);
911
912 tuple = heap_form_tuple(tupledesc, values, nulls);
913 result = HeapTupleGetDatum(tuple);
914
915 PG_RETURN_DATUM(result);
916}
917
918/*
919 * Try to mark all the shared buffers as dirty.
920 */
921Datum
923{
924 Datum result;
925 TupleDesc tupledesc;
926 HeapTuple tuple;
928 bool nulls[NUM_BUFFERCACHE_MARK_DIRTY_ALL_ELEM] = {0};
929
930 int32 buffers_already_dirty = 0;
931 int32 buffers_dirtied = 0;
932 int32 buffers_skipped = 0;
933
934 if (get_call_result_type(fcinfo, NULL, &tupledesc) != TYPEFUNC_COMPOSITE)
935 elog(ERROR, "return type must be a row type");
936
937 pg_buffercache_superuser_check("pg_buffercache_mark_dirty_all");
938
939 MarkDirtyAllUnpinnedBuffers(&buffers_dirtied, &buffers_already_dirty,
940 &buffers_skipped);
941
942 values[0] = Int32GetDatum(buffers_dirtied);
943 values[1] = Int32GetDatum(buffers_already_dirty);
944 values[2] = Int32GetDatum(buffers_skipped);
945
946 tuple = heap_form_tuple(tupledesc, values, nulls);
947 result = HeapTupleGetDatum(tuple);
948
949 PG_RETURN_DATUM(result);
950}
Datum idx(PG_FUNCTION_ARGS)
Definition: _int_op.c:262
static uint32 pg_atomic_read_u32(volatile pg_atomic_uint32 *ptr)
Definition: atomics.h:237
int16 AttrNumber
Definition: attnum.h:21
uint32 BlockNumber
Definition: block.h:31
#define InvalidBlockNumber
Definition: block.h:33
static Datum values[MAXATTR]
Definition: bootstrap.c:153
int Buffer
Definition: buf.h:23
#define BM_MAX_USAGE_COUNT
Definition: buf_internals.h:86
#define BM_TAG_VALID
Definition: buf_internals.h:71
static ForkNumber BufTagGetForkNum(const BufferTag *tag)
static RelFileNumber BufTagGetRelNumber(const BufferTag *tag)
static void UnlockBufHdr(BufferDesc *desc)
#define BM_DIRTY
Definition: buf_internals.h:69
#define BUF_STATE_GET_USAGECOUNT(state)
Definition: buf_internals.h:60
#define BUF_STATE_GET_REFCOUNT(state)
Definition: buf_internals.h:59
#define BM_VALID
Definition: buf_internals.h:70
static BufferDesc * GetBufferDescriptor(uint32 id)
static Buffer BufferDescriptorGetBuffer(const BufferDesc *bdesc)
void EvictAllUnpinnedBuffers(int32 *buffers_evicted, int32 *buffers_flushed, int32 *buffers_skipped)
Definition: bufmgr.c:6683
void EvictRelUnpinnedBuffers(Relation rel, int32 *buffers_evicted, int32 *buffers_flushed, int32 *buffers_skipped)
Definition: bufmgr.c:6733
void MarkDirtyAllUnpinnedBuffers(int32 *buffers_dirtied, int32 *buffers_already_dirty, int32 *buffers_skipped)
Definition: bufmgr.c:6933
bool MarkDirtyUnpinnedBuffer(Buffer buf, bool *buffer_already_dirty)
Definition: bufmgr.c:6840
bool EvictUnpinnedBuffer(Buffer buf, bool *buffer_flushed)
Definition: bufmgr.c:6654
void MarkDirtyRelUnpinnedBuffers(Relation rel, int32 *buffers_dirtied, int32 *buffers_already_dirty, int32 *buffers_skipped)
Definition: bufmgr.c:6876
uint32 LockBufHdr(BufferDesc *desc)
Definition: bufmgr.c:6264
static Block BufferGetBlock(Buffer buffer)
Definition: bufmgr.h:400
#define TYPEALIGN(ALIGNVAL, LEN)
Definition: c.h:808
#define Max(x, y)
Definition: c.h:1002
int64_t int64
Definition: c.h:540
#define UINT64_FORMAT
Definition: c.h:562
int32_t int32
Definition: c.h:539
uint64_t uint64
Definition: c.h:544
uint16_t uint16
Definition: c.h:542
uint32_t uint32
Definition: c.h:543
size_t Size
Definition: c.h:615
#define TYPEALIGN_DOWN(ALIGNVAL, LEN)
Definition: c.h:820
int errcode(int sqlerrcode)
Definition: elog.c:863
int errmsg(const char *fmt,...)
Definition: elog.c:1080
#define DEBUG1
Definition: elog.h:30
#define ERROR
Definition: elog.h:39
#define elog(elevel,...)
Definition: elog.h:226
#define ereport(elevel,...)
Definition: elog.h:150
TupleDesc BlessTupleDesc(TupleDesc tupdesc)
Definition: execTuples.c:2260
#define PG_GETARG_OID(n)
Definition: fmgr.h:275
#define PG_GETARG_INT32(n)
Definition: fmgr.h:269
#define PG_GETARG_BOOL(n)
Definition: fmgr.h:274
#define PG_RETURN_DATUM(x)
Definition: fmgr.h:353
#define PG_FUNCTION_ARGS
Definition: fmgr.h:193
void InitMaterializedSRF(FunctionCallInfo fcinfo, bits32 flags)
Definition: funcapi.c:76
TypeFuncClass get_call_result_type(FunctionCallInfo fcinfo, Oid *resultTypeId, TupleDesc *resultTupleDesc)
Definition: funcapi.c:276
#define SRF_IS_FIRSTCALL()
Definition: funcapi.h:304
#define SRF_PERCALL_SETUP()
Definition: funcapi.h:308
@ TYPEFUNC_COMPOSITE
Definition: funcapi.h:149
#define SRF_RETURN_NEXT(_funcctx, _result)
Definition: funcapi.h:310
#define SRF_FIRSTCALL_INIT()
Definition: funcapi.h:306
static Datum HeapTupleGetDatum(const HeapTupleData *tuple)
Definition: funcapi.h:230
#define SRF_RETURN_DONE(_funcctx)
Definition: funcapi.h:328
int NBuffers
Definition: globals.c:142
Assert(PointerIsAligned(start, uint64))
HeapTuple heap_form_tuple(TupleDesc tupleDescriptor, const Datum *values, const bool *isnull)
Definition: heaptuple.c:1117
int i
Definition: isn.c:77
#define AccessShareLock
Definition: lockdefs.h:36
void * palloc0(Size size)
Definition: mcxt.c:1395
void * palloc(Size size)
Definition: mcxt.c:1365
MemoryContext CurrentMemoryContext
Definition: mcxt.c:160
void * MemoryContextAllocHuge(MemoryContext context, Size size)
Definition: mcxt.c:1703
#define CHECK_FOR_INTERRUPTS()
Definition: miscadmin.h:123
static MemoryContext MemoryContextSwitchTo(MemoryContext context)
Definition: palloc.h:124
Datum pg_buffercache_os_pages(PG_FUNCTION_ARGS)
PG_MODULE_MAGIC_EXT(.name="pg_buffercache",.version=PG_VERSION)
Datum pg_buffercache_evict_relation(PG_FUNCTION_ARGS)
#define NUM_BUFFERCACHE_USAGE_COUNTS_ELEM
#define NUM_BUFFERCACHE_OS_PAGES_ELEM
Datum pg_buffercache_evict(PG_FUNCTION_ARGS)
Datum pg_buffercache_mark_dirty_relation(PG_FUNCTION_ARGS)
Datum pg_buffercache_summary(PG_FUNCTION_ARGS)
static void pg_buffercache_superuser_check(char *func_name)
PG_FUNCTION_INFO_V1(pg_buffercache_pages)
Datum pg_buffercache_usage_counts(PG_FUNCTION_ARGS)
#define NUM_BUFFERCACHE_SUMMARY_ELEM
Datum pg_buffercache_pages(PG_FUNCTION_ARGS)
#define NUM_BUFFERCACHE_EVICT_ELEM
#define NUM_BUFFERCACHE_PAGES_MIN_ELEM
#define NUM_BUFFERCACHE_EVICT_ALL_ELEM
Datum pg_buffercache_evict_all(PG_FUNCTION_ARGS)
#define NUM_BUFFERCACHE_MARK_DIRTY_RELATION_ELEM
#define NUM_BUFFERCACHE_PAGES_ELEM
#define NUM_BUFFERCACHE_MARK_DIRTY_ELEM
#define NUM_BUFFERCACHE_MARK_DIRTY_ALL_ELEM
Datum pg_buffercache_mark_dirty_all(PG_FUNCTION_ARGS)
Datum pg_buffercache_mark_dirty(PG_FUNCTION_ARGS)
Datum pg_buffercache_numa_pages(PG_FUNCTION_ARGS)
static bool firstNumaTouch
static Datum pg_buffercache_os_pages_internal(FunctionCallInfo fcinfo, bool include_numa)
#define NUM_BUFFERCACHE_EVICT_RELATION_ELEM
#define pg_numa_touch_mem_if_required(ptr)
Definition: pg_numa.h:37
PGDLLIMPORT int pg_numa_query_pages(int pid, unsigned long count, void **pages, int *status)
Definition: pg_numa.c:130
PGDLLIMPORT int pg_numa_init(void)
Definition: pg_numa.c:123
static char * buf
Definition: pg_test_fsync.c:72
static Datum Int64GetDatum(int64 X)
Definition: postgres.h:403
static Datum Int16GetDatum(int16 X)
Definition: postgres.h:182
static Datum BoolGetDatum(bool X)
Definition: postgres.h:112
static Datum ObjectIdGetDatum(Oid X)
Definition: postgres.h:262
uint64_t Datum
Definition: postgres.h:70
static Datum Float8GetDatum(float8 X)
Definition: postgres.h:492
static Datum Int32GetDatum(int32 X)
Definition: postgres.h:222
unsigned int Oid
Definition: postgres_ext.h:32
#define RelationUsesLocalBuffers(relation)
Definition: rel.h:647
Oid RelFileNumber
Definition: relpath.h:25
ForkNumber
Definition: relpath.h:56
Size pg_get_shmem_pagesize(void)
Definition: shmem.c:733
void relation_close(Relation relation, LOCKMODE lockmode)
Definition: relation.c:205
Relation relation_open(Oid relationId, LOCKMODE lockmode)
Definition: relation.c:47
BufferCacheOsPagesRec * record
BufferCachePagesRec * record
BufferTag tag
pg_atomic_uint32 state
void * user_fctx
Definition: funcapi.h:82
uint64 max_calls
Definition: funcapi.h:74
uint64 call_cntr
Definition: funcapi.h:65
MemoryContext multi_call_memory_ctx
Definition: funcapi.h:101
BlockNumber blockNum
Oid spcOid
bool superuser(void)
Definition: superuser.c:46
TupleDesc CreateTemplateTupleDesc(int natts)
Definition: tupdesc.c:182
void TupleDescInitEntry(TupleDesc desc, AttrNumber attributeNumber, const char *attributeName, Oid oidtypeid, int32 typmod, int attdim)
Definition: tupdesc.c:842
void tuplestore_putvalues(Tuplestorestate *state, TupleDesc tdesc, const Datum *values, const bool *isnull)
Definition: tuplestore.c:784
const char * name