@@ -172,7 +172,7 @@ GlobalSnapshotShmemInit()
172172/*
173173 * GlobalSnapshotStartup
174174 *
175- * Set gsXidMap etnries to oldestActiveXID during startup.
175+ * Set gsXidMap entries to oldestActiveXID during startup.
176176 */
177177void
178178GlobalSnapshotStartup (TransactionId oldestActiveXID )
@@ -200,24 +200,33 @@ GlobalSnapshotStartup(TransactionId oldestActiveXID)
200200 * global transaction. Otherwise old versions of tuples that were needed for
201201 * this transaction can be recycled by other processes (vacuum, HOT, etc).
202202 *
203- * Called upon each snapshot creation after ProcArrayLock is released. Such
204- * usage creates a race condition. It is possible that backend who got
205- * glabal_csn called GlobalSnapshotMapXmin() only after other backends managed
206- * to get snapshot and complete GlobalSnapshotMapXmin() call. To address that
207- * race we do two thigs:
203+ * Locking here is not trivial. Called upon each snapshot creation after
204+ * ProcArrayLock is released. Such usage creates several race conditions. It
205+ * is possible that backend who got global_csn called GlobalSnapshotMapXmin()
206+ * only after other backends managed to get snapshot and complete
207+ * GlobalSnapshotMapXmin() call, or even committed. This is safe because
208208 *
209- * * snapshot_global_csn is always rounded up to next second. So that is
210- * okay if call to GlobalSnapshotMapXmin() with later global_csn will
211- * succeed first -- it anyway will be taken into account for a next
209+ * * We already hold our xmin in MyPgXact, so our snapshot will not be
210+ * harmed even though ProcArrayLock is released.
211+ *
212+ * * snapshot_global_csn is always pessmistically rounded up to the next
212213 * second.
213214 *
215+ * * For performance reasons, xmin value for particular second is filled
216+ * only once. Because of that instead of writing to buffer just our
217+ * xmin (which is enough for our snapshot), we bump oldestXmin there --
218+ * it mitigates the possibility of damaging someone else's snapshot by
219+ * writing to the buffer too advanced value in case of slowness of
220+ * another backend who generated csn earlier, but didn't manage to
221+ * insert it before us.
222+ *
214223 * * if GlobalSnapshotMapXmin() founds a gap in several seconds between
215224 * current call and latest completed call then it should fill that gap
216- * with latest known values instead of new one. Otherwise it is possible
217- * (however highly unlikely) that this gap also happend between taking
218- * snapshot and call to GlobalSnapshotMapXmin() for some backend. And we
219- * are at risk to fill circullar buffer with oldestXmin's that are
220- * bigger then they actually were.
225+ * with latest known values instead of new one. Otherwise it is
226+ * possible (however highly unlikely) that this gap also happend
227+ * between taking snapshot and call to GlobalSnapshotMapXmin() for some
228+ * backend. And we are at risk to fill circullar buffer with
229+ * oldestXmin's that are bigger then they actually were.
221230 */
222231void
223232GlobalSnapshotMapXmin (GlobalCSN snapshot_global_csn )
@@ -233,10 +242,7 @@ GlobalSnapshotMapXmin(GlobalCSN snapshot_global_csn)
233242 Assert (gsXidMap != NULL );
234243
235244 /*
236- * We don't have guarantee that process who called us first for this
237- * csn_seconds is actually one who took snapshot firt in this second.
238- * So just round up global_csn to the next second -- snapshots for next
239- * second would have oldestXmin greater or equal then ours anyway.
245+ * Round up global_csn to the next second -- pessimistically and safely.
240246 */
241247 csn_seconds = (snapshot_global_csn / NSECS_PER_SEC + 1 );
242248
@@ -290,16 +296,15 @@ GlobalSnapshotMapXmin(GlobalCSN snapshot_global_csn)
290296
291297 /* Fill new entry with current_oldest_xmin */
292298 gsXidMap -> xmin_by_second [offset ] = current_oldest_xmin ;
293- offset = (offset + gsXidMap -> size - 1 ) % gsXidMap -> size ;
294299
295300 /*
296301 * If we have gap then fill it with previous_oldest_xmin for reasons
297302 * outlined in comment above this function.
298303 */
299304 for (i = 1 ; i < gap ; i ++ )
300305 {
301- gsXidMap -> xmin_by_second [offset ] = previous_oldest_xmin ;
302306 offset = (offset + gsXidMap -> size - 1 ) % gsXidMap -> size ;
307+ gsXidMap -> xmin_by_second [offset ] = previous_oldest_xmin ;
303308 }
304309
305310 oldest_deferred_xmin =
@@ -309,8 +314,11 @@ GlobalSnapshotMapXmin(GlobalCSN snapshot_global_csn)
309314
310315 /*
311316 * Advance procArray->global_snapshot_xmin after we released
312- * GlobalSnapshotXidMapLock.
317+ * GlobalSnapshotXidMapLock. Since we gather not xmin but oldestXmin, it
318+ * never goes backwards regardless of how slow we can do that.
313319 */
320+ Assert (TransactionIdFollowsOrEquals (oldest_deferred_xmin ,
321+ ProcArrayGetGlobalSnapshotXmin ()));
314322 ProcArraySetGlobalSnapshotXmin (oldest_deferred_xmin );
315323}
316324
@@ -554,7 +562,7 @@ XidInvisibleInGlobalSnapshot(TransactionId xid, Snapshot snapshot)
554562 * Functions to handle distributed commit on transaction coordinator:
555563 * GlobalSnapshotPrepareCurrent() / GlobalSnapshotAssignCsnCurrent().
556564 * Correspoding functions for remote nodes are defined in twophase.c:
557- * pg_global_snaphot_prepare/pg_global_snaphot_assign .
565+ * pg_global_snapshot_prepare/pg_global_snapshot_assign .
558566 *****************************************************************************/
559567
560568
@@ -594,7 +602,7 @@ GlobalSnapshotPrepareCurrent()
594602 *
595603 * Asign GlobalCSN for currently active transaction. GlobalCSN is supposedly
596604 * maximal among of values returned by GlobalSnapshotPrepareCurrent and
597- * pg_global_snaphot_prepare .
605+ * pg_global_snapshot_prepare .
598606 */
599607void
600608GlobalSnapshotAssignCsnCurrent (GlobalCSN global_csn )
@@ -609,7 +617,7 @@ GlobalSnapshotAssignCsnCurrent(GlobalCSN global_csn)
609617 if (!GlobalCSNIsNormal (global_csn ))
610618 ereport (ERROR ,
611619 (errcode (ERRCODE_INVALID_PARAMETER_VALUE ),
612- errmsg ("pg_global_snaphot_assign expects normal global_csn" )));
620+ errmsg ("pg_global_snapshot_assign expects normal global_csn" )));
613621
614622 /* Skip emtpty transactions */
615623 if (!TransactionIdIsValid (GetCurrentTransactionIdIfAny ()))
@@ -633,7 +641,7 @@ GlobalSnapshotAssignCsnCurrent(GlobalCSN global_csn)
633641 * proc->assignedGlobalCsn to GlobalCSNLog.
634642 *
635643 * Same rules applies to global transaction, except that global_csn is already
636- * assigned by GlobalSnapshotAssignCsnCurrent/pg_global_snaphot_assign and
644+ * assigned by GlobalSnapshotAssignCsnCurrent/pg_global_snapshot_assign and
637645 * GlobalSnapshotPrecommit is basically no-op.
638646 *
639647 * GlobalSnapshotAbort is slightly different comparing to commit because abort
0 commit comments