88 *
99 *
1010 * IDENTIFICATION
11- * $Header: /cvsroot/pgsql/src/backend/storage/smgr/md.c,v 1.83 2001/04/02 23: 20:24 tgl Exp $
11+ * $Header: /cvsroot/pgsql/src/backend/storage/smgr/md.c,v 1.84 2001/05/10 20:38:49 tgl Exp $
1212 *
1313 *-------------------------------------------------------------------------
1414 */
@@ -54,10 +54,9 @@ typedef struct _MdfdVec
5454 int mdfd_flags ; /* fd status flags */
5555
5656/* these are the assigned bits in mdfd_flags: */
57- #define MDFD_FREE (1 << 0)/* unused entry */
57+ #define MDFD_FREE (1 << 0) /* unused entry */
5858
59- int mdfd_lstbcnt ; /* most recent block count */
60- int mdfd_nextFree ; /* next free vector */
59+ int mdfd_nextFree ; /* link to next freelist member, if free */
6160#ifndef LET_OS_MANAGE_FILESIZE
6261 struct _MdfdVec * mdfd_chain ;/* for large relations */
6362#endif
@@ -164,7 +163,6 @@ mdcreate(Relation reln)
164163
165164 Md_fdvec [vfd ].mdfd_vfd = fd ;
166165 Md_fdvec [vfd ].mdfd_flags = (uint16 ) 0 ;
167- Md_fdvec [vfd ].mdfd_lstbcnt = 0 ;
168166#ifndef LET_OS_MANAGE_FILESIZE
169167 Md_fdvec [vfd ].mdfd_chain = (MdfdVec * ) NULL ;
170168#endif
@@ -225,52 +223,69 @@ mdunlink(RelFileNode rnode)
225223/*
226224 * mdextend() -- Add a block to the specified relation.
227225 *
226+ * The semantics are basically the same as mdwrite(): write at the
227+ * specified position. However, we are expecting to extend the
228+ * relation (ie, blocknum is the current EOF), and so in case of
229+ * failure we clean up by truncating.
230+ *
228231 * This routine returns SM_FAIL or SM_SUCCESS, with errno set as
229232 * appropriate.
233+ *
234+ * Note: this routine used to call mdnblocks() to get the block position
235+ * to write at, but that's pretty silly since the caller needs to know where
236+ * the block will be written, and accordingly must have done mdnblocks()
237+ * already. Might as well pass in the position and save a seek.
230238 */
231239int
232- mdextend (Relation reln , char * buffer )
240+ mdextend (Relation reln , BlockNumber blocknum , char * buffer )
233241{
234- long pos ,
235- nbytes ;
236- int nblocks ;
242+ long seekpos ;
243+ int nbytes ;
237244 MdfdVec * v ;
238245
239- nblocks = mdnblocks (reln );
240- v = _mdfd_getseg (reln , nblocks );
246+ v = _mdfd_getseg (reln , blocknum );
241247
242- if ((pos = FileSeek (v -> mdfd_vfd , 0L , SEEK_END )) < 0 )
243- return SM_FAIL ;
248+ #ifndef LET_OS_MANAGE_FILESIZE
249+ seekpos = (long ) (BLCKSZ * (blocknum % RELSEG_SIZE ));
250+ #ifdef DIAGNOSTIC
251+ if (seekpos >= BLCKSZ * RELSEG_SIZE )
252+ elog (FATAL , "seekpos too big!" );
253+ #endif
254+ #else
255+ seekpos = (long ) (BLCKSZ * (blocknum ));
256+ #endif
244257
245- if (pos % BLCKSZ != 0 ) /* the last block is incomplete */
246- {
247- pos -= pos % BLCKSZ ;
248- if (FileSeek (v -> mdfd_vfd , pos , SEEK_SET ) < 0 )
249- return SM_FAIL ;
250- }
258+ /*
259+ * Note: because caller obtained blocknum by calling mdnblocks, which
260+ * did a seek(SEEK_END), this seek is often redundant and will be
261+ * optimized away by fd.c. It's not redundant, however, if there is a
262+ * partial page at the end of the file. In that case we want to try to
263+ * overwrite the partial page with a full page. It's also not redundant
264+ * if bufmgr.c had to dump another buffer of the same file to make room
265+ * for the new page's buffer.
266+ */
267+ if (FileSeek (v -> mdfd_vfd , seekpos , SEEK_SET ) != seekpos )
268+ return SM_FAIL ;
251269
252270 if ((nbytes = FileWrite (v -> mdfd_vfd , buffer , BLCKSZ )) != BLCKSZ )
253271 {
254272 if (nbytes > 0 )
255273 {
256- FileTruncate (v -> mdfd_vfd , pos );
257- FileSeek (v -> mdfd_vfd , pos , SEEK_SET );
274+ int save_errno = errno ;
275+
276+ /* Remove the partially-written page */
277+ FileTruncate (v -> mdfd_vfd , seekpos );
278+ FileSeek (v -> mdfd_vfd , seekpos , SEEK_SET );
279+ errno = save_errno ;
258280 }
259281 return SM_FAIL ;
260282 }
261283
262- /* try to keep the last block count current, though it's just a hint */
263284#ifndef LET_OS_MANAGE_FILESIZE
264- if ((v -> mdfd_lstbcnt = (++ nblocks % RELSEG_SIZE )) == 0 )
265- v -> mdfd_lstbcnt = RELSEG_SIZE ;
266-
267285#ifdef DIAGNOSTIC
268- if (_mdnblocks (v -> mdfd_vfd , BLCKSZ ) > RELSEG_SIZE
269- || v -> mdfd_lstbcnt > RELSEG_SIZE )
286+ if (_mdnblocks (v -> mdfd_vfd , BLCKSZ ) > RELSEG_SIZE )
270287 elog (FATAL , "segment too big!" );
271288#endif
272- #else
273- v -> mdfd_lstbcnt = ++ nblocks ;
274289#endif
275290
276291 return SM_SUCCESS ;
@@ -319,12 +334,11 @@ mdopen(Relation reln)
319334
320335 Md_fdvec [vfd ].mdfd_vfd = fd ;
321336 Md_fdvec [vfd ].mdfd_flags = (uint16 ) 0 ;
322- Md_fdvec [vfd ].mdfd_lstbcnt = _mdnblocks (fd , BLCKSZ );
323337#ifndef LET_OS_MANAGE_FILESIZE
324338 Md_fdvec [vfd ].mdfd_chain = (MdfdVec * ) NULL ;
325339
326340#ifdef DIAGNOSTIC
327- if (Md_fdvec [ vfd ]. mdfd_lstbcnt > RELSEG_SIZE )
341+ if (_mdnblocks ( fd , BLCKSZ ) > RELSEG_SIZE )
328342 elog (FATAL , "segment too big on relopen!" );
329343#endif
330344#endif
@@ -440,9 +454,12 @@ mdread(Relation reln, BlockNumber blocknum, char *buffer)
440454 status = SM_SUCCESS ;
441455 if ((nbytes = FileRead (v -> mdfd_vfd , buffer , BLCKSZ )) != BLCKSZ )
442456 {
443- if (nbytes == 0 )
444- MemSet (buffer , 0 , BLCKSZ );
445- else if (blocknum == 0 && nbytes > 0 && mdnblocks (reln ) == 0 )
457+ /*
458+ * If we are at EOF, return zeroes without complaining.
459+ * (XXX Is this still necessary/a good idea??)
460+ */
461+ if (nbytes == 0 ||
462+ (nbytes > 0 && mdnblocks (reln ) == blocknum ))
446463 MemSet (buffer , 0 , BLCKSZ );
447464 else
448465 status = SM_FAIL ;
@@ -459,7 +476,6 @@ mdread(Relation reln, BlockNumber blocknum, char *buffer)
459476int
460477mdwrite (Relation reln , BlockNumber blocknum , char * buffer )
461478{
462- int status ;
463479 long seekpos ;
464480 MdfdVec * v ;
465481
@@ -478,11 +494,10 @@ mdwrite(Relation reln, BlockNumber blocknum, char *buffer)
478494 if (FileSeek (v -> mdfd_vfd , seekpos , SEEK_SET ) != seekpos )
479495 return SM_FAIL ;
480496
481- status = SM_SUCCESS ;
482497 if (FileWrite (v -> mdfd_vfd , buffer , BLCKSZ ) != BLCKSZ )
483- status = SM_FAIL ;
498+ return SM_FAIL ;
484499
485- return status ;
500+ return SM_SUCCESS ;
486501}
487502
488503/*
@@ -662,31 +677,29 @@ mdnblocks(Relation reln)
662677 nblocks = _mdnblocks (v -> mdfd_vfd , BLCKSZ );
663678 if (nblocks > RELSEG_SIZE )
664679 elog (FATAL , "segment too big in mdnblocks!" );
665- v -> mdfd_lstbcnt = nblocks ;
666- if (nblocks == RELSEG_SIZE )
667- {
668- segno ++ ;
680+ if (nblocks < RELSEG_SIZE )
681+ return (segno * RELSEG_SIZE ) + nblocks ;
682+ /*
683+ * If segment is exactly RELSEG_SIZE, advance to next one.
684+ */
685+ segno ++ ;
669686
687+ if (v -> mdfd_chain == (MdfdVec * ) NULL )
688+ {
689+ /*
690+ * Because we pass O_CREAT, we will create the next
691+ * segment (with zero length) immediately, if the last
692+ * segment is of length REL_SEGSIZE. This is unnecessary
693+ * but harmless, and testing for the case would take more
694+ * cycles than it seems worth.
695+ */
696+ v -> mdfd_chain = _mdfd_openseg (reln , segno , O_CREAT );
670697 if (v -> mdfd_chain == (MdfdVec * ) NULL )
671- {
672-
673- /*
674- * Because we pass O_CREAT, we will create the next
675- * segment (with zero length) immediately, if the last
676- * segment is of length REL_SEGSIZE. This is unnecessary
677- * but harmless, and testing for the case would take more
678- * cycles than it seems worth.
679- */
680- v -> mdfd_chain = _mdfd_openseg (reln , segno , O_CREAT );
681- if (v -> mdfd_chain == (MdfdVec * ) NULL )
682- elog (ERROR , "cannot count blocks for %s -- open failed: %m" ,
683- RelationGetRelationName (reln ));
684- }
685-
686- v = v -> mdfd_chain ;
698+ elog (ERROR , "cannot count blocks for %s -- open failed: %m" ,
699+ RelationGetRelationName (reln ));
687700 }
688- else
689- return ( segno * RELSEG_SIZE ) + nblocks ;
701+
702+ v = v -> mdfd_chain ;
690703 }
691704#else
692705 return _mdnblocks (v -> mdfd_vfd , BLCKSZ );
@@ -761,7 +774,6 @@ mdtruncate(Relation reln, int nblocks)
761774
762775 if (FileTruncate (v -> mdfd_vfd , lastsegblocks * BLCKSZ ) < 0 )
763776 return -1 ;
764- v -> mdfd_lstbcnt = lastsegblocks ;
765777 v = v -> mdfd_chain ;
766778 ov -> mdfd_chain = (MdfdVec * ) NULL ;
767779 }
@@ -779,7 +791,6 @@ mdtruncate(Relation reln, int nblocks)
779791#else
780792 if (FileTruncate (v -> mdfd_vfd , nblocks * BLCKSZ ) < 0 )
781793 return -1 ;
782- v -> mdfd_lstbcnt = nblocks ;
783794#endif
784795
785796 return nblocks ;
@@ -958,13 +969,12 @@ _mdfd_openseg(Relation reln, int segno, int oflags)
958969 /* fill the entry */
959970 v -> mdfd_vfd = fd ;
960971 v -> mdfd_flags = (uint16 ) 0 ;
961- v -> mdfd_lstbcnt = _mdnblocks (fd , BLCKSZ );
962972#ifndef LET_OS_MANAGE_FILESIZE
963973 v -> mdfd_chain = (MdfdVec * ) NULL ;
964974
965975#ifdef DIAGNOSTIC
966- if (v -> mdfd_lstbcnt > RELSEG_SIZE )
967- elog (FATAL , "segment too big on open !" );
976+ if (_mdnblocks ( fd , BLCKSZ ) > RELSEG_SIZE )
977+ elog (FATAL , "segment too big on openseg !" );
968978#endif
969979#endif
970980
0 commit comments