bufpage.h 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435
  1. /*-------------------------------------------------------------------------
  2. *
  3. * bufpage.h
  4. * Standard POSTGRES buffer page definitions.
  5. *
  6. *
  7. * Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group
  8. * Portions Copyright (c) 1994, Regents of the University of California
  9. *
  10. * src/include/storage/bufpage.h
  11. *
  12. *-------------------------------------------------------------------------
  13. */
  14. #ifndef BUFPAGE_H
  15. #define BUFPAGE_H
  16. #include "access/xlogdefs.h"
  17. #include "storage/block.h"
  18. #include "storage/item.h"
  19. #include "storage/off.h"
  20. /*
  21. * A postgres disk page is an abstraction layered on top of a postgres
  22. * disk block (which is simply a unit of i/o, see block.h).
  23. *
  24. * specifically, while a disk block can be unformatted, a postgres
  25. * disk page is always a slotted page of the form:
  26. *
  27. * +----------------+---------------------------------+
  28. * | PageHeaderData | linp1 linp2 linp3 ... |
  29. * +-----------+----+---------------------------------+
  30. * | ... linpN | |
  31. * +-----------+--------------------------------------+
  32. * | ^ pd_lower |
  33. * | |
  34. * | v pd_upper |
  35. * +-------------+------------------------------------+
  36. * | | tupleN ... |
  37. * +-------------+------------------+-----------------+
  38. * | ... tuple3 tuple2 tuple1 | "special space" |
  39. * +--------------------------------+-----------------+
  40. * ^ pd_special
  41. *
  42. * a page is full when nothing can be added between pd_lower and
  43. * pd_upper.
  44. *
  45. * all blocks written out by an access method must be disk pages.
  46. *
  47. * EXCEPTIONS:
  48. *
  49. * obviously, a page is not formatted before it is initialized by
  50. * a call to PageInit.
  51. *
  52. * NOTES:
  53. *
  54. * linp1..N form an ItemId array. ItemPointers point into this array
  55. * rather than pointing directly to a tuple. Note that OffsetNumbers
  56. * conventionally start at 1, not 0.
  57. *
  58. * tuple1..N are added "backwards" on the page. because a tuple's
  59. * ItemPointer points to its ItemId entry rather than its actual
  60. * byte-offset position, tuples can be physically shuffled on a page
  61. * whenever the need arises.
  62. *
  63. * AM-generic per-page information is kept in PageHeaderData.
  64. *
  65. * AM-specific per-page data (if any) is kept in the area marked "special
  66. * space"; each AM has an "opaque" structure defined somewhere that is
  67. * stored as the page trailer. an access method should always
  68. * initialize its pages with PageInit and then set its own opaque
  69. * fields.
  70. */
  71. typedef Pointer Page;
  72. /*
  73. * location (byte offset) within a page.
  74. *
  75. * note that this is actually limited to 2^15 because we have limited
  76. * ItemIdData.lp_off and ItemIdData.lp_len to 15 bits (see itemid.h).
  77. */
  78. typedef uint16 LocationIndex;
  79. /*
  80. * For historical reasons, the 64-bit LSN value is stored as two 32-bit
  81. * values.
  82. */
  83. typedef struct
  84. {
  85. uint32 xlogid; /* high bits */
  86. uint32 xrecoff; /* low bits */
  87. } PageXLogRecPtr;
  88. #define PageXLogRecPtrGet(val) \
  89. ((uint64) (val).xlogid << 32 | (val).xrecoff)
  90. #define PageXLogRecPtrSet(ptr, lsn) \
  91. ((ptr).xlogid = (uint32) ((lsn) >> 32), (ptr).xrecoff = (uint32) (lsn))
  92. /*
  93. * disk page organization
  94. *
  95. * space management information generic to any page
  96. *
  97. * pd_lsn - identifies xlog record for last change to this page.
  98. * pd_checksum - page checksum, if set.
  99. * pd_flags - flag bits.
  100. * pd_lower - offset to start of free space.
  101. * pd_upper - offset to end of free space.
  102. * pd_special - offset to start of special space.
  103. * pd_pagesize_version - size in bytes and page layout version number.
  104. * pd_prune_xid - oldest XID among potentially prunable tuples on page.
  105. *
  106. * The LSN is used by the buffer manager to enforce the basic rule of WAL:
  107. * "thou shalt write xlog before data". A dirty buffer cannot be dumped
  108. * to disk until xlog has been flushed at least as far as the page's LSN.
  109. *
  110. * pd_checksum stores the page checksum, if it has been set for this page;
  111. * zero is a valid value for a checksum. If a checksum is not in use then
  112. * we leave the field unset. This will typically mean the field is zero
  113. * though non-zero values may also be present if databases have been
  114. * pg_upgraded from releases prior to 9.3, when the same byte offset was
  115. * used to store the current timelineid when the page was last updated.
  116. * Note that there is no indication on a page as to whether the checksum
  117. * is valid or not, a deliberate design choice which avoids the problem
  118. * of relying on the page contents to decide whether to verify it. Hence
  119. * there are no flag bits relating to checksums.
  120. *
  121. * pd_prune_xid is a hint field that helps determine whether pruning will be
  122. * useful. It is currently unused in index pages.
  123. *
  124. * The page version number and page size are packed together into a single
  125. * uint16 field. This is for historical reasons: before PostgreSQL 7.3,
  126. * there was no concept of a page version number, and doing it this way
  127. * lets us pretend that pre-7.3 databases have page version number zero.
  128. * We constrain page sizes to be multiples of 256, leaving the low eight
  129. * bits available for a version number.
  130. *
  131. * Minimum possible page size is perhaps 64B to fit page header, opaque space
  132. * and a minimal tuple; of course, in reality you want it much bigger, so
  133. * the constraint on pagesize mod 256 is not an important restriction.
  134. * On the high end, we can only support pages up to 32KB because lp_off/lp_len
  135. * are 15 bits.
  136. */
  137. typedef struct PageHeaderData
  138. {
  139. /* XXX LSN is member of *any* block, not only page-organized ones */
  140. PageXLogRecPtr pd_lsn; /* LSN: next byte after last byte of xlog
  141. * record for last change to this page */
  142. uint16 pd_checksum; /* checksum */
  143. uint16 pd_flags; /* flag bits, see below */
  144. LocationIndex pd_lower; /* offset to start of free space */
  145. LocationIndex pd_upper; /* offset to end of free space */
  146. LocationIndex pd_special; /* offset to start of special space */
  147. uint16 pd_pagesize_version;
  148. TransactionId pd_prune_xid; /* oldest prunable XID, or zero if none */
  149. ItemIdData pd_linp[FLEXIBLE_ARRAY_MEMBER]; /* line pointer array */
  150. } PageHeaderData;
  151. typedef PageHeaderData *PageHeader;
  152. /*
  153. * pd_flags contains the following flag bits. Undefined bits are initialized
  154. * to zero and may be used in the future.
  155. *
  156. * PD_HAS_FREE_LINES is set if there are any LP_UNUSED line pointers before
  157. * pd_lower. This should be considered a hint rather than the truth, since
  158. * changes to it are not WAL-logged.
  159. *
  160. * PD_PAGE_FULL is set if an UPDATE doesn't find enough free space in the
  161. * page for its new tuple version; this suggests that a prune is needed.
  162. * Again, this is just a hint.
  163. */
  164. #define PD_HAS_FREE_LINES 0x0001 /* are there any unused line pointers? */
  165. #define PD_PAGE_FULL 0x0002 /* not enough free space for new
  166. * tuple? */
  167. #define PD_ALL_VISIBLE 0x0004 /* all tuples on page are visible to
  168. * everyone */
  169. #define PD_VALID_FLAG_BITS 0x0007 /* OR of all valid pd_flags bits */
  170. /*
  171. * Page layout version number 0 is for pre-7.3 Postgres releases.
  172. * Releases 7.3 and 7.4 use 1, denoting a new HeapTupleHeader layout.
  173. * Release 8.0 uses 2; it changed the HeapTupleHeader layout again.
  174. * Release 8.1 uses 3; it redefined HeapTupleHeader infomask bits.
  175. * Release 8.3 uses 4; it changed the HeapTupleHeader layout again, and
  176. * added the pd_flags field (by stealing some bits from pd_tli),
  177. * as well as adding the pd_prune_xid field (which enlarges the header).
  178. *
  179. * As of Release 9.3, the checksum version must also be considered when
  180. * handling pages.
  181. */
  182. #define PG_PAGE_LAYOUT_VERSION 4
  183. #define PG_DATA_CHECKSUM_VERSION 1
  184. /* ----------------------------------------------------------------
  185. * page support macros
  186. * ----------------------------------------------------------------
  187. */
  188. /*
  189. * PageIsValid
  190. * True iff page is valid.
  191. */
  192. #define PageIsValid(page) PointerIsValid(page)
  193. /*
  194. * line pointer(s) do not count as part of header
  195. */
  196. #define SizeOfPageHeaderData (offsetof(PageHeaderData, pd_linp))
  197. /*
  198. * PageIsEmpty
  199. * returns true iff no itemid has been allocated on the page
  200. */
  201. #define PageIsEmpty(page) \
  202. (((PageHeader) (page))->pd_lower <= SizeOfPageHeaderData)
  203. /*
  204. * PageIsNew
  205. * returns true iff page has not been initialized (by PageInit)
  206. */
  207. #define PageIsNew(page) (((PageHeader) (page))->pd_upper == 0)
  208. /*
  209. * PageGetItemId
  210. * Returns an item identifier of a page.
  211. */
  212. #define PageGetItemId(page, offsetNumber) \
  213. ((ItemId) (&((PageHeader) (page))->pd_linp[(offsetNumber) - 1]))
  214. /*
  215. * PageGetContents
  216. * To be used in case the page does not contain item pointers.
  217. *
  218. * Note: prior to 8.3 this was not guaranteed to yield a MAXALIGN'd result.
  219. * Now it is. Beware of old code that might think the offset to the contents
  220. * is just SizeOfPageHeaderData rather than MAXALIGN(SizeOfPageHeaderData).
  221. */
  222. #define PageGetContents(page) \
  223. ((char *) (page) + MAXALIGN(SizeOfPageHeaderData))
  224. /* ----------------
  225. * macros to access page size info
  226. * ----------------
  227. */
  228. /*
  229. * PageSizeIsValid
  230. * True iff the page size is valid.
  231. */
  232. #define PageSizeIsValid(pageSize) ((pageSize) == BLCKSZ)
  233. /*
  234. * PageGetPageSize
  235. * Returns the page size of a page.
  236. *
  237. * this can only be called on a formatted page (unlike
  238. * BufferGetPageSize, which can be called on an unformatted page).
  239. * however, it can be called on a page that is not stored in a buffer.
  240. */
  241. #define PageGetPageSize(page) \
  242. ((Size) (((PageHeader) (page))->pd_pagesize_version & (uint16) 0xFF00))
  243. /*
  244. * PageGetPageLayoutVersion
  245. * Returns the page layout version of a page.
  246. */
  247. #define PageGetPageLayoutVersion(page) \
  248. (((PageHeader) (page))->pd_pagesize_version & 0x00FF)
  249. /*
  250. * PageSetPageSizeAndVersion
  251. * Sets the page size and page layout version number of a page.
  252. *
  253. * We could support setting these two values separately, but there's
  254. * no real need for it at the moment.
  255. */
  256. #define PageSetPageSizeAndVersion(page, size, version) \
  257. ( \
  258. AssertMacro(((size) & 0xFF00) == (size)), \
  259. AssertMacro(((version) & 0x00FF) == (version)), \
  260. ((PageHeader) (page))->pd_pagesize_version = (size) | (version) \
  261. )
  262. /* ----------------
  263. * page special data macros
  264. * ----------------
  265. */
  266. /*
  267. * PageGetSpecialSize
  268. * Returns size of special space on a page.
  269. */
  270. #define PageGetSpecialSize(page) \
  271. ((uint16) (PageGetPageSize(page) - ((PageHeader)(page))->pd_special))
  272. /*
  273. * Using assertions, validate that the page special pointer is OK.
  274. *
  275. * This is intended to catch use of the pointer before page initialization.
  276. * It is implemented as a function due to the limitations of the MSVC
  277. * compiler, which choked on doing all these tests within another macro. We
  278. * return true so that MacroAssert() can be used while still getting the
  279. * specifics from the macro failure within this function.
  280. */
  281. static inline bool
  282. PageValidateSpecialPointer(Page page)
  283. {
  284. Assert(PageIsValid(page));
  285. Assert(((PageHeader) (page))->pd_special <= BLCKSZ);
  286. Assert(((PageHeader) (page))->pd_special >= SizeOfPageHeaderData);
  287. return true;
  288. }
  289. /*
  290. * PageGetSpecialPointer
  291. * Returns pointer to special space on a page.
  292. */
  293. #define PageGetSpecialPointer(page) \
  294. ( \
  295. AssertMacro(PageValidateSpecialPointer(page)), \
  296. (char *) ((char *) (page) + ((PageHeader) (page))->pd_special) \
  297. )
  298. /*
  299. * PageGetItem
  300. * Retrieves an item on the given page.
  301. *
  302. * Note:
  303. * This does not change the status of any of the resources passed.
  304. * The semantics may change in the future.
  305. */
  306. #define PageGetItem(page, itemId) \
  307. ( \
  308. AssertMacro(PageIsValid(page)), \
  309. AssertMacro(ItemIdHasStorage(itemId)), \
  310. (Item)(((char *)(page)) + ItemIdGetOffset(itemId)) \
  311. )
  312. /*
  313. * PageGetMaxOffsetNumber
  314. * Returns the maximum offset number used by the given page.
  315. * Since offset numbers are 1-based, this is also the number
  316. * of items on the page.
  317. *
  318. * NOTE: if the page is not initialized (pd_lower == 0), we must
  319. * return zero to ensure sane behavior. Accept double evaluation
  320. * of the argument so that we can ensure this.
  321. */
  322. #define PageGetMaxOffsetNumber(page) \
  323. (((PageHeader) (page))->pd_lower <= SizeOfPageHeaderData ? 0 : \
  324. ((((PageHeader) (page))->pd_lower - SizeOfPageHeaderData) \
  325. / sizeof(ItemIdData)))
  326. /*
  327. * Additional macros for access to page headers. (Beware multiple evaluation
  328. * of the arguments!)
  329. */
  330. #define PageGetLSN(page) \
  331. PageXLogRecPtrGet(((PageHeader) (page))->pd_lsn)
  332. #define PageSetLSN(page, lsn) \
  333. PageXLogRecPtrSet(((PageHeader) (page))->pd_lsn, lsn)
  334. #define PageHasFreeLinePointers(page) \
  335. (((PageHeader) (page))->pd_flags & PD_HAS_FREE_LINES)
  336. #define PageSetHasFreeLinePointers(page) \
  337. (((PageHeader) (page))->pd_flags |= PD_HAS_FREE_LINES)
  338. #define PageClearHasFreeLinePointers(page) \
  339. (((PageHeader) (page))->pd_flags &= ~PD_HAS_FREE_LINES)
  340. #define PageIsFull(page) \
  341. (((PageHeader) (page))->pd_flags & PD_PAGE_FULL)
  342. #define PageSetFull(page) \
  343. (((PageHeader) (page))->pd_flags |= PD_PAGE_FULL)
  344. #define PageClearFull(page) \
  345. (((PageHeader) (page))->pd_flags &= ~PD_PAGE_FULL)
  346. #define PageIsAllVisible(page) \
  347. (((PageHeader) (page))->pd_flags & PD_ALL_VISIBLE)
  348. #define PageSetAllVisible(page) \
  349. (((PageHeader) (page))->pd_flags |= PD_ALL_VISIBLE)
  350. #define PageClearAllVisible(page) \
  351. (((PageHeader) (page))->pd_flags &= ~PD_ALL_VISIBLE)
  352. #define PageIsPrunable(page, oldestxmin) \
  353. ( \
  354. AssertMacro(TransactionIdIsNormal(oldestxmin)), \
  355. TransactionIdIsValid(((PageHeader) (page))->pd_prune_xid) && \
  356. TransactionIdPrecedes(((PageHeader) (page))->pd_prune_xid, oldestxmin) \
  357. )
  358. #define PageSetPrunable(page, xid) \
  359. do { \
  360. Assert(TransactionIdIsNormal(xid)); \
  361. if (!TransactionIdIsValid(((PageHeader) (page))->pd_prune_xid) || \
  362. TransactionIdPrecedes(xid, ((PageHeader) (page))->pd_prune_xid)) \
  363. ((PageHeader) (page))->pd_prune_xid = (xid); \
  364. } while (0)
  365. #define PageClearPrunable(page) \
  366. (((PageHeader) (page))->pd_prune_xid = InvalidTransactionId)
  367. /* ----------------------------------------------------------------
  368. * extern declarations
  369. * ----------------------------------------------------------------
  370. */
  371. #define PAI_OVERWRITE (1 << 0)
  372. #define PAI_IS_HEAP (1 << 1)
  373. #define PAI_ALLOW_FAR_OFFSET (1 << 2)
  374. extern void PageInit(Page page, Size pageSize, Size specialSize);
  375. extern bool PageIsVerified(Page page, BlockNumber blkno);
  376. extern OffsetNumber PageAddItem(Page page, Item item, Size size,
  377. OffsetNumber offsetNumber, bool overwrite, bool is_heap);
  378. extern OffsetNumber PageAddItemExtended(Page page, Item item, Size size,
  379. OffsetNumber offsetNumber, int flags);
  380. extern Page PageGetTempPage(Page page);
  381. extern Page PageGetTempPageCopy(Page page);
  382. extern Page PageGetTempPageCopySpecial(Page page);
  383. extern void PageRestoreTempPage(Page tempPage, Page oldPage);
  384. extern void PageRepairFragmentation(Page page);
  385. extern Size PageGetFreeSpace(Page page);
  386. extern Size PageGetExactFreeSpace(Page page);
  387. extern Size PageGetHeapFreeSpace(Page page);
  388. extern void PageIndexTupleDelete(Page page, OffsetNumber offset);
  389. extern void PageIndexMultiDelete(Page page, OffsetNumber *itemnos, int nitems);
  390. extern void PageIndexDeleteNoCompact(Page page, OffsetNumber *itemnos,
  391. int nitems);
  392. extern char *PageSetChecksumCopy(Page page, BlockNumber blkno);
  393. extern void PageSetChecksumInplace(Page page, BlockNumber blkno);
  394. #endif /* BUFPAGE_H */