gin_private.h 33 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983
  1. /*--------------------------------------------------------------------------
  2. * gin_private.h
  3. * header file for postgres inverted index access method implementation.
  4. *
  5. * Copyright (c) 2006-2016, PostgreSQL Global Development Group
  6. *
  7. * src/include/access/gin_private.h
  8. *--------------------------------------------------------------------------
  9. */
  10. #ifndef GIN_PRIVATE_H
  11. #define GIN_PRIVATE_H
  12. #include "access/amapi.h"
  13. #include "access/gin.h"
  14. #include "access/itup.h"
  15. #include "fmgr.h"
  16. #include "storage/bufmgr.h"
  17. #include "lib/rbtree.h"
  18. /*
  19. * Page opaque data in an inverted index page.
  20. *
  21. * Note: GIN does not include a page ID word as do the other index types.
  22. * This is OK because the opaque data is only 8 bytes and so can be reliably
  23. * distinguished by size. Revisit this if the size ever increases.
  24. * Further note: as of 9.2, SP-GiST also uses 8-byte special space, as does
  25. * BRIN as of 9.5. This is still OK, as long as GIN isn't using all of the
  26. * high-order bits in its flags word, because that way the flags word cannot
  27. * match the page IDs used by SP-GiST and BRIN.
  28. */
  29. typedef struct GinPageOpaqueData
  30. {
  31. BlockNumber rightlink; /* next page if any */
  32. OffsetNumber maxoff; /* number of PostingItems on GIN_DATA &
  33. * ~GIN_LEAF page. On GIN_LIST page, number of
  34. * heap tuples. */
  35. uint16 flags; /* see bit definitions below */
  36. } GinPageOpaqueData;
  37. typedef GinPageOpaqueData *GinPageOpaque;
  38. #define GIN_DATA (1 << 0)
  39. #define GIN_LEAF (1 << 1)
  40. #define GIN_DELETED (1 << 2)
  41. #define GIN_META (1 << 3)
  42. #define GIN_LIST (1 << 4)
  43. #define GIN_LIST_FULLROW (1 << 5) /* makes sense only on GIN_LIST page */
  44. #define GIN_INCOMPLETE_SPLIT (1 << 6) /* page was split, but parent not
  45. * updated */
  46. #define GIN_COMPRESSED (1 << 7)
  47. /* Page numbers of fixed-location pages */
  48. #define GIN_METAPAGE_BLKNO (0)
  49. #define GIN_ROOT_BLKNO (1)
  50. typedef struct GinMetaPageData
  51. {
  52. /*
  53. * Pointers to head and tail of pending list, which consists of GIN_LIST
  54. * pages. These store fast-inserted entries that haven't yet been moved
  55. * into the regular GIN structure.
  56. */
  57. BlockNumber head;
  58. BlockNumber tail;
  59. /*
  60. * Free space in bytes in the pending list's tail page.
  61. */
  62. uint32 tailFreeSize;
  63. /*
  64. * We store both number of pages and number of heap tuples that are in the
  65. * pending list.
  66. */
  67. BlockNumber nPendingPages;
  68. int64 nPendingHeapTuples;
  69. /*
  70. * Statistics for planner use (accurate as of last VACUUM)
  71. */
  72. BlockNumber nTotalPages;
  73. BlockNumber nEntryPages;
  74. BlockNumber nDataPages;
  75. int64 nEntries;
  76. /*
  77. * GIN version number (ideally this should have been at the front, but too
  78. * late now. Don't move it!)
  79. *
  80. * Currently 2 (for indexes initialized in 9.4 or later)
  81. *
  82. * Version 1 (indexes initialized in version 9.1, 9.2 or 9.3), is
  83. * compatible, but may contain uncompressed posting tree (leaf) pages and
  84. * posting lists. They will be converted to compressed format when
  85. * modified.
  86. *
  87. * Version 0 (indexes initialized in 9.0 or before) is compatible but may
  88. * be missing null entries, including both null keys and placeholders.
  89. * Reject full-index-scan attempts on such indexes.
  90. */
  91. int32 ginVersion;
  92. } GinMetaPageData;
  93. #define GIN_CURRENT_VERSION 2
  94. #define GinPageGetMeta(p) \
  95. ((GinMetaPageData *) PageGetContents(p))
  96. /*
  97. * Macros for accessing a GIN index page's opaque data
  98. */
  99. #define GinPageGetOpaque(page) ( (GinPageOpaque) PageGetSpecialPointer(page) )
  100. #define GinPageIsLeaf(page) ( (GinPageGetOpaque(page)->flags & GIN_LEAF) != 0 )
  101. #define GinPageSetLeaf(page) ( GinPageGetOpaque(page)->flags |= GIN_LEAF )
  102. #define GinPageSetNonLeaf(page) ( GinPageGetOpaque(page)->flags &= ~GIN_LEAF )
  103. #define GinPageIsData(page) ( (GinPageGetOpaque(page)->flags & GIN_DATA) != 0 )
  104. #define GinPageSetData(page) ( GinPageGetOpaque(page)->flags |= GIN_DATA )
  105. #define GinPageIsList(page) ( (GinPageGetOpaque(page)->flags & GIN_LIST) != 0 )
  106. #define GinPageSetList(page) ( GinPageGetOpaque(page)->flags |= GIN_LIST )
  107. #define GinPageHasFullRow(page) ( (GinPageGetOpaque(page)->flags & GIN_LIST_FULLROW) != 0 )
  108. #define GinPageSetFullRow(page) ( GinPageGetOpaque(page)->flags |= GIN_LIST_FULLROW )
  109. #define GinPageIsCompressed(page) ( (GinPageGetOpaque(page)->flags & GIN_COMPRESSED) != 0 )
  110. #define GinPageSetCompressed(page) ( GinPageGetOpaque(page)->flags |= GIN_COMPRESSED )
  111. #define GinPageIsDeleted(page) ( (GinPageGetOpaque(page)->flags & GIN_DELETED) != 0 )
  112. #define GinPageSetDeleted(page) ( GinPageGetOpaque(page)->flags |= GIN_DELETED)
  113. #define GinPageSetNonDeleted(page) ( GinPageGetOpaque(page)->flags &= ~GIN_DELETED)
  114. #define GinPageIsIncompleteSplit(page) ( (GinPageGetOpaque(page)->flags & GIN_INCOMPLETE_SPLIT) != 0 )
  115. #define GinPageRightMost(page) ( GinPageGetOpaque(page)->rightlink == InvalidBlockNumber)
  116. /*
  117. * We use our own ItemPointerGet(BlockNumber|OffsetNumber)
  118. * to avoid Asserts, since sometimes the ip_posid isn't "valid"
  119. */
  120. #define GinItemPointerGetBlockNumber(pointer) \
  121. BlockIdGetBlockNumber(&(pointer)->ip_blkid)
  122. #define GinItemPointerGetOffsetNumber(pointer) \
  123. ((pointer)->ip_posid)
  124. /*
  125. * Special-case item pointer values needed by the GIN search logic.
  126. * MIN: sorts less than any valid item pointer
  127. * MAX: sorts greater than any valid item pointer
  128. * LOSSY PAGE: indicates a whole heap page, sorts after normal item
  129. * pointers for that page
  130. * Note that these are all distinguishable from an "invalid" item pointer
  131. * (which is InvalidBlockNumber/0) as well as from all normal item
  132. * pointers (which have item numbers in the range 1..MaxHeapTuplesPerPage).
  133. */
  134. #define ItemPointerSetMin(p) \
  135. ItemPointerSet((p), (BlockNumber)0, (OffsetNumber)0)
  136. #define ItemPointerIsMin(p) \
  137. (GinItemPointerGetOffsetNumber(p) == (OffsetNumber)0 && \
  138. GinItemPointerGetBlockNumber(p) == (BlockNumber)0)
  139. #define ItemPointerSetMax(p) \
  140. ItemPointerSet((p), InvalidBlockNumber, (OffsetNumber)0xffff)
  141. #define ItemPointerIsMax(p) \
  142. (GinItemPointerGetOffsetNumber(p) == (OffsetNumber)0xffff && \
  143. GinItemPointerGetBlockNumber(p) == InvalidBlockNumber)
  144. #define ItemPointerSetLossyPage(p, b) \
  145. ItemPointerSet((p), (b), (OffsetNumber)0xffff)
  146. #define ItemPointerIsLossyPage(p) \
  147. (GinItemPointerGetOffsetNumber(p) == (OffsetNumber)0xffff && \
  148. GinItemPointerGetBlockNumber(p) != InvalidBlockNumber)
  149. /*
  150. * Posting item in a non-leaf posting-tree page
  151. */
  152. typedef struct
  153. {
  154. /* We use BlockIdData not BlockNumber to avoid padding space wastage */
  155. BlockIdData child_blkno;
  156. ItemPointerData key;
  157. } PostingItem;
  158. #define PostingItemGetBlockNumber(pointer) \
  159. BlockIdGetBlockNumber(&(pointer)->child_blkno)
  160. #define PostingItemSetBlockNumber(pointer, blockNumber) \
  161. BlockIdSet(&((pointer)->child_blkno), (blockNumber))
  162. /*
  163. * Category codes to distinguish placeholder nulls from ordinary NULL keys.
  164. * Note that the datatype size and the first two code values are chosen to be
  165. * compatible with the usual usage of bool isNull flags.
  166. *
  167. * GIN_CAT_EMPTY_QUERY is never stored in the index; and notice that it is
  168. * chosen to sort before not after regular key values.
  169. */
  170. typedef signed char GinNullCategory;
  171. #define GIN_CAT_NORM_KEY 0 /* normal, non-null key value */
  172. #define GIN_CAT_NULL_KEY 1 /* null key value */
  173. #define GIN_CAT_EMPTY_ITEM 2 /* placeholder for zero-key item */
  174. #define GIN_CAT_NULL_ITEM 3 /* placeholder for null item */
  175. #define GIN_CAT_EMPTY_QUERY (-1) /* placeholder for full-scan query */
  176. /*
  177. * Access macros for null category byte in entry tuples
  178. */
  179. #define GinCategoryOffset(itup,ginstate) \
  180. (IndexInfoFindDataOffset((itup)->t_info) + \
  181. ((ginstate)->oneCol ? 0 : sizeof(int16)))
  182. #define GinGetNullCategory(itup,ginstate) \
  183. (*((GinNullCategory *) ((char*)(itup) + GinCategoryOffset(itup,ginstate))))
  184. #define GinSetNullCategory(itup,ginstate,c) \
  185. (*((GinNullCategory *) ((char*)(itup) + GinCategoryOffset(itup,ginstate))) = (c))
  186. /*
  187. * Access macros for leaf-page entry tuples (see discussion in README)
  188. */
  189. #define GinGetNPosting(itup) GinItemPointerGetOffsetNumber(&(itup)->t_tid)
  190. #define GinSetNPosting(itup,n) ItemPointerSetOffsetNumber(&(itup)->t_tid,n)
  191. #define GIN_TREE_POSTING ((OffsetNumber)0xffff)
  192. #define GinIsPostingTree(itup) (GinGetNPosting(itup) == GIN_TREE_POSTING)
  193. #define GinSetPostingTree(itup, blkno) ( GinSetNPosting((itup),GIN_TREE_POSTING), ItemPointerSetBlockNumber(&(itup)->t_tid, blkno) )
  194. #define GinGetPostingTree(itup) GinItemPointerGetBlockNumber(&(itup)->t_tid)
  195. #define GIN_ITUP_COMPRESSED (1U << 31)
  196. #define GinGetPostingOffset(itup) (GinItemPointerGetBlockNumber(&(itup)->t_tid) & (~GIN_ITUP_COMPRESSED))
  197. #define GinSetPostingOffset(itup,n) ItemPointerSetBlockNumber(&(itup)->t_tid,(n)|GIN_ITUP_COMPRESSED)
  198. #define GinGetPosting(itup) ((Pointer) ((char*)(itup) + GinGetPostingOffset(itup)))
  199. #define GinItupIsCompressed(itup) ((GinItemPointerGetBlockNumber(&(itup)->t_tid) & GIN_ITUP_COMPRESSED) != 0)
  200. /*
  201. * Maximum size of an item on entry tree page. Make sure that we fit at least
  202. * three items on each page. (On regular B-tree indexes, we must fit at least
  203. * three items: two data items and the "high key". In GIN entry tree, we don't
  204. * currently store the high key explicitly, we just use the rightmost item on
  205. * the page, so it would actually be enough to fit two items.)
  206. */
  207. #define GinMaxItemSize \
  208. Min(INDEX_SIZE_MASK, \
  209. MAXALIGN_DOWN(((BLCKSZ - \
  210. MAXALIGN(SizeOfPageHeaderData + 3 * sizeof(ItemIdData)) - \
  211. MAXALIGN(sizeof(GinPageOpaqueData))) / 3)))
  212. /*
  213. * Access macros for non-leaf entry tuples
  214. */
  215. #define GinGetDownlink(itup) GinItemPointerGetBlockNumber(&(itup)->t_tid)
  216. #define GinSetDownlink(itup,blkno) ItemPointerSet(&(itup)->t_tid, blkno, InvalidOffsetNumber)
  217. /*
  218. * Data (posting tree) pages
  219. *
  220. * Posting tree pages don't store regular tuples. Non-leaf pages contain
  221. * PostingItems, which are pairs of ItemPointers and child block numbers.
  222. * Leaf pages contain GinPostingLists and an uncompressed array of item
  223. * pointers.
  224. *
  225. * In a leaf page, the compressed posting lists are stored after the regular
  226. * page header, one after each other. Although we don't store regular tuples,
  227. * pd_lower is used to indicate the end of the posting lists. After that, free
  228. * space follows. This layout is compatible with the "standard" heap and
  229. * index page layout described in bufpage.h, so that we can e.g set buffer_std
  230. * when writing WAL records.
  231. *
  232. * In the special space is the GinPageOpaque struct.
  233. */
  234. #define GinDataLeafPageGetPostingList(page) \
  235. (GinPostingList *) ((PageGetContents(page) + MAXALIGN(sizeof(ItemPointerData))))
  236. #define GinDataLeafPageGetPostingListSize(page) \
  237. (((PageHeader) page)->pd_lower - MAXALIGN(SizeOfPageHeaderData) - MAXALIGN(sizeof(ItemPointerData)))
  238. #define GinDataLeafPageIsEmpty(page) \
  239. (GinPageIsCompressed(page) ? (GinDataLeafPageGetPostingListSize(page) == 0) : (GinPageGetOpaque(page)->maxoff < FirstOffsetNumber))
  240. #define GinDataLeafPageGetFreeSpace(page) PageGetExactFreeSpace(page)
  241. #define GinDataPageGetRightBound(page) ((ItemPointer) PageGetContents(page))
  242. /*
  243. * Pointer to the data portion of a posting tree page. For internal pages,
  244. * that's the beginning of the array of PostingItems. For compressed leaf
  245. * pages, the first compressed posting list. For uncompressed (pre-9.4) leaf
  246. * pages, it's the beginning of the ItemPointer array.
  247. */
  248. #define GinDataPageGetData(page) \
  249. (PageGetContents(page) + MAXALIGN(sizeof(ItemPointerData)))
  250. /* non-leaf pages contain PostingItems */
  251. #define GinDataPageGetPostingItem(page, i) \
  252. ((PostingItem *) (GinDataPageGetData(page) + ((i)-1) * sizeof(PostingItem)))
  253. /*
  254. * Note: there is no GinDataPageGetDataSize macro, because before version
  255. * 9.4, we didn't set pd_lower on data pages. There can be pages in the index
  256. * that were binary-upgraded from earlier versions and still have an invalid
  257. * pd_lower, so we cannot trust it in general. Compressed posting tree leaf
  258. * pages are new in 9.4, however, so we can trust them; see
  259. * GinDataLeafPageGetPostingListSize.
  260. */
  261. #define GinDataPageSetDataSize(page, size) \
  262. { \
  263. Assert(size <= GinDataPageMaxDataSize); \
  264. ((PageHeader) page)->pd_lower = (size) + MAXALIGN(SizeOfPageHeaderData) + MAXALIGN(sizeof(ItemPointerData)); \
  265. }
  266. #define GinNonLeafDataPageGetFreeSpace(page) \
  267. (GinDataPageMaxDataSize - \
  268. GinPageGetOpaque(page)->maxoff * sizeof(PostingItem))
  269. #define GinDataPageMaxDataSize \
  270. (BLCKSZ - MAXALIGN(SizeOfPageHeaderData) \
  271. - MAXALIGN(sizeof(ItemPointerData)) \
  272. - MAXALIGN(sizeof(GinPageOpaqueData)))
  273. /*
  274. * List pages
  275. */
  276. #define GinListPageSize \
  277. ( BLCKSZ - SizeOfPageHeaderData - MAXALIGN(sizeof(GinPageOpaqueData)) )
  278. /*
  279. * Storage type for GIN's reloptions
  280. */
  281. typedef struct GinOptions
  282. {
  283. int32 vl_len_; /* varlena header (do not touch directly!) */
  284. bool useFastUpdate; /* use fast updates? */
  285. int pendingListCleanupSize; /* maximum size of pending list */
  286. } GinOptions;
  287. #define GIN_DEFAULT_USE_FASTUPDATE true
  288. #define GinGetUseFastUpdate(relation) \
  289. ((relation)->rd_options ? \
  290. ((GinOptions *) (relation)->rd_options)->useFastUpdate : GIN_DEFAULT_USE_FASTUPDATE)
  291. #define GinGetPendingListCleanupSize(relation) \
  292. ((relation)->rd_options && \
  293. ((GinOptions *) (relation)->rd_options)->pendingListCleanupSize != -1 ? \
  294. ((GinOptions *) (relation)->rd_options)->pendingListCleanupSize : \
  295. gin_pending_list_limit)
  296. /* Macros for buffer lock/unlock operations */
  297. #define GIN_UNLOCK BUFFER_LOCK_UNLOCK
  298. #define GIN_SHARE BUFFER_LOCK_SHARE
  299. #define GIN_EXCLUSIVE BUFFER_LOCK_EXCLUSIVE
  300. /*
  301. * GinState: working data structure describing the index being worked on
  302. */
  303. typedef struct GinState
  304. {
  305. Relation index;
  306. bool oneCol; /* true if single-column index */
  307. /*
  308. * origTupDesc is the nominal tuple descriptor of the index, ie, the i'th
  309. * attribute shows the key type (not the input data type!) of the i'th
  310. * index column. In a single-column index this describes the actual leaf
  311. * index tuples. In a multi-column index, the actual leaf tuples contain
  312. * a smallint column number followed by a key datum of the appropriate
  313. * type for that column. We set up tupdesc[i] to describe the actual
  314. * rowtype of the index tuples for the i'th column, ie, (int2, keytype).
  315. * Note that in any case, leaf tuples contain more data than is known to
  316. * the TupleDesc; see access/gin/README for details.
  317. */
  318. TupleDesc origTupdesc;
  319. TupleDesc tupdesc[INDEX_MAX_KEYS];
  320. /*
  321. * Per-index-column opclass support functions
  322. */
  323. FmgrInfo compareFn[INDEX_MAX_KEYS];
  324. FmgrInfo extractValueFn[INDEX_MAX_KEYS];
  325. FmgrInfo extractQueryFn[INDEX_MAX_KEYS];
  326. FmgrInfo consistentFn[INDEX_MAX_KEYS];
  327. FmgrInfo triConsistentFn[INDEX_MAX_KEYS];
  328. FmgrInfo comparePartialFn[INDEX_MAX_KEYS]; /* optional method */
  329. /* canPartialMatch[i] is true if comparePartialFn[i] is valid */
  330. bool canPartialMatch[INDEX_MAX_KEYS];
  331. /* Collations to pass to the support functions */
  332. Oid supportCollation[INDEX_MAX_KEYS];
  333. } GinState;
  334. /*
  335. * A compressed posting list.
  336. *
  337. * Note: This requires 2-byte alignment.
  338. */
  339. typedef struct
  340. {
  341. ItemPointerData first; /* first item in this posting list (unpacked) */
  342. uint16 nbytes; /* number of bytes that follow */
  343. unsigned char bytes[FLEXIBLE_ARRAY_MEMBER]; /* varbyte encoded items */
  344. } GinPostingList;
  345. #define SizeOfGinPostingList(plist) (offsetof(GinPostingList, bytes) + SHORTALIGN((plist)->nbytes) )
  346. #define GinNextPostingListSegment(cur) ((GinPostingList *) (((char *) (cur)) + SizeOfGinPostingList((cur))))
  347. /* XLog stuff */
  348. #define XLOG_GIN_CREATE_INDEX 0x00
  349. #define XLOG_GIN_CREATE_PTREE 0x10
  350. typedef struct ginxlogCreatePostingTree
  351. {
  352. uint32 size;
  353. /* A compressed posting list follows */
  354. } ginxlogCreatePostingTree;
  355. /*
  356. * The format of the insertion record varies depending on the page type.
  357. * ginxlogInsert is the common part between all variants.
  358. *
  359. * Backup Blk 0: target page
  360. * Backup Blk 1: left child, if this insertion finishes an incomplete split
  361. */
  362. #define XLOG_GIN_INSERT 0x20
  363. typedef struct
  364. {
  365. uint16 flags; /* GIN_INSERT_ISLEAF and/or GIN_INSERT_ISDATA */
  366. /*
  367. * FOLLOWS:
  368. *
  369. * 1. if not leaf page, block numbers of the left and right child pages
  370. * whose split this insertion finishes, as BlockIdData[2] (beware of
  371. * adding fields in this struct that would make them not 16-bit aligned)
  372. *
  373. * 2. a ginxlogInsertEntry or ginxlogRecompressDataLeaf struct, depending
  374. * on tree type.
  375. *
  376. * NB: the below structs are only 16-bit aligned when appended to a
  377. * ginxlogInsert struct! Beware of adding fields to them that require
  378. * stricter alignment.
  379. */
  380. } ginxlogInsert;
  381. typedef struct
  382. {
  383. OffsetNumber offset;
  384. bool isDelete;
  385. IndexTupleData tuple; /* variable length */
  386. } ginxlogInsertEntry;
  387. typedef struct
  388. {
  389. uint16 nactions;
  390. /* Variable number of 'actions' follow */
  391. } ginxlogRecompressDataLeaf;
  392. /*
  393. * Note: this struct is currently not used in code, and only acts as
  394. * documentation. The WAL record format is as specified here, but the code
  395. * uses straight access through a Pointer and memcpy to read/write these.
  396. */
  397. typedef struct
  398. {
  399. uint8 segno; /* segment this action applies to */
  400. char type; /* action type (see below) */
  401. /*
  402. * Action-specific data follows. For INSERT and REPLACE actions that is a
  403. * GinPostingList struct. For ADDITEMS, a uint16 for the number of items
  404. * added, followed by the items themselves as ItemPointers. DELETE actions
  405. * have no further data.
  406. */
  407. } ginxlogSegmentAction;
  408. /* Action types */
  409. #define GIN_SEGMENT_UNMODIFIED 0 /* no action (not used in WAL records) */
  410. #define GIN_SEGMENT_DELETE 1 /* a whole segment is removed */
  411. #define GIN_SEGMENT_INSERT 2 /* a whole segment is added */
  412. #define GIN_SEGMENT_REPLACE 3 /* a segment is replaced */
  413. #define GIN_SEGMENT_ADDITEMS 4 /* items are added to existing segment */
  414. typedef struct
  415. {
  416. OffsetNumber offset;
  417. PostingItem newitem;
  418. } ginxlogInsertDataInternal;
  419. /*
  420. * Backup Blk 0: new left page (= original page, if not root split)
  421. * Backup Blk 1: new right page
  422. * Backup Blk 2: original page / new root page, if root split
  423. * Backup Blk 3: left child, if this insertion completes an earlier split
  424. */
  425. #define XLOG_GIN_SPLIT 0x30
  426. typedef struct ginxlogSplit
  427. {
  428. RelFileNode node;
  429. BlockNumber rrlink; /* right link, or root's blocknumber if root
  430. * split */
  431. BlockNumber leftChildBlkno; /* valid on a non-leaf split */
  432. BlockNumber rightChildBlkno;
  433. uint16 flags; /* see below */
  434. } ginxlogSplit;
  435. /*
  436. * Flags used in ginxlogInsert and ginxlogSplit records
  437. */
  438. #define GIN_INSERT_ISDATA 0x01 /* for both insert and split records */
  439. #define GIN_INSERT_ISLEAF 0x02 /* ditto */
  440. #define GIN_SPLIT_ROOT 0x04 /* only for split records */
  441. /*
  442. * Vacuum simply WAL-logs the whole page, when anything is modified. This
  443. * is functionally identical to heap_newpage records, but is kept separate for
  444. * debugging purposes. (When inspecting the WAL stream, it's easier to see
  445. * what's going on when GIN vacuum records are marked as such, not as heap
  446. * records.) This is currently only used for entry tree leaf pages.
  447. */
  448. #define XLOG_GIN_VACUUM_PAGE 0x40
  449. /*
  450. * Vacuuming posting tree leaf page is WAL-logged like recompression caused
  451. * by insertion.
  452. */
  453. #define XLOG_GIN_VACUUM_DATA_LEAF_PAGE 0x90
  454. typedef struct ginxlogVacuumDataLeafPage
  455. {
  456. ginxlogRecompressDataLeaf data;
  457. } ginxlogVacuumDataLeafPage;
  458. /*
  459. * Backup Blk 0: deleted page
  460. * Backup Blk 1: parent
  461. * Backup Blk 2: left sibling
  462. */
  463. #define XLOG_GIN_DELETE_PAGE 0x50
  464. typedef struct ginxlogDeletePage
  465. {
  466. OffsetNumber parentOffset;
  467. BlockNumber rightLink;
  468. } ginxlogDeletePage;
  469. #define XLOG_GIN_UPDATE_META_PAGE 0x60
  470. /*
  471. * Backup Blk 0: metapage
  472. * Backup Blk 1: tail page
  473. */
  474. typedef struct ginxlogUpdateMeta
  475. {
  476. RelFileNode node;
  477. GinMetaPageData metadata;
  478. BlockNumber prevTail;
  479. BlockNumber newRightlink;
  480. int32 ntuples; /* if ntuples > 0 then metadata.tail was
  481. * updated with that many tuples; else new sub
  482. * list was inserted */
  483. /* array of inserted tuples follows */
  484. } ginxlogUpdateMeta;
  485. #define XLOG_GIN_INSERT_LISTPAGE 0x70
  486. typedef struct ginxlogInsertListPage
  487. {
  488. BlockNumber rightlink;
  489. int32 ntuples;
  490. /* array of inserted tuples follows */
  491. } ginxlogInsertListPage;
  492. /*
  493. * Backup Blk 0: metapage
  494. * Backup Blk 1 to (ndeleted + 1): deleted pages
  495. */
  496. #define XLOG_GIN_DELETE_LISTPAGE 0x80
  497. /*
  498. * The WAL record for deleting list pages must contain a block reference to
  499. * all the deleted pages, so the number of pages that can be deleted in one
  500. * record is limited by XLR_MAX_BLOCK_ID. (block_id 0 is used for the
  501. * metapage.)
  502. */
  503. #define GIN_NDELETE_AT_ONCE Min(16, XLR_MAX_BLOCK_ID - 1)
  504. typedef struct ginxlogDeleteListPages
  505. {
  506. GinMetaPageData metadata;
  507. int32 ndeleted;
  508. } ginxlogDeleteListPages;
  509. /* ginutil.c */
  510. extern Datum ginhandler(PG_FUNCTION_ARGS);
  511. extern bytea *ginoptions(Datum reloptions, bool validate);
  512. extern void initGinState(GinState *state, Relation index);
  513. extern Buffer GinNewBuffer(Relation index);
  514. extern void GinInitBuffer(Buffer b, uint32 f);
  515. extern void GinInitPage(Page page, uint32 f, Size pageSize);
  516. extern void GinInitMetabuffer(Buffer b);
  517. extern int ginCompareEntries(GinState *ginstate, OffsetNumber attnum,
  518. Datum a, GinNullCategory categorya,
  519. Datum b, GinNullCategory categoryb);
  520. extern int ginCompareAttEntries(GinState *ginstate,
  521. OffsetNumber attnuma, Datum a, GinNullCategory categorya,
  522. OffsetNumber attnumb, Datum b, GinNullCategory categoryb);
  523. extern Datum *ginExtractEntries(GinState *ginstate, OffsetNumber attnum,
  524. Datum value, bool isNull,
  525. int32 *nentries, GinNullCategory **categories);
  526. extern OffsetNumber gintuple_get_attrnum(GinState *ginstate, IndexTuple tuple);
  527. extern Datum gintuple_get_key(GinState *ginstate, IndexTuple tuple,
  528. GinNullCategory *category);
  529. /* gininsert.c */
  530. extern IndexBuildResult *ginbuild(Relation heap, Relation index,
  531. struct IndexInfo *indexInfo);
  532. extern void ginbuildempty(Relation index);
  533. extern bool gininsert(Relation index, Datum *values, bool *isnull,
  534. ItemPointer ht_ctid, Relation heapRel,
  535. IndexUniqueCheck checkUnique);
  536. extern void ginEntryInsert(GinState *ginstate,
  537. OffsetNumber attnum, Datum key, GinNullCategory category,
  538. ItemPointerData *items, uint32 nitem,
  539. GinStatsData *buildStats);
  540. /* ginbtree.c */
  541. typedef struct GinBtreeStack
  542. {
  543. BlockNumber blkno;
  544. Buffer buffer;
  545. OffsetNumber off;
  546. ItemPointerData iptr;
  547. /* predictNumber contains predicted number of pages on current level */
  548. uint32 predictNumber;
  549. struct GinBtreeStack *parent;
  550. } GinBtreeStack;
  551. typedef struct GinBtreeData *GinBtree;
  552. /* Return codes for GinBtreeData.beginPlaceToPage method */
  553. typedef enum
  554. {
  555. GPTP_NO_WORK,
  556. GPTP_INSERT,
  557. GPTP_SPLIT
  558. } GinPlaceToPageRC;
  559. typedef struct GinBtreeData
  560. {
  561. /* search methods */
  562. BlockNumber (*findChildPage) (GinBtree, GinBtreeStack *);
  563. BlockNumber (*getLeftMostChild) (GinBtree, Page);
  564. bool (*isMoveRight) (GinBtree, Page);
  565. bool (*findItem) (GinBtree, GinBtreeStack *);
  566. /* insert methods */
  567. OffsetNumber (*findChildPtr) (GinBtree, Page, BlockNumber, OffsetNumber);
  568. GinPlaceToPageRC (*beginPlaceToPage) (GinBtree, Buffer, GinBtreeStack *, void *, BlockNumber, void **, Page *, Page *);
  569. void (*execPlaceToPage) (GinBtree, Buffer, GinBtreeStack *, void *, BlockNumber, void *);
  570. void *(*prepareDownlink) (GinBtree, Buffer);
  571. void (*fillRoot) (GinBtree, Page, BlockNumber, Page, BlockNumber, Page);
  572. bool isData;
  573. Relation index;
  574. BlockNumber rootBlkno;
  575. GinState *ginstate; /* not valid in a data scan */
  576. bool fullScan;
  577. bool isBuild;
  578. /* Search key for Entry tree */
  579. OffsetNumber entryAttnum;
  580. Datum entryKey;
  581. GinNullCategory entryCategory;
  582. /* Search key for data tree (posting tree) */
  583. ItemPointerData itemptr;
  584. } GinBtreeData;
  585. /* This represents a tuple to be inserted to entry tree. */
  586. typedef struct
  587. {
  588. IndexTuple entry; /* tuple to insert */
  589. bool isDelete; /* delete old tuple at same offset? */
  590. } GinBtreeEntryInsertData;
  591. /*
  592. * This represents an itempointer, or many itempointers, to be inserted to
  593. * a data (posting tree) leaf page
  594. */
  595. typedef struct
  596. {
  597. ItemPointerData *items;
  598. uint32 nitem;
  599. uint32 curitem;
  600. } GinBtreeDataLeafInsertData;
  601. /*
  602. * For internal data (posting tree) pages, the insertion payload is a
  603. * PostingItem
  604. */
  605. extern GinBtreeStack *ginFindLeafPage(GinBtree btree, bool searchMode, Snapshot snapshot);
  606. extern Buffer ginStepRight(Buffer buffer, Relation index, int lockmode);
  607. extern void freeGinBtreeStack(GinBtreeStack *stack);
  608. extern void ginInsertValue(GinBtree btree, GinBtreeStack *stack,
  609. void *insertdata, GinStatsData *buildStats);
  610. /* ginentrypage.c */
  611. extern IndexTuple GinFormTuple(GinState *ginstate,
  612. OffsetNumber attnum, Datum key, GinNullCategory category,
  613. Pointer data, Size dataSize, int nipd, bool errorTooBig);
  614. extern void ginPrepareEntryScan(GinBtree btree, OffsetNumber attnum,
  615. Datum key, GinNullCategory category,
  616. GinState *ginstate);
  617. extern void ginEntryFillRoot(GinBtree btree, Page root, BlockNumber lblkno, Page lpage, BlockNumber rblkno, Page rpage);
  618. extern ItemPointer ginReadTuple(GinState *ginstate, OffsetNumber attnum,
  619. IndexTuple itup, int *nitems);
  620. /* gindatapage.c */
  621. extern ItemPointer GinDataLeafPageGetItems(Page page, int *nitems, ItemPointerData advancePast);
  622. extern int GinDataLeafPageGetItemsToTbm(Page page, TIDBitmap *tbm);
  623. extern BlockNumber createPostingTree(Relation index,
  624. ItemPointerData *items, uint32 nitems,
  625. GinStatsData *buildStats);
  626. extern void GinDataPageAddPostingItem(Page page, PostingItem *data, OffsetNumber offset);
  627. extern void GinPageDeletePostingItem(Page page, OffsetNumber offset);
  628. extern void ginInsertItemPointers(Relation index, BlockNumber rootBlkno,
  629. ItemPointerData *items, uint32 nitem,
  630. GinStatsData *buildStats);
  631. extern GinBtreeStack *ginScanBeginPostingTree(GinBtree btree, Relation index, BlockNumber rootBlkno, Snapshot snapshot);
  632. extern void ginDataFillRoot(GinBtree btree, Page root, BlockNumber lblkno, Page lpage, BlockNumber rblkno, Page rpage);
  633. extern void ginPrepareDataScan(GinBtree btree, Relation index, BlockNumber rootBlkno);
  634. /*
  635. * This is declared in ginvacuum.c, but is passed between ginVacuumItemPointers
  636. * and ginVacuumPostingTreeLeaf and as an opaque struct, so we need a forward
  637. * declaration for it.
  638. */
  639. typedef struct GinVacuumState GinVacuumState;
  640. extern void ginVacuumPostingTreeLeaf(Relation rel, Buffer buf, GinVacuumState *gvs);
  641. /* ginscan.c */
  642. /*
  643. * GinScanKeyData describes a single GIN index qualifier expression.
  644. *
  645. * From each qual expression, we extract one or more specific index search
  646. * conditions, which are represented by GinScanEntryData. It's quite
  647. * possible for identical search conditions to be requested by more than
  648. * one qual expression, in which case we merge such conditions to have just
  649. * one unique GinScanEntry --- this is particularly important for efficiency
  650. * when dealing with full-index-scan entries. So there can be multiple
  651. * GinScanKeyData.scanEntry pointers to the same GinScanEntryData.
  652. *
  653. * In each GinScanKeyData, nentries is the true number of entries, while
  654. * nuserentries is the number that extractQueryFn returned (which is what
  655. * we report to consistentFn). The "user" entries must come first.
  656. */
  657. typedef struct GinScanKeyData *GinScanKey;
  658. typedef struct GinScanEntryData *GinScanEntry;
  659. typedef struct GinScanKeyData
  660. {
  661. /* Real number of entries in scanEntry[] (always > 0) */
  662. uint32 nentries;
  663. /* Number of entries that extractQueryFn and consistentFn know about */
  664. uint32 nuserentries;
  665. /* array of GinScanEntry pointers, one per extracted search condition */
  666. GinScanEntry *scanEntry;
  667. /*
  668. * At least one of the entries in requiredEntries must be present for a
  669. * tuple to match the overall qual.
  670. *
  671. * additionalEntries contains entries that are needed by the consistent
  672. * function to decide if an item matches, but are not sufficient to
  673. * satisfy the qual without entries from requiredEntries.
  674. */
  675. GinScanEntry *requiredEntries;
  676. int nrequired;
  677. GinScanEntry *additionalEntries;
  678. int nadditional;
  679. /* array of check flags, reported to consistentFn */
  680. bool *entryRes;
  681. bool (*boolConsistentFn) (GinScanKey key);
  682. GinTernaryValue (*triConsistentFn) (GinScanKey key);
  683. FmgrInfo *consistentFmgrInfo;
  684. FmgrInfo *triConsistentFmgrInfo;
  685. Oid collation;
  686. /* other data needed for calling consistentFn */
  687. Datum query;
  688. /* NB: these three arrays have only nuserentries elements! */
  689. Datum *queryValues;
  690. GinNullCategory *queryCategories;
  691. Pointer *extra_data;
  692. StrategyNumber strategy;
  693. int32 searchMode;
  694. OffsetNumber attnum;
  695. /*
  696. * Match status data. curItem is the TID most recently tested (could be a
  697. * lossy-page pointer). curItemMatches is TRUE if it passes the
  698. * consistentFn test; if so, recheckCurItem is the recheck flag.
  699. * isFinished means that all the input entry streams are finished, so this
  700. * key cannot succeed for any later TIDs.
  701. */
  702. ItemPointerData curItem;
  703. bool curItemMatches;
  704. bool recheckCurItem;
  705. bool isFinished;
  706. } GinScanKeyData;
  707. typedef struct GinScanEntryData
  708. {
  709. /* query key and other information from extractQueryFn */
  710. Datum queryKey;
  711. GinNullCategory queryCategory;
  712. bool isPartialMatch;
  713. Pointer extra_data;
  714. StrategyNumber strategy;
  715. int32 searchMode;
  716. OffsetNumber attnum;
  717. /* Current page in posting tree */
  718. Buffer buffer;
  719. /* current ItemPointer to heap */
  720. ItemPointerData curItem;
  721. /* for a partial-match or full-scan query, we accumulate all TIDs here */
  722. TIDBitmap *matchBitmap;
  723. TBMIterator *matchIterator;
  724. TBMIterateResult *matchResult;
  725. /* used for Posting list and one page in Posting tree */
  726. ItemPointerData *list;
  727. int nlist;
  728. OffsetNumber offset;
  729. bool isFinished;
  730. bool reduceResult;
  731. uint32 predictNumberResult;
  732. GinBtreeData btree;
  733. } GinScanEntryData;
  734. typedef struct GinScanOpaqueData
  735. {
  736. MemoryContext tempCtx;
  737. GinState ginstate;
  738. GinScanKey keys; /* one per scan qualifier expr */
  739. uint32 nkeys;
  740. GinScanEntry *entries; /* one per index search condition */
  741. uint32 totalentries;
  742. uint32 allocentries; /* allocated length of entries[] */
  743. MemoryContext keyCtx; /* used to hold key and entry data */
  744. bool isVoidRes; /* true if query is unsatisfiable */
  745. } GinScanOpaqueData;
  746. typedef GinScanOpaqueData *GinScanOpaque;
  747. extern IndexScanDesc ginbeginscan(Relation rel, int nkeys, int norderbys);
  748. extern void ginendscan(IndexScanDesc scan);
  749. extern void ginrescan(IndexScanDesc scan, ScanKey key, int nscankeys,
  750. ScanKey orderbys, int norderbys);
  751. extern void ginNewScanKey(IndexScanDesc scan);
  752. extern void ginFreeScanKeys(GinScanOpaque so);
  753. /* ginget.c */
  754. extern int64 gingetbitmap(IndexScanDesc scan, TIDBitmap *tbm);
  755. /* ginfast.c */
  756. extern Datum gin_clean_pending_list(PG_FUNCTION_ARGS);
  757. /* ginlogic.c */
  758. extern void ginInitConsistentFunction(GinState *ginstate, GinScanKey key);
  759. /* ginvacuum.c */
  760. extern IndexBulkDeleteResult *ginbulkdelete(IndexVacuumInfo *info,
  761. IndexBulkDeleteResult *stats,
  762. IndexBulkDeleteCallback callback,
  763. void *callback_state);
  764. extern IndexBulkDeleteResult *ginvacuumcleanup(IndexVacuumInfo *info,
  765. IndexBulkDeleteResult *stats);
  766. extern ItemPointer ginVacuumItemPointers(GinVacuumState *gvs,
  767. ItemPointerData *items, int nitem, int *nremaining);
  768. /* ginvalidate.c */
  769. extern bool ginvalidate(Oid opclassoid);
  770. /* ginbulk.c */
  771. typedef struct GinEntryAccumulator
  772. {
  773. RBNode rbnode;
  774. Datum key;
  775. GinNullCategory category;
  776. OffsetNumber attnum;
  777. bool shouldSort;
  778. ItemPointerData *list;
  779. uint32 maxcount; /* allocated size of list[] */
  780. uint32 count; /* current number of list[] entries */
  781. } GinEntryAccumulator;
  782. typedef struct
  783. {
  784. GinState *ginstate;
  785. Size allocatedMemory;
  786. GinEntryAccumulator *entryallocator;
  787. uint32 eas_used;
  788. RBTree *tree;
  789. } BuildAccumulator;
  790. extern void ginInitBA(BuildAccumulator *accum);
  791. extern void ginInsertBAEntries(BuildAccumulator *accum,
  792. ItemPointer heapptr, OffsetNumber attnum,
  793. Datum *entries, GinNullCategory *categories,
  794. int32 nentries);
  795. extern void ginBeginBAScan(BuildAccumulator *accum);
  796. extern ItemPointerData *ginGetBAEntry(BuildAccumulator *accum,
  797. OffsetNumber *attnum, Datum *key, GinNullCategory *category,
  798. uint32 *n);
  799. /* ginfast.c */
  800. typedef struct GinTupleCollector
  801. {
  802. IndexTuple *tuples;
  803. uint32 ntuples;
  804. uint32 lentuples;
  805. uint32 sumsize;
  806. } GinTupleCollector;
  807. extern void ginHeapTupleFastInsert(GinState *ginstate,
  808. GinTupleCollector *collector);
  809. extern void ginHeapTupleFastCollect(GinState *ginstate,
  810. GinTupleCollector *collector,
  811. OffsetNumber attnum, Datum value, bool isNull,
  812. ItemPointer ht_ctid);
  813. extern void ginInsertCleanup(GinState *ginstate, bool full_clean,
  814. bool fill_fsm, IndexBulkDeleteResult *stats);
  815. /* ginpostinglist.c */
  816. extern GinPostingList *ginCompressPostingList(const ItemPointer ptrs, int nptrs,
  817. int maxsize, int *nwritten);
  818. extern int ginPostingListDecodeAllSegmentsToTbm(GinPostingList *ptr, int totalsize, TIDBitmap *tbm);
  819. extern ItemPointer ginPostingListDecodeAllSegments(GinPostingList *ptr, int len, int *ndecoded);
  820. extern ItemPointer ginPostingListDecode(GinPostingList *ptr, int *ndecoded);
  821. extern ItemPointer ginMergeItemPointers(ItemPointerData *a, uint32 na,
  822. ItemPointerData *b, uint32 nb,
  823. int *nmerged);
  824. /*
  825. * Merging the results of several gin scans compares item pointers a lot,
  826. * so we want this to be inlined.
  827. */
  828. static inline int
  829. ginCompareItemPointers(ItemPointer a, ItemPointer b)
  830. {
  831. uint64 ia = (uint64) a->ip_blkid.bi_hi << 32 | (uint64) a->ip_blkid.bi_lo << 16 | a->ip_posid;
  832. uint64 ib = (uint64) b->ip_blkid.bi_hi << 32 | (uint64) b->ip_blkid.bi_lo << 16 | b->ip_posid;
  833. if (ia == ib)
  834. return 0;
  835. else if (ia > ib)
  836. return 1;
  837. else
  838. return -1;
  839. }
  840. #endif /* GIN_PRIVATE_H */