predicate_internals.h 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478
  1. /*-------------------------------------------------------------------------
  2. *
  3. * predicate_internals.h
  4. * POSTGRES internal predicate locking definitions.
  5. *
  6. *
  7. * Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group
  8. * Portions Copyright (c) 1994, Regents of the University of California
  9. *
  10. * src/include/storage/predicate_internals.h
  11. *
  12. *-------------------------------------------------------------------------
  13. */
  14. #ifndef PREDICATE_INTERNALS_H
  15. #define PREDICATE_INTERNALS_H
  16. #include "storage/lock.h"
  17. /*
  18. * Commit number.
  19. */
  20. typedef uint64 SerCommitSeqNo;
  21. /*
  22. * Reserved commit sequence numbers:
  23. * - 0 is reserved to indicate a non-existent SLRU entry; it cannot be
  24. * used as a SerCommitSeqNo, even an invalid one
  25. * - InvalidSerCommitSeqNo is used to indicate a transaction that
  26. * hasn't committed yet, so use a number greater than all valid
  27. * ones to make comparison do the expected thing
  28. * - RecoverySerCommitSeqNo is used to refer to transactions that
  29. * happened before a crash/recovery, since we restart the sequence
  30. * at that point. It's earlier than all normal sequence numbers,
  31. * and is only used by recovered prepared transactions
  32. */
  33. #define InvalidSerCommitSeqNo ((SerCommitSeqNo) PG_UINT64_MAX)
  34. #define RecoverySerCommitSeqNo ((SerCommitSeqNo) 1)
  35. #define FirstNormalSerCommitSeqNo ((SerCommitSeqNo) 2)
  36. /*
  37. * The SERIALIZABLEXACT struct contains information needed for each
  38. * serializable database transaction to support SSI techniques.
  39. *
  40. * A home-grown list is maintained in shared memory to manage these.
  41. * An entry is used when the serializable transaction acquires a snapshot.
  42. * Unless the transaction is rolled back, this entry must generally remain
  43. * until all concurrent transactions have completed. (There are special
  44. * optimizations for READ ONLY transactions which often allow them to be
  45. * cleaned up earlier.) A transaction which is rolled back is cleaned up
  46. * as soon as possible.
  47. *
  48. * Eligibility for cleanup of committed transactions is generally determined
  49. * by comparing the transaction's finishedBefore field to
  50. * SerializableGlobalXmin.
  51. */
  52. typedef struct SERIALIZABLEXACT
  53. {
  54. VirtualTransactionId vxid; /* The executing process always has one of
  55. * these. */
  56. /*
  57. * We use two numbers to track the order that transactions commit. Before
  58. * commit, a transaction is marked as prepared, and prepareSeqNo is set.
  59. * Shortly after commit, it's marked as committed, and commitSeqNo is set.
  60. * This doesn't give a strict commit order, but these two values together
  61. * are good enough for us, as we can always err on the safe side and
  62. * assume that there's a conflict, if we can't be sure of the exact
  63. * ordering of two commits.
  64. *
  65. * Note that a transaction is marked as prepared for a short period during
  66. * commit processing, even if two-phase commit is not used. But with
  67. * two-phase commit, a transaction can stay in prepared state for some
  68. * time.
  69. */
  70. SerCommitSeqNo prepareSeqNo;
  71. SerCommitSeqNo commitSeqNo;
  72. /* these values are not both interesting at the same time */
  73. union
  74. {
  75. SerCommitSeqNo earliestOutConflictCommit; /* when committed with
  76. * conflict out */
  77. SerCommitSeqNo lastCommitBeforeSnapshot; /* when not committed or
  78. * no conflict out */
  79. } SeqNo;
  80. SHM_QUEUE outConflicts; /* list of write transactions whose data we
  81. * couldn't read. */
  82. SHM_QUEUE inConflicts; /* list of read transactions which couldn't
  83. * see our write. */
  84. SHM_QUEUE predicateLocks; /* list of associated PREDICATELOCK objects */
  85. SHM_QUEUE finishedLink; /* list link in
  86. * FinishedSerializableTransactions */
  87. /*
  88. * for r/o transactions: list of concurrent r/w transactions that we could
  89. * potentially have conflicts with, and vice versa for r/w transactions
  90. */
  91. SHM_QUEUE possibleUnsafeConflicts;
  92. TransactionId topXid; /* top level xid for the transaction, if one
  93. * exists; else invalid */
  94. TransactionId finishedBefore; /* invalid means still running; else
  95. * the struct expires when no
  96. * serializable xids are before this. */
  97. TransactionId xmin; /* the transaction's snapshot xmin */
  98. uint32 flags; /* OR'd combination of values defined below */
  99. int pid; /* pid of associated process */
  100. } SERIALIZABLEXACT;
  101. #define SXACT_FLAG_COMMITTED 0x00000001 /* already committed */
  102. #define SXACT_FLAG_PREPARED 0x00000002 /* about to commit */
  103. #define SXACT_FLAG_ROLLED_BACK 0x00000004 /* already rolled back */
  104. #define SXACT_FLAG_DOOMED 0x00000008 /* will roll back */
  105. /*
  106. * The following flag actually means that the flagged transaction has a
  107. * conflict out *to a transaction which committed ahead of it*. It's hard
  108. * to get that into a name of a reasonable length.
  109. */
  110. #define SXACT_FLAG_CONFLICT_OUT 0x00000010
  111. #define SXACT_FLAG_READ_ONLY 0x00000020
  112. #define SXACT_FLAG_DEFERRABLE_WAITING 0x00000040
  113. #define SXACT_FLAG_RO_SAFE 0x00000080
  114. #define SXACT_FLAG_RO_UNSAFE 0x00000100
  115. #define SXACT_FLAG_SUMMARY_CONFLICT_IN 0x00000200
  116. #define SXACT_FLAG_SUMMARY_CONFLICT_OUT 0x00000400
  117. /*
  118. * The following types are used to provide an ad hoc list for holding
  119. * SERIALIZABLEXACT objects. An HTAB is overkill, since there is no need to
  120. * access these by key -- there are direct pointers to these objects where
  121. * needed. If a shared memory list is created, these types can probably be
  122. * eliminated in favor of using the general solution.
  123. */
  124. typedef struct PredXactListElementData
  125. {
  126. SHM_QUEUE link;
  127. SERIALIZABLEXACT sxact;
  128. } PredXactListElementData;
  129. typedef struct PredXactListElementData *PredXactListElement;
  130. #define PredXactListElementDataSize \
  131. ((Size)MAXALIGN(sizeof(PredXactListElementData)))
  132. typedef struct PredXactListData
  133. {
  134. SHM_QUEUE availableList;
  135. SHM_QUEUE activeList;
  136. /*
  137. * These global variables are maintained when registering and cleaning up
  138. * serializable transactions. They must be global across all backends,
  139. * but are not needed outside the predicate.c source file. Protected by
  140. * SerializableXactHashLock.
  141. */
  142. TransactionId SxactGlobalXmin; /* global xmin for active serializable
  143. * transactions */
  144. int SxactGlobalXminCount; /* how many active serializable
  145. * transactions have this xmin */
  146. int WritableSxactCount; /* how many non-read-only serializable
  147. * transactions are active */
  148. SerCommitSeqNo LastSxactCommitSeqNo; /* a strictly monotonically
  149. * increasing number for
  150. * commits of serializable
  151. * transactions */
  152. /* Protected by SerializableXactHashLock. */
  153. SerCommitSeqNo CanPartialClearThrough; /* can clear predicate locks
  154. * and inConflicts for
  155. * committed transactions
  156. * through this seq no */
  157. /* Protected by SerializableFinishedListLock. */
  158. SerCommitSeqNo HavePartialClearedThrough; /* have cleared through this
  159. * seq no */
  160. SERIALIZABLEXACT *OldCommittedSxact; /* shared copy of dummy sxact */
  161. PredXactListElement element;
  162. } PredXactListData;
  163. typedef struct PredXactListData *PredXactList;
  164. #define PredXactListDataSize \
  165. ((Size)MAXALIGN(sizeof(PredXactListData)))
  166. /*
  167. * The following types are used to provide lists of rw-conflicts between
  168. * pairs of transactions. Since exactly the same information is needed,
  169. * they are also used to record possible unsafe transaction relationships
  170. * for purposes of identifying safe snapshots for read-only transactions.
  171. *
  172. * When a RWConflictData is not in use to record either type of relationship
  173. * between a pair of transactions, it is kept on an "available" list. The
  174. * outLink field is used for maintaining that list.
  175. */
  176. typedef struct RWConflictData
  177. {
  178. SHM_QUEUE outLink; /* link for list of conflicts out from a sxact */
  179. SHM_QUEUE inLink; /* link for list of conflicts in to a sxact */
  180. SERIALIZABLEXACT *sxactOut;
  181. SERIALIZABLEXACT *sxactIn;
  182. } RWConflictData;
  183. typedef struct RWConflictData *RWConflict;
  184. #define RWConflictDataSize \
  185. ((Size)MAXALIGN(sizeof(RWConflictData)))
  186. typedef struct RWConflictPoolHeaderData
  187. {
  188. SHM_QUEUE availableList;
  189. RWConflict element;
  190. } RWConflictPoolHeaderData;
  191. typedef struct RWConflictPoolHeaderData *RWConflictPoolHeader;
  192. #define RWConflictPoolHeaderDataSize \
  193. ((Size)MAXALIGN(sizeof(RWConflictPoolHeaderData)))
  194. /*
  195. * The SERIALIZABLEXIDTAG struct identifies an xid assigned to a serializable
  196. * transaction or any of its subtransactions.
  197. */
  198. typedef struct SERIALIZABLEXIDTAG
  199. {
  200. TransactionId xid;
  201. } SERIALIZABLEXIDTAG;
  202. /*
  203. * The SERIALIZABLEXID struct provides a link from a TransactionId for a
  204. * serializable transaction to the related SERIALIZABLEXACT record, even if
  205. * the transaction has completed and its connection has been closed.
  206. *
  207. * These are created as new top level transaction IDs are first assigned to
  208. * transactions which are participating in predicate locking. This may
  209. * never happen for a particular transaction if it doesn't write anything.
  210. * They are removed with their related serializable transaction objects.
  211. *
  212. * The SubTransGetTopmostTransaction method is used where necessary to get
  213. * from an XID which might be from a subtransaction to the top level XID.
  214. */
  215. typedef struct SERIALIZABLEXID
  216. {
  217. /* hash key */
  218. SERIALIZABLEXIDTAG tag;
  219. /* data */
  220. SERIALIZABLEXACT *myXact; /* pointer to the top level transaction data */
  221. } SERIALIZABLEXID;
  222. /*
  223. * The PREDICATELOCKTARGETTAG struct identifies a database object which can
  224. * be the target of predicate locks.
  225. *
  226. * Note that the hash function being used doesn't properly respect tag
  227. * length -- if the length of the structure isn't a multiple of four bytes it
  228. * will go to a four byte boundary past the end of the tag. If you change
  229. * this struct, make sure any slack space is initialized, so that any random
  230. * bytes in the middle or at the end are not included in the hash.
  231. *
  232. * TODO SSI: If we always use the same fields for the same type of value, we
  233. * should rename these. Holding off until it's clear there are no exceptions.
  234. * Since indexes are relations with blocks and tuples, it's looking likely that
  235. * the rename will be possible. If not, we may need to divide the last field
  236. * and use part of it for a target type, so that we know how to interpret the
  237. * data..
  238. */
  239. typedef struct PREDICATELOCKTARGETTAG
  240. {
  241. uint32 locktag_field1; /* a 32-bit ID field */
  242. uint32 locktag_field2; /* a 32-bit ID field */
  243. uint32 locktag_field3; /* a 32-bit ID field */
  244. uint32 locktag_field4; /* a 32-bit ID field */
  245. } PREDICATELOCKTARGETTAG;
  246. /*
  247. * The PREDICATELOCKTARGET struct represents a database object on which there
  248. * are predicate locks.
  249. *
  250. * A hash list of these objects is maintained in shared memory. An entry is
  251. * added when a predicate lock is requested on an object which doesn't
  252. * already have one. An entry is removed when the last lock is removed from
  253. * its list.
  254. */
  255. typedef struct PREDICATELOCKTARGET
  256. {
  257. /* hash key */
  258. PREDICATELOCKTARGETTAG tag; /* unique identifier of lockable object */
  259. /* data */
  260. SHM_QUEUE predicateLocks; /* list of PREDICATELOCK objects assoc. with
  261. * predicate lock target */
  262. } PREDICATELOCKTARGET;
  263. /*
  264. * The PREDICATELOCKTAG struct identifies an individual predicate lock.
  265. *
  266. * It is the combination of predicate lock target (which is a lockable
  267. * object) and a serializable transaction which has acquired a lock on that
  268. * target.
  269. */
  270. typedef struct PREDICATELOCKTAG
  271. {
  272. PREDICATELOCKTARGET *myTarget;
  273. SERIALIZABLEXACT *myXact;
  274. } PREDICATELOCKTAG;
  275. /*
  276. * The PREDICATELOCK struct represents an individual lock.
  277. *
  278. * An entry can be created here when the related database object is read, or
  279. * by promotion of multiple finer-grained targets. All entries related to a
  280. * serializable transaction are removed when that serializable transaction is
  281. * cleaned up. Entries can also be removed when they are combined into a
  282. * single coarser-grained lock entry.
  283. */
  284. typedef struct PREDICATELOCK
  285. {
  286. /* hash key */
  287. PREDICATELOCKTAG tag; /* unique identifier of lock */
  288. /* data */
  289. SHM_QUEUE targetLink; /* list link in PREDICATELOCKTARGET's list of
  290. * predicate locks */
  291. SHM_QUEUE xactLink; /* list link in SERIALIZABLEXACT's list of
  292. * predicate locks */
  293. SerCommitSeqNo commitSeqNo; /* only used for summarized predicate locks */
  294. } PREDICATELOCK;
  295. /*
  296. * The LOCALPREDICATELOCK struct represents a local copy of data which is
  297. * also present in the PREDICATELOCK table, organized for fast access without
  298. * needing to acquire a LWLock. It is strictly for optimization.
  299. *
  300. * Each serializable transaction creates its own local hash table to hold a
  301. * collection of these. This information is used to determine when a number
  302. * of fine-grained locks should be promoted to a single coarser-grained lock.
  303. * The information is maintained more-or-less in parallel to the
  304. * PREDICATELOCK data, but because this data is not protected by locks and is
  305. * only used in an optimization heuristic, it is allowed to drift in a few
  306. * corner cases where maintaining exact data would be expensive.
  307. *
  308. * The hash table is created when the serializable transaction acquires its
  309. * snapshot, and its memory is released upon completion of the transaction.
  310. */
  311. typedef struct LOCALPREDICATELOCK
  312. {
  313. /* hash key */
  314. PREDICATELOCKTARGETTAG tag; /* unique identifier of lockable object */
  315. /* data */
  316. bool held; /* is lock held, or just its children? */
  317. int childLocks; /* number of child locks currently held */
  318. } LOCALPREDICATELOCK;
  319. /*
  320. * The types of predicate locks which can be acquired.
  321. */
  322. typedef enum PredicateLockTargetType
  323. {
  324. PREDLOCKTAG_RELATION,
  325. PREDLOCKTAG_PAGE,
  326. PREDLOCKTAG_TUPLE
  327. /* TODO SSI: Other types may be needed for index locking */
  328. } PredicateLockTargetType;
  329. /*
  330. * This structure is used to quickly capture a copy of all predicate
  331. * locks. This is currently used only by the pg_lock_status function,
  332. * which in turn is used by the pg_locks view.
  333. */
  334. typedef struct PredicateLockData
  335. {
  336. int nelements;
  337. PREDICATELOCKTARGETTAG *locktags;
  338. SERIALIZABLEXACT *xacts;
  339. } PredicateLockData;
  340. /*
  341. * These macros define how we map logical IDs of lockable objects into the
  342. * physical fields of PREDICATELOCKTARGETTAG. Use these to set up values,
  343. * rather than accessing the fields directly. Note multiple eval of target!
  344. */
  345. #define SET_PREDICATELOCKTARGETTAG_RELATION(locktag,dboid,reloid) \
  346. ((locktag).locktag_field1 = (dboid), \
  347. (locktag).locktag_field2 = (reloid), \
  348. (locktag).locktag_field3 = InvalidBlockNumber, \
  349. (locktag).locktag_field4 = InvalidOffsetNumber)
  350. #define SET_PREDICATELOCKTARGETTAG_PAGE(locktag,dboid,reloid,blocknum) \
  351. ((locktag).locktag_field1 = (dboid), \
  352. (locktag).locktag_field2 = (reloid), \
  353. (locktag).locktag_field3 = (blocknum), \
  354. (locktag).locktag_field4 = InvalidOffsetNumber)
  355. #define SET_PREDICATELOCKTARGETTAG_TUPLE(locktag,dboid,reloid,blocknum,offnum) \
  356. ((locktag).locktag_field1 = (dboid), \
  357. (locktag).locktag_field2 = (reloid), \
  358. (locktag).locktag_field3 = (blocknum), \
  359. (locktag).locktag_field4 = (offnum))
  360. #define GET_PREDICATELOCKTARGETTAG_DB(locktag) \
  361. ((Oid) (locktag).locktag_field1)
  362. #define GET_PREDICATELOCKTARGETTAG_RELATION(locktag) \
  363. ((Oid) (locktag).locktag_field2)
  364. #define GET_PREDICATELOCKTARGETTAG_PAGE(locktag) \
  365. ((BlockNumber) (locktag).locktag_field3)
  366. #define GET_PREDICATELOCKTARGETTAG_OFFSET(locktag) \
  367. ((OffsetNumber) (locktag).locktag_field4)
  368. #define GET_PREDICATELOCKTARGETTAG_TYPE(locktag) \
  369. (((locktag).locktag_field4 != InvalidOffsetNumber) ? PREDLOCKTAG_TUPLE : \
  370. (((locktag).locktag_field3 != InvalidBlockNumber) ? PREDLOCKTAG_PAGE : \
  371. PREDLOCKTAG_RELATION))
  372. /*
  373. * Two-phase commit statefile records. There are two types: for each
  374. * transaction, we generate one per-transaction record and a variable
  375. * number of per-predicate-lock records.
  376. */
  377. typedef enum TwoPhasePredicateRecordType
  378. {
  379. TWOPHASEPREDICATERECORD_XACT,
  380. TWOPHASEPREDICATERECORD_LOCK
  381. } TwoPhasePredicateRecordType;
  382. /*
  383. * Per-transaction information to reconstruct a SERIALIZABLEXACT. Not
  384. * much is needed because most of it not meaningful for a recovered
  385. * prepared transaction.
  386. *
  387. * In particular, we do not record the in and out conflict lists for a
  388. * prepared transaction because the associated SERIALIZABLEXACTs will
  389. * not be available after recovery. Instead, we simply record the
  390. * existence of each type of conflict by setting the transaction's
  391. * summary conflict in/out flag.
  392. */
  393. typedef struct TwoPhasePredicateXactRecord
  394. {
  395. TransactionId xmin;
  396. uint32 flags;
  397. } TwoPhasePredicateXactRecord;
  398. /* Per-lock state */
  399. typedef struct TwoPhasePredicateLockRecord
  400. {
  401. PREDICATELOCKTARGETTAG target;
  402. uint32 filler; /* to avoid length change in back-patched fix */
  403. } TwoPhasePredicateLockRecord;
  404. typedef struct TwoPhasePredicateRecord
  405. {
  406. TwoPhasePredicateRecordType type;
  407. union
  408. {
  409. TwoPhasePredicateXactRecord xactRecord;
  410. TwoPhasePredicateLockRecord lockRecord;
  411. } data;
  412. } TwoPhasePredicateRecord;
  413. /*
  414. * Define a macro to use for an "empty" SERIALIZABLEXACT reference.
  415. */
  416. #define InvalidSerializableXact ((SERIALIZABLEXACT *) NULL)
  417. /*
  418. * Function definitions for functions needing awareness of predicate
  419. * locking internals.
  420. */
  421. extern PredicateLockData *GetPredicateLockStatusData(void);
  422. #endif /* PREDICATE_INTERNALS_H */