jsonb.h 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440
  1. /*-------------------------------------------------------------------------
  2. *
  3. * jsonb.h
  4. * Declarations for jsonb data type support.
  5. *
  6. * Copyright (c) 1996-2016, PostgreSQL Global Development Group
  7. *
  8. * src/include/utils/jsonb.h
  9. *
  10. *-------------------------------------------------------------------------
  11. */
  12. #ifndef __JSONB_H__
  13. #define __JSONB_H__
  14. #include "lib/stringinfo.h"
  15. #include "utils/array.h"
  16. #include "utils/numeric.h"
  17. /* Tokens used when sequentially processing a jsonb value */
  18. typedef enum
  19. {
  20. WJB_DONE,
  21. WJB_KEY,
  22. WJB_VALUE,
  23. WJB_ELEM,
  24. WJB_BEGIN_ARRAY,
  25. WJB_END_ARRAY,
  26. WJB_BEGIN_OBJECT,
  27. WJB_END_OBJECT
  28. } JsonbIteratorToken;
  29. /* Strategy numbers for GIN index opclasses */
  30. #define JsonbContainsStrategyNumber 7
  31. #define JsonbExistsStrategyNumber 9
  32. #define JsonbExistsAnyStrategyNumber 10
  33. #define JsonbExistsAllStrategyNumber 11
  34. /*
  35. * In the standard jsonb_ops GIN opclass for jsonb, we choose to index both
  36. * keys and values. The storage format is text. The first byte of the text
  37. * string distinguishes whether this is a key (always a string), null value,
  38. * boolean value, numeric value, or string value. However, array elements
  39. * that are strings are marked as though they were keys; this imprecision
  40. * supports the definition of the "exists" operator, which treats array
  41. * elements like keys. The remainder of the text string is empty for a null
  42. * value, "t" or "f" for a boolean value, a normalized print representation of
  43. * a numeric value, or the text of a string value. However, if the length of
  44. * this text representation would exceed JGIN_MAXLENGTH bytes, we instead hash
  45. * the text representation and store an 8-hex-digit representation of the
  46. * uint32 hash value, marking the prefix byte with an additional bit to
  47. * distinguish that this has happened. Hashing long strings saves space and
  48. * ensures that we won't overrun the maximum entry length for a GIN index.
  49. * (But JGIN_MAXLENGTH is quite a bit shorter than GIN's limit. It's chosen
  50. * to ensure that the on-disk text datum will have a short varlena header.)
  51. * Note that when any hashed item appears in a query, we must recheck index
  52. * matches against the heap tuple; currently, this costs nothing because we
  53. * must always recheck for other reasons.
  54. */
  55. #define JGINFLAG_KEY 0x01 /* key (or string array element) */
  56. #define JGINFLAG_NULL 0x02 /* null value */
  57. #define JGINFLAG_BOOL 0x03 /* boolean value */
  58. #define JGINFLAG_NUM 0x04 /* numeric value */
  59. #define JGINFLAG_STR 0x05 /* string value (if not an array element) */
  60. #define JGINFLAG_HASHED 0x10 /* OR'd into flag if value was hashed */
  61. #define JGIN_MAXLENGTH 125 /* max length of text part before hashing */
  62. /* Convenience macros */
  63. #define DatumGetJsonb(d) ((Jsonb *) PG_DETOAST_DATUM(d))
  64. #define JsonbGetDatum(p) PointerGetDatum(p)
  65. #define PG_GETARG_JSONB(x) DatumGetJsonb(PG_GETARG_DATUM(x))
  66. #define PG_RETURN_JSONB(x) PG_RETURN_POINTER(x)
  67. typedef struct JsonbPair JsonbPair;
  68. typedef struct JsonbValue JsonbValue;
  69. /*
  70. * Jsonbs are varlena objects, so must meet the varlena convention that the
  71. * first int32 of the object contains the total object size in bytes. Be sure
  72. * to use VARSIZE() and SET_VARSIZE() to access it, though!
  73. *
  74. * Jsonb is the on-disk representation, in contrast to the in-memory JsonbValue
  75. * representation. Often, JsonbValues are just shims through which a Jsonb
  76. * buffer is accessed, but they can also be deep copied and passed around.
  77. *
  78. * Jsonb is a tree structure. Each node in the tree consists of a JEntry
  79. * header and a variable-length content (possibly of zero size). The JEntry
  80. * header indicates what kind of a node it is, e.g. a string or an array,
  81. * and provides the length of its variable-length portion.
  82. *
  83. * The JEntry and the content of a node are not stored physically together.
  84. * Instead, the container array or object has an array that holds the JEntrys
  85. * of all the child nodes, followed by their variable-length portions.
  86. *
  87. * The root node is an exception; it has no parent array or object that could
  88. * hold its JEntry. Hence, no JEntry header is stored for the root node. It
  89. * is implicitly known that the root node must be an array or an object,
  90. * so we can get away without the type indicator as long as we can distinguish
  91. * the two. For that purpose, both an array and an object begin with a uint32
  92. * header field, which contains an JB_FOBJECT or JB_FARRAY flag. When a naked
  93. * scalar value needs to be stored as a Jsonb value, what we actually store is
  94. * an array with one element, with the flags in the array's header field set
  95. * to JB_FSCALAR | JB_FARRAY.
  96. *
  97. * Overall, the Jsonb struct requires 4-bytes alignment. Within the struct,
  98. * the variable-length portion of some node types is aligned to a 4-byte
  99. * boundary, while others are not. When alignment is needed, the padding is
  100. * in the beginning of the node that requires it. For example, if a numeric
  101. * node is stored after a string node, so that the numeric node begins at
  102. * offset 3, the variable-length portion of the numeric node will begin with
  103. * one padding byte so that the actual numeric data is 4-byte aligned.
  104. */
  105. /*
  106. * JEntry format.
  107. *
  108. * The least significant 28 bits store either the data length of the entry,
  109. * or its end+1 offset from the start of the variable-length portion of the
  110. * containing object. The next three bits store the type of the entry, and
  111. * the high-order bit tells whether the least significant bits store a length
  112. * or an offset.
  113. *
  114. * The reason for the offset-or-length complication is to compromise between
  115. * access speed and data compressibility. In the initial design each JEntry
  116. * always stored an offset, but this resulted in JEntry arrays with horrible
  117. * compressibility properties, so that TOAST compression of a JSONB did not
  118. * work well. Storing only lengths would greatly improve compressibility,
  119. * but it makes random access into large arrays expensive (O(N) not O(1)).
  120. * So what we do is store an offset in every JB_OFFSET_STRIDE'th JEntry and
  121. * a length in the rest. This results in reasonably compressible data (as
  122. * long as the stride isn't too small). We may have to examine as many as
  123. * JB_OFFSET_STRIDE JEntrys in order to find out the offset or length of any
  124. * given item, but that's still O(1) no matter how large the container is.
  125. *
  126. * We could avoid eating a flag bit for this purpose if we were to store
  127. * the stride in the container header, or if we were willing to treat the
  128. * stride as an unchangeable constant. Neither of those options is very
  129. * attractive though.
  130. */
  131. typedef uint32 JEntry;
  132. #define JENTRY_OFFLENMASK 0x0FFFFFFF
  133. #define JENTRY_TYPEMASK 0x70000000
  134. #define JENTRY_HAS_OFF 0x80000000
  135. /* values stored in the type bits */
  136. #define JENTRY_ISSTRING 0x00000000
  137. #define JENTRY_ISNUMERIC 0x10000000
  138. #define JENTRY_ISBOOL_FALSE 0x20000000
  139. #define JENTRY_ISBOOL_TRUE 0x30000000
  140. #define JENTRY_ISNULL 0x40000000
  141. #define JENTRY_ISCONTAINER 0x50000000 /* array or object */
  142. /* Access macros. Note possible multiple evaluations */
  143. #define JBE_OFFLENFLD(je_) ((je_) & JENTRY_OFFLENMASK)
  144. #define JBE_HAS_OFF(je_) (((je_) & JENTRY_HAS_OFF) != 0)
  145. #define JBE_ISSTRING(je_) (((je_) & JENTRY_TYPEMASK) == JENTRY_ISSTRING)
  146. #define JBE_ISNUMERIC(je_) (((je_) & JENTRY_TYPEMASK) == JENTRY_ISNUMERIC)
  147. #define JBE_ISCONTAINER(je_) (((je_) & JENTRY_TYPEMASK) == JENTRY_ISCONTAINER)
  148. #define JBE_ISNULL(je_) (((je_) & JENTRY_TYPEMASK) == JENTRY_ISNULL)
  149. #define JBE_ISBOOL_TRUE(je_) (((je_) & JENTRY_TYPEMASK) == JENTRY_ISBOOL_TRUE)
  150. #define JBE_ISBOOL_FALSE(je_) (((je_) & JENTRY_TYPEMASK) == JENTRY_ISBOOL_FALSE)
  151. #define JBE_ISBOOL(je_) (JBE_ISBOOL_TRUE(je_) || JBE_ISBOOL_FALSE(je_))
  152. /* Macro for advancing an offset variable to the next JEntry */
  153. #define JBE_ADVANCE_OFFSET(offset, je) \
  154. do { \
  155. JEntry je_ = (je); \
  156. if (JBE_HAS_OFF(je_)) \
  157. (offset) = JBE_OFFLENFLD(je_); \
  158. else \
  159. (offset) += JBE_OFFLENFLD(je_); \
  160. } while(0)
  161. /*
  162. * We store an offset, not a length, every JB_OFFSET_STRIDE children.
  163. * Caution: this macro should only be referenced when creating a JSONB
  164. * value. When examining an existing value, pay attention to the HAS_OFF
  165. * bits instead. This allows changes in the offset-placement heuristic
  166. * without breaking on-disk compatibility.
  167. */
  168. #define JB_OFFSET_STRIDE 32
  169. /*
  170. * A jsonb array or object node, within a Jsonb Datum.
  171. *
  172. * An array has one child for each element, stored in array order.
  173. *
  174. * An object has two children for each key/value pair. The keys all appear
  175. * first, in key sort order; then the values appear, in an order matching the
  176. * key order. This arrangement keeps the keys compact in memory, making a
  177. * search for a particular key more cache-friendly.
  178. */
  179. typedef struct JsonbContainer
  180. {
  181. uint32 header; /* number of elements or key/value pairs, and
  182. * flags */
  183. JEntry children[FLEXIBLE_ARRAY_MEMBER];
  184. /* the data for each child node follows. */
  185. } JsonbContainer;
  186. /* flags for the header-field in JsonbContainer */
  187. #define JB_CMASK 0x0FFFFFFF /* mask for count field */
  188. #define JB_FSCALAR 0x10000000 /* flag bits */
  189. #define JB_FOBJECT 0x20000000
  190. #define JB_FARRAY 0x40000000
  191. /* The top-level on-disk format for a jsonb datum. */
  192. typedef struct
  193. {
  194. int32 vl_len_; /* varlena header (do not touch directly!) */
  195. JsonbContainer root;
  196. } Jsonb;
  197. /* convenience macros for accessing the root container in a Jsonb datum */
  198. #define JB_ROOT_COUNT(jbp_) ( *(uint32*) VARDATA(jbp_) & JB_CMASK)
  199. #define JB_ROOT_IS_SCALAR(jbp_) ( *(uint32*) VARDATA(jbp_) & JB_FSCALAR)
  200. #define JB_ROOT_IS_OBJECT(jbp_) ( *(uint32*) VARDATA(jbp_) & JB_FOBJECT)
  201. #define JB_ROOT_IS_ARRAY(jbp_) ( *(uint32*) VARDATA(jbp_) & JB_FARRAY)
  202. /*
  203. * JsonbValue: In-memory representation of Jsonb. This is a convenient
  204. * deserialized representation, that can easily support using the "val"
  205. * union across underlying types during manipulation. The Jsonb on-disk
  206. * representation has various alignment considerations.
  207. */
  208. struct JsonbValue
  209. {
  210. enum
  211. {
  212. /* Scalar types */
  213. jbvNull = 0x0,
  214. jbvString,
  215. jbvNumeric,
  216. jbvBool,
  217. /* Composite types */
  218. jbvArray = 0x10,
  219. jbvObject,
  220. /* Binary (i.e. struct Jsonb) jbvArray/jbvObject */
  221. jbvBinary
  222. } type; /* Influences sort order */
  223. union
  224. {
  225. Numeric numeric;
  226. bool boolean;
  227. struct
  228. {
  229. int len;
  230. char *val; /* Not necessarily null-terminated */
  231. } string; /* String primitive type */
  232. struct
  233. {
  234. int nElems;
  235. JsonbValue *elems;
  236. bool rawScalar; /* Top-level "raw scalar" array? */
  237. } array; /* Array container type */
  238. struct
  239. {
  240. int nPairs; /* 1 pair, 2 elements */
  241. JsonbPair *pairs;
  242. } object; /* Associative container type */
  243. struct
  244. {
  245. int len;
  246. JsonbContainer *data;
  247. } binary; /* Array or object, in on-disk format */
  248. } val;
  249. };
  250. #define IsAJsonbScalar(jsonbval) ((jsonbval)->type >= jbvNull && \
  251. (jsonbval)->type <= jbvBool)
  252. /*
  253. * Key/value pair within an Object.
  254. *
  255. * This struct type is only used briefly while constructing a Jsonb; it is
  256. * *not* the on-disk representation.
  257. *
  258. * Pairs with duplicate keys are de-duplicated. We store the originally
  259. * observed pair ordering for the purpose of removing duplicates in a
  260. * well-defined way (which is "last observed wins").
  261. */
  262. struct JsonbPair
  263. {
  264. JsonbValue key; /* Must be a jbvString */
  265. JsonbValue value; /* May be of any type */
  266. uint32 order; /* Pair's index in original sequence */
  267. };
  268. /* Conversion state used when parsing Jsonb from text, or for type coercion */
  269. typedef struct JsonbParseState
  270. {
  271. JsonbValue contVal;
  272. Size size;
  273. struct JsonbParseState *next;
  274. } JsonbParseState;
  275. /*
  276. * JsonbIterator holds details of the type for each iteration. It also stores a
  277. * Jsonb varlena buffer, which can be directly accessed in some contexts.
  278. */
  279. typedef enum
  280. {
  281. JBI_ARRAY_START,
  282. JBI_ARRAY_ELEM,
  283. JBI_OBJECT_START,
  284. JBI_OBJECT_KEY,
  285. JBI_OBJECT_VALUE
  286. } JsonbIterState;
  287. typedef struct JsonbIterator
  288. {
  289. /* Container being iterated */
  290. JsonbContainer *container;
  291. uint32 nElems; /* Number of elements in children array (will
  292. * be nPairs for objects) */
  293. bool isScalar; /* Pseudo-array scalar value? */
  294. JEntry *children; /* JEntrys for child nodes */
  295. /* Data proper. This points to the beginning of the variable-length data */
  296. char *dataProper;
  297. /* Current item in buffer (up to nElems) */
  298. int curIndex;
  299. /* Data offset corresponding to current item */
  300. uint32 curDataOffset;
  301. /*
  302. * If the container is an object, we want to return keys and values
  303. * alternately; so curDataOffset points to the current key, and
  304. * curValueOffset points to the current value.
  305. */
  306. uint32 curValueOffset;
  307. /* Private state */
  308. JsonbIterState state;
  309. struct JsonbIterator *parent;
  310. } JsonbIterator;
  311. /* I/O routines */
  312. extern Datum jsonb_in(PG_FUNCTION_ARGS);
  313. extern Datum jsonb_out(PG_FUNCTION_ARGS);
  314. extern Datum jsonb_recv(PG_FUNCTION_ARGS);
  315. extern Datum jsonb_send(PG_FUNCTION_ARGS);
  316. extern Datum jsonb_typeof(PG_FUNCTION_ARGS);
  317. /* generator routines */
  318. extern Datum to_jsonb(PG_FUNCTION_ARGS);
  319. extern Datum jsonb_build_object(PG_FUNCTION_ARGS);
  320. extern Datum jsonb_build_object_noargs(PG_FUNCTION_ARGS);
  321. extern Datum jsonb_build_array(PG_FUNCTION_ARGS);
  322. extern Datum jsonb_build_array_noargs(PG_FUNCTION_ARGS);
  323. extern Datum jsonb_object(PG_FUNCTION_ARGS);
  324. extern Datum jsonb_object_two_arg(PG_FUNCTION_ARGS);
  325. /* jsonb_agg, json_object_agg functions */
  326. extern Datum jsonb_agg_transfn(PG_FUNCTION_ARGS);
  327. extern Datum jsonb_agg_finalfn(PG_FUNCTION_ARGS);
  328. extern Datum jsonb_object_agg_transfn(PG_FUNCTION_ARGS);
  329. extern Datum jsonb_object_agg_finalfn(PG_FUNCTION_ARGS);
  330. /* Indexing-related ops */
  331. extern Datum jsonb_exists(PG_FUNCTION_ARGS);
  332. extern Datum jsonb_exists_any(PG_FUNCTION_ARGS);
  333. extern Datum jsonb_exists_all(PG_FUNCTION_ARGS);
  334. extern Datum jsonb_contains(PG_FUNCTION_ARGS);
  335. extern Datum jsonb_contained(PG_FUNCTION_ARGS);
  336. extern Datum jsonb_ne(PG_FUNCTION_ARGS);
  337. extern Datum jsonb_lt(PG_FUNCTION_ARGS);
  338. extern Datum jsonb_gt(PG_FUNCTION_ARGS);
  339. extern Datum jsonb_le(PG_FUNCTION_ARGS);
  340. extern Datum jsonb_ge(PG_FUNCTION_ARGS);
  341. extern Datum jsonb_eq(PG_FUNCTION_ARGS);
  342. extern Datum jsonb_cmp(PG_FUNCTION_ARGS);
  343. extern Datum jsonb_hash(PG_FUNCTION_ARGS);
  344. /* GIN support functions for jsonb_ops */
  345. extern Datum gin_compare_jsonb(PG_FUNCTION_ARGS);
  346. extern Datum gin_extract_jsonb(PG_FUNCTION_ARGS);
  347. extern Datum gin_extract_jsonb_query(PG_FUNCTION_ARGS);
  348. extern Datum gin_consistent_jsonb(PG_FUNCTION_ARGS);
  349. extern Datum gin_triconsistent_jsonb(PG_FUNCTION_ARGS);
  350. /* GIN support functions for jsonb_path_ops */
  351. extern Datum gin_extract_jsonb_path(PG_FUNCTION_ARGS);
  352. extern Datum gin_extract_jsonb_query_path(PG_FUNCTION_ARGS);
  353. extern Datum gin_consistent_jsonb_path(PG_FUNCTION_ARGS);
  354. extern Datum gin_triconsistent_jsonb_path(PG_FUNCTION_ARGS);
  355. /* pretty printer, returns text */
  356. extern Datum jsonb_pretty(PG_FUNCTION_ARGS);
  357. /* concatenation */
  358. extern Datum jsonb_concat(PG_FUNCTION_ARGS);
  359. /* deletion */
  360. extern Datum jsonb_delete(PG_FUNCTION_ARGS);
  361. extern Datum jsonb_delete_idx(PG_FUNCTION_ARGS);
  362. extern Datum jsonb_delete_path(PG_FUNCTION_ARGS);
  363. /* replacement */
  364. extern Datum jsonb_set(PG_FUNCTION_ARGS);
  365. /* insert after or before (for arrays) */
  366. extern Datum jsonb_insert(PG_FUNCTION_ARGS);
  367. /* Support functions */
  368. extern uint32 getJsonbOffset(const JsonbContainer *jc, int index);
  369. extern uint32 getJsonbLength(const JsonbContainer *jc, int index);
  370. extern int compareJsonbContainers(JsonbContainer *a, JsonbContainer *b);
  371. extern JsonbValue *findJsonbValueFromContainer(JsonbContainer *sheader,
  372. uint32 flags,
  373. JsonbValue *key);
  374. extern JsonbValue *getIthJsonbValueFromContainer(JsonbContainer *sheader,
  375. uint32 i);
  376. extern JsonbValue *pushJsonbValue(JsonbParseState **pstate,
  377. JsonbIteratorToken seq, JsonbValue *jbVal);
  378. extern JsonbIterator *JsonbIteratorInit(JsonbContainer *container);
  379. extern JsonbIteratorToken JsonbIteratorNext(JsonbIterator **it, JsonbValue *val,
  380. bool skipNested);
  381. extern Jsonb *JsonbValueToJsonb(JsonbValue *val);
  382. extern bool JsonbDeepContains(JsonbIterator **val,
  383. JsonbIterator **mContained);
  384. extern void JsonbHashScalarValue(const JsonbValue *scalarVal, uint32 *hash);
  385. /* jsonb.c support functions */
  386. extern char *JsonbToCString(StringInfo out, JsonbContainer *in,
  387. int estimated_len);
  388. extern char *JsonbToCStringIndent(StringInfo out, JsonbContainer *in,
  389. int estimated_len);
  390. #endif /* __JSONB_H__ */