Line data Source code
1 : /*
2 : * contrib/hstore/hstore_compat.c
3 : *
4 : * Notes on old/new hstore format disambiguation.
5 : *
6 : * There are three formats to consider:
7 : * 1) old contrib/hstore (referred to as hstore-old)
8 : * 2) prerelease pgfoundry hstore
9 : * 3) new contrib/hstore
10 : *
11 : * (2) and (3) are identical except for the HS_FLAG_NEWVERSION
12 : * bit, which is set in (3) but not (2).
13 : *
14 : * Values that are already in format (3), or which are
15 : * unambiguously in format (2), are handled by the first
16 : * "return immediately" test in hstoreUpgrade().
17 : *
18 : * To stress a point: we ONLY get here with possibly-ambiguous
19 : * values if we're doing some sort of in-place migration from an
20 : * old prerelease pgfoundry hstore-new; and we explicitly don't
21 : * support that without fixing up any potentially padded values
22 : * first. Most of the code here is serious overkill, but the
23 : * performance penalty isn't serious (especially compared to the
24 : * palloc() that we have to do anyway) and the belt-and-braces
25 : * validity checks provide some reassurance. (If for some reason
26 : * we get a value that would have worked on the old code, but
27 : * which would be botched by the conversion code, the validity
28 : * checks will fail it first so we get an error rather than bad
29 : * data.)
30 : *
31 : * Note also that empty hstores are the same in (2) and (3), so
32 : * there are some special-case paths for them.
33 : *
34 : * We tell the difference between formats (2) and (3) as follows (but
35 : * note that there are some edge cases where we can't tell; see
36 : * comments in hstoreUpgrade):
37 : *
38 : * First, since there must be at least one entry, we look at
39 : * how the bits line up. The new format looks like:
40 : *
41 : * 10kkkkkkkkkkkkkkkkkkkkkkkkkkkkkk (k..k = keylen)
42 : * 0nvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv (v..v = keylen+vallen)
43 : *
44 : * The old format looks like one of these, depending on endianness
45 : * and bitfield layout: (k..k = keylen, v..v = vallen, p..p = pos,
46 : * n = isnull)
47 : *
48 : * kkkkkkkkkkkkkkkkvvvvvvvvvvvvvvvv
49 : * nppppppppppppppppppppppppppppppp
50 : *
51 : * kkkkkkkkkkkkkkkkvvvvvvvvvvvvvvvv
52 : * pppppppppppppppppppppppppppppppn
53 : *
54 : * vvvvvvvvvvvvvvvvkkkkkkkkkkkkkkkk
55 : * nppppppppppppppppppppppppppppppp
56 : *
57 : * vvvvvvvvvvvvvvvvkkkkkkkkkkkkkkkk
58 : * pppppppppppppppppppppppppppppppn (usual i386 format)
59 : *
60 : * If the entry is in old format, for the first entry "pos" must be 0.
61 : * We can obviously see that either keylen or vallen must be >32768
62 : * for there to be any ambiguity (which is why lengths less than that
63 : * are fasttracked in hstore.h) Since "pos"==0, the "v" field in the
64 : * new-format interpretation can only be 0 or 1, which constrains all
65 : * but three bits of the old-format's k and v fields. But in addition
66 : * to all of this, the data length implied by the keylen and vallen
67 : * must fit in the varlena size. So the only ambiguous edge case for
68 : * hstores with only one entry occurs between a new-format entry with
69 : * an excess (~32k) of padding, and an old-format entry. But we know
70 : * which format to use in that case based on how we were compiled, so
71 : * no actual data corruption can occur.
72 : *
73 : * If there is more than one entry, the requirement that keys do not
74 : * decrease in length, and that positions increase contiguously, and
75 : * that the end of the data not be beyond the end of the varlena
76 : * itself, disambiguates in almost all other cases. There is a small
77 : * set of ambiguous cases which could occur if the old-format value
78 : * has a large excess of padding and just the right pattern of key
79 : * sizes, but these are also handled based on how we were compiled.
80 : *
81 : * The otherwise undocumented function hstore_version_diag is provided
82 : * for testing purposes.
83 : */
84 : #include "postgres.h"
85 :
86 :
87 : #include "hstore.h"
88 :
89 : /*
90 : * This is the structure used for entries in the old contrib/hstore
91 : * implementation. Notice that this is the same size as the new entry
92 : * (two 32-bit words per key/value pair) and that the header is the
93 : * same, so the old and new versions of ARRPTR, STRPTR, CALCDATASIZE
94 : * etc. are compatible.
95 : *
96 : * If the above statement isn't true on some bizarre platform, we're
97 : * a bit hosed (see StaticAssertStmt in hstoreValidOldFormat).
98 : */
99 : typedef struct
100 : {
101 : uint16 keylen;
102 : uint16 vallen;
103 : uint32
104 : valisnull:1,
105 : pos:31;
106 : } HOldEntry;
107 :
108 : static int hstoreValidNewFormat(HStore *hs);
109 : static int hstoreValidOldFormat(HStore *hs);
110 :
111 :
112 : /*
113 : * Validity test for a new-format hstore.
114 : * 0 = not valid
115 : * 1 = valid but with "slop" in the length
116 : * 2 = exactly valid
117 : */
118 : static int
119 0 : hstoreValidNewFormat(HStore *hs)
120 : {
121 0 : int count = HS_COUNT(hs);
122 0 : HEntry *entries = ARRPTR(hs);
123 0 : int buflen = (count) ? HSE_ENDPOS(entries[2 * (count) - 1]) : 0;
124 0 : int vsize = CALCDATASIZE(count, buflen);
125 : int i;
126 :
127 0 : if (hs->size_ & HS_FLAG_NEWVERSION)
128 0 : return 2;
129 :
130 0 : if (count == 0)
131 0 : return 2;
132 :
133 0 : if (!HSE_ISFIRST(entries[0]))
134 0 : return 0;
135 :
136 0 : if (vsize > VARSIZE(hs))
137 0 : return 0;
138 :
139 : /* entry position must be nondecreasing */
140 :
141 0 : for (i = 1; i < 2 * count; ++i)
142 : {
143 0 : if (HSE_ISFIRST(entries[i])
144 0 : || (HSE_ENDPOS(entries[i]) < HSE_ENDPOS(entries[i - 1])))
145 0 : return 0;
146 : }
147 :
148 : /* key length must be nondecreasing and keys must not be null */
149 :
150 0 : for (i = 1; i < count; ++i)
151 : {
152 0 : if (HSTORE_KEYLEN(entries, i) < HSTORE_KEYLEN(entries, i - 1))
153 0 : return 0;
154 0 : if (HSE_ISNULL(entries[2 * i]))
155 0 : return 0;
156 : }
157 :
158 0 : if (vsize != VARSIZE(hs))
159 0 : return 1;
160 :
161 0 : return 2;
162 : }
163 :
164 : /*
165 : * Validity test for an old-format hstore.
166 : * 0 = not valid
167 : * 1 = valid but with "slop" in the length
168 : * 2 = exactly valid
169 : */
170 : static int
171 0 : hstoreValidOldFormat(HStore *hs)
172 : {
173 0 : int count = hs->size_;
174 0 : HOldEntry *entries = (HOldEntry *) ARRPTR(hs);
175 : int vsize;
176 0 : int lastpos = 0;
177 : int i;
178 :
179 0 : if (hs->size_ & HS_FLAG_NEWVERSION)
180 0 : return 0;
181 :
182 : /* New format uses an HEntry for key and another for value */
183 : StaticAssertStmt(sizeof(HOldEntry) == 2 * sizeof(HEntry),
184 : "old hstore format is not upward-compatible");
185 :
186 0 : if (count == 0)
187 0 : return 2;
188 :
189 0 : if (count > 0xFFFFFFF)
190 0 : return 0;
191 :
192 0 : if (CALCDATASIZE(count, 0) > VARSIZE(hs))
193 0 : return 0;
194 :
195 0 : if (entries[0].pos != 0)
196 0 : return 0;
197 :
198 : /* key length must be nondecreasing */
199 :
200 0 : for (i = 1; i < count; ++i)
201 : {
202 0 : if (entries[i].keylen < entries[i - 1].keylen)
203 0 : return 0;
204 : }
205 :
206 : /*
207 : * entry position must be strictly increasing, except for the first entry
208 : * (which can be ""=>"" and thus zero-length); and all entries must be
209 : * properly contiguous
210 : */
211 :
212 0 : for (i = 0; i < count; ++i)
213 : {
214 0 : if (entries[i].pos != lastpos)
215 0 : return 0;
216 0 : lastpos += (entries[i].keylen
217 0 : + ((entries[i].valisnull) ? 0 : entries[i].vallen));
218 : }
219 :
220 0 : vsize = CALCDATASIZE(count, lastpos);
221 :
222 0 : if (vsize > VARSIZE(hs))
223 0 : return 0;
224 :
225 0 : if (vsize != VARSIZE(hs))
226 0 : return 1;
227 :
228 0 : return 2;
229 : }
230 :
231 :
232 : /*
233 : * hstoreUpgrade: PG_DETOAST_DATUM plus support for conversion of old hstores
234 : */
235 : HStore *
236 219546 : hstoreUpgrade(Datum orig)
237 : {
238 219546 : HStore *hs = (HStore *) PG_DETOAST_DATUM(orig);
239 : int valid_new;
240 : int valid_old;
241 :
242 : /* Return immediately if no conversion needed */
243 219546 : if (hs->size_ & HS_FLAG_NEWVERSION)
244 219546 : return hs;
245 :
246 : /* Do we have a writable copy? If not, make one. */
247 0 : if ((void *) hs == (void *) DatumGetPointer(orig))
248 0 : hs = (HStore *) PG_DETOAST_DATUM_COPY(orig);
249 :
250 0 : if (hs->size_ == 0 ||
251 0 : (VARSIZE(hs) < 32768 && HSE_ISFIRST((ARRPTR(hs)[0]))))
252 : {
253 0 : HS_SETCOUNT(hs, HS_COUNT(hs));
254 0 : HS_FIXSIZE(hs, HS_COUNT(hs));
255 0 : return hs;
256 : }
257 :
258 0 : valid_new = hstoreValidNewFormat(hs);
259 0 : valid_old = hstoreValidOldFormat(hs);
260 :
261 0 : if (!valid_old || hs->size_ == 0)
262 : {
263 0 : if (valid_new)
264 : {
265 : /*
266 : * force the "new version" flag and the correct varlena length.
267 : */
268 0 : HS_SETCOUNT(hs, HS_COUNT(hs));
269 0 : HS_FIXSIZE(hs, HS_COUNT(hs));
270 0 : return hs;
271 : }
272 : else
273 : {
274 0 : elog(ERROR, "invalid hstore value found");
275 : }
276 : }
277 :
278 : /*
279 : * this is the tricky edge case. It is only possible in some quite extreme
280 : * cases (the hstore must have had a lot of wasted padding space at the
281 : * end). But the only way a "new" hstore value could get here is if we're
282 : * upgrading in place from a pre-release version of hstore-new (NOT
283 : * contrib/hstore), so we work off the following assumptions: 1. If you're
284 : * moving from old contrib/hstore to hstore-new, you're required to fix up
285 : * any potential conflicts first, e.g. by running ALTER TABLE ... USING
286 : * col::text::hstore; on all hstore columns before upgrading. 2. If you're
287 : * moving from old contrib/hstore to new contrib/hstore, then "new" values
288 : * are impossible here 3. If you're moving from pre-release hstore-new to
289 : * hstore-new, then "old" values are impossible here 4. If you're moving
290 : * from pre-release hstore-new to new contrib/hstore, you're not doing so
291 : * as an in-place upgrade, so there is no issue So the upshot of all this
292 : * is that we can treat all the edge cases as "new" if we're being built
293 : * as hstore-new, and "old" if we're being built as contrib/hstore.
294 : *
295 : * XXX the WARNING can probably be downgraded to DEBUG1 once this has been
296 : * beta-tested. But for now, it would be very useful to know if anyone can
297 : * actually reach this case in a non-contrived setting.
298 : */
299 :
300 0 : if (valid_new)
301 : {
302 : #ifdef HSTORE_IS_HSTORE_NEW
303 : elog(WARNING, "ambiguous hstore value resolved as hstore-new");
304 :
305 : /*
306 : * force the "new version" flag and the correct varlena length.
307 : */
308 : HS_SETCOUNT(hs, HS_COUNT(hs));
309 : HS_FIXSIZE(hs, HS_COUNT(hs));
310 : return hs;
311 : #else
312 0 : elog(WARNING, "ambiguous hstore value resolved as hstore-old");
313 : #endif
314 : }
315 :
316 : /*
317 : * must have an old-style value. Overwrite it in place as a new-style one.
318 : */
319 : {
320 0 : int count = hs->size_;
321 0 : HEntry *new_entries = ARRPTR(hs);
322 0 : HOldEntry *old_entries = (HOldEntry *) ARRPTR(hs);
323 : int i;
324 :
325 0 : for (i = 0; i < count; ++i)
326 : {
327 0 : uint32 pos = old_entries[i].pos;
328 0 : uint32 keylen = old_entries[i].keylen;
329 0 : uint32 vallen = old_entries[i].vallen;
330 0 : bool isnull = old_entries[i].valisnull;
331 :
332 0 : if (isnull)
333 0 : vallen = 0;
334 :
335 0 : new_entries[2 * i].entry = (pos + keylen) & HENTRY_POSMASK;
336 0 : new_entries[2 * i + 1].entry = (((pos + keylen + vallen) & HENTRY_POSMASK)
337 0 : | ((isnull) ? HENTRY_ISNULL : 0));
338 : }
339 :
340 0 : if (count)
341 0 : new_entries[0].entry |= HENTRY_ISFIRST;
342 0 : HS_SETCOUNT(hs, count);
343 0 : HS_FIXSIZE(hs, count);
344 : }
345 :
346 0 : return hs;
347 : }
348 :
349 :
350 14 : PG_FUNCTION_INFO_V1(hstore_version_diag);
351 : Datum
352 0 : hstore_version_diag(PG_FUNCTION_ARGS)
353 : {
354 0 : HStore *hs = (HStore *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
355 0 : int valid_new = hstoreValidNewFormat(hs);
356 0 : int valid_old = hstoreValidOldFormat(hs);
357 :
358 0 : PG_RETURN_INT32(valid_old * 10 + valid_new);
359 : }
|