Line data Source code
1 : /*
2 : * contrib/hstore/hstore_compat.c
3 : *
4 : * Notes on old/new hstore format disambiguation.
5 : *
6 : * There are three formats to consider:
7 : * 1) old contrib/hstore (referred to as hstore-old)
8 : * 2) prerelease pgfoundry hstore
9 : * 3) new contrib/hstore
10 : *
11 : * (2) and (3) are identical except for the HS_FLAG_NEWVERSION
12 : * bit, which is set in (3) but not (2).
13 : *
14 : * Values that are already in format (3), or which are
15 : * unambiguously in format (2), are handled by the first
16 : * "return immediately" test in hstoreUpgrade().
17 : *
18 : * To stress a point: we ONLY get here with possibly-ambiguous
19 : * values if we're doing some sort of in-place migration from an
20 : * old prerelease pgfoundry hstore-new; and we explicitly don't
21 : * support that without fixing up any potentially padded values
22 : * first. Most of the code here is serious overkill, but the
23 : * performance penalty isn't serious (especially compared to the
24 : * palloc() that we have to do anyway) and the belt-and-braces
25 : * validity checks provide some reassurance. (If for some reason
26 : * we get a value that would have worked on the old code, but
27 : * which would be botched by the conversion code, the validity
28 : * checks will fail it first so we get an error rather than bad
29 : * data.)
30 : *
31 : * Note also that empty hstores are the same in (2) and (3), so
32 : * there are some special-case paths for them.
33 : *
34 : * We tell the difference between formats (2) and (3) as follows (but
35 : * note that there are some edge cases where we can't tell; see
36 : * comments in hstoreUpgrade):
37 : *
38 : * First, since there must be at least one entry, we look at
39 : * how the bits line up. The new format looks like:
40 : *
41 : * 10kkkkkkkkkkkkkkkkkkkkkkkkkkkkkk (k..k = keylen)
42 : * 0nvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv (v..v = keylen+vallen)
43 : *
44 : * The old format looks like one of these, depending on endianness
45 : * and bitfield layout: (k..k = keylen, v..v = vallen, p..p = pos,
46 : * n = isnull)
47 : *
48 : * kkkkkkkkkkkkkkkkvvvvvvvvvvvvvvvv
49 : * nppppppppppppppppppppppppppppppp
50 : *
51 : * kkkkkkkkkkkkkkkkvvvvvvvvvvvvvvvv
52 : * pppppppppppppppppppppppppppppppn
53 : *
54 : * vvvvvvvvvvvvvvvvkkkkkkkkkkkkkkkk
55 : * nppppppppppppppppppppppppppppppp
56 : *
57 : * vvvvvvvvvvvvvvvvkkkkkkkkkkkkkkkk
58 : * pppppppppppppppppppppppppppppppn (usual i386 format)
59 : *
60 : * If the entry is in old format, for the first entry "pos" must be 0.
61 : * We can obviously see that either keylen or vallen must be >32768
62 : * for there to be any ambiguity (which is why lengths less than that
63 : * are fasttracked in hstore.h) Since "pos"==0, the "v" field in the
64 : * new-format interpretation can only be 0 or 1, which constrains all
65 : * but three bits of the old-format's k and v fields. But in addition
66 : * to all of this, the data length implied by the keylen and vallen
67 : * must fit in the varlena size. So the only ambiguous edge case for
68 : * hstores with only one entry occurs between a new-format entry with
69 : * an excess (~32k) of padding, and an old-format entry. But we know
70 : * which format to use in that case based on how we were compiled, so
71 : * no actual data corruption can occur.
72 : *
73 : * If there is more than one entry, the requirement that keys do not
74 : * decrease in length, and that positions increase contiguously, and
75 : * that the end of the data not be beyond the end of the varlena
76 : * itself, disambiguates in almost all other cases. There is a small
77 : * set of ambiguous cases which could occur if the old-format value
78 : * has a large excess of padding and just the right pattern of key
79 : * sizes, but these are also handled based on how we were compiled.
80 : *
81 : * The otherwise undocumented function hstore_version_diag is provided
82 : * for testing purposes.
83 : */
84 : #include "postgres.h"
85 :
86 :
87 : #include "hstore.h"
88 :
89 : /*
90 : * This is the structure used for entries in the old contrib/hstore
91 : * implementation. Notice that this is the same size as the new entry
92 : * (two 32-bit words per key/value pair) and that the header is the
93 : * same, so the old and new versions of ARRPTR, STRPTR, CALCDATASIZE
94 : * etc. are compatible.
95 : *
96 : * If the above statement isn't true on some bizarre platform, we're
97 : * a bit hosed.
98 : */
99 : typedef struct
100 : {
101 : uint16 keylen;
102 : uint16 vallen;
103 : uint32
104 : valisnull:1,
105 : pos:31;
106 : } HOldEntry;
107 :
108 : StaticAssertDecl(sizeof(HOldEntry) == 2 * sizeof(HEntry),
109 : "old hstore format is not upward-compatible");
110 :
111 : static int hstoreValidNewFormat(HStore *hs);
112 : static int hstoreValidOldFormat(HStore *hs);
113 :
114 :
115 : /*
116 : * Validity test for a new-format hstore.
117 : * 0 = not valid
118 : * 1 = valid but with "slop" in the length
119 : * 2 = exactly valid
120 : */
121 : static int
122 0 : hstoreValidNewFormat(HStore *hs)
123 : {
124 0 : int count = HS_COUNT(hs);
125 0 : HEntry *entries = ARRPTR(hs);
126 0 : int buflen = (count) ? HSE_ENDPOS(entries[2 * (count) - 1]) : 0;
127 0 : int vsize = CALCDATASIZE(count, buflen);
128 : int i;
129 :
130 0 : if (hs->size_ & HS_FLAG_NEWVERSION)
131 0 : return 2;
132 :
133 0 : if (count == 0)
134 0 : return 2;
135 :
136 0 : if (!HSE_ISFIRST(entries[0]))
137 0 : return 0;
138 :
139 0 : if (vsize > VARSIZE(hs))
140 0 : return 0;
141 :
142 : /* entry position must be nondecreasing */
143 :
144 0 : for (i = 1; i < 2 * count; ++i)
145 : {
146 0 : if (HSE_ISFIRST(entries[i])
147 0 : || (HSE_ENDPOS(entries[i]) < HSE_ENDPOS(entries[i - 1])))
148 0 : return 0;
149 : }
150 :
151 : /* key length must be nondecreasing and keys must not be null */
152 :
153 0 : for (i = 1; i < count; ++i)
154 : {
155 0 : if (HSTORE_KEYLEN(entries, i) < HSTORE_KEYLEN(entries, i - 1))
156 0 : return 0;
157 0 : if (HSE_ISNULL(entries[2 * i]))
158 0 : return 0;
159 : }
160 :
161 0 : if (vsize != VARSIZE(hs))
162 0 : return 1;
163 :
164 0 : return 2;
165 : }
166 :
167 : /*
168 : * Validity test for an old-format hstore.
169 : * 0 = not valid
170 : * 1 = valid but with "slop" in the length
171 : * 2 = exactly valid
172 : */
173 : static int
174 0 : hstoreValidOldFormat(HStore *hs)
175 : {
176 0 : int count = hs->size_;
177 0 : HOldEntry *entries = (HOldEntry *) ARRPTR(hs);
178 : int vsize;
179 0 : int lastpos = 0;
180 : int i;
181 :
182 0 : if (hs->size_ & HS_FLAG_NEWVERSION)
183 0 : return 0;
184 :
185 0 : if (count == 0)
186 0 : return 2;
187 :
188 0 : if (count > 0xFFFFFFF)
189 0 : return 0;
190 :
191 0 : if (CALCDATASIZE(count, 0) > VARSIZE(hs))
192 0 : return 0;
193 :
194 0 : if (entries[0].pos != 0)
195 0 : return 0;
196 :
197 : /* key length must be nondecreasing */
198 :
199 0 : for (i = 1; i < count; ++i)
200 : {
201 0 : if (entries[i].keylen < entries[i - 1].keylen)
202 0 : return 0;
203 : }
204 :
205 : /*
206 : * entry position must be strictly increasing, except for the first entry
207 : * (which can be ""=>"" and thus zero-length); and all entries must be
208 : * properly contiguous
209 : */
210 :
211 0 : for (i = 0; i < count; ++i)
212 : {
213 0 : if (entries[i].pos != lastpos)
214 0 : return 0;
215 0 : lastpos += (entries[i].keylen
216 0 : + ((entries[i].valisnull) ? 0 : entries[i].vallen));
217 : }
218 :
219 0 : vsize = CALCDATASIZE(count, lastpos);
220 :
221 0 : if (vsize > VARSIZE(hs))
222 0 : return 0;
223 :
224 0 : if (vsize != VARSIZE(hs))
225 0 : return 1;
226 :
227 0 : return 2;
228 : }
229 :
230 :
231 : /*
232 : * hstoreUpgrade: PG_DETOAST_DATUM plus support for conversion of old hstores
233 : */
234 : HStore *
235 219540 : hstoreUpgrade(Datum orig)
236 : {
237 219540 : HStore *hs = (HStore *) PG_DETOAST_DATUM(orig);
238 : int valid_new;
239 : int valid_old;
240 :
241 : /* Return immediately if no conversion needed */
242 219540 : if (hs->size_ & HS_FLAG_NEWVERSION)
243 219540 : return hs;
244 :
245 : /* Do we have a writable copy? If not, make one. */
246 0 : if ((void *) hs == (void *) DatumGetPointer(orig))
247 0 : hs = (HStore *) PG_DETOAST_DATUM_COPY(orig);
248 :
249 0 : if (hs->size_ == 0 ||
250 0 : (VARSIZE(hs) < 32768 && HSE_ISFIRST((ARRPTR(hs)[0]))))
251 : {
252 0 : HS_SETCOUNT(hs, HS_COUNT(hs));
253 0 : HS_FIXSIZE(hs, HS_COUNT(hs));
254 0 : return hs;
255 : }
256 :
257 0 : valid_new = hstoreValidNewFormat(hs);
258 0 : valid_old = hstoreValidOldFormat(hs);
259 :
260 0 : if (!valid_old || hs->size_ == 0)
261 : {
262 0 : if (valid_new)
263 : {
264 : /*
265 : * force the "new version" flag and the correct varlena length.
266 : */
267 0 : HS_SETCOUNT(hs, HS_COUNT(hs));
268 0 : HS_FIXSIZE(hs, HS_COUNT(hs));
269 0 : return hs;
270 : }
271 : else
272 : {
273 0 : elog(ERROR, "invalid hstore value found");
274 : }
275 : }
276 :
277 : /*
278 : * this is the tricky edge case. It is only possible in some quite extreme
279 : * cases (the hstore must have had a lot of wasted padding space at the
280 : * end). But the only way a "new" hstore value could get here is if we're
281 : * upgrading in place from a pre-release version of hstore-new (NOT
282 : * contrib/hstore), so we work off the following assumptions: 1. If you're
283 : * moving from old contrib/hstore to hstore-new, you're required to fix up
284 : * any potential conflicts first, e.g. by running ALTER TABLE ... USING
285 : * col::text::hstore; on all hstore columns before upgrading. 2. If you're
286 : * moving from old contrib/hstore to new contrib/hstore, then "new" values
287 : * are impossible here 3. If you're moving from pre-release hstore-new to
288 : * hstore-new, then "old" values are impossible here 4. If you're moving
289 : * from pre-release hstore-new to new contrib/hstore, you're not doing so
290 : * as an in-place upgrade, so there is no issue So the upshot of all this
291 : * is that we can treat all the edge cases as "new" if we're being built
292 : * as hstore-new, and "old" if we're being built as contrib/hstore.
293 : *
294 : * XXX the WARNING can probably be downgraded to DEBUG1 once this has been
295 : * beta-tested. But for now, it would be very useful to know if anyone can
296 : * actually reach this case in a non-contrived setting.
297 : */
298 :
299 0 : if (valid_new)
300 : {
301 : #ifdef HSTORE_IS_HSTORE_NEW
302 : elog(WARNING, "ambiguous hstore value resolved as hstore-new");
303 :
304 : /*
305 : * force the "new version" flag and the correct varlena length.
306 : */
307 : HS_SETCOUNT(hs, HS_COUNT(hs));
308 : HS_FIXSIZE(hs, HS_COUNT(hs));
309 : return hs;
310 : #else
311 0 : elog(WARNING, "ambiguous hstore value resolved as hstore-old");
312 : #endif
313 : }
314 :
315 : /*
316 : * must have an old-style value. Overwrite it in place as a new-style one.
317 : */
318 : {
319 0 : int count = hs->size_;
320 0 : HEntry *new_entries = ARRPTR(hs);
321 0 : HOldEntry *old_entries = (HOldEntry *) ARRPTR(hs);
322 : int i;
323 :
324 0 : for (i = 0; i < count; ++i)
325 : {
326 0 : uint32 pos = old_entries[i].pos;
327 0 : uint32 keylen = old_entries[i].keylen;
328 0 : uint32 vallen = old_entries[i].vallen;
329 0 : bool isnull = old_entries[i].valisnull;
330 :
331 0 : if (isnull)
332 0 : vallen = 0;
333 :
334 0 : new_entries[2 * i].entry = (pos + keylen) & HENTRY_POSMASK;
335 0 : new_entries[2 * i + 1].entry = (((pos + keylen + vallen) & HENTRY_POSMASK)
336 0 : | ((isnull) ? HENTRY_ISNULL : 0));
337 : }
338 :
339 0 : if (count)
340 0 : new_entries[0].entry |= HENTRY_ISFIRST;
341 0 : HS_SETCOUNT(hs, count);
342 0 : HS_FIXSIZE(hs, count);
343 : }
344 :
345 0 : return hs;
346 : }
347 :
348 :
349 14 : PG_FUNCTION_INFO_V1(hstore_version_diag);
350 : Datum
351 0 : hstore_version_diag(PG_FUNCTION_ARGS)
352 : {
353 0 : HStore *hs = (HStore *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
354 0 : int valid_new = hstoreValidNewFormat(hs);
355 0 : int valid_old = hstoreValidOldFormat(hs);
356 :
357 0 : PG_RETURN_INT32(valid_old * 10 + valid_new);
358 : }
|