PostgreSQL Source Code git master
pg_locale_builtin.c
Go to the documentation of this file.
1/*-----------------------------------------------------------------------
2 *
3 * PostgreSQL locale utilities for builtin provider
4 *
5 * Portions Copyright (c) 2002-2025, PostgreSQL Global Development Group
6 *
7 * src/backend/utils/adt/pg_locale_builtin.c
8 *
9 *-----------------------------------------------------------------------
10 */
11
12#include "postgres.h"
13
14#include "catalog/pg_database.h"
16#include "common/unicode_case.h"
18#include "miscadmin.h"
19#include "utils/builtins.h"
20#include "utils/pg_locale.h"
21#include "utils/syscache.h"
22
24 MemoryContext context);
25extern char *get_collation_actual_version_builtin(const char *collcollate);
26
28{
29 const char *str;
30 size_t len;
31 size_t offset;
32 bool posix;
33 bool init;
35};
36
37/*
38 * In UTF-8, pg_wchar is guaranteed to be the code point value.
39 */
40static inline char32_t
42{
44 return (char32_t) wc;
45}
46
47static inline pg_wchar
48to_pg_wchar(char32_t c32)
49{
51 return (pg_wchar) c32;
52}
53
54/*
55 * Simple word boundary iterator that draws boundaries each time the result of
56 * pg_u_isalnum() changes.
57 */
58static size_t
60{
61 struct WordBoundaryState *wbstate = (struct WordBoundaryState *) state;
62
63 while (wbstate->offset < wbstate->len &&
64 wbstate->str[wbstate->offset] != '\0')
65 {
66 char32_t u = utf8_to_unicode((unsigned char *) wbstate->str +
67 wbstate->offset);
68 bool curr_alnum = pg_u_isalnum(u, wbstate->posix);
69
70 if (!wbstate->init || curr_alnum != wbstate->prev_alnum)
71 {
72 size_t prev_offset = wbstate->offset;
73
74 wbstate->init = true;
75 wbstate->offset += unicode_utf8len(u);
76 wbstate->prev_alnum = curr_alnum;
77 return prev_offset;
78 }
79
80 wbstate->offset += unicode_utf8len(u);
81 }
82
83 return wbstate->len;
84}
85
86static size_t
87strlower_builtin(char *dest, size_t destsize, const char *src, ssize_t srclen,
89{
90 return unicode_strlower(dest, destsize, src, srclen,
91 locale->builtin.casemap_full);
92}
93
94static size_t
95strtitle_builtin(char *dest, size_t destsize, const char *src, ssize_t srclen,
97{
98 struct WordBoundaryState wbstate = {
99 .str = src,
100 .len = srclen,
101 .offset = 0,
102 .posix = !locale->builtin.casemap_full,
103 .init = false,
104 .prev_alnum = false,
105 };
106
107 return unicode_strtitle(dest, destsize, src, srclen,
108 locale->builtin.casemap_full,
109 initcap_wbnext, &wbstate);
110}
111
112static size_t
113strupper_builtin(char *dest, size_t destsize, const char *src, ssize_t srclen,
115{
116 return unicode_strupper(dest, destsize, src, srclen,
117 locale->builtin.casemap_full);
118}
119
120static size_t
121strfold_builtin(char *dest, size_t destsize, const char *src, ssize_t srclen,
123{
124 return unicode_strfold(dest, destsize, src, srclen,
125 locale->builtin.casemap_full);
126}
127
128static bool
130{
131 return pg_u_isdigit(to_char32(wc), !locale->builtin.casemap_full);
132}
133
134static bool
136{
137 return pg_u_isalpha(to_char32(wc));
138}
139
140static bool
142{
143 return pg_u_isalnum(to_char32(wc), !locale->builtin.casemap_full);
144}
145
146static bool
148{
149 return pg_u_isupper(to_char32(wc));
150}
151
152static bool
154{
155 return pg_u_islower(to_char32(wc));
156}
157
158static bool
160{
161 return pg_u_isgraph(to_char32(wc));
162}
163
164static bool
166{
167 return pg_u_isprint(to_char32(wc));
168}
169
170static bool
172{
173 return pg_u_ispunct(to_char32(wc), !locale->builtin.casemap_full);
174}
175
176static bool
178{
179 return pg_u_isspace(to_char32(wc));
180}
181
182static bool
184{
185 return pg_u_isxdigit(to_char32(wc), !locale->builtin.casemap_full);
186}
187
188static bool
190{
191 return IS_HIGHBIT_SET(ch) ||
192 (ch >= 'A' && ch <= 'Z') || (ch >= 'a' && ch <= 'z');
193}
194
195static pg_wchar
197{
199}
200
201static pg_wchar
203{
205}
206
209 .strtitle = strtitle_builtin,
210 .strupper = strupper_builtin,
211 .strfold = strfold_builtin,
212 .wc_isdigit = wc_isdigit_builtin,
213 .wc_isalpha = wc_isalpha_builtin,
214 .wc_isalnum = wc_isalnum_builtin,
215 .wc_isupper = wc_isupper_builtin,
216 .wc_islower = wc_islower_builtin,
217 .wc_isgraph = wc_isgraph_builtin,
218 .wc_isprint = wc_isprint_builtin,
219 .wc_ispunct = wc_ispunct_builtin,
220 .wc_isspace = wc_isspace_builtin,
221 .wc_isxdigit = wc_isxdigit_builtin,
222 .char_is_cased = char_is_cased_builtin,
223 .wc_tolower = wc_tolower_builtin,
224 .wc_toupper = wc_toupper_builtin,
225};
226
229{
230 const char *locstr;
231 pg_locale_t result;
232
233 if (collid == DEFAULT_COLLATION_OID)
234 {
235 HeapTuple tp;
236 Datum datum;
237
239 if (!HeapTupleIsValid(tp))
240 elog(ERROR, "cache lookup failed for database %u", MyDatabaseId);
241 datum = SysCacheGetAttrNotNull(DATABASEOID, tp,
242 Anum_pg_database_datlocale);
243 locstr = TextDatumGetCString(datum);
244 ReleaseSysCache(tp);
245 }
246 else
247 {
248 HeapTuple tp;
249 Datum datum;
250
252 if (!HeapTupleIsValid(tp))
253 elog(ERROR, "cache lookup failed for collation %u", collid);
254 datum = SysCacheGetAttrNotNull(COLLOID, tp,
255 Anum_pg_collation_colllocale);
256 locstr = TextDatumGetCString(datum);
257 ReleaseSysCache(tp);
258 }
259
261
262 result = MemoryContextAllocZero(context, sizeof(struct pg_locale_struct));
263
264 result->builtin.locale = MemoryContextStrdup(context, locstr);
265 result->builtin.casemap_full = (strcmp(locstr, "PG_UNICODE_FAST") == 0);
266 result->deterministic = true;
267 result->collate_is_c = true;
268 result->ctype_is_c = (strcmp(locstr, "C") == 0);
269 if (!result->ctype_is_c)
270 result->ctype = &ctype_methods_builtin;
271
272 return result;
273}
274
275char *
277{
278 /*
279 * The only two supported locales (C and C.UTF-8) are both based on memcmp
280 * and are not expected to change, but track the version anyway.
281 *
282 * Note that the character semantics may change for some locales, but the
283 * collation version only tracks changes to sort order.
284 */
285 if (strcmp(collcollate, "C") == 0)
286 return "1";
287 else if (strcmp(collcollate, "C.UTF-8") == 0)
288 return "1";
289 else if (strcmp(collcollate, "PG_UNICODE_FAST") == 0)
290 return "1";
291 else
293 (errcode(ERRCODE_WRONG_OBJECT_TYPE),
294 errmsg("invalid locale name \"%s\" for builtin provider",
295 collcollate)));
296
297 return NULL; /* keep compiler quiet */
298}
#define TextDatumGetCString(d)
Definition: builtins.h:98
#define IS_HIGHBIT_SET(ch)
Definition: c.h:1145
Oid collid
int errcode(int sqlerrcode)
Definition: elog.c:863
int errmsg(const char *fmt,...)
Definition: elog.c:1080
#define ERROR
Definition: elog.h:39
#define elog(elevel,...)
Definition: elog.h:226
#define ereport(elevel,...)
Definition: elog.h:150
Oid MyDatabaseId
Definition: globals.c:94
Assert(PointerIsAligned(start, uint64))
#define HeapTupleIsValid(tuple)
Definition: htup.h:78
static char * locale
Definition: initdb.c:140
unsigned int pg_wchar
Definition: mbprint.c:31
static char32_t utf8_to_unicode(const unsigned char *c)
Definition: mbprint.c:53
int GetDatabaseEncoding(void)
Definition: mbutils.c:1262
char * MemoryContextStrdup(MemoryContext context, const char *string)
Definition: mcxt.c:1746
void * MemoryContextAllocZero(MemoryContext context, Size size)
Definition: mcxt.c:1263
const char * builtin_validate_locale(int encoding, const char *locale)
Definition: pg_locale.c:1687
static pg_wchar wc_toupper_builtin(pg_wchar wc, pg_locale_t locale)
static bool wc_isgraph_builtin(pg_wchar wc, pg_locale_t locale)
static pg_wchar wc_tolower_builtin(pg_wchar wc, pg_locale_t locale)
static bool wc_islower_builtin(pg_wchar wc, pg_locale_t locale)
pg_locale_t create_pg_locale_builtin(Oid collid, MemoryContext context)
static bool wc_isprint_builtin(pg_wchar wc, pg_locale_t locale)
static size_t strupper_builtin(char *dest, size_t destsize, const char *src, ssize_t srclen, pg_locale_t locale)
static size_t initcap_wbnext(void *state)
static size_t strlower_builtin(char *dest, size_t destsize, const char *src, ssize_t srclen, pg_locale_t locale)
static bool wc_ispunct_builtin(pg_wchar wc, pg_locale_t locale)
static bool char_is_cased_builtin(char ch, pg_locale_t locale)
static bool wc_isdigit_builtin(pg_wchar wc, pg_locale_t locale)
static bool wc_isupper_builtin(pg_wchar wc, pg_locale_t locale)
static pg_wchar to_pg_wchar(char32_t c32)
static char32_t to_char32(pg_wchar wc)
static size_t strfold_builtin(char *dest, size_t destsize, const char *src, ssize_t srclen, pg_locale_t locale)
char * get_collation_actual_version_builtin(const char *collcollate)
static bool wc_isspace_builtin(pg_wchar wc, pg_locale_t locale)
static bool wc_isalpha_builtin(pg_wchar wc, pg_locale_t locale)
static bool wc_isxdigit_builtin(pg_wchar wc, pg_locale_t locale)
static bool wc_isalnum_builtin(pg_wchar wc, pg_locale_t locale)
static size_t strtitle_builtin(char *dest, size_t destsize, const char *src, ssize_t srclen, pg_locale_t locale)
static const struct ctype_methods ctype_methods_builtin
@ PG_UTF8
Definition: pg_wchar.h:232
static int unicode_utf8len(char32_t c)
Definition: pg_wchar.h:607
static Datum ObjectIdGetDatum(Oid X)
Definition: postgres.h:262
uint64_t Datum
Definition: postgres.h:70
unsigned int Oid
Definition: postgres_ext.h:32
size_t(* strlower)(char *dest, size_t destsize, const char *src, ssize_t srclen, pg_locale_t locale)
Definition: pg_locale.h:101
const struct ctype_methods * ctype
Definition: pg_locale.h:167
struct pg_locale_struct::@166::@168 builtin
const char * locale
Definition: pg_locale.h:173
Definition: regguts.h:323
void ReleaseSysCache(HeapTuple tuple)
Definition: syscache.c:264
HeapTuple SearchSysCache1(int cacheId, Datum key1)
Definition: syscache.c:220
Datum SysCacheGetAttrNotNull(int cacheId, HeapTuple tup, AttrNumber attributeNumber)
Definition: syscache.c:625
size_t unicode_strupper(char *dst, size_t dstsize, const char *src, ssize_t srclen, bool full)
Definition: unicode_case.c:165
size_t unicode_strlower(char *dst, size_t dstsize, const char *src, ssize_t srclen, bool full)
Definition: unicode_case.c:101
char32_t unicode_lowercase_simple(char32_t code)
Definition: unicode_case.c:50
size_t unicode_strtitle(char *dst, size_t dstsize, const char *src, ssize_t srclen, bool full, WordBoundaryNext wbnext, void *wbstate)
Definition: unicode_case.c:138
size_t unicode_strfold(char *dst, size_t dstsize, const char *src, ssize_t srclen, bool full)
Definition: unicode_case.c:189
char32_t unicode_uppercase_simple(char32_t code)
Definition: unicode_case.c:66
bool pg_u_isalnum(char32_t code, bool posix)
bool pg_u_isprint(char32_t code)
bool pg_u_islower(char32_t code)
bool pg_u_isdigit(char32_t code, bool posix)
bool pg_u_isalpha(char32_t code)
bool pg_u_isxdigit(char32_t code, bool posix)
bool pg_u_ispunct(char32_t code, bool posix)
bool pg_u_isgraph(char32_t code)
bool pg_u_isspace(char32_t code)
bool pg_u_isupper(char32_t code)