PostgreSQL Source Code git master
unicode_category.h
Go to the documentation of this file.
1/*-------------------------------------------------------------------------
2 *
3 * unicode_category.h
4 * Routines for determining the category of Unicode characters.
5 *
6 * These definitions can be used by both frontend and backend code.
7 *
8 * Copyright (c) 2017-2025, PostgreSQL Global Development Group
9 *
10 * src/include/common/unicode_category.h
11 *
12 *-------------------------------------------------------------------------
13 */
14#ifndef UNICODE_CATEGORY_H
15#define UNICODE_CATEGORY_H
16
17/*
18 * Unicode General Category Values
19 *
20 * See: https://www.unicode.org/reports/tr44/#General_Category_Values
21 *
22 * The Unicode stability policy guarantees: "The enumeration of
23 * General_Category property values is fixed. No new values will be
24 * added". See: https://www.unicode.org/policies/stability_policy.html
25 *
26 * Numeric values chosen to match corresponding ICU UCharCategory.
27 */
29{
30 PG_U_UNASSIGNED = 0, /* Cn */
35 PG_U_OTHER_LETTER = 5, /* Lo */
37 PG_U_ENCLOSING_MARK = 7, /* Me */
38 PG_U_SPACING_MARK = 8, /* Mc */
39 PG_U_DECIMAL_NUMBER = 9, /* Nd */
40 PG_U_LETTER_NUMBER = 10, /* Nl */
41 PG_U_OTHER_NUMBER = 11, /* No */
42 PG_U_SPACE_SEPARATOR = 12, /* Zs */
43 PG_U_LINE_SEPARATOR = 13, /* Zl */
45 PG_U_CONTROL = 15, /* Cc */
46 PG_U_FORMAT = 16, /* Cf */
47 PG_U_PRIVATE_USE = 17, /* Co */
48 PG_U_SURROGATE = 18, /* Cs */
49 PG_U_DASH_PUNCTUATION = 19, /* Pd */
50 PG_U_OPEN_PUNCTUATION = 20, /* Ps */
54 PG_U_MATH_SYMBOL = 24, /* Sm */
55 PG_U_CURRENCY_SYMBOL = 25, /* Sc */
56 PG_U_MODIFIER_SYMBOL = 26, /* Sk */
57 PG_U_OTHER_SYMBOL = 27, /* So */
59 PG_U_FINAL_PUNCTUATION = 29 /* Pf */
61
62extern pg_unicode_category unicode_category(char32_t code);
63extern const char *unicode_category_string(pg_unicode_category category);
64extern const char *unicode_category_abbrev(pg_unicode_category category);
65
66extern bool pg_u_prop_alphabetic(char32_t code);
67extern bool pg_u_prop_lowercase(char32_t code);
68extern bool pg_u_prop_uppercase(char32_t code);
69extern bool pg_u_prop_cased(char32_t code);
70extern bool pg_u_prop_case_ignorable(char32_t code);
71extern bool pg_u_prop_white_space(char32_t code);
72extern bool pg_u_prop_hex_digit(char32_t code);
73extern bool pg_u_prop_join_control(char32_t code);
74
75extern bool pg_u_isdigit(char32_t code, bool posix);
76extern bool pg_u_isalpha(char32_t code);
77extern bool pg_u_isalnum(char32_t code, bool posix);
78extern bool pg_u_isword(char32_t code);
79extern bool pg_u_isupper(char32_t code);
80extern bool pg_u_islower(char32_t code);
81extern bool pg_u_isblank(char32_t code);
82extern bool pg_u_iscntrl(char32_t code);
83extern bool pg_u_isgraph(char32_t code);
84extern bool pg_u_isprint(char32_t code);
85extern bool pg_u_ispunct(char32_t code, bool posix);
86extern bool pg_u_isspace(char32_t code);
87extern bool pg_u_isxdigit(char32_t code, bool posix);
88
89#endif /* UNICODE_CATEGORY_H */
bool pg_u_isalnum(char32_t code, bool posix)
const char * unicode_category_string(pg_unicode_category category)
bool pg_u_prop_cased(char32_t code)
bool pg_u_prop_white_space(char32_t code)
bool pg_u_isprint(char32_t code)
bool pg_u_islower(char32_t code)
const char * unicode_category_abbrev(pg_unicode_category category)
bool pg_u_iscntrl(char32_t code)
pg_unicode_category
@ PG_U_CONNECTOR_PUNCTUATION
@ PG_U_OTHER_SYMBOL
@ PG_U_DASH_PUNCTUATION
@ PG_U_UPPERCASE_LETTER
@ PG_U_DECIMAL_NUMBER
@ PG_U_CLOSE_PUNCTUATION
@ PG_U_NONSPACING_MARK
@ PG_U_INITIAL_PUNCTUATION
@ PG_U_CURRENCY_SYMBOL
@ PG_U_LETTER_NUMBER
@ PG_U_MODIFIER_SYMBOL
@ PG_U_SPACE_SEPARATOR
@ PG_U_OPEN_PUNCTUATION
@ PG_U_FORMAT
@ PG_U_PRIVATE_USE
@ PG_U_OTHER_LETTER
@ PG_U_PARAGRAPH_SEPARATOR
@ PG_U_CONTROL
@ PG_U_SPACING_MARK
@ PG_U_TITLECASE_LETTER
@ PG_U_OTHER_NUMBER
@ PG_U_MATH_SYMBOL
@ PG_U_LOWERCASE_LETTER
@ PG_U_LINE_SEPARATOR
@ PG_U_UNASSIGNED
@ PG_U_SURROGATE
@ PG_U_FINAL_PUNCTUATION
@ PG_U_MODIFIER_LETTER
@ PG_U_OTHER_PUNCTUATION
@ PG_U_ENCLOSING_MARK
pg_unicode_category unicode_category(char32_t code)
bool pg_u_prop_lowercase(char32_t code)
bool pg_u_prop_join_control(char32_t code)
bool pg_u_isdigit(char32_t code, bool posix)
bool pg_u_isalpha(char32_t code)
bool pg_u_prop_uppercase(char32_t code)
bool pg_u_isword(char32_t code)
bool pg_u_isxdigit(char32_t code, bool posix)
bool pg_u_prop_case_ignorable(char32_t code)
bool pg_u_ispunct(char32_t code, bool posix)
bool pg_u_prop_hex_digit(char32_t code)
bool pg_u_isblank(char32_t code)
bool pg_u_isgraph(char32_t code)
bool pg_u_isspace(char32_t code)
bool pg_u_isupper(char32_t code)
bool pg_u_prop_alphabetic(char32_t code)