Summary of common PostgreSQL Regular Expression functions,
Summary of common PostgreSQL Regular Expression Functions
Regular Expressions are a useful tool for programs that require complex data processing. This article focuses on some common PostgreSQL Regular Expression functions and some functions in the source code.
Directory structure of the relevant part of the regular expression
[root@localhost regex]# pwd/opt/hgdb-core/src/include/regex[root@localhost regex]# lltotal 40-rw-r--r--. 1 postgres postgres 3490 Mar 19 19:00 regcustom.h-rw-r--r--. 1 postgres postgres 1332 Mar 19 18:59 regerrs.h-rw-r--r--. 1 postgres postgres 6703 Mar 19 19:00 regex.h-rw-r--r--. 1 postgres postgres 2353 Mar 19 19:00 regexport.h-rw-r--r--. 1 postgres postgres 16454 Mar 19 19:00 regguts.h
Regular Expression compilation, matching, release, and error information files, which will be detailed later
[root@localhost regex]# pwd/opt/hgdb-core/src/backend/regex[root@localhost regex]# ll reg*.c-rw-r--r--. 1 postgres postgres 55851 Mar 19 19:00 regcomp.c-rw-r--r--. 1 postgres postgres 3671 Mar 19 18:59 regerror.c-rw-r--r--. 1 postgres postgres 34873 Mar 19 19:00 regexec.c-rw-r--r--. 1 postgres postgres 2123 Mar 19 18:59 regfree.c[root@localhost regex]#
Built-in functions are implemented in regexp. c
[root@localhost adt]# pwd/opt/hgdb-core/src/backend/utils/adt[root@localhost adt]# ll regexp.c-rw-r--r--. 1 postgres postgres 34863 Apr 12 02:29 regexp.c[root@localhost adt]#
Built-in function declaration:
/* src/include/catalog/pg_proc.h */DATA(insert OID = 2073 ( substring PGNSP PGUID 12 1 0 0 0 f f f f t f i 2 0 25 "25 25" _null_ _null_ _null_ _null_ _null_ textregexsubstr _null_ _null_ _null_ ));DESCR("extract text matching regular expression");DATA(insert OID = 2074 ( substring PGNSP PGUID 14 1 0 0 0 f f f f t f i 3 0 25 "25 25 25" _null_ _null_ _null_ _null_ _null_ "select pg_catalog.substring($1, pg_catalog.similar_escape($2, $3))" _null_ _null_ _null_ ));DESCR("extract text matching SQL99 regular expression");DATA(insert OID = 2284 ( regexp_replace PGNSP PGUID 12 1 0 0 0 f f f f t f i 3 0 25 "25 25 25" _null_ _null_ _null_ _null_ _null_ textregexreplace_noopt _null_ _null_ _null_ ));DESCR("replace text using regexp");DATA(insert OID = 2285 ( regexp_replace PGNSP PGUID 12 1 0 0 0 f f f f t f i 4 0 25 "25 25 25 25" _null_ _null_ _null_ _null_ _null_ textregexreplace _null_ _null_ _null_ ));DESCR("replace text using regexp");DATA(insert OID = 2763 ( regexp_matches PGNSP PGUID 12 1 1 0 0 f f f f t t i 2 0 1009 "25 25" _null_ _null_ _null_ _null_ _null_ regexp_matches_no_flags _null_ _null_ _null_ ));DESCR("find all match groups for regexp");DATA(insert OID = 2764 ( regexp_matches PGNSP PGUID 12 1 10 0 0 f f f f t t i 3 0 1009 "25 25 25" _null_ _null_ _null_ _null_ _null_ regexp_matches _null_ _null_ _null_ ));DESCR("find all match groups for regexp");DATA(insert OID = 2765 ( regexp_split_to_table PGNSP PGUID 12 1 1000 0 0 f f f f t t i 2 0 25 "25 25" _null_ _null_ _null_ _null_ _null_ regexp_split_to_table_no_flags _null_ _null_ _null_ ));DESCR("split string by pattern");DATA(insert OID = 2766 ( regexp_split_to_table PGNSP PGUID 12 1 1000 0 0 f f f f t t i 3 0 25 "25 25 25" _null_ _null_ _null_ _null_ _null_ regexp_split_to_table _null_ _null_ _null_ ));DESCR("split string by pattern");DATA(insert OID = 2767 ( regexp_split_to_array PGNSP PGUID 12 1 0 0 0 f f f f t f i 2 0 1009 "25 25" _null_ _null_ _null_ _null_ _null_ regexp_split_to_array_no_flags _null_ _null_ _null_ ));DESCR("split string by pattern");DATA(insert OID = 2768 ( regexp_split_to_array PGNSP PGUID 12 1 0 0 0 f f f f t f i 3 0 1009 "25 25 25" _null_ _null_ _null_ _null_ _null_ regexp_split_to_array _null_ _null_ _null_ ));
Parameter type and return value type:
postgres=# select oid,typname from pg_type where oid = 25 or oid = 1009; oid | typname ------+--------- 25 | text 1009 | _text(2 rows)
The substring (string from pattern) function extracts a substring from a string that matches the POSIX regular expression pattern. If it does not match, NULL is returned; otherwise, it is the part of the text that matches the pattern.
The regexp_replace (source, pattern, replacement [, flags]) function provides the ability to replace a substring that matches the POSIX regular expression pattern with a new text.
The regexp_matches (string, pattern [, flags]) function returns a text array of all substrings obtained from the POSIX regular expression mode.
The flags parameter is an optional text string that contains 0 or more single-letter tags to change function behavior. Marking g leads to searching for each match in the string, not only the first one. Each match returns a row.
The regexp_split_to_table (string, pattern [, flags]) function uses the POSIX regular expression pattern as the separator to separate strings. Returns a string ..
The regexp_split_to_array (string, pattern [, flags]) function performs the same behavior as regexp_split_to_table, but the returned result is a text array.
For more information, see the user manual.
Src/include/regex. h
Regex_t struct
/* the biggie, a compiled RE (or rather, a front end to same) */typedef struct{ int re_magic; /* magic number */ size_t re_nsub; /* number of subexpressions */ long re_info; /* information about RE */#define REG_UBACKREF 000001#define REG_ULOOKAHEAD 000002#define REG_UBOUNDS 000004#define REG_UBRACES 000010#define REG_UBSALNUM 000020#define REG_UPBOTCH 000040#define REG_UBBS 000100#define REG_UNONPOSIX 000200#define REG_UUNSPEC 000400#define REG_UUNPORT 001000#define REG_ULOCALE 002000#define REG_UEMPTYMATCH 004000#define REG_UIMPOSSIBLE 010000#define REG_USHORTEST 020000 int re_csize; /* sizeof(character) */ char *re_endp; /* backward compatibility kludge */ Oid re_collation; /* Collation that defines LC_CTYPE behavior */ /* the rest is opaque pointers to hidden innards */ char *re_guts; /* `char *' is more portable than `void *' */ char *re_fns;} regex_t;
Stores compiled regular expressions
Regmatch_t struct
/* result reporting (may acquire more fields later) */typedef struct{ regoff_t rm_so; /* start of substring */ regoff_t rm_eo; /* end of substring */} regmatch_t;typedef long regoff_t;
The rm_so member stores the starting position of the matching text string in the target string, and the rm_eo member stores the Ending position. We usually define a group of such structures in the form of arrays.
There are several major function declarations below
/* * the prototypes for exported functions */extern int pg_regcomp(regex_t *, const pg_wchar *, size_t, int, Oid);extern int pg_regexec(regex_t *, const pg_wchar *, size_t, size_t, rm_detail_t *, size_t, regmatch_t[], int);extern int pg_regprefix(regex_t *, pg_wchar **, size_t *);extern void pg_regfree(regex_t *);extern size_t pg_regerror(int, const regex_t *, char *, size_t);extern void pg_set_regex_collation(Oid collation);
Common functions used to process regular expressions include pg_regcomp (), pg_regexec (), pg_regfree (), and pg_regerror ().
General processing steps: Compile the regular expression pg_regcomp (), match the regular expression pg_regexec (), and release the regular expression pg_regfree ().
Pg_regerror (): When regcomp or regexec is executed to generate an error, you can call this function and return a string containing the error message.
Parameter description
Intpg_regcomp (regex_t * re, const chr * string,/* Regular Expression string */size_t len,/* Regular Expression string Length */int flags, Oid collation) intpg_regexec (regex_t * re,/* The regular expression that has been compiled using the regcomp function */const chr * string,/* Target string */size_t len, /* Target String Length */size_t search_start,/* match start position */rm_detail_t * details,/* NULL */size_t nmatch, /* is the length of the regmatch_t struct array */regmatch_t pmatch [],/* regmatch_t struct array, stores the location information of matching text strings */int flags)
Flags
Src/backend/utils/adt/regexp. c
/* all the options of interest for regex functions */typedef struct pg_re_flags{ int cflags; /* compile flags for Spencer's regex code */ bool glob; /* do it globally (for each occurrence) */} pg_re_flags;
/* * parse_re_flags - parse the options argument of regexp_matches and friends * * flags --- output argument, filled with desired options * opts --- TEXT object, or NULL for defaults * * This accepts all the options allowed by any of the callers; callers that * don't want some have to reject them after the fact. */static voidparse_re_flags(pg_re_flags *flags, text *opts){ /* regex flavor is always folded into the compile flags */ flags->cflags = REG_ADVANCED; flags->glob = false; if (opts) { char *opt_p = VARDATA_ANY(opts); int opt_len = VARSIZE_ANY_EXHDR(opts); int i; for (i = 0; i < opt_len; i++) { switch (opt_p[i]) { case 'g': flags->glob = true; break; case 'b': /* BREs (but why???) */ flags->cflags &= ~(REG_ADVANCED | REG_EXTENDED | REG_QUOTE); break; case 'c': /* case sensitive */ flags->cflags &= ~REG_ICASE; break; case 'e': /* plain EREs */ flags->cflags |= REG_EXTENDED; flags->cflags &= ~(REG_ADVANCED | REG_QUOTE); break; case 'i': /* case insensitive */ flags->cflags |= REG_ICASE; break; case 'm': /* Perloid synonym for n */ case 'n': /* \n affects ^ $ . [^ */ flags->cflags |= REG_NEWLINE; break; case 'p': /* ~Perl, \n affects . [^ */ flags->cflags |= REG_NLSTOP; flags->cflags &= ~REG_NLANCH; break; case 'q': /* literal string */ flags->cflags |= REG_QUOTE; flags->cflags &= ~(REG_ADVANCED | REG_EXTENDED); break; case 's': /* single line, \n ordinary */ flags->cflags &= ~REG_NEWLINE; break; case 't': /* tight syntax */ flags->cflags &= ~REG_EXPANDED; break; case 'w': /* weird, \n affects ^ $ only */ flags->cflags &= ~REG_NLSTOP; flags->cflags |= REG_NLANCH; break; case 'x': /* expanded syntax */ flags->cflags |= REG_EXPANDED; break; default: ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("invalid regexp option: \"%c\"", opt_p[i]))); break; } } }}
| Option |
Description |
| B |
The remaining regular expression is BR. |
| C |
Case-sensitive matching (overwrite operator type) |
| E |
The remaining regular expression is ERE. |
| I |
Case Insensitive matching (overwrite operator type) |
| M |
Historical synonyms of n |
| N |
New Line sensitive horse |
| P |
Sensitive matching of some new rows |
| Q |
Resets the regular expression to a text ("caused") string, all of which are common characters. |
| S |
Non-New Line sensitive matching (default) |
| T |
Tight syntax |
| W |
Reverse some new line sensitive ("weird") Matching |
| X |
Extended syntax |
The above is an example of common PostgreSQL Regular Expression functions. If you have any questions, please leave a message or go to the community on this site for discussion. Thank you for reading this article and hope to help you. Thank you for your support for this site!