1999-04-17 08:37:12 +08:00
|
|
|
|
/*
|
|
|
|
|
+----------------------------------------------------------------------+
|
2004-01-08 16:18:22 +08:00
|
|
|
|
| PHP Version 5 |
|
1999-04-17 08:37:12 +08:00
|
|
|
|
+----------------------------------------------------------------------+
|
2004-01-08 16:18:22 +08:00
|
|
|
|
| Copyright (c) 1997-2004 The PHP Group |
|
1999-04-17 08:37:12 +08:00
|
|
|
|
+----------------------------------------------------------------------+
|
2003-06-11 04:04:29 +08:00
|
|
|
|
| This source file is subject to version 3.0 of the PHP license, |
|
1999-07-16 21:13:16 +08:00
|
|
|
|
| that is bundled with this package in the file LICENSE, and is |
|
2003-06-11 04:04:29 +08:00
|
|
|
|
| available through the world-wide-web at the following url: |
|
|
|
|
|
| http://www.php.net/license/3_0.txt. |
|
1999-07-16 21:13:16 +08:00
|
|
|
|
| If you did not receive a copy of the PHP license and are unable to |
|
|
|
|
|
| obtain it through the world-wide-web, please send a note to |
|
|
|
|
|
| license@php.net so we can mail you a copy immediately. |
|
1999-04-17 08:37:12 +08:00
|
|
|
|
+----------------------------------------------------------------------+
|
2002-02-28 16:29:35 +08:00
|
|
|
|
| Authors: Rasmus Lerdorf <rasmus@php.net> |
|
1999-04-17 08:37:12 +08:00
|
|
|
|
| Jim Winstead <jimw@php.net> |
|
|
|
|
|
| Jaakko Hyv<EFBFBD>tti <jaakko@hyvatti.iki.fi> |
|
|
|
|
|
+----------------------------------------------------------------------+
|
|
|
|
|
*/
|
|
|
|
|
/* $Id$ */
|
1999-04-24 04:06:01 +08:00
|
|
|
|
|
1999-04-17 08:37:12 +08:00
|
|
|
|
#include <stdio.h>
|
2002-09-25 00:34:54 +08:00
|
|
|
|
#include <ctype.h>
|
1999-04-17 08:37:12 +08:00
|
|
|
|
#include "php.h"
|
1999-12-05 03:19:57 +08:00
|
|
|
|
#include "php_string.h"
|
1999-04-17 08:37:12 +08:00
|
|
|
|
#include "reg.h"
|
2000-04-06 05:43:03 +08:00
|
|
|
|
#include "ext/standard/info.h"
|
1999-04-17 08:37:12 +08:00
|
|
|
|
|
2002-09-25 22:02:34 +08:00
|
|
|
|
ZEND_DECLARE_MODULE_GLOBALS(reg)
|
1999-05-12 23:40:15 +08:00
|
|
|
|
|
|
|
|
|
typedef struct {
|
|
|
|
|
regex_t preg;
|
|
|
|
|
int cflags;
|
|
|
|
|
} reg_cache;
|
|
|
|
|
|
2001-06-06 21:06:12 +08:00
|
|
|
|
/* {{{ _php_regcomp
|
|
|
|
|
*/
|
1999-05-12 23:40:15 +08:00
|
|
|
|
static int _php_regcomp(regex_t *preg, const char *pattern, int cflags)
|
|
|
|
|
{
|
|
|
|
|
int r = 0;
|
|
|
|
|
int patlen = strlen(pattern);
|
|
|
|
|
reg_cache *rc = NULL;
|
2001-07-28 19:36:37 +08:00
|
|
|
|
TSRMLS_FETCH();
|
1999-05-12 23:40:15 +08:00
|
|
|
|
|
1999-08-03 03:17:14 +08:00
|
|
|
|
if(zend_hash_find(®(ht_rc), (char *) pattern, patlen+1, (void **) &rc) == FAILURE ||
|
1999-05-12 23:40:15 +08:00
|
|
|
|
rc->cflags != cflags) {
|
|
|
|
|
r = regcomp(preg, pattern, cflags);
|
|
|
|
|
if(!r) {
|
|
|
|
|
reg_cache rcp;
|
|
|
|
|
|
|
|
|
|
rcp.cflags = cflags;
|
|
|
|
|
memcpy(&rcp.preg, preg, sizeof(*preg));
|
1999-08-03 03:17:14 +08:00
|
|
|
|
zend_hash_update(®(ht_rc), (char *) pattern, patlen+1,
|
1999-05-16 19:19:26 +08:00
|
|
|
|
(void *) &rcp, sizeof(rcp), NULL);
|
1999-05-12 23:40:15 +08:00
|
|
|
|
}
|
|
|
|
|
} else {
|
|
|
|
|
memcpy(preg, &rc->preg, sizeof(*preg));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return r;
|
|
|
|
|
}
|
2001-06-06 21:06:12 +08:00
|
|
|
|
/* }}} */
|
1999-05-12 23:40:15 +08:00
|
|
|
|
|
2000-01-18 01:33:37 +08:00
|
|
|
|
static void _free_reg_cache(reg_cache *rc)
|
1999-05-12 23:40:15 +08:00
|
|
|
|
{
|
|
|
|
|
regfree(&rc->preg);
|
|
|
|
|
}
|
1999-05-16 19:19:26 +08:00
|
|
|
|
|
1999-11-14 00:51:33 +08:00
|
|
|
|
#undef regfree
|
1999-05-16 19:19:26 +08:00
|
|
|
|
#define regfree(a);
|
1999-11-14 00:51:33 +08:00
|
|
|
|
#undef regcomp
|
2001-08-12 01:03:37 +08:00
|
|
|
|
#define regcomp(a, b, c) _php_regcomp(a, b, c)
|
1999-05-12 23:40:15 +08:00
|
|
|
|
|
2002-09-25 22:02:34 +08:00
|
|
|
|
static void php_reg_init_globals(zend_reg_globals *reg_globals TSRMLS_DC)
|
1999-05-12 23:40:15 +08:00
|
|
|
|
{
|
2000-01-18 01:33:37 +08:00
|
|
|
|
zend_hash_init(®_globals->ht_rc, 0, NULL, (void (*)(void *)) _free_reg_cache, 1);
|
1999-05-12 23:40:15 +08:00
|
|
|
|
}
|
|
|
|
|
|
2002-09-25 22:02:34 +08:00
|
|
|
|
static void php_reg_destroy_globals(zend_reg_globals *reg_globals TSRMLS_DC)
|
1999-05-12 23:40:15 +08:00
|
|
|
|
{
|
2002-09-25 22:02:34 +08:00
|
|
|
|
zend_hash_destroy(®_globals->ht_rc);
|
|
|
|
|
}
|
1999-05-12 23:40:15 +08:00
|
|
|
|
|
2002-09-25 22:02:34 +08:00
|
|
|
|
PHP_MINIT_FUNCTION(regex)
|
|
|
|
|
{
|
|
|
|
|
ZEND_INIT_MODULE_GLOBALS(reg, php_reg_init_globals, php_reg_destroy_globals);
|
1999-05-12 23:40:15 +08:00
|
|
|
|
return SUCCESS;
|
|
|
|
|
}
|
|
|
|
|
|
2000-03-07 04:37:11 +08:00
|
|
|
|
PHP_MSHUTDOWN_FUNCTION(regex)
|
1999-05-12 23:40:15 +08:00
|
|
|
|
{
|
2002-09-25 22:02:34 +08:00
|
|
|
|
#ifndef ZTS
|
|
|
|
|
php_reg_destroy_globals(®_globals TSRMLS_CC);
|
|
|
|
|
#endif
|
|
|
|
|
|
1999-05-12 23:40:15 +08:00
|
|
|
|
return SUCCESS;
|
|
|
|
|
}
|
|
|
|
|
|
2000-03-07 04:37:11 +08:00
|
|
|
|
PHP_MINFO_FUNCTION(regex)
|
1999-05-12 23:40:15 +08:00
|
|
|
|
{
|
2002-04-09 02:43:35 +08:00
|
|
|
|
#if HSREGEX
|
2000-04-06 05:43:03 +08:00
|
|
|
|
php_info_print_table_row(2, "Regex Library", "Bundled library enabled");
|
2002-04-09 02:43:35 +08:00
|
|
|
|
#else
|
|
|
|
|
php_info_print_table_row(2, "Regex Library", "System library enabled");
|
|
|
|
|
#endif
|
1999-05-12 23:40:15 +08:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
2001-06-06 21:06:12 +08:00
|
|
|
|
/* {{{ php_reg_eprint
|
1999-12-18 12:01:20 +08:00
|
|
|
|
* php_reg_eprint - convert error number to name
|
1999-04-17 08:37:12 +08:00
|
|
|
|
*/
|
1999-12-18 12:01:20 +08:00
|
|
|
|
static void php_reg_eprint(int err, regex_t *re) {
|
1999-04-17 08:37:12 +08:00
|
|
|
|
char *buf = NULL, *message = NULL;
|
|
|
|
|
size_t len;
|
|
|
|
|
size_t buf_len;
|
|
|
|
|
|
|
|
|
|
#ifdef REG_ITOA
|
|
|
|
|
/* get the length of the message */
|
|
|
|
|
buf_len = regerror(REG_ITOA | err, re, NULL, 0);
|
|
|
|
|
if (buf_len) {
|
2003-08-12 07:16:54 +08:00
|
|
|
|
buf = (char *)safe_emalloc(buf_len, sizeof(char), 0);
|
1999-04-17 08:37:12 +08:00
|
|
|
|
if (!buf) return; /* fail silently */
|
|
|
|
|
/* finally, get the error message */
|
|
|
|
|
regerror(REG_ITOA | err, re, buf, buf_len);
|
|
|
|
|
}
|
|
|
|
|
#else
|
|
|
|
|
buf_len = 0;
|
|
|
|
|
#endif
|
|
|
|
|
len = regerror(err, re, NULL, 0);
|
|
|
|
|
if (len) {
|
2002-12-06 04:59:49 +08:00
|
|
|
|
TSRMLS_FETCH();
|
|
|
|
|
|
2003-08-12 07:16:54 +08:00
|
|
|
|
message = (char *)safe_emalloc((buf_len + len + 2), sizeof(char), 0);
|
1999-04-17 08:37:12 +08:00
|
|
|
|
if (!message) {
|
|
|
|
|
return; /* fail silently */
|
|
|
|
|
}
|
|
|
|
|
if (buf_len) {
|
|
|
|
|
snprintf(message, buf_len, "%s: ", buf);
|
|
|
|
|
buf_len += 1; /* so pointer math below works */
|
|
|
|
|
}
|
|
|
|
|
/* drop the message into place */
|
|
|
|
|
regerror(err, re, message + buf_len, len);
|
|
|
|
|
|
2002-12-06 04:59:49 +08:00
|
|
|
|
php_error_docref(NULL TSRMLS_CC, E_WARNING, "%s", message);
|
1999-04-17 08:37:12 +08:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
STR_FREE(buf);
|
|
|
|
|
STR_FREE(message);
|
|
|
|
|
}
|
2001-06-06 21:06:12 +08:00
|
|
|
|
/* }}} */
|
1999-04-17 08:37:12 +08:00
|
|
|
|
|
2001-06-06 21:06:12 +08:00
|
|
|
|
/* {{{ php_ereg
|
|
|
|
|
*/
|
1999-12-18 12:01:20 +08:00
|
|
|
|
static void php_ereg(INTERNAL_FUNCTION_PARAMETERS, int icase)
|
1999-04-17 08:37:12 +08:00
|
|
|
|
{
|
1999-12-14 12:10:01 +08:00
|
|
|
|
pval **regex, /* Regular expression */
|
|
|
|
|
**findin, /* String to apply expression to */
|
|
|
|
|
**array = NULL; /* Optional register array */
|
1999-04-17 08:37:12 +08:00
|
|
|
|
regex_t re;
|
2001-12-12 03:28:31 +08:00
|
|
|
|
regmatch_t *subs;
|
2001-12-22 11:04:32 +08:00
|
|
|
|
int err, match_len, string_len;
|
|
|
|
|
uint i;
|
1999-04-17 08:37:12 +08:00
|
|
|
|
int copts = 0;
|
|
|
|
|
off_t start, end;
|
|
|
|
|
char *buf = NULL;
|
|
|
|
|
char *string = NULL;
|
2001-09-03 16:44:02 +08:00
|
|
|
|
int argc = ZEND_NUM_ARGS();
|
1999-04-17 08:37:12 +08:00
|
|
|
|
|
2001-09-03 16:44:02 +08:00
|
|
|
|
if (argc < 2 || argc > 3 ||
|
|
|
|
|
zend_get_parameters_ex(argc, ®ex, &findin, &array) == FAILURE) {
|
1999-04-17 08:37:12 +08:00
|
|
|
|
WRONG_PARAM_COUNT;
|
|
|
|
|
}
|
|
|
|
|
|
2001-09-03 16:44:02 +08:00
|
|
|
|
if (icase)
|
|
|
|
|
copts |= REG_ICASE;
|
|
|
|
|
|
|
|
|
|
if (argc == 2)
|
|
|
|
|
copts |= REG_NOSUB;
|
1999-04-17 08:37:12 +08:00
|
|
|
|
|
|
|
|
|
/* compile the regular expression from the supplied regex */
|
2001-09-03 16:44:02 +08:00
|
|
|
|
if (Z_TYPE_PP(regex) == IS_STRING) {
|
|
|
|
|
err = regcomp(&re, Z_STRVAL_PP(regex), REG_EXTENDED | copts);
|
1999-04-17 08:37:12 +08:00
|
|
|
|
} else {
|
|
|
|
|
/* we convert numbers to integers and treat them as a string */
|
2001-09-03 16:44:02 +08:00
|
|
|
|
if (Z_TYPE_PP(regex) == IS_DOUBLE)
|
1999-12-14 12:10:01 +08:00
|
|
|
|
convert_to_long_ex(regex); /* get rid of decimal places */
|
|
|
|
|
convert_to_string_ex(regex);
|
1999-04-17 08:37:12 +08:00
|
|
|
|
/* don't bother doing an extended regex with just a number */
|
2001-09-03 16:44:02 +08:00
|
|
|
|
err = regcomp(&re, Z_STRVAL_PP(regex), copts);
|
1999-04-17 08:37:12 +08:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (err) {
|
1999-12-18 12:01:20 +08:00
|
|
|
|
php_reg_eprint(err, &re);
|
1999-04-17 08:37:12 +08:00
|
|
|
|
RETURN_FALSE;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* make a copy of the string we're looking in */
|
1999-12-14 12:10:01 +08:00
|
|
|
|
convert_to_string_ex(findin);
|
2001-09-03 16:44:02 +08:00
|
|
|
|
string = estrndup(Z_STRVAL_PP(findin), Z_STRLEN_PP(findin));
|
1999-04-17 08:37:12 +08:00
|
|
|
|
|
2001-12-12 03:28:31 +08:00
|
|
|
|
/* allocate storage for (sub-)expression-matches */
|
|
|
|
|
subs = (regmatch_t *)ecalloc(sizeof(regmatch_t),re.re_nsub+1);
|
|
|
|
|
|
1999-04-17 08:37:12 +08:00
|
|
|
|
/* actually execute the regular expression */
|
2001-12-12 03:28:31 +08:00
|
|
|
|
err = regexec(&re, string, re.re_nsub+1, subs, 0);
|
1999-04-17 08:37:12 +08:00
|
|
|
|
if (err && err != REG_NOMATCH) {
|
1999-12-18 12:01:20 +08:00
|
|
|
|
php_reg_eprint(err, &re);
|
1999-04-17 08:37:12 +08:00
|
|
|
|
regfree(&re);
|
2001-12-12 03:28:31 +08:00
|
|
|
|
efree(subs);
|
1999-04-17 08:37:12 +08:00
|
|
|
|
RETURN_FALSE;
|
|
|
|
|
}
|
|
|
|
|
match_len = 1;
|
|
|
|
|
|
|
|
|
|
if (array && err != REG_NOMATCH) {
|
|
|
|
|
match_len = (int) (subs[0].rm_eo - subs[0].rm_so);
|
2001-09-03 16:44:02 +08:00
|
|
|
|
string_len = Z_STRLEN_PP(findin) + 1;
|
1999-04-17 08:37:12 +08:00
|
|
|
|
|
|
|
|
|
buf = emalloc(string_len);
|
|
|
|
|
|
2001-09-03 16:47:45 +08:00
|
|
|
|
zval_dtor(*array); /* start with clean array */
|
1999-12-14 12:10:01 +08:00
|
|
|
|
array_init(*array);
|
1999-04-17 08:37:12 +08:00
|
|
|
|
|
2001-12-12 03:28:31 +08:00
|
|
|
|
for (i = 0; i <= re.re_nsub; i++) {
|
1999-04-17 08:37:12 +08:00
|
|
|
|
start = subs[i].rm_so;
|
|
|
|
|
end = subs[i].rm_eo;
|
|
|
|
|
if (start != -1 && end > 0 && start < string_len && end < string_len && start < end) {
|
1999-12-14 12:10:01 +08:00
|
|
|
|
add_index_stringl(*array, i, string+start, end-start, 1);
|
1999-10-04 21:04:32 +08:00
|
|
|
|
} else {
|
1999-12-14 12:10:01 +08:00
|
|
|
|
add_index_bool(*array, i, 0);
|
1999-04-17 08:37:12 +08:00
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
efree(buf);
|
|
|
|
|
}
|
|
|
|
|
|
2001-12-23 23:56:41 +08:00
|
|
|
|
efree(subs);
|
1999-04-17 08:37:12 +08:00
|
|
|
|
efree(string);
|
|
|
|
|
if (err == REG_NOMATCH) {
|
|
|
|
|
RETVAL_FALSE;
|
|
|
|
|
} else {
|
|
|
|
|
if (match_len == 0)
|
|
|
|
|
match_len = 1;
|
|
|
|
|
RETVAL_LONG(match_len);
|
|
|
|
|
}
|
|
|
|
|
regfree(&re);
|
|
|
|
|
}
|
2001-06-06 21:06:12 +08:00
|
|
|
|
/* }}} */
|
1999-04-17 08:37:12 +08:00
|
|
|
|
|
2000-08-04 19:57:21 +08:00
|
|
|
|
/* {{{ proto int ereg(string pattern, string string [, array registers])
|
1999-04-17 08:37:12 +08:00
|
|
|
|
Regular expression match */
|
1999-05-16 19:19:26 +08:00
|
|
|
|
PHP_FUNCTION(ereg)
|
1999-04-17 08:37:12 +08:00
|
|
|
|
{
|
1999-12-18 12:01:20 +08:00
|
|
|
|
php_ereg(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0);
|
1999-04-17 08:37:12 +08:00
|
|
|
|
}
|
|
|
|
|
/* }}} */
|
|
|
|
|
|
2000-08-04 19:57:21 +08:00
|
|
|
|
/* {{{ proto int eregi(string pattern, string string [, array registers])
|
1999-04-17 08:37:12 +08:00
|
|
|
|
Case-insensitive regular expression match */
|
1999-05-16 19:19:26 +08:00
|
|
|
|
PHP_FUNCTION(eregi)
|
1999-04-17 08:37:12 +08:00
|
|
|
|
{
|
1999-12-18 12:01:20 +08:00
|
|
|
|
php_ereg(INTERNAL_FUNCTION_PARAM_PASSTHRU, 1);
|
1999-04-17 08:37:12 +08:00
|
|
|
|
}
|
|
|
|
|
/* }}} */
|
|
|
|
|
|
2001-06-06 21:06:12 +08:00
|
|
|
|
/* {{{ php_reg_replace
|
|
|
|
|
* this is the meat and potatoes of regex replacement! */
|
2001-12-03 13:15:29 +08:00
|
|
|
|
PHPAPI char *php_reg_replace(const char *pattern, const char *replace, const char *string, int icase, int extended)
|
1999-04-17 08:37:12 +08:00
|
|
|
|
{
|
|
|
|
|
regex_t re;
|
2001-12-12 03:28:31 +08:00
|
|
|
|
regmatch_t *subs;
|
1999-04-17 08:37:12 +08:00
|
|
|
|
|
|
|
|
|
char *buf, /* buf is where we build the replaced string */
|
|
|
|
|
*nbuf, /* nbuf is used when we grow the buffer */
|
|
|
|
|
*walkbuf; /* used to walk buf when replacing backrefs */
|
|
|
|
|
const char *walk; /* used to walk replacement string for backrefs */
|
|
|
|
|
int buf_len;
|
|
|
|
|
int pos, tmp, string_len, new_l;
|
|
|
|
|
int err, copts = 0;
|
|
|
|
|
|
|
|
|
|
string_len = strlen(string);
|
|
|
|
|
|
2003-02-25 00:13:13 +08:00
|
|
|
|
if (icase) {
|
1999-04-17 08:37:12 +08:00
|
|
|
|
copts = REG_ICASE;
|
2003-02-25 00:13:13 +08:00
|
|
|
|
}
|
|
|
|
|
if (extended) {
|
1999-04-17 08:37:12 +08:00
|
|
|
|
copts |= REG_EXTENDED;
|
2003-02-25 00:13:13 +08:00
|
|
|
|
}
|
2001-09-03 16:44:02 +08:00
|
|
|
|
|
1999-04-17 08:37:12 +08:00
|
|
|
|
err = regcomp(&re, pattern, copts);
|
|
|
|
|
if (err) {
|
1999-12-18 12:01:20 +08:00
|
|
|
|
php_reg_eprint(err, &re);
|
1999-04-17 08:37:12 +08:00
|
|
|
|
return ((char *) -1);
|
|
|
|
|
}
|
|
|
|
|
|
2001-12-12 03:28:31 +08:00
|
|
|
|
|
|
|
|
|
/* allocate storage for (sub-)expression-matches */
|
|
|
|
|
subs = (regmatch_t *)ecalloc(sizeof(regmatch_t),re.re_nsub+1);
|
|
|
|
|
|
1999-04-17 08:37:12 +08:00
|
|
|
|
/* start with a buffer that is twice the size of the stringo
|
|
|
|
|
we're doing replacements in */
|
|
|
|
|
buf_len = 2 * string_len + 1;
|
2003-08-12 07:16:54 +08:00
|
|
|
|
buf = safe_emalloc(buf_len, sizeof(char), 0);
|
1999-04-17 08:37:12 +08:00
|
|
|
|
|
|
|
|
|
err = pos = 0;
|
|
|
|
|
buf[0] = '\0';
|
|
|
|
|
while (!err) {
|
2001-12-12 03:28:31 +08:00
|
|
|
|
err = regexec(&re, &string[pos], re.re_nsub+1, subs, (pos ? REG_NOTBOL : 0));
|
1999-04-17 08:37:12 +08:00
|
|
|
|
|
|
|
|
|
if (err && err != REG_NOMATCH) {
|
1999-12-18 12:01:20 +08:00
|
|
|
|
php_reg_eprint(err, &re);
|
2001-12-12 03:28:31 +08:00
|
|
|
|
efree(subs);
|
|
|
|
|
efree(buf);
|
1999-04-17 08:37:12 +08:00
|
|
|
|
regfree(&re);
|
|
|
|
|
return ((char *) -1);
|
|
|
|
|
}
|
2001-09-03 16:44:02 +08:00
|
|
|
|
|
1999-04-17 08:37:12 +08:00
|
|
|
|
if (!err) {
|
|
|
|
|
/* backref replacement is done in two passes:
|
|
|
|
|
1) find out how long the string will be, and allocate buf
|
|
|
|
|
2) copy the part before match, replacement and backrefs to buf
|
|
|
|
|
|
|
|
|
|
Jaakko Hyv<EFBFBD>tti <Jaakko.Hyvatti@iki.fi>
|
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
new_l = strlen(buf) + subs[0].rm_so; /* part before the match */
|
|
|
|
|
walk = replace;
|
2003-02-25 00:13:13 +08:00
|
|
|
|
while (*walk) {
|
2003-02-25 00:54:21 +08:00
|
|
|
|
if ('\\' == *walk && isdigit((unsigned char)walk[1]) && ((unsigned char)walk[1]) - '0' <= re.re_nsub) {
|
2002-09-25 00:34:54 +08:00
|
|
|
|
if (subs[walk[1] - '0'].rm_so > -1 && subs[walk[1] - '0'].rm_eo > -1) {
|
|
|
|
|
new_l += subs[walk[1] - '0'].rm_eo - subs[walk[1] - '0'].rm_so;
|
|
|
|
|
}
|
1999-04-17 08:37:12 +08:00
|
|
|
|
walk += 2;
|
|
|
|
|
} else {
|
|
|
|
|
new_l++;
|
|
|
|
|
walk++;
|
|
|
|
|
}
|
2003-02-25 00:13:13 +08:00
|
|
|
|
}
|
1999-04-17 08:37:12 +08:00
|
|
|
|
if (new_l + 1 > buf_len) {
|
|
|
|
|
buf_len = 1 + buf_len + 2 * new_l;
|
|
|
|
|
nbuf = emalloc(buf_len);
|
|
|
|
|
strcpy(nbuf, buf);
|
|
|
|
|
efree(buf);
|
|
|
|
|
buf = nbuf;
|
|
|
|
|
}
|
|
|
|
|
tmp = strlen(buf);
|
|
|
|
|
/* copy the part of the string before the match */
|
|
|
|
|
strncat(buf, &string[pos], subs[0].rm_so);
|
|
|
|
|
|
|
|
|
|
/* copy replacement and backrefs */
|
|
|
|
|
walkbuf = &buf[tmp + subs[0].rm_so];
|
|
|
|
|
walk = replace;
|
2003-02-25 00:13:13 +08:00
|
|
|
|
while (*walk) {
|
2003-01-10 12:44:21 +08:00
|
|
|
|
if ('\\' == *walk && isdigit(walk[1]) && walk[1] - '0' <= (int)re.re_nsub) {
|
2002-09-25 00:34:54 +08:00
|
|
|
|
if (subs[walk[1] - '0'].rm_so > -1 && subs[walk[1] - '0'].rm_eo > -1
|
|
|
|
|
/* this next case shouldn't happen. it does. */
|
|
|
|
|
&& subs[walk[1] - '0'].rm_so <= subs[walk[1] - '0'].rm_eo) {
|
|
|
|
|
|
|
|
|
|
tmp = subs[walk[1] - '0'].rm_eo - subs[walk[1] - '0'].rm_so;
|
|
|
|
|
memcpy (walkbuf, &string[pos + subs[walk[1] - '0'].rm_so], tmp);
|
|
|
|
|
walkbuf += tmp;
|
|
|
|
|
}
|
1999-04-17 08:37:12 +08:00
|
|
|
|
walk += 2;
|
2002-09-25 00:34:54 +08:00
|
|
|
|
} else {
|
1999-04-17 08:37:12 +08:00
|
|
|
|
*walkbuf++ = *walk++;
|
2003-02-25 00:13:13 +08:00
|
|
|
|
}
|
|
|
|
|
}
|
1999-04-17 08:37:12 +08:00
|
|
|
|
*walkbuf = '\0';
|
|
|
|
|
|
|
|
|
|
/* and get ready to keep looking for replacements */
|
|
|
|
|
if (subs[0].rm_so == subs[0].rm_eo) {
|
2003-02-25 00:13:13 +08:00
|
|
|
|
if (subs[0].rm_so + pos >= string_len) {
|
1999-04-17 08:37:12 +08:00
|
|
|
|
break;
|
2003-02-25 00:13:13 +08:00
|
|
|
|
}
|
1999-04-17 08:37:12 +08:00
|
|
|
|
new_l = strlen (buf) + 1;
|
|
|
|
|
if (new_l + 1 > buf_len) {
|
|
|
|
|
buf_len = 1 + buf_len + 2 * new_l;
|
2003-08-12 07:16:54 +08:00
|
|
|
|
nbuf = safe_emalloc(buf_len, sizeof(char), 0);
|
1999-04-17 08:37:12 +08:00
|
|
|
|
strcpy(nbuf, buf);
|
|
|
|
|
efree(buf);
|
|
|
|
|
buf = nbuf;
|
|
|
|
|
}
|
|
|
|
|
pos += subs[0].rm_eo + 1;
|
|
|
|
|
buf [new_l-1] = string [pos-1];
|
|
|
|
|
buf [new_l] = '\0';
|
|
|
|
|
} else {
|
|
|
|
|
pos += subs[0].rm_eo;
|
|
|
|
|
}
|
|
|
|
|
} else { /* REG_NOMATCH */
|
|
|
|
|
new_l = strlen(buf) + strlen(&string[pos]);
|
|
|
|
|
if (new_l + 1 > buf_len) {
|
|
|
|
|
buf_len = new_l + 1; /* now we know exactly how long it is */
|
2003-08-12 07:16:54 +08:00
|
|
|
|
nbuf = safe_emalloc(buf_len, sizeof(char), 0);
|
1999-04-17 08:37:12 +08:00
|
|
|
|
strcpy(nbuf, buf);
|
|
|
|
|
efree(buf);
|
|
|
|
|
buf = nbuf;
|
|
|
|
|
}
|
|
|
|
|
/* stick that last bit of string on our output */
|
|
|
|
|
strcat(buf, &string[pos]);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* don't want to leak memory .. */
|
2001-12-12 03:28:31 +08:00
|
|
|
|
efree(subs);
|
1999-04-17 08:37:12 +08:00
|
|
|
|
regfree(&re);
|
|
|
|
|
|
|
|
|
|
/* whew. */
|
|
|
|
|
return (buf);
|
|
|
|
|
}
|
2001-06-06 21:06:12 +08:00
|
|
|
|
/* }}} */
|
1999-04-17 08:37:12 +08:00
|
|
|
|
|
2001-06-06 21:06:12 +08:00
|
|
|
|
/* {{{ php_ereg_replace
|
|
|
|
|
*/
|
1999-12-18 12:01:20 +08:00
|
|
|
|
static void php_ereg_replace(INTERNAL_FUNCTION_PARAMETERS, int icase)
|
1999-04-17 08:37:12 +08:00
|
|
|
|
{
|
1999-12-14 12:10:01 +08:00
|
|
|
|
pval **arg_pattern,
|
|
|
|
|
**arg_replace,
|
|
|
|
|
**arg_string;
|
1999-04-17 08:37:12 +08:00
|
|
|
|
char *pattern;
|
|
|
|
|
char *string;
|
|
|
|
|
char *replace;
|
|
|
|
|
char *ret;
|
|
|
|
|
|
2001-09-03 16:44:02 +08:00
|
|
|
|
if (ZEND_NUM_ARGS() != 3 ||
|
|
|
|
|
zend_get_parameters_ex(3, &arg_pattern, &arg_replace, &arg_string) == FAILURE) {
|
1999-04-17 08:37:12 +08:00
|
|
|
|
WRONG_PARAM_COUNT;
|
|
|
|
|
}
|
|
|
|
|
|
2001-09-03 16:44:02 +08:00
|
|
|
|
if (Z_TYPE_PP(arg_pattern) == IS_STRING) {
|
|
|
|
|
if (Z_STRVAL_PP(arg_pattern) && Z_STRLEN_PP(arg_pattern))
|
|
|
|
|
pattern = estrndup(Z_STRVAL_PP(arg_pattern), Z_STRLEN_PP(arg_pattern));
|
1999-04-17 08:37:12 +08:00
|
|
|
|
else
|
|
|
|
|
pattern = empty_string;
|
|
|
|
|
} else {
|
1999-12-14 12:10:01 +08:00
|
|
|
|
convert_to_long_ex(arg_pattern);
|
1999-04-17 08:37:12 +08:00
|
|
|
|
pattern = emalloc(2);
|
2001-09-03 16:44:02 +08:00
|
|
|
|
pattern[0] = (char) Z_LVAL_PP(arg_pattern);
|
1999-04-17 08:37:12 +08:00
|
|
|
|
pattern[1] = '\0';
|
|
|
|
|
}
|
|
|
|
|
|
2001-09-03 16:44:02 +08:00
|
|
|
|
if (Z_TYPE_PP(arg_replace) == IS_STRING) {
|
|
|
|
|
if (Z_STRVAL_PP(arg_replace) && Z_STRLEN_PP(arg_replace))
|
|
|
|
|
replace = estrndup(Z_STRVAL_PP(arg_replace), Z_STRLEN_PP(arg_replace));
|
1999-04-17 08:37:12 +08:00
|
|
|
|
else
|
|
|
|
|
replace = empty_string;
|
|
|
|
|
} else {
|
1999-12-14 12:10:01 +08:00
|
|
|
|
convert_to_long_ex(arg_replace);
|
1999-04-17 08:37:12 +08:00
|
|
|
|
replace = emalloc(2);
|
2001-09-03 16:44:02 +08:00
|
|
|
|
replace[0] = (char) Z_LVAL_PP(arg_replace);
|
1999-04-17 08:37:12 +08:00
|
|
|
|
replace[1] = '\0';
|
|
|
|
|
}
|
|
|
|
|
|
1999-12-14 12:10:01 +08:00
|
|
|
|
convert_to_string_ex(arg_string);
|
2001-09-03 16:44:02 +08:00
|
|
|
|
if (Z_STRVAL_PP(arg_string) && Z_STRLEN_PP(arg_string))
|
|
|
|
|
string = estrndup(Z_STRVAL_PP(arg_string), Z_STRLEN_PP(arg_string));
|
1999-04-17 08:37:12 +08:00
|
|
|
|
else
|
|
|
|
|
string = empty_string;
|
|
|
|
|
|
|
|
|
|
/* do the actual work */
|
1999-12-18 12:01:20 +08:00
|
|
|
|
ret = php_reg_replace(pattern, replace, string, icase, 1);
|
1999-04-17 08:37:12 +08:00
|
|
|
|
if (ret == (char *) -1) {
|
|
|
|
|
RETVAL_FALSE;
|
|
|
|
|
} else {
|
2001-08-12 01:03:37 +08:00
|
|
|
|
RETVAL_STRING(ret, 1);
|
1999-04-17 08:37:12 +08:00
|
|
|
|
STR_FREE(ret);
|
|
|
|
|
}
|
2001-09-03 16:44:02 +08:00
|
|
|
|
|
1999-04-17 08:37:12 +08:00
|
|
|
|
STR_FREE(string);
|
|
|
|
|
STR_FREE(replace);
|
|
|
|
|
STR_FREE(pattern);
|
|
|
|
|
}
|
2001-06-06 21:06:12 +08:00
|
|
|
|
/* }}} */
|
1999-04-17 08:37:12 +08:00
|
|
|
|
|
2000-08-04 19:57:21 +08:00
|
|
|
|
/* {{{ proto string ereg_replace(string pattern, string replacement, string string)
|
1999-04-17 08:37:12 +08:00
|
|
|
|
Replace regular expression */
|
1999-07-25 06:16:54 +08:00
|
|
|
|
PHP_FUNCTION(ereg_replace)
|
1999-04-17 08:37:12 +08:00
|
|
|
|
{
|
1999-12-18 12:01:20 +08:00
|
|
|
|
php_ereg_replace(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0);
|
1999-04-17 08:37:12 +08:00
|
|
|
|
}
|
|
|
|
|
/* }}} */
|
|
|
|
|
|
2001-02-10 00:42:55 +08:00
|
|
|
|
/* {{{ proto string eregi_replace(string pattern, string replacement, string string)
|
1999-04-17 08:37:12 +08:00
|
|
|
|
Case insensitive replace regular expression */
|
1999-07-25 06:16:54 +08:00
|
|
|
|
PHP_FUNCTION(eregi_replace)
|
1999-04-17 08:37:12 +08:00
|
|
|
|
{
|
1999-12-18 12:01:20 +08:00
|
|
|
|
php_ereg_replace(INTERNAL_FUNCTION_PARAM_PASSTHRU, 1);
|
1999-04-17 08:37:12 +08:00
|
|
|
|
}
|
|
|
|
|
/* }}} */
|
|
|
|
|
|
2001-06-06 21:06:12 +08:00
|
|
|
|
/* {{{ php_split
|
|
|
|
|
*/
|
2000-06-23 19:48:02 +08:00
|
|
|
|
static void php_split(INTERNAL_FUNCTION_PARAMETERS, int icase)
|
1999-04-17 08:37:12 +08:00
|
|
|
|
{
|
2001-09-03 16:44:02 +08:00
|
|
|
|
zval **spliton, **str, **arg_count = NULL;
|
1999-04-17 08:37:12 +08:00
|
|
|
|
regex_t re;
|
|
|
|
|
regmatch_t subs[1];
|
|
|
|
|
char *strp, *endp;
|
2001-09-03 16:44:02 +08:00
|
|
|
|
int err, size, count = -1, copts = 0;
|
|
|
|
|
int argc = ZEND_NUM_ARGS();
|
2000-06-23 19:48:02 +08:00
|
|
|
|
|
2001-09-03 16:44:02 +08:00
|
|
|
|
if (argc < 2 || argc > 3 ||
|
|
|
|
|
zend_get_parameters_ex(argc, &spliton, &str, &arg_count) == FAILURE) {
|
|
|
|
|
WRONG_PARAM_COUNT;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (argc > 2) {
|
|
|
|
|
convert_to_long_ex(arg_count);
|
|
|
|
|
count = Z_LVAL_PP(arg_count);
|
|
|
|
|
}
|
|
|
|
|
|
2000-06-23 19:48:02 +08:00
|
|
|
|
if (icase)
|
|
|
|
|
copts = REG_ICASE;
|
2001-09-03 16:44:02 +08:00
|
|
|
|
|
1999-12-14 12:10:01 +08:00
|
|
|
|
convert_to_string_ex(spliton);
|
|
|
|
|
convert_to_string_ex(str);
|
1999-04-17 08:37:12 +08:00
|
|
|
|
|
2001-09-03 16:44:02 +08:00
|
|
|
|
strp = Z_STRVAL_PP(str);
|
|
|
|
|
endp = strp + Z_STRLEN_PP(str);
|
1999-04-17 08:37:12 +08:00
|
|
|
|
|
2001-09-03 16:44:02 +08:00
|
|
|
|
err = regcomp(&re, Z_STRVAL_PP(spliton), REG_EXTENDED | copts);
|
1999-04-17 08:37:12 +08:00
|
|
|
|
if (err) {
|
2001-09-03 16:44:02 +08:00
|
|
|
|
php_reg_eprint(err, &re);
|
1999-04-17 08:37:12 +08:00
|
|
|
|
RETURN_FALSE;
|
|
|
|
|
}
|
|
|
|
|
|
2002-12-06 06:28:02 +08:00
|
|
|
|
array_init(return_value);
|
1999-04-17 08:37:12 +08:00
|
|
|
|
|
|
|
|
|
/* churn through str, generating array entries as we go */
|
1999-06-22 07:07:46 +08:00
|
|
|
|
while ((count == -1 || count > 1) && !(err = regexec(&re, strp, 1, subs, 0))) {
|
1999-04-17 08:37:12 +08:00
|
|
|
|
if (subs[0].rm_so == 0 && subs[0].rm_eo) {
|
|
|
|
|
/* match is at start of string, return empty string */
|
|
|
|
|
add_next_index_stringl(return_value, empty_string, 0, 1);
|
|
|
|
|
/* skip ahead the length of the regex match */
|
2001-09-03 16:44:02 +08:00
|
|
|
|
strp += subs[0].rm_eo;
|
|
|
|
|
} else if (subs[0].rm_so == 0 && subs[0].rm_eo == 0) {
|
1999-04-17 08:37:12 +08:00
|
|
|
|
/* No more matches */
|
|
|
|
|
regfree(&re);
|
2002-12-06 04:59:49 +08:00
|
|
|
|
php_error_docref(NULL TSRMLS_CC, E_WARNING, "Invalid Regular Expression to split()");
|
2001-09-03 16:44:02 +08:00
|
|
|
|
zend_hash_destroy(Z_ARRVAL_P(return_value));
|
|
|
|
|
efree(Z_ARRVAL_P(return_value));
|
1999-04-17 08:37:12 +08:00
|
|
|
|
RETURN_FALSE;
|
|
|
|
|
} else {
|
|
|
|
|
/* On a real match */
|
|
|
|
|
|
|
|
|
|
/* make a copy of the substring */
|
|
|
|
|
size = subs[0].rm_so;
|
|
|
|
|
|
|
|
|
|
/* add it to the array */
|
|
|
|
|
add_next_index_stringl(return_value, strp, size, 1);
|
|
|
|
|
|
|
|
|
|
/* point at our new starting point */
|
|
|
|
|
strp = strp + subs[0].rm_eo;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* if we're only looking for a certain number of points,
|
|
|
|
|
stop looking once we hit it */
|
1999-06-05 21:56:18 +08:00
|
|
|
|
if (count != -1) {
|
|
|
|
|
count--;
|
|
|
|
|
}
|
1999-04-17 08:37:12 +08:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* see if we encountered an error */
|
|
|
|
|
if (err && err != REG_NOMATCH) {
|
2001-09-03 16:44:02 +08:00
|
|
|
|
php_reg_eprint(err, &re);
|
1999-04-17 08:37:12 +08:00
|
|
|
|
regfree(&re);
|
2001-09-03 16:44:02 +08:00
|
|
|
|
zend_hash_destroy(Z_ARRVAL_P(return_value));
|
|
|
|
|
efree(Z_ARRVAL_P(return_value));
|
1999-04-17 08:37:12 +08:00
|
|
|
|
RETURN_FALSE;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* otherwise we just have one last element to add to the array */
|
1999-06-22 07:07:46 +08:00
|
|
|
|
size = endp - strp;
|
1999-04-17 08:37:12 +08:00
|
|
|
|
|
1999-06-22 07:07:46 +08:00
|
|
|
|
add_next_index_stringl(return_value, strp, size, 1);
|
1999-04-17 08:37:12 +08:00
|
|
|
|
|
|
|
|
|
regfree(&re);
|
2000-06-23 19:48:02 +08:00
|
|
|
|
}
|
2001-06-06 21:06:12 +08:00
|
|
|
|
/* }}} */
|
2000-06-23 19:48:02 +08:00
|
|
|
|
|
|
|
|
|
/* {{{ proto array split(string pattern, string string [, int limit])
|
|
|
|
|
Split string into array by regular expression */
|
|
|
|
|
PHP_FUNCTION(split)
|
|
|
|
|
{
|
|
|
|
|
php_split(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0);
|
|
|
|
|
}
|
|
|
|
|
/* }}} */
|
|
|
|
|
|
|
|
|
|
/* {{{ proto array spliti(string pattern, string string [, int limit])
|
|
|
|
|
Split string into array by regular expression case-insensitive */
|
|
|
|
|
|
|
|
|
|
PHP_FUNCTION(spliti)
|
|
|
|
|
{
|
|
|
|
|
php_split(INTERNAL_FUNCTION_PARAM_PASSTHRU, 1);
|
1999-04-17 08:37:12 +08:00
|
|
|
|
}
|
2000-06-23 19:48:02 +08:00
|
|
|
|
|
1999-04-17 08:37:12 +08:00
|
|
|
|
/* }}} */
|
|
|
|
|
|
|
|
|
|
/* {{{ proto string sql_regcase(string string)
|
|
|
|
|
Make regular expression for case insensitive match */
|
1999-05-16 19:19:26 +08:00
|
|
|
|
PHPAPI PHP_FUNCTION(sql_regcase)
|
1999-04-17 08:37:12 +08:00
|
|
|
|
{
|
2001-09-03 16:44:02 +08:00
|
|
|
|
zval **string;
|
1999-04-17 08:37:12 +08:00
|
|
|
|
char *tmp;
|
1999-04-22 01:11:01 +08:00
|
|
|
|
unsigned char c;
|
|
|
|
|
register int i, j;
|
1999-04-17 08:37:12 +08:00
|
|
|
|
|
2000-06-06 03:47:54 +08:00
|
|
|
|
if (ZEND_NUM_ARGS()!=1 || zend_get_parameters_ex(1, &string)==FAILURE) {
|
1999-04-17 08:37:12 +08:00
|
|
|
|
WRONG_PARAM_COUNT;
|
2001-09-03 16:44:02 +08:00
|
|
|
|
}
|
1999-12-14 12:10:01 +08:00
|
|
|
|
convert_to_string_ex(string);
|
1999-04-17 08:37:12 +08:00
|
|
|
|
|
2003-08-12 07:16:54 +08:00
|
|
|
|
tmp = safe_emalloc(Z_STRLEN_PP(string), 4, 1);
|
1999-04-17 08:37:12 +08:00
|
|
|
|
|
2003-01-12 07:05:19 +08:00
|
|
|
|
for (i = j = 0; i < Z_STRLEN_PP(string); i++) {
|
2001-09-03 16:44:02 +08:00
|
|
|
|
c = (unsigned char) Z_STRVAL_PP(string)[i];
|
1999-04-22 01:11:01 +08:00
|
|
|
|
if(isalpha(c)) {
|
|
|
|
|
tmp[j++] = '[';
|
|
|
|
|
tmp[j++] = toupper(c);
|
|
|
|
|
tmp[j++] = tolower(c);
|
|
|
|
|
tmp[j++] = ']';
|
|
|
|
|
} else {
|
|
|
|
|
tmp[j++] = c;
|
|
|
|
|
}
|
1999-04-17 08:37:12 +08:00
|
|
|
|
}
|
2001-09-03 16:44:02 +08:00
|
|
|
|
tmp[j] = 0;
|
|
|
|
|
|
|
|
|
|
RETVAL_STRINGL(tmp, j, 1);
|
|
|
|
|
efree(tmp);
|
1999-04-17 08:37:12 +08:00
|
|
|
|
}
|
|
|
|
|
/* }}} */
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Local variables:
|
|
|
|
|
* tab-width: 4
|
|
|
|
|
* c-basic-offset: 4
|
|
|
|
|
* End:
|
2001-09-09 21:29:31 +08:00
|
|
|
|
* vim600: noet sw=4 ts=4 fdm=marker
|
|
|
|
|
* vim<600: noet sw=4 ts=4
|
1999-04-17 08:37:12 +08:00
|
|
|
|
*/
|