php-src/ext/pcre/php_pcre.c

/*
   +----------------------------------------------------------------------+
   | PHP version 4.0                                                      |
   +----------------------------------------------------------------------+
   | Copyright (c) 1997, 1998, 1999 The PHP Group                         |
   +----------------------------------------------------------------------+
   | This source file is subject to version 2.0 of the PHP license,       |
   | that is bundled with this package in the file LICENSE, and is        |
   | available at through the world-wide-web at                           |
   | http://www.php.net/license/2_0.txt.                                  |
   | If you did not receive a copy of the PHP license and are unable to   |
   | obtain it through the world-wide-web, please send a note to          |
   | license@php.net so we can mail you a copy immediately.               |
   +----------------------------------------------------------------------+
   | Authors: Andrey Zmievski <andrey@ispi.net>                           |
   +----------------------------------------------------------------------+
 */

/* $Id$ */

#include "php.h"
#include "php_globals.h"

#if HAVE_PCRE

#include "php_pcre.h"
#include "ext/standard/php3_string.h"

#define PREG_PATTERN_ORDER	0
#define PREG_SET_ORDER		1

#define PREG_REPLACE_EVAL	(1<<0)

/* {{{ module definition structures */

unsigned char third_arg_force_ref[] = { 3, BYREF_NONE, BYREF_NONE, BYREF_FORCE };

function_entry pcre_functions[] = {
	PHP_FE(preg_match,		third_arg_force_ref)
	PHP_FE(preg_match_all,	third_arg_force_ref)
	PHP_FE(preg_replace,	NULL)
	PHP_FE(preg_split,		NULL)
	PHP_FE(preg_quote,		NULL)
	PHP_FE(preg_grep,		NULL)
	{NULL, 		NULL, 		NULL}
};

zend_module_entry pcre_module_entry = {
   "PCRE", pcre_functions, php_minit_pcre, php_mshutdown_pcre,
		   php_rinit_pcre, NULL,
		   php_info_pcre, STANDARD_MODULE_PROPERTIES
};

/* }}} */


#ifdef ZTS
int pcre_globals_id;
#else
php_pcre_globals pcre_globals;
#endif


static void *php_pcre_malloc(size_t size)
{
	return pemalloc(size, 1);
}


static void php_pcre_free(void *ptr)
{
	pefree(ptr, 1);
}


static int _php_free_pcre_cache(void *data)
{
	pcre_cache_entry *pce = (pcre_cache_entry *) data;
	pefree(pce->re, 1);
	return 1;
}


#ifdef ZTS
static void _php_pcre_init_globals(php_pcre_globals *pcre_globals)
{
	zend_hash_init(&PCRE_G(pcre_cache), 0, NULL, _php_free_pcre_cache, 1);
}


static void _php_pcre_shutdown_globals(php_pcre_globals *pcre_globals)
{
	zend_hash_destroy(&PCRE_G(pcre_cache));
}
#endif


/* {{{ void php_info_pcre(ZEND_MODULE_INFO_FUNC_ARGS) */
void php_info_pcre(ZEND_MODULE_INFO_FUNC_ARGS)
{
	php3_printf("Perl Compatible Regular Expressions");
	php3_printf("<table cellpadding=5>"
				"<tr><td>PCRE library version:</td>"
				"<td>%s</td></tr>"
				"</table>", pcre_version());
}
/* }}} */


/* {{{ int php_minit_pcre(INIT_FUNC_ARGS) */
int php_minit_pcre(INIT_FUNC_ARGS)
{
	ELS_FETCH();

#ifdef ZTS
	pcre_globals_id = ts_allocate_id(
							sizeof(php_pcre_globals),
							_php_pcre_init_globals,
							_php_pcre_shutdown_globals);
#else
	zend_hash_init(&PCRE_G(pcre_cache), 0, NULL, _php_free_pcre_cache, 1);
#endif

	REGISTER_LONG_CONSTANT("PREG_PATTERN_ORDER", PREG_PATTERN_ORDER, CONST_CS | CONST_PERSISTENT);
	REGISTER_LONG_CONSTANT("PREG_SET_ORDER", PREG_SET_ORDER, CONST_CS | CONST_PERSISTENT);
	return SUCCESS;
}
/* }}} */


/* {{{ int php_mshutdown_pcre(void) */
int php_mshutdown_pcre(SHUTDOWN_FUNC_ARGS)
{
#ifndef ZTS
	zend_hash_destroy(&PCRE_G(pcre_cache));
#endif
	return SUCCESS;
}
/* }}} */


/* {{{ int php_rinit_pcre(INIT_FUNC_ARGS) */
int php_rinit_pcre(INIT_FUNC_ARGS)
{
	pcre_malloc = php_pcre_malloc;
	pcre_free = php_pcre_free;

	return SUCCESS;
}
/* }}} */


/* {{{ static pcre* _pcre_get_compiled_regex(char *regex, pcre_extra *extra) */
static pcre* _pcre_get_compiled_regex(char *regex, pcre_extra *extra, int *preg_options) {
	pcre				*re = NULL;
	int				 	 coptions = 0;
	int				 	 soptions = 0;
	const char	 		*error;
	int			 	 	 erroffset;
	char		 	 	 delimiter;
	char 				*p, *pp;
	char				*pattern;
	int				 	 regex_len;
	int				 	 do_study = 0;
	int					 poptions = 0;
	pcre_cache_entry	*pce;
	pcre_cache_entry	 new_entry;
	PCRE_LS_FETCH();

	/* Try to lookup the cached regex entry, and if successful, just pass
	   back the compiled pattern, otherwise go on and compile it. */
	regex_len = strlen(regex);
	if (zend_hash_find(&PCRE_G(pcre_cache), regex, regex_len+1, (void **)&pce) == SUCCESS) {
		extra = pce->extra;
		*preg_options = pce->preg_options;
		return pce->re;
	}

	p = regex;

	/* Parse through the leading whitespace, and display a warning if we
	   get to the end without encountering a delimiter. */
	while (isspace((int)*p)) p++;
	if (*p == 0) {
		zend_error(E_WARNING, "Empty regular expression");
		return NULL;
	}

	/* Get the delimiter and display a warning if it is alphanumeric
	   or a backslash. */
	delimiter = *p++;
	if (isalnum((int)delimiter) || delimiter == '\\') {
		zend_error(E_WARNING, "Delimiter must not be alphanumeric or backslash");
		return NULL;
	}

	/* We need to iterate through the pattern, searching for the ending delimiter,
	   but skipping the backslashed delimiters.  If the ending delimiter is not
	   found, display a warning. */
	pp = p;
	while (*pp != 0) {
		if (*pp == delimiter && pp[-1] != '\\')
			break;
		pp++;
	}
	if (*pp == 0) {
		zend_error(E_WARNING, "No ending delimiter found");
		return NULL;
	}

	/* Make a copy of the actual pattern. */
	pattern = estrndup(p, pp-p);

	/* Move on to the options */
	pp++;

	/* Clear out preg options */
	*preg_options = 0;

	/* Parse through the options, setting appropriate flags.  Display
	   a warning if we encounter an unknown option. */
	while (*pp != 0) {
		switch (*pp++) {
			/* Perl compatible options */
			case 'i':	coptions |= PCRE_CASELESS;		break;
			case 'm':	coptions |= PCRE_MULTILINE;		break;
			case 's':	coptions |= PCRE_DOTALL;		break;
			case 'x':	coptions |= PCRE_EXTENDED;		break;

			/* PCRE specific options */
			case 'A':	coptions |= PCRE_ANCHORED;		break;
			case 'D':	coptions |= PCRE_DOLLAR_ENDONLY;break;
			case 'S':	do_study  = 1;					break;
			case 'U':	coptions |= PCRE_UNGREEDY;		break;
			case 'X':	coptions |= PCRE_EXTRA;			break;

			/* Custom preg options */
			case 'e':	poptions |= PREG_REPLACE_EVAL; break;

			case ' ':
			case '\n':
				break;

			default:
				zend_error(E_WARNING, "Unknown option '%c'", pp[-1]);
				efree(pattern);
				return NULL;
		}
	}

	/* Compile pattern and display a warning if compilation failed. */
	re = pcre_compile(pattern,
					  coptions,
					  &error,
					  &erroffset,
					  NULL);

	if (re == NULL) {
		zend_error(E_WARNING, "Compilation failed: %s at offset %d\n", error, erroffset);
		efree(pattern);
		return NULL;
	}

	/* If study option was specified, study the pattern and
	   store the result in extra for passing to pcre_exec. */
	if (do_study) {
		extra = pcre_study(re, soptions, &error);
		if (error != NULL) {
			zend_error(E_WARNING, "Error while studying pattern");
		}
	}

	*preg_options = poptions;

	efree(pattern);

	/* Store the compiled pattern and extra info in the cache. */
	new_entry.re = re;
	new_entry.extra = extra;
	new_entry.preg_options = poptions;
	zend_hash_update(&PCRE_G(pcre_cache), regex, regex_len+1, (void *)&new_entry,
						sizeof(pcre_cache_entry), NULL);

	return re;
}
/* }}} */


/* {{{ void _pcre_match(INTERNAL_FUNCTION_PARAMETERS, int global) */
static void _pcre_match(INTERNAL_FUNCTION_PARAMETERS, int global)
{
	zval			*regex,				/* Regular expression */
					*subject,			/* String to match against */
					*subpats = NULL,	/* Array for subpatterns */
					*subpats_order,		/* Order of the results in the subpatterns
										   array for global match */
					*result_set,		/* Holds a set of subpatterns after
										   a global match */
				   **match_sets = NULL;	/* An array of sets of matches for each
										   subpattern after a global match */
	pcre			*re = NULL;			/* Compiled regular expression */
	pcre_extra		*extra = NULL;		/* Holds results of studying */
	int			 	 exoptions = 0;		/* Execution options */
	int			 	 preg_options = 0;	/* Custom preg options */
	int			 	 count = 0;			/* Count of matched subpatterns */
	int			 	*offsets;			/* Array of subpattern offsets */
	int				 num_subpats;		/* Number of captured subpatterns */
	int			 	 size_offsets;		/* Size of the offsets array */
	int			 	 matched;			/* Has anything matched */
	int				 i;
	int				 subpats_order_val = 0;	/* Integer value of subpats_order */
	const char	   **stringlist;		/* Used to hold list of subpatterns */
	char			*match,				/* The current match */
					*piece,				/* The current piece of subject */
					*subject_end;		/* Points to the end of the subject */


	/* Get function parameters and do error-checking. */
	switch(ARG_COUNT(ht)) {
		case 2:
			if (getParameters(ht, 2, &regex, &subject) == FAILURE) {
				WRONG_PARAM_COUNT;
			}
			break;

		case 3:
			if (getParameters(ht, 3, &regex, &subject, &subpats) == FAILURE) {
				WRONG_PARAM_COUNT;
			}
			if (global)
				subpats_order_val = PREG_PATTERN_ORDER;
			if (!ParameterPassedByReference(ht, 3)) {
				zend_error(E_WARNING, "Array to be filled with matches must be passed by reference.");
				RETURN_FALSE;
			}
			break;

		case 4:
			if (getParameters(ht, 4, &regex, &subject, &subpats, &subpats_order) == FAILURE) {
				WRONG_PARAM_COUNT;
			}
			if (!ParameterPassedByReference(ht, 3)) {
				zend_error(E_WARNING, "Array to be filled with matches must be passed by reference.");
				RETURN_FALSE;
			}

			/* Make sure subpats_order is a number */
			convert_to_long(subpats_order);
			subpats_order_val = subpats_order->value.lval;
			if (subpats_order_val < PREG_PATTERN_ORDER ||
				subpats_order_val > PREG_SET_ORDER) {
				zend_error(E_WARNING, "Wrong value for parameter 4 in call to preg_match_all()");
			}
			break;

		default:
			WRONG_PARAM_COUNT;
	}

	/* Make sure we're dealing with strings. */
	convert_to_string(regex);
	convert_to_string(subject);

	/* Make sure to clean up the passed array and initialize it. */
	if (subpats != NULL) {
		zval_dtor(subpats);
		array_init(subpats);
	}

	/* Compile regex or get it from cache. */
	if ((re = _pcre_get_compiled_regex(regex->value.str.val, extra, &preg_options)) == NULL) {
		RETURN_FALSE;
	}

	/* Calculate the size of the offsets array, and allocate memory for it. */
	num_subpats = pcre_info(re, NULL, NULL) + 1;
	size_offsets = num_subpats * 3;
	offsets = (int *)emalloc(size_offsets * sizeof(int));

	/* Allocate match sets array and initialize the values */
	if (global && subpats_order_val == PREG_PATTERN_ORDER) {
		match_sets = (zval **)emalloc(num_subpats * sizeof(zval *));
		for (i=0; i<num_subpats; i++) {
			match_sets[i] = (zval *)emalloc(sizeof(zval));
			array_init(match_sets[i]);
			INIT_PZVAL(match_sets[i]);
		}
	}

	/* Start from the beginning of the string */
	piece = subject->value.str.val;
	subject_end = piece + subject->value.str.len;
	match = NULL;
	matched = 0;

	do {
		/* Execute the regular expression. */
		count = pcre_exec(re, extra, piece,
						  subject_end-piece, subject->value.str.val,
						  (piece==subject->value.str.val ? exoptions : exoptions|PCRE_NOTBOL),
						  offsets, size_offsets, (piece == match));

		/* Check for too many substrings condition. */
		if (count == 0) {
			zend_error(E_NOTICE, "Matched, but too many substrings\n");
			count = size_offsets/3;
		}

		/* If something has matched */
		if (count >= 0) {
			matched++;
			match = piece + offsets[0];

			/* If subpatters array has been passed, fill it in with values. */
			if (subpats != NULL) {
				/* Try to get the list of substrings and display a warning if failed. */
				if (pcre_get_substring_list(piece, offsets, count, &stringlist) < 0) {
					efree(offsets);
					efree(re);
					zend_error(E_WARNING, "Get subpatterns list failed");
					return;
				}

				if (global) {	/* global pattern matching */
					if (subpats_order_val == PREG_PATTERN_ORDER) {
						/* For each subpattern, insert it into the appropriate array */
						for (i=0; i<count; i++) {
							add_next_index_string(match_sets[i], (char *)stringlist[i], 1);
						}
					}
					else {
						/* Allocate the result set array */
						result_set = emalloc(sizeof(zval));
						array_init(result_set);
						INIT_PZVAL(result_set);

						/* Add all the subpatterns to it */
						for (i=0; i<count; i++) {
							add_next_index_string(result_set, (char *)stringlist[i], 1);
						}
						/* And add it to the output array */
						zend_hash_next_index_insert(subpats->value.ht, &result_set,
								sizeof(zval *), NULL);
					}
				}
				else {			/* single pattern matching */
					/* For each subpattern, insert it into the subpatterns array. */
					for (i=0; i<count; i++) {
						add_next_index_string(subpats, (char *)stringlist[i], 1);
					}
				}

				php_pcre_free((void *) stringlist);

				/* Advance to the position right after the last full match */
				piece += offsets[1];
			}
		}
	} while (global && count >= 0);

	/* Add the match sets to the output array and clean up */
	if (global && subpats_order_val == PREG_PATTERN_ORDER) {
		for (i=0; i<num_subpats; i++) {
			zend_hash_next_index_insert(subpats->value.ht, &match_sets[i], sizeof(zval *), NULL);
		}
		efree(match_sets);
	}

	efree(offsets);

	RETVAL_LONG(matched);
}
/* }}} */


/* {{{ proto preg_match(string pattern, string subject [, array subpatterns ])
   Perform a Perl-style regular expression match */
PHP_FUNCTION(preg_match)
{
	_pcre_match(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0);
}
/* }}} */


/* {{{ proto preg_match_all(string pattern, string subject, array subpatterns [, int order ])
   Perform a Perl-style global regular expression match */
PHP_FUNCTION(preg_match_all)
{
	_pcre_match(INTERNAL_FUNCTION_PARAM_PASSTHRU, 1);
}
/* }}} */


/* {{{ int _preg_get_backref(const char *walk, int *backref) */
static int _preg_get_backref(const char *walk, int *backref)
{
	if (*walk && *walk >= '0' && *walk <= '9')
		*backref = *walk - '0';
	else
		return 0;

	if (walk[1] && walk[1] >= '0' && walk[1] <= '9')
		*backref = *backref * 10 + walk[1] - '0';

	return 1;
}
/* }}} */


/* {{{ int _preg_do_eval(char *eval_str, char *subject, int *offsets,
				  int count, char **result) */
static int _preg_do_eval(char *eval_str, char *subject, int *offsets,
				  int count, char **result)
{
	zval		 retval;			/* Return value from evaluation */
	char		 backref_buf[4],	/* Buffer for string version of backref */
				*code,				/* PHP code string */
				*new_code,			/* Code as result of substitution */
				*match,				/* Current match for a backref */
				*walk;				/* Used to walk the code string */
	int			 code_len;			/* Length of the code string */
	int			 new_code_len;		/* Length of the substituted code string */
	int			 match_len;			/* Length of the match */
	int			 result_len;		/* Length of the result of the evaluation */
	int			 backref;			/* Current backref */
	CLS_FETCH();
	ELS_FETCH();

	/* Save string to be evaluated, since we will be modifying it */
	code = estrdup(eval_str);
	walk = code;
	new_code_len = code_len = strlen(code);

	while (*walk) {
		/* If found a backreference.. */
		if ('\\' == *walk &&
			_preg_get_backref(walk+1, &backref) &&
			backref < count) {
			/* Find the corresponding string match and substitute it
			   in instead of the backref */
			match = subject + offsets[backref<<1];
			match_len = offsets[(backref<<1)+1] - offsets[backref<<1];
			sprintf(backref_buf, "\\%d", backref);
			new_code = _php3_str_to_str(code, code_len,
										backref_buf, (backref > 9) ? 3 : 2,
										match, match_len, &new_code_len);

			/* Adjust the walk pointer */
			walk = new_code + (walk - code) + match_len;

			/* Clean up and reassign */
			efree(code);
			code = new_code;
			code_len = new_code_len;
		} else {
			walk++;
		}
	}

	/* Run the code */
	zend_eval_string(code, &retval CLS_CC ELS_CC);
	convert_to_string(&retval);

	/* Save the return value and its length */
	*result = estrdup(retval.value.str.val);
	result_len = retval.value.str.len;

	/* Clean up */
	zval_dtor(&retval);
	efree(code);

	return result_len;
}
/* }}} */


/* {{{ char *_php_pcre_replace(char *regex, char *subject, char *replace) */
char *_php_pcre_replace(char *regex, char *subject, char *replace)
{
	pcre			*re = NULL;			/* Compiled regular expression */
	pcre_extra		*extra = NULL;		/* Holds results of studying */
	int			 	 exoptions = 0;		/* Execution options */
	int			 	 preg_options = 0;	/* Custom preg options */
	int			 	 count = 0;			/* Count of matched subpatterns */
	int			 	*offsets;			/* Array of subpattern offsets */
	int			 	 size_offsets;		/* Size of the offsets array */
	int				 new_len;			/* Length of needed storage */
	int				 alloc_len;			/* Actual allocated length */
	int				 subject_len;		/* Length of the subject string */
	int				 eval_result_len=0;	/* Length of the eval'ed string */
	int				 result_len;		/* Current length of the result */
	int				 match_len;			/* Length of the current match */
	int				 backref;			/* Backreference number */
	int				 eval;				/* If the replacement string should be eval'ed */
	char			*result,			/* Result of replacement */
					*new_buf,			/* Temporary buffer for re-allocation */
					*walkbuf,			/* Location of current replacement in the result */
					*walk,				/* Used to walk the replacement string */
					*match,				/* The current match */
					*piece,				/* The current piece of subject */
					*subject_end,		/* Points to the end of the subject */
					*eval_result;		/* Result of eval */

	/* Compile regex or get it from cache. */
	if ((re = _pcre_get_compiled_regex(regex, extra, &preg_options)) == NULL) {
		return NULL;
	}

	/* Calculate the size of the offsets array, and allocate memory for it. */
	size_offsets = (pcre_info(re, NULL, NULL) + 1) * 3;
	offsets = (int *)emalloc(size_offsets * sizeof(int));

	subject_len = strlen(subject);

	alloc_len = 2 * subject_len + 1;
	result = emalloc(alloc_len * sizeof(char));
	if (!result) {
		zend_error(E_WARNING, "Unable to allocate memory in pcre_replace");
		efree(re);
		efree(offsets);
		return NULL;
	}

	/* Initialize */
	match = NULL;
	result[0] = '\0';
	piece = subject;
	subject_end = subject + subject_len;
	eval = preg_options & PREG_REPLACE_EVAL;

	while (count >= 0) {
		/* Execute the regular expression. */
		count = pcre_exec(re, extra, piece,
							subject_end-piece, subject,
						  	(piece==subject ? exoptions : exoptions|PCRE_NOTBOL),
							offsets, size_offsets, (piece == match));

		/* Check for too many substrings condition. */
		if (count == 0) {
			zend_error(E_NOTICE, "Matched, but too many substrings\n");
			count = size_offsets/3;
		}

		if (count > 0) {
			/* Set the match location in piece */
			match = piece + offsets[0];

			new_len = strlen(result) + offsets[0]; /* part before the match */

			/* If evaluating, do it and add the return string's length */
			if (eval) {
				eval_result_len = _preg_do_eval(replace, piece, offsets,
												count, &eval_result);
				new_len += eval_result_len;
			} else { /* do regular substitution */
				walk = replace;
				while (*walk)
					if ('\\' == *walk &&
						_preg_get_backref(walk+1, &backref) &&
						backref < count) {
						new_len += offsets[(backref<<1)+1] - offsets[backref<<1];
						walk += (backref > 9) ? 3 : 2;
					} else {
						new_len++;
						walk++;
					}
			}

			if (new_len + 1 > alloc_len) {
				alloc_len = 1 + alloc_len + 2 * new_len;
				new_buf = emalloc(alloc_len);
				strcpy(new_buf, result);
				efree(result);
				result = new_buf;
			}
			result_len = strlen(result);
			/* copy the part of the string before the match */
			strncat(result, piece, match-piece);

			/* copy replacement and backrefs */
			walkbuf = &result[result_len + offsets[0]];

			/* If evaluating, copy result to the buffer and clean up */
			if (eval) {
				memcpy(walkbuf, eval_result, eval_result_len);
				walkbuf += eval_result_len;
				efree(eval_result);
			} else { /* do regular backreference copying */
				walk = replace;
				while (*walk)
					if ('\\' == *walk &&
						_preg_get_backref(walk+1, &backref) &&
						backref < count) {
						match_len = offsets[(backref<<1)+1] - offsets[backref<<1];
						memcpy (walkbuf,
								piece + offsets[backref<<1],
								match_len);
						walkbuf += match_len;
						walk += (backref > 9) ? 3 : 2;
					} else
						*walkbuf++ = *walk++;
			}
			*walkbuf = '\0';

			/* Advance to the next piece */
			piece += offsets[1];
		} else {
			new_len = strlen(result) + subject_end-piece;
			if (new_len + 1 > alloc_len) {
				alloc_len = new_len + 1; /* now we know exactly how long it is */
				new_buf = emalloc(alloc_len * sizeof(char));
				strcpy(new_buf, result);
				efree(result);
				result = new_buf;
			}
			/* stick that last bit of string on our output */
			strcat(result, piece);
		}
	}

	efree(offsets);

	return result;
}
/* }}} */


static char *_php_replace_in_subject(zval *regex, zval *replace, zval *subject)
{
	zval		**regex_entry_ptr,
				*regex_entry,
				**replace_entry_ptr,
				*replace_entry;
	char		*replace_value = NULL,
				*subject_value,
				*result;

	/* Make sure we're dealing with strings. */
	convert_to_string(subject);

	/* If regex is an array */
	if (regex->type == IS_ARRAY) {
		/* Duplicating subject string for repeated replacement */
		subject_value = estrdup(subject->value.str.val);

		zend_hash_internal_pointer_reset(regex->value.ht);

		if (replace->type == IS_ARRAY)
			zend_hash_internal_pointer_reset(replace->value.ht);
		else
			/* Set replacement value to the passed one */
			replace_value = replace->value.str.val;

		/* For each entry in the regex array, get the entry */
		while (zend_hash_get_current_data(regex->value.ht, (void **)&regex_entry_ptr) == SUCCESS) {
			regex_entry = *regex_entry_ptr;

			/* Make sure we're dealing with strings. */
			convert_to_string(regex_entry);

			/* If replace is an array */
			if (replace->type == IS_ARRAY) {
				/* Get current entry */
				if (zend_hash_get_current_data(replace->value.ht, (void **)&replace_entry_ptr) == SUCCESS) {
					replace_entry = *replace_entry_ptr;

					/* Make sure we're dealing with strings. */
					convert_to_string(replace_entry);

					/* Set replacement value to the one we got from array */
					replace_value = replace_entry->value.str.val;

					zend_hash_move_forward(replace->value.ht);
				}
				else
					/* We've run out of replacement strings, so use an empty one */
					replace_value = empty_string;
			}

			/* Do the actual replacement and put the result back into subject_value
			   for further replacements. */
			if ((result = _php_pcre_replace(regex_entry->value.str.val,
											subject_value,
											replace_value)) != NULL) {
				efree(subject_value);
				subject_value = result;
			}

			zend_hash_move_forward(regex->value.ht);
		}

		return subject_value;
	}
	else {
		/* Make sure we're dealing with strings and do the replacement */
		convert_to_string(regex);
		convert_to_string(replace);
		result = _php_pcre_replace(regex->value.str.val,
									subject->value.str.val,
									replace->value.str.val);
		return result;
	}
}


/* {{{ proto preg_replace(string|array regex, string|array replace, string|array subject)
    Perform Perl-style regular expression replacement */
PHP_FUNCTION(preg_replace)
{
	zval			*regex,
					*replace,
					*subject,
				   **subject_entry_ptr,
					*subject_entry;
	char			*result;

	/* Get function parameters and do error-checking. */
	if (ARG_COUNT(ht) != 3 || getParameters(ht, 3, &regex, &replace, &subject) == FAILURE) {
		WRONG_PARAM_COUNT;
	}

	/* if subject is an array */
	if (subject->type == IS_ARRAY) {
		array_init(return_value);
		zend_hash_internal_pointer_reset(subject->value.ht);

		/* For each subject entry, convert it to string, then perform replacement
		   and add the result to the return_value array. */
		while (zend_hash_get_current_data(subject->value.ht, (void **)&subject_entry_ptr) == SUCCESS) {
			subject_entry = *subject_entry_ptr;

			if ((result = _php_replace_in_subject(regex, replace, subject_entry)) != NULL)
				add_next_index_string(return_value, result, 0);

			zend_hash_move_forward(subject->value.ht);
		}
	}
	else {	/* if subject is not an array */
		if ((result = _php_replace_in_subject(regex, replace, subject)) != NULL) {
			RETVAL_STRING(result, 1);
			efree(result);
		}
	}
}
/* }}} */


/* {{{ proto preg_split(string pattern, string subject [, int limit ]) */
PHP_FUNCTION(preg_split)
{
	zval			*regex,				/* Regular expression to split by */
					*subject,			/* Subject string to split */
					*limit;				/* Number of pieces to return */
	pcre			*re = NULL;			/* Compiled regular expression */
	pcre_extra		*extra = NULL;		/* Holds results of studying */
	int			 	*offsets;			/* Array of subpattern offsets */
	int			 	 size_offsets;		/* Size of the offsets array */
	int				 exoptions = 0;		/* Execution options */
	int			 	 preg_options = 0;	/* Custom preg options */
	int				 argc;				/* Argument count */
	int				 limit_val;			/* Integer value of limit */
	int				 count = 0;			/* Count of matched subpatterns */
	char			*match,				/* The current match */
					*piece,				/* The current piece of subject */
					*subject_end;		/* Points to the end of subject string */

	/* Get function parameters and do error checking */
	argc = ARG_COUNT(ht);
	if (argc < 1 || argc > 3 || getParameters(ht, argc, &regex, &subject, &limit) == FAILURE) {
		WRONG_PARAM_COUNT;
	}

	if (argc == 3) {
		convert_to_long(limit);
		limit_val = limit->value.lval;
	}
	else
		limit_val = -1;

	/* Make sure we're dealing with strings */
	convert_to_string(regex);
	convert_to_string(subject);

	/* Compile regex or get it from cache. */
	if ((re = _pcre_get_compiled_regex(regex->value.str.val, extra, &preg_options)) == NULL) {
		RETURN_FALSE;
	}

	/* Initialize return value */
	array_init(return_value);

	/* Calculate the size of the offsets array, and allocate memory for it. */
	size_offsets = (pcre_info(re, NULL, NULL) + 1) * 3;
	offsets = (int *)emalloc(size_offsets * sizeof(int));

	/* Start at the beginning of the string */
	piece = subject->value.str.val;
	subject_end = piece + subject->value.str.len;
	match = NULL;

	/* Get next piece if no limit or limit not yet reached and something matched*/
	while ((limit_val == -1 || limit_val > 1) && count >= 0) {
		count = pcre_exec(re, extra, piece,
						  subject_end-piece, subject->value.str.val,
						  (piece==subject->value.str.val ? exoptions : exoptions|PCRE_NOTBOL),
						  offsets, size_offsets, (piece==match));

		/* Check for too many substrings condition. */
		if (count == 0) {
			zend_error(E_NOTICE, "Matched, but too many substrings\n");
			count = size_offsets/3;
		}

		/* If something matched */
		if (count > 0) {
			match = piece + offsets[0];

			/* Add the piece to the return value */
			add_next_index_stringl(return_value,
								  piece,
								  offsets[0], 1);

			/* Advance to next position */
			piece += offsets[1];

			/* One less left to do */
			if (limit_val != -1)
				limit_val--;
		}
	}

	/* Add the last piece to the return value */
	add_next_index_stringl(return_value,
						   piece,
						   subject_end-piece, 1);

	/* Clean up */
	efree(offsets);
}
/* }}} */


/* {{{ proto string preg_quote(string str) */
PHP_FUNCTION(preg_quote)
{
	zval 	*in_str_arg;	/* Input string argument */
	char 	*in_str,		/* Input string */
			*out_str,		/* Output string with quoted characters */
		 	*p,				/* Iterator for input string */
			*q,				/* Iterator for output string */
		 	 c;				/* Current character */

	/* Get the arguments and check for errors */
	if (ARG_COUNT(ht) != 1 || getParameters(ht, 1, &in_str_arg) == FAILURE) {
		WRONG_PARAM_COUNT;
	}

	/* Make sure we're working with strings */
	convert_to_string(in_str_arg);
	in_str = in_str_arg->value.str.val;

	/* Nothing to do if we got an empty string */
	if (!*in_str) {
		RETVAL_STRING(empty_string, 0);
	}

	/* Allocate enough memory so that even if each character
	   is quoted, we won't run out of room */
	out_str = emalloc(2 * in_str_arg->value.str.len + 1);

	/* Go through the string and quote necessary characters */
	for(p = in_str, q = out_str; (c = *p); p++) {
		switch(c) {
			case '.':
			case '\\':
			case '+':
			case '*':
			case '?':
			case '[':
			case '^':
			case ']':
			case '$':
			case '(':
			case ')':
			case '{':
			case '}':
			case '=':
			case '!':
			case '>':
			case '<':
			case '|':
			case ':':
				*q++ = '\\';
				/* break is missing _intentionally_ */
			default:
				*q++ = c;
		}
	}
	*q = '\0';

	/* Reallocate string and return it */
	RETVAL_STRING(erealloc(out_str, q - out_str + 1), 0);
}
/* }}} */


/* {{{ proto array preg_grep(string regex, array input)
   Searches array and returns entries which match regex */
PHP_FUNCTION(preg_grep)
{
	zval			*regex,				/* Regular expression */
					*input,				/* Input array */
			   	   **entry;				/* An entry in the input array */
	pcre			*re = NULL;			/* Compiled regular expression */
	pcre_extra		*extra = NULL;		/* Holds results of studying */
	int			 	 preg_options = 0;	/* Custom preg options */
	int			 	*offsets;			/* Array of subpattern offsets */
	int			 	 size_offsets;		/* Size of the offsets array */
	int			 	 count = 0;			/* Count of matched subpatterns */
	char			*string_key;
	ulong			 num_key;

	/* Get arguments and do error checking */

	if (ARG_COUNT(ht) != 2 || getParameters(ht, ARG_COUNT(ht), &regex, &input) == FAILURE) {
		WRONG_PARAM_COUNT;
	}

	if (input->type != IS_ARRAY) {
		zend_error(E_WARNING, "Secong argument to preg_grep() should be an array");
		return;
	}

	/* Make sure regex is a string */
	convert_to_string(regex);

	/* Compile regex or get it from cache. */
	if ((re = _pcre_get_compiled_regex(regex->value.str.val, extra, &preg_options)) == NULL) {
		RETURN_FALSE;
	}

	/* Calculate the size of the offsets array, and allocate memory for it. */
	size_offsets = (pcre_info(re, NULL, NULL) + 1) * 3;
	offsets = (int *)emalloc(size_offsets * sizeof(int));

	/* Initialize return array */
	array_init(return_value);

	/* Go through the input array */
	zend_hash_internal_pointer_reset(input->value.ht);
	while(zend_hash_get_current_data(input->value.ht, (void **)&entry) == SUCCESS) {

		/* Only match against strings */
		if ((*entry)->type == IS_STRING) {
			/* Perform the match */
			count = pcre_exec(re, extra, (*entry)->value.str.val,
							  (*entry)->value.str.len, (*entry)->value.str.val,
							  0, offsets, size_offsets, 0);

			/* Check for too many substrings condition. */
			if (count == 0) {
				zend_error(E_NOTICE, "Matched, but too many substrings\n");
				count = size_offsets/3;
			}

			/* If something matched */
			if (count > 0) {
				(*entry)->refcount++;

				/* Add to return array */
				switch(zend_hash_get_current_key(input->value.ht, &string_key, &num_key))
				{
					case HASH_KEY_IS_STRING:
						zend_hash_update(return_value->value.ht, string_key,
										 strlen(string_key)+1, entry, sizeof(zval *), NULL);
						efree(string_key);
						break;

					case HASH_KEY_IS_LONG:
						zend_hash_next_index_insert(return_value->value.ht, entry,
													sizeof(zval *), NULL);
						break;
				}
			}
		}

		zend_hash_move_forward(input->value.ht);
	}

	efree(offsets);
}
/* }}} */


#endif /* HAVE_PCRE */

/*
 * Local variables:
 * tab-width: 4
 * c-basic-offset: 4
 * End:
 */