MFH: Fix scanner handling of NULL values in heredoc, nowdoc, strings, comments, and non-parsed content.

This commit is contained in:
Brian Shire 2009-03-16 01:40:14 +00:00
parent cb1d8ac991
commit dd031eee68
4 changed files with 3887 additions and 3952 deletions

File diff suppressed because it is too large Load Diff

View File

@ -48,7 +48,7 @@
#include "tsrm_config_common.h"
#define YYCTYPE unsigned char
#define YYFILL(n) { if ((YYCURSOR + n) >= (YYLIMIT + ZEND_MMAP_AHEAD)) { zend_error(E_COMPILE_ERROR, "Exceeded YYLIMIT bounds during scanning. Please report this."); return 0; } }
#define YYFILL(n) { if ((YYCURSOR + n) >= (YYLIMIT + ZEND_MMAP_AHEAD)) { return 0; } }
#define YYCURSOR SCNG(yy_cursor)
#define YYLIMIT SCNG(yy_limit)
#define YYMARKER SCNG(yy_marker)
@ -852,9 +852,9 @@ NULL [\x00]{1}
* or a { and therefore will be taken literally. The case of literal $ before
* a variable or "${" is handled in a rule for each string type
*/
DOUBLE_QUOTES_LITERAL_DOLLAR ("$"+([^a-zA-Z_\x7f-\xff$"\\{\x00]|("\\"{ANY_CHAR})))
BACKQUOTE_LITERAL_DOLLAR ("$"+([^a-zA-Z_\x7f-\xff$`\\{\x00]|("\\"{ANY_CHAR})))
HEREDOC_LITERAL_DOLLAR ("$"+([^a-zA-Z_\x7f-\xff$\n\r\\{\x00]|("\\"[^\n\r\x00])))
DOUBLE_QUOTES_LITERAL_DOLLAR ("$"+([^a-zA-Z_\x7f-\xff$"\\{]|("\\"{ANY_CHAR})))
BACKQUOTE_LITERAL_DOLLAR ("$"+([^a-zA-Z_\x7f-\xff$`\\{]|("\\"{ANY_CHAR})))
HEREDOC_LITERAL_DOLLAR ("$"+([^a-zA-Z_\x7f-\xff$\n\r\\{]|("\\"[^\n\r])))
/*
* Usually, HEREDOC_NEWLINE will just function like a simple NEWLINE, but some
@ -871,7 +871,7 @@ HEREDOC_NEWLINE ((({LABEL}";"?((("{"+|"$"+)"\\"?)|"\\"))|(("{"*|"$"*)"\\"?)){NEW
* This pattern is just used in the next 2 for matching { or literal $, and/or
* \ escape sequence immediately at the beginning of a line or after a label
*/
HEREDOC_CURLY_OR_ESCAPE_OR_DOLLAR (("{"+[^$\n\r\\{\x00])|("{"*"\\"[^\n\r\x00])|{HEREDOC_LITERAL_DOLLAR})
HEREDOC_CURLY_OR_ESCAPE_OR_DOLLAR (("{"+[^$\n\r\\{])|("{"*"\\"[^\n\r])|{HEREDOC_LITERAL_DOLLAR})
/*
* These 2 label-related patterns allow HEREDOC_CHARS to continue "regular"
@ -880,12 +880,12 @@ HEREDOC_CURLY_OR_ESCAPE_OR_DOLLAR (("{"+[^$\n\r\\{\x00])|("{"*"\\"[^\n\r\x00])|{
* a variable or "{$" Matching a newline, and possibly label, up TO a variable
* or "{$", is handled in the heredoc rules
*
* The HEREDOC_LABEL_NO_NEWLINE pattern (";"[^$\n\r\\{\x00]) handles cases where ;
* follows a label. [^a-zA-Z0-9_\x7f-\xff;$\n\r\\{\x00] is needed to prevent a label
* The HEREDOC_LABEL_NO_NEWLINE pattern (";"[^$\n\r\\{]) handles cases where ;
* follows a label. [^a-zA-Z0-9_\x7f-\xff;$\n\r\\{] is needed to prevent a label
* character or ; from matching on a possible (real) ending label
*/
HEREDOC_NON_LABEL ([^a-zA-Z_\x7f-\xff$\n\r\\{\x00]|{HEREDOC_CURLY_OR_ESCAPE_OR_DOLLAR})
HEREDOC_LABEL_NO_NEWLINE ({LABEL}([^a-zA-Z0-9_\x7f-\xff;$\n\r\\{\x00]|(";"[^$\n\r\\{\x00])|(";"?{HEREDOC_CURLY_OR_ESCAPE_OR_DOLLAR})))
HEREDOC_NON_LABEL ([^a-zA-Z_\x7f-\xff$\n\r\\{]|{HEREDOC_CURLY_OR_ESCAPE_OR_DOLLAR})
HEREDOC_LABEL_NO_NEWLINE ({LABEL}([^a-zA-Z0-9_\x7f-\xff;$\n\r\\{]|(";"[^$\n\r\\{])|(";"?{HEREDOC_CURLY_OR_ESCAPE_OR_DOLLAR})))
/*
* CHARS matches everything up to a variable or "{$"
@ -895,11 +895,11 @@ HEREDOC_LABEL_NO_NEWLINE ({LABEL}([^a-zA-Z0-9_\x7f-\xff;$\n\r\\{\x00]|(";"[^$\n\
* For heredocs, matching continues across/after newlines if/when it's known
* that the next line doesn't contain a possible ending label
*/
DOUBLE_QUOTES_CHARS ("{"*([^$"\\{\x00]|("\\"{ANY_CHAR}))|{DOUBLE_QUOTES_LITERAL_DOLLAR})
BACKQUOTE_CHARS ("{"*([^$`\\{\x00]|("\\"{ANY_CHAR}))|{BACKQUOTE_LITERAL_DOLLAR})
HEREDOC_CHARS ("{"*([^$\n\r\\{\x00]|("\\"[^\n\r\x00]))|{HEREDOC_LITERAL_DOLLAR}|({HEREDOC_NEWLINE}+({HEREDOC_NON_LABEL}|{HEREDOC_LABEL_NO_NEWLINE})))
DOUBLE_QUOTES_CHARS ("{"*([^$"\\{]|("\\"{ANY_CHAR}))|{DOUBLE_QUOTES_LITERAL_DOLLAR})
BACKQUOTE_CHARS ("{"*([^$`\\{]|("\\"{ANY_CHAR}))|{BACKQUOTE_LITERAL_DOLLAR})
HEREDOC_CHARS ("{"*([^$\n\r\\{]|("\\"[^\n\r]))|{HEREDOC_LITERAL_DOLLAR}|({HEREDOC_NEWLINE}+({HEREDOC_NON_LABEL}|{HEREDOC_LABEL_NO_NEWLINE})))
NOWDOC_CHARS ([^\n\r\x00]|{NEWLINE}+([^a-zA-Z_\x7f-\xff\n\r\x00]|({LABEL}([^a-zA-Z0-9_\x7f-\xff;\n\r\x00]|(";"[^\n\r\x00])))))
NOWDOC_CHARS ([^\n\r]|{NEWLINE}+([^a-zA-Z_\x7f-\xff\n\r]|({LABEL}([^a-zA-Z0-9_\x7f-\xff;\n\r]|(";"[^\n\r])))))
/* compute yyleng before each rule */
<!*> := yyleng = YYCURSOR - SCNG(yy_text);
@ -1716,7 +1716,7 @@ inline_char_handler:
yymore();
}
<ST_ONE_LINE_COMMENT>[^\n\r?%>\x00]*{ANY_CHAR} {
<ST_ONE_LINE_COMMENT>[^\n\r?%>]*{ANY_CHAR} {
switch (yytext[yyleng-1]) {
case '?': case '%': case '>':
yyless(yyleng-1);
@ -1773,7 +1773,7 @@ inline_char_handler:
}
<ST_COMMENT,ST_DOC_COMMENT>[^*\x00]+ {
<ST_COMMENT,ST_DOC_COMMENT>[^*]+ {
yymore();
}
@ -1828,7 +1828,7 @@ inline_char_handler:
}
<ST_IN_SCRIPTING>(b?[']([^'\\\x00]|("\\"{ANY_CHAR}))*[']) {
<ST_IN_SCRIPTING>(b?[']([^'\\]|("\\"{ANY_CHAR}))*[']) {
register char *s, *t;
char *end;
int bprefix = (yytext[0] != '\'') ? 1 : 0;

View File

@ -1,4 +1,4 @@
/* Generated by re2c 0.13.5 on Wed Mar 11 14:42:24 2009 */
/* Generated by re2c 0.13.5 on Fri Mar 13 00:13:18 2009 */
#line 3 "Zend/zend_language_scanner_defs.h"
enum YYCONDTYPE {

View File

@ -50,7 +50,7 @@ bool(false)
</span>
</code>bool(true)
<code><span style="color: #000000">
<span style="color: #0000BB">&lt;?php&nbsp;</span><span style="color: #007700">echo&nbsp;</span><span style="color: #DD0000">"test&nbsp;?&gt;</span>
<span style="color: #0000BB">&lt;?php&nbsp;</span><span style="color: #007700">echo&nbsp;</span><span style="color: #FF9900">"test&nbsp;?&gt;</span>
</span>
</code>bool(true)
<code><span style="color: #000000">