579 lines
15 KiB
Plaintext
579 lines
15 KiB
Plaintext
%{
|
|
/*
|
|
* Copyright © 2010 Intel Corporation
|
|
*
|
|
* Permission is hereby granted, free of charge, to any person obtaining a
|
|
* copy of this software and associated documentation files (the "Software"),
|
|
* to deal in the Software without restriction, including without limitation
|
|
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
|
* and/or sell copies of the Software, and to permit persons to whom the
|
|
* Software is furnished to do so, subject to the following conditions:
|
|
*
|
|
* The above copyright notice and this permission notice (including the next
|
|
* paragraph) shall be included in all copies or substantial portions of the
|
|
* Software.
|
|
*
|
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
|
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
|
* DEALINGS IN THE SOFTWARE.
|
|
*/
|
|
|
|
#include <stdio.h>
|
|
#include <string.h>
|
|
#include <ctype.h>
|
|
|
|
#include "glcpp.h"
|
|
#include "glcpp-parse.h"
|
|
|
|
/* Flex annoyingly generates some functions without making them
|
|
* static. Let's declare them here. */
|
|
int glcpp_get_column (yyscan_t yyscanner);
|
|
void glcpp_set_column (int column_no , yyscan_t yyscanner);
|
|
|
|
#ifdef _MSC_VER
|
|
#define YY_NO_UNISTD_H
|
|
#pragma warning(disable: 4267) // warning C4267: '=' : conversion from 'size_t' to 'int', possible loss of data
|
|
#endif
|
|
|
|
#define YY_NO_INPUT
|
|
|
|
#define YY_USER_ACTION \
|
|
do { \
|
|
if (parser->has_new_line_number) \
|
|
yylineno = parser->new_line_number; \
|
|
if (parser->has_new_source_number) \
|
|
yylloc->source = parser->new_source_number; \
|
|
yylloc->first_column = yycolumn + 1; \
|
|
yylloc->first_line = yylloc->last_line = yylineno; \
|
|
yycolumn += yyleng; \
|
|
yylloc->last_column = yycolumn + 1; \
|
|
parser->has_new_line_number = 0; \
|
|
parser->has_new_source_number = 0; \
|
|
} while(0);
|
|
|
|
#define YY_USER_INIT \
|
|
do { \
|
|
yylineno = 1; \
|
|
yycolumn = 0; \
|
|
yylloc->source = 0; \
|
|
} while(0)
|
|
|
|
/* It's ugly to have macros that have return statements inside of
|
|
* them, but flex-based lexer generation is all built around the
|
|
* return statement.
|
|
*
|
|
* To mitigate the ugliness, we defer as much of the logic as possible
|
|
* to an actual function, not a macro (see
|
|
* glcpplex_update_state_per_token) and we make the word RETURN
|
|
* prominent in all of the macros which may return.
|
|
*
|
|
* The most-commonly-used macro is RETURN_TOKEN which will perform all
|
|
* necessary state updates based on the provided token,, then
|
|
* conditionally return the token. It will not return a token if the
|
|
* parser is currently skipping tokens, (such as within #if
|
|
* 0...#else).
|
|
*
|
|
* The RETURN_TOKEN_NEVER_SKIP macro is a lower-level variant that
|
|
* makes the token returning unconditional. This is needed for things
|
|
* like #if and the tokens of its condition, (since these must be
|
|
* evaluated by the parser even when otherwise skipping).
|
|
*
|
|
* Finally, RETURN_STRING_TOKEN is a simple convenience wrapper on top
|
|
* of RETURN_TOKEN that performs a string copy of yytext before the
|
|
* return.
|
|
*/
|
|
#define RETURN_TOKEN_NEVER_SKIP(token) \
|
|
do { \
|
|
if (glcpp_lex_update_state_per_token (parser, token)) \
|
|
return token; \
|
|
} while (0)
|
|
|
|
#define RETURN_TOKEN(token) \
|
|
do { \
|
|
if (! parser->skipping) { \
|
|
RETURN_TOKEN_NEVER_SKIP(token); \
|
|
} \
|
|
} while(0)
|
|
|
|
#define RETURN_STRING_TOKEN(token) \
|
|
do { \
|
|
if (! parser->skipping) { \
|
|
yylval->str = ralloc_strdup (yyextra, yytext); \
|
|
RETURN_TOKEN_NEVER_SKIP (token); \
|
|
} \
|
|
} while(0)
|
|
|
|
|
|
/* Update all state necessary for each token being returned.
|
|
*
|
|
* Here we'll be tracking newlines and spaces so that the lexer can
|
|
* alter its behavior as necessary, (for example, '#' has special
|
|
* significance if it is the first non-whitespace, non-comment token
|
|
* in a line, but does not otherwise).
|
|
*
|
|
* NOTE: If this function returns FALSE, then no token should be
|
|
* returned at all. This is used to suprress duplicate SPACE tokens.
|
|
*/
|
|
static int
|
|
glcpp_lex_update_state_per_token (glcpp_parser_t *parser, int token)
|
|
{
|
|
/* After the first non-space token in a line, we won't
|
|
* allow any '#' to introduce a directive. */
|
|
if (token == NEWLINE) {
|
|
parser->first_non_space_token_this_line = 1;
|
|
} else if (token != SPACE) {
|
|
parser->first_non_space_token_this_line = 0;
|
|
}
|
|
|
|
/* Track newlines just to know whether a newline needs
|
|
* to be inserted if end-of-file comes early. */
|
|
if (token == NEWLINE) {
|
|
parser->last_token_was_newline = 1;
|
|
} else {
|
|
parser->last_token_was_newline = 0;
|
|
}
|
|
|
|
/* Track spaces to avoid emitting multiple SPACE
|
|
* tokens in a row. */
|
|
if (token == SPACE) {
|
|
if (! parser->last_token_was_space) {
|
|
parser->last_token_was_space = 1;
|
|
return 1;
|
|
} else {
|
|
parser->last_token_was_space = 1;
|
|
return 0;
|
|
}
|
|
} else {
|
|
parser->last_token_was_space = 0;
|
|
return 1;
|
|
}
|
|
}
|
|
|
|
|
|
%}
|
|
|
|
%option bison-bridge bison-locations reentrant noyywrap
|
|
%option extra-type="glcpp_parser_t *"
|
|
%option prefix="glcpp_"
|
|
%option stack
|
|
%option never-interactive
|
|
%option warn nodefault
|
|
|
|
/* Note: When adding any start conditions to this list, you must also
|
|
* update the "Internal compiler error" catch-all rule near the end of
|
|
* this file. */
|
|
|
|
%x COMMENT DEFINE DONE HASH NEWLINE_CATCHUP UNREACHABLE
|
|
|
|
SPACE [[:space:]]
|
|
NONSPACE [^[:space:]]
|
|
HSPACE [ \t]
|
|
HASH #
|
|
NEWLINE (\r\n|\n\r|\r|\n)
|
|
IDENTIFIER [_a-zA-Z][_a-zA-Z0-9]*
|
|
PP_NUMBER [.]?[0-9]([._a-zA-Z0-9]|[eEpP][-+])*
|
|
PUNCTUATION [][(){}.&*~!/%<>^|;,=+-]
|
|
|
|
/* The OTHER class is simply a catch-all for things that the CPP
|
|
parser just doesn't care about. Since flex regular expressions that
|
|
match longer strings take priority over those matching shorter
|
|
strings, we have to be careful to avoid OTHER matching and hiding
|
|
something that CPP does care about. So we simply exclude all
|
|
characters that appear in any other expressions. */
|
|
|
|
OTHER [^][_#[:space:]#a-zA-Z0-9(){}.&*~!/%<>^|;,=+-]
|
|
|
|
DIGITS [0-9][0-9]*
|
|
DECIMAL_INTEGER [1-9][0-9]*[uU]?
|
|
OCTAL_INTEGER 0[0-7]*[uU]?
|
|
HEXADECIMAL_INTEGER 0[xX][0-9a-fA-F]+[uU]?
|
|
|
|
%%
|
|
|
|
glcpp_parser_t *parser = yyextra;
|
|
|
|
/* When we lex a multi-line comment, we replace it (as
|
|
* specified) with a single space. But if the comment spanned
|
|
* multiple lines, then subsequent parsing stages will not
|
|
* count correct line numbers. To avoid this problem we keep
|
|
* track of all newlines that were commented out by a
|
|
* multi-line comment, and we emit a NEWLINE token for each at
|
|
* the next legal opportunity, (which is when the lexer would
|
|
* be emitting a NEWLINE token anyway).
|
|
*/
|
|
if (YY_START == NEWLINE_CATCHUP) {
|
|
if (parser->commented_newlines)
|
|
parser->commented_newlines--;
|
|
if (parser->commented_newlines == 0)
|
|
BEGIN INITIAL;
|
|
RETURN_TOKEN_NEVER_SKIP (NEWLINE);
|
|
}
|
|
|
|
/* Set up the parser->skipping bit here before doing any lexing.
|
|
*
|
|
* This bit controls whether tokens are skipped, (as implemented by
|
|
* RETURN_TOKEN), such as between "#if 0" and "#endif".
|
|
*
|
|
* The parser maintains a skip_stack indicating whether we should be
|
|
* skipping, (and nested levels of #if/#ifdef/#ifndef/#endif) will
|
|
* push and pop items from the stack.
|
|
*
|
|
* Here are the rules for determining whether we are skipping:
|
|
*
|
|
* 1. If the skip stack is NULL, we are outside of all #if blocks
|
|
* and we are not skipping.
|
|
*
|
|
* 2. If the skip stack is non-NULL, the type of the top node in
|
|
* the stack determines whether to skip. A type of
|
|
* SKIP_NO_SKIP is used for blocks wheere we are emitting
|
|
* tokens, (such as between #if 1 and #endif, or after the
|
|
* #else of an #if 0, etc.).
|
|
*
|
|
* 3. The lexing_directive bit overrides the skip stack. This bit
|
|
* is set when we are actively lexing the expression for a
|
|
* pre-processor condition, (such as #if, #elif, or #else). In
|
|
* this case, even if otherwise skipping, we need to emit the
|
|
* tokens for this condition so that the parser can evaluate
|
|
* the expression. (For, #else, there's no expression, but we
|
|
* emit tokens so the parser can generate a nice error message
|
|
* if there are any tokens here).
|
|
*/
|
|
if (parser->skip_stack &&
|
|
parser->skip_stack->type != SKIP_NO_SKIP &&
|
|
! parser->lexing_directive)
|
|
{
|
|
parser->skipping = 1;
|
|
} else {
|
|
parser->skipping = 0;
|
|
}
|
|
|
|
/* Single-line comments */
|
|
<INITIAL,DEFINE,HASH>"//"[^\r\n]* {
|
|
}
|
|
|
|
/* Multi-line comments */
|
|
<INITIAL,DEFINE,HASH>"/*" { yy_push_state(COMMENT, yyscanner); }
|
|
<COMMENT>[^*\r\n]*
|
|
<COMMENT>[^*\r\n]*{NEWLINE} { yylineno++; yycolumn = 0; parser->commented_newlines++; }
|
|
<COMMENT>"*"+[^*/\r\n]*
|
|
<COMMENT>"*"+[^*/\r\n]*{NEWLINE} { yylineno++; yycolumn = 0; parser->commented_newlines++; }
|
|
<COMMENT>"*"+"/" {
|
|
yy_pop_state(yyscanner);
|
|
/* In the <HASH> start condition, we don't want any SPACE token. */
|
|
if (yyextra->space_tokens && YY_START != HASH)
|
|
RETURN_TOKEN (SPACE);
|
|
}
|
|
|
|
{HASH} {
|
|
|
|
/* If the '#' is the first non-whitespace, non-comment token on this
|
|
* line, then it introduces a directive, switch to the <HASH> start
|
|
* condition.
|
|
*
|
|
* Otherwise, this is just punctuation, so return the HASH_TOKEN
|
|
* token. */
|
|
if (parser->first_non_space_token_this_line) {
|
|
BEGIN HASH;
|
|
}
|
|
|
|
RETURN_TOKEN_NEVER_SKIP (HASH_TOKEN);
|
|
}
|
|
|
|
<HASH>version{HSPACE}+ {
|
|
BEGIN INITIAL;
|
|
yyextra->space_tokens = 0;
|
|
RETURN_STRING_TOKEN (VERSION_TOKEN);
|
|
}
|
|
|
|
/* Swallow empty #pragma directives, (to avoid confusing the
|
|
* downstream compiler).
|
|
*
|
|
* Note: We use a simple regular expression for the lookahead
|
|
* here. Specifically, we cannot use the complete {NEWLINE} expression
|
|
* since it uses alternation and we've found that there's a flex bug
|
|
* where using alternation in the lookahead portion of a pattern
|
|
* triggers a buffer overrun. */
|
|
<HASH>pragma{HSPACE}*/[\r\n] {
|
|
BEGIN INITIAL;
|
|
}
|
|
|
|
/* glcpp doesn't handle #extension, #version, or #pragma directives.
|
|
* Simply pass them through to the main compiler's lexer/parser. */
|
|
<HASH>(extension|pragma)[^\r\n]* {
|
|
BEGIN INITIAL;
|
|
RETURN_STRING_TOKEN (PRAGMA);
|
|
}
|
|
|
|
<HASH>line{HSPACE}+ {
|
|
BEGIN INITIAL;
|
|
RETURN_TOKEN (LINE);
|
|
}
|
|
|
|
<HASH>{NEWLINE} {
|
|
BEGIN INITIAL;
|
|
RETURN_TOKEN_NEVER_SKIP (NEWLINE);
|
|
}
|
|
|
|
/* For the pre-processor directives, we return these tokens
|
|
* even when we are otherwise skipping. */
|
|
<HASH>ifdef {
|
|
BEGIN INITIAL;
|
|
yyextra->lexing_directive = 1;
|
|
yyextra->space_tokens = 0;
|
|
RETURN_TOKEN_NEVER_SKIP (IFDEF);
|
|
}
|
|
|
|
<HASH>ifndef {
|
|
BEGIN INITIAL;
|
|
yyextra->lexing_directive = 1;
|
|
yyextra->space_tokens = 0;
|
|
RETURN_TOKEN_NEVER_SKIP (IFNDEF);
|
|
}
|
|
|
|
<HASH>if/[^_a-zA-Z0-9] {
|
|
BEGIN INITIAL;
|
|
yyextra->lexing_directive = 1;
|
|
yyextra->space_tokens = 0;
|
|
RETURN_TOKEN_NEVER_SKIP (IF);
|
|
}
|
|
|
|
<HASH>elif/[^_a-zA-Z0-9] {
|
|
BEGIN INITIAL;
|
|
yyextra->lexing_directive = 1;
|
|
yyextra->space_tokens = 0;
|
|
RETURN_TOKEN_NEVER_SKIP (ELIF);
|
|
}
|
|
|
|
<HASH>else {
|
|
BEGIN INITIAL;
|
|
yyextra->space_tokens = 0;
|
|
RETURN_TOKEN_NEVER_SKIP (ELSE);
|
|
}
|
|
|
|
<HASH>endif {
|
|
BEGIN INITIAL;
|
|
yyextra->space_tokens = 0;
|
|
RETURN_TOKEN_NEVER_SKIP (ENDIF);
|
|
}
|
|
|
|
<HASH>error[^\r\n]* {
|
|
BEGIN INITIAL;
|
|
RETURN_STRING_TOKEN (ERROR_TOKEN);
|
|
}
|
|
|
|
/* After we see a "#define" we enter the <DEFINE> start state
|
|
* for the lexer. Within <DEFINE> we are looking for the first
|
|
* identifier and specifically checking whether the identifier
|
|
* is followed by a '(' or not, (to lex either a
|
|
* FUNC_IDENTIFIER or an OBJ_IDENITIFIER token).
|
|
*
|
|
* While in the <DEFINE> state we also need to explicitly
|
|
* handle a few other things that may appear before the
|
|
* identifier:
|
|
*
|
|
* * Comments, (handled above with the main support for
|
|
* comments).
|
|
*
|
|
* * Whitespace (simply ignored)
|
|
*
|
|
* * Anything else, (not an identifier, not a comment,
|
|
* and not whitespace). This will generate an error.
|
|
*/
|
|
<HASH>define{HSPACE}* {
|
|
if (! parser->skipping) {
|
|
BEGIN DEFINE;
|
|
yyextra->space_tokens = 0;
|
|
RETURN_TOKEN (DEFINE_TOKEN);
|
|
}
|
|
}
|
|
|
|
<HASH>undef {
|
|
BEGIN INITIAL;
|
|
yyextra->space_tokens = 0;
|
|
RETURN_TOKEN (UNDEF);
|
|
}
|
|
|
|
<HASH>{HSPACE}+ {
|
|
/* Nothing to do here. Importantly, don't leave the <HASH>
|
|
* start condition, since it's legal to have space between the
|
|
* '#' and the directive.. */
|
|
}
|
|
|
|
/* This will catch any non-directive garbage after a HASH */
|
|
<HASH>{NONSPACE} {
|
|
BEGIN INITIAL;
|
|
RETURN_TOKEN (GARBAGE);
|
|
}
|
|
|
|
/* An identifier immediately followed by '(' */
|
|
<DEFINE>{IDENTIFIER}/"(" {
|
|
BEGIN INITIAL;
|
|
RETURN_STRING_TOKEN (FUNC_IDENTIFIER);
|
|
}
|
|
|
|
/* An identifier not immediately followed by '(' */
|
|
<DEFINE>{IDENTIFIER} {
|
|
BEGIN INITIAL;
|
|
RETURN_STRING_TOKEN (OBJ_IDENTIFIER);
|
|
}
|
|
|
|
/* Whitespace */
|
|
<DEFINE>{HSPACE}+ {
|
|
/* Just ignore it. Nothing to do here. */
|
|
}
|
|
|
|
/* '/' not followed by '*', so not a comment. This is an error. */
|
|
<DEFINE>[/][^*]{NONSPACE}* {
|
|
BEGIN INITIAL;
|
|
glcpp_error(yylloc, yyextra, "#define followed by a non-identifier: %s", yytext);
|
|
RETURN_STRING_TOKEN (INTEGER_STRING);
|
|
}
|
|
|
|
/* A character that can't start an identifier, comment, or
|
|
* space. This is an error. */
|
|
<DEFINE>[^_a-zA-Z/[:space:]]{NONSPACE}* {
|
|
BEGIN INITIAL;
|
|
glcpp_error(yylloc, yyextra, "#define followed by a non-identifier: %s", yytext);
|
|
RETURN_STRING_TOKEN (INTEGER_STRING);
|
|
}
|
|
|
|
{DECIMAL_INTEGER} {
|
|
RETURN_STRING_TOKEN (INTEGER_STRING);
|
|
}
|
|
|
|
{OCTAL_INTEGER} {
|
|
RETURN_STRING_TOKEN (INTEGER_STRING);
|
|
}
|
|
|
|
{HEXADECIMAL_INTEGER} {
|
|
RETURN_STRING_TOKEN (INTEGER_STRING);
|
|
}
|
|
|
|
"<<" {
|
|
RETURN_TOKEN (LEFT_SHIFT);
|
|
}
|
|
|
|
">>" {
|
|
RETURN_TOKEN (RIGHT_SHIFT);
|
|
}
|
|
|
|
"<=" {
|
|
RETURN_TOKEN (LESS_OR_EQUAL);
|
|
}
|
|
|
|
">=" {
|
|
RETURN_TOKEN (GREATER_OR_EQUAL);
|
|
}
|
|
|
|
"==" {
|
|
RETURN_TOKEN (EQUAL);
|
|
}
|
|
|
|
"!=" {
|
|
RETURN_TOKEN (NOT_EQUAL);
|
|
}
|
|
|
|
"&&" {
|
|
RETURN_TOKEN (AND);
|
|
}
|
|
|
|
"||" {
|
|
RETURN_TOKEN (OR);
|
|
}
|
|
|
|
"++" {
|
|
RETURN_TOKEN (PLUS_PLUS);
|
|
}
|
|
|
|
"--" {
|
|
RETURN_TOKEN (MINUS_MINUS);
|
|
}
|
|
|
|
"##" {
|
|
if (! parser->skipping) {
|
|
if (parser->is_gles)
|
|
glcpp_error(yylloc, yyextra, "Token pasting (##) is illegal in GLES");
|
|
RETURN_TOKEN (PASTE);
|
|
}
|
|
}
|
|
|
|
"defined" {
|
|
RETURN_TOKEN (DEFINED);
|
|
}
|
|
|
|
{IDENTIFIER} {
|
|
RETURN_STRING_TOKEN (IDENTIFIER);
|
|
}
|
|
|
|
{PP_NUMBER} {
|
|
RETURN_STRING_TOKEN (OTHER);
|
|
}
|
|
|
|
{PUNCTUATION} {
|
|
RETURN_TOKEN (yytext[0]);
|
|
}
|
|
|
|
{OTHER}+ {
|
|
RETURN_STRING_TOKEN (OTHER);
|
|
}
|
|
|
|
{HSPACE} {
|
|
if (yyextra->space_tokens) {
|
|
RETURN_TOKEN (SPACE);
|
|
}
|
|
}
|
|
|
|
/* We preserve all newlines, even between #if 0..#endif, so no
|
|
skipping.. */
|
|
<*>{NEWLINE} {
|
|
if (parser->commented_newlines) {
|
|
BEGIN NEWLINE_CATCHUP;
|
|
} else {
|
|
BEGIN INITIAL;
|
|
}
|
|
yyextra->space_tokens = 1;
|
|
yyextra->lexing_directive = 0;
|
|
yylineno++;
|
|
yycolumn = 0;
|
|
RETURN_TOKEN_NEVER_SKIP (NEWLINE);
|
|
}
|
|
|
|
<INITIAL,COMMENT,DEFINE,HASH><<EOF>> {
|
|
if (YY_START == COMMENT)
|
|
glcpp_error(yylloc, yyextra, "Unterminated comment");
|
|
BEGIN DONE; /* Don't keep matching this rule forever. */
|
|
yyextra->lexing_directive = 0;
|
|
if (! parser->last_token_was_newline)
|
|
RETURN_TOKEN (NEWLINE);
|
|
}
|
|
|
|
/* This is a catch-all to avoid the annoying default flex action which
|
|
* matches any character and prints it. If any input ever matches this
|
|
* rule, then we have made a mistake above and need to fix one or more
|
|
* of the preceding patterns to match that input. */
|
|
|
|
<*>. {
|
|
glcpp_error(yylloc, yyextra, "Internal compiler error: Unexpected character: %s", yytext);
|
|
|
|
/* We don't actually use the UNREACHABLE start condition. We
|
|
only have this block here so that we can pretend to call some
|
|
generated functions, (to avoid "defined but not used"
|
|
warnings. */
|
|
if (YY_START == UNREACHABLE) {
|
|
unput('.');
|
|
yy_top_state(yyextra);
|
|
}
|
|
}
|
|
|
|
%%
|
|
|
|
void
|
|
glcpp_lex_set_source_string(glcpp_parser_t *parser, const char *shader)
|
|
{
|
|
yy_scan_string(shader, parser->scanner);
|
|
}
|