1
0
Fork 0
mirror of https://github.com/eclipse-cdt/cdt synced 2025-04-29 19:45:01 +02:00

For bug 205272, the lexer with JUnit tests.

This commit is contained in:
Markus Schorn 2007-10-09 15:09:59 +00:00
parent 120cac7e45
commit 285d224021
18 changed files with 1978 additions and 145 deletions

View file

@ -26,6 +26,7 @@ public class DOMParserTestSuite extends TestCase {
public static Test suite() {
TestSuite suite= new TestSuite(ParserTestSuite.class.getName());
suite.addTest(LexerTests.suite());
suite.addTest(DOMScannerTests.suite());
suite.addTest(AST2Tests.suite());
suite.addTestSuite( GCCTests.class );

View file

@ -0,0 +1,545 @@
/*******************************************************************************
* Copyright (c) 2007 Wind River Systems, Inc. and others.
* All rights reserved. This program and the accompanying materials
* are made available under the terms of the Eclipse Public License v1.0
* which accompanies this distribution, and is available at
* http://www.eclipse.org/legal/epl-v10.html
*
* Contributors:
* Markus Schorn - initial API and implementation
*******************************************************************************/
package org.eclipse.cdt.core.parser.tests.ast2;
import junit.framework.TestSuite;
import org.eclipse.cdt.core.parser.IGCCToken;
import org.eclipse.cdt.core.parser.IProblem;
import org.eclipse.cdt.core.parser.IToken;
import org.eclipse.cdt.core.testplugin.util.BaseTestCase;
import org.eclipse.cdt.internal.core.parser.scanner.Lexer;
import org.eclipse.cdt.internal.core.parser.scanner.Token;
import org.eclipse.cdt.internal.core.parser.scanner.Lexer.LexerOptions;
public class LexerTests extends BaseTestCase {
static String TRIGRAPH_REPLACES_CHARS= "#^[]|{}~\\";
static String TRIGRAPH_CHARS= "='()!<>-/";
public static TestSuite suite() {
return suite(LexerTests.class);
}
private Lexer fLexer;
private TestLexerLog fLog= new TestLexerLog();
private int fLastEndOffset;
public LexerTests() {
super();
}
public LexerTests(String name) {
super(name);
}
private void init(String input) throws Exception {
fLog.clear();
fLexer= new Lexer(input.toCharArray(), new LexerOptions(), fLog);
fLexer.nextToken();
fLastEndOffset= 0;
}
private void init(String input, boolean dollar, boolean minmax) throws Exception {
fLog.clear();
final LexerOptions lexerOptions = new LexerOptions();
lexerOptions.fSupportDollarInitializers= dollar;
lexerOptions.fSupportMinAndMax= minmax;
fLexer= new Lexer(input.toCharArray(), lexerOptions, fLog);
fLexer.nextToken();
fLastEndOffset= 0;
}
private void token(int tokenType) throws Exception {
token(tokenType, null);
}
private void token(int tokenType, String image) throws Exception {
Token t= fLexer.currentToken();
assertEquals(tokenType, t.getType());
assertEquals(fLastEndOffset, t.getOffset());
fLastEndOffset= t.getEndOffset();
if (image != null) {
assertEquals(image, new String(t.getTokenImage()));
}
fLexer.nextToken();
}
private void integer(String expectedImage) throws Exception {
token(IToken.tINTEGER, expectedImage);
}
private void floating(String expectedImage) throws Exception {
token(IToken.tFLOATINGPT, expectedImage);
}
private void id(String expectedImage) throws Exception {
token(IToken.tIDENTIFIER, expectedImage);
}
private void str(String expectedImage) throws Exception {
token(IToken.tSTRING, "\"" + expectedImage + "\"");
}
private void wstr(String expectedImage) throws Exception {
token(IToken.tLSTRING, "L\"" + expectedImage + "\"");
}
private void ch(String expectedImage) throws Exception {
token(IToken.tCHAR, expectedImage);
}
private void wch(String expectedImage) throws Exception {
token(IToken.tLCHAR, expectedImage);
}
private void eof() throws Exception {
IToken t= fLexer.nextToken();
assertEquals("superfluous token " + t, Lexer.tEND_OF_INPUT, t.getType());
assertEquals(0, fLog.getProblemCount());
assertEquals(0, fLog.getCommentCount());
}
private void nl() throws Exception {
token(Lexer.tNEWLINE);
}
private void ws() throws Exception {
int offset= fLexer.currentToken().getOffset();
assertTrue(offset > fLastEndOffset);
fLastEndOffset= offset;
}
private void problem(int kind, String img) throws Exception {
assertEquals(fLog.createString(kind, img), fLog.removeFirstProblem());
}
private void comment(String img) throws Exception {
ws();
assertEquals(img, fLog.removeFirstComment());
}
public void testTrigraphSequences() throws Exception {
init("\"??=??/??'??(??)??!??<??>??-\"");
str("#\\^[]|{}~");
eof();
init("??=??'??(??)??!??<??>??-");
token(IToken.tPOUND);
token(IToken.tXOR);
token(IToken.tLBRACKET);
token(IToken.tRBRACKET);
token(IToken.tBITOR);
token(IToken.tLBRACE);
token(IToken.tRBRACE);
token(IToken.tCOMPL);
eof();
init("a??/\nb");
id("ab");
eof();
}
public void testWindowsLineEnding() throws Exception {
init("\n\n");
nl(); nl(); eof();
init("\r\n\r\n");
nl(); nl(); eof();
}
public void testLineSplicingTrigraph() throws Exception {
// a trigraph cannot be spliced
init("??\\\n=");
token(IToken.tQUESTION);
token(IToken.tQUESTION);
token(IToken.tASSIGN);
eof();
init("??\\\r\n=");
token(IToken.tQUESTION);
token(IToken.tQUESTION);
token(IToken.tASSIGN);
eof();
// trigraph can be used to splice a line
init("a??/\nb");
id("ab");
eof();
}
public void testLineSplicingStringLiteral() throws Exception {
// splicing in string literal
init("\"a\\\nb\"");
str("ab");
eof();
init("\"a\\\r\nb\"");
str("ab");
eof();
}
public void testLineSplicingCharLiteral() throws Exception {
init("'a\\\nb'");
ch("'ab'");
eof();
init("'a\\\r\nb'");
ch("'ab'");
eof();
}
public void testLineSplicingHeaderName() throws Exception {
init("p\"a\\\nb\"");
fLexer.setInsideIncludeDirective();
id("p");
token(Lexer.tQUOTE_HEADER_NAME, "\"ab\"");
eof();
init("p\"a\\\r\nb\"");
fLexer.setInsideIncludeDirective();
id("p");
token(Lexer.tQUOTE_HEADER_NAME, "\"ab\"");
eof();
init("p<a\\\nb>");
fLexer.setInsideIncludeDirective();
id("p");
token(Lexer.tSYSTEM_HEADER_NAME, "<ab>");
eof();
init("p<a\\\r\nb>");
fLexer.setInsideIncludeDirective();
id("p");
token(Lexer.tSYSTEM_HEADER_NAME, "<ab>");
eof();
}
public void testLineSplicingComment() throws Exception {
init("// a\\\nb\n");
comment("// a\\\nb");
nl();
eof();
init("// a\\\nb\n");
comment("// a\\\nb");
nl();
eof();
init("/\\\n\\\n/ ab\n");
comment("/\\\n\\\n/ ab");
nl();
eof();
init("/\\\n* a\\\nb*\\\n/");
comment("/\\\n* a\\\nb*\\\n/");
eof();
}
public void testLineSplicingIdentifier() throws Exception {
init("a\\\nb");
id("ab");
eof();
init("a\\\r\nb");
id("ab");
eof();
}
public void testLineSplicingNumber() throws Exception {
init(".\\\n1");
floating(".1");
eof();
init(".\\\r\n1");
floating(".1");
eof();
}
public void testComments() throws Exception {
init("// /*\na");
comment("// /*");
nl();
id("a");
eof();
init("/* // /* \n xxx*/a");
comment("/* // /* \n xxx*/");
id("a");
eof();
}
public void testHeaderName() throws Exception {
init("p\"'/*//\\\"");
fLexer.setInsideIncludeDirective();
id("p");
token(Lexer.tQUOTE_HEADER_NAME, "\"'/*//\\\"");
eof();
init("p<'\"/*//>");
fLexer.setInsideIncludeDirective();
id("p");
token(Lexer.tSYSTEM_HEADER_NAME, "<'\"/*//>");
eof();
}
public void testIdentifier() throws Exception {
final String ident= "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_$\\u1234\\U123456780123456789";
int unc1= ident.indexOf('\\');
for (int i = 0; i <= unc1; i++) {
String id= ident.substring(i);
init(id);
id(id);
eof();
}
String id= ident.substring(ident.indexOf('\\', unc1+1));
init(id);
id(id);
eof();
for (int i= 0; i <10; i++) {
String nonid= ident.substring(ident.length()-i-1);
init(nonid);
integer(nonid);
eof();
}
init(ident, false, true);
final int idxDollar = ident.indexOf('$');
id(ident.substring(0, idxDollar));
problem(IProblem.SCANNER_BAD_CHARACTER, "$");
ws();
id(ident.substring(idxDollar+1));
}
public void testNumber() throws Exception {
final String number= ".0123456789.abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_" +
"\\uaaaa\\Uaaaaaaaae+e-E+E-";
for (int i = 0; i < 11; i++) {
String n= number.substring(i);
init(n);
floating(n);
eof();
}
int idxPlus= number.indexOf('+');
for (int i = 11; i < number.length(); i++) {
String n= number.substring(i);
init(n);
int startString= 0;
if (i==11) {token(IToken.tDOT); startString=1;}
if (i<idxPlus) id(n.substring(startString, idxPlus-i));
if (i<idxPlus+1) token(IToken.tPLUS);
if (i<idxPlus+2) id("e");
if (i<idxPlus+3) token(IToken.tMINUS);
if (i<idxPlus+4) id("E");
if (i<idxPlus+5) token(IToken.tPLUS);
if (i<idxPlus+6) id("E");
token(IToken.tMINUS);
eof();
}
}
public void testCharLiteral() throws Exception {
String lit= "'abc0123\\'\".:; \\\\'";
init(lit);
ch(lit);
eof();
lit= 'L'+lit;
init(lit);
wch(lit);
eof();
lit= "'ut\n";
init(lit);
problem(IProblem.SCANNER_BAD_CHARACTER, "'ut");
ch("'ut");
nl();
eof();
lit= 'L'+lit;
init(lit);
problem(IProblem.SCANNER_BAD_CHARACTER, "L'ut");
wch("L'ut");
nl();
eof();
lit= "'ut\\'";
init(lit);
problem(IProblem.SCANNER_BAD_CHARACTER, lit);
ch("'ut\\'");
eof();
lit= 'L'+lit;
init(lit);
problem(IProblem.SCANNER_BAD_CHARACTER, lit);
wch("L'ut\\'");
eof();
}
public void testStringLiteral() throws Exception {
String lit= "abc0123\\\"'.:; \\\\";
init('"' + lit + '"');
str(lit);
eof();
init("L\"" + lit + '"');
wstr(lit);
eof();
lit= "ut\n";
init('"' + lit);
problem(IProblem.SCANNER_UNBOUNDED_STRING, "\"ut");
token(IToken.tSTRING, "\"ut");
nl();
eof();
init("L\"" + lit);
problem(IProblem.SCANNER_UNBOUNDED_STRING, "L\"ut");
token(IToken.tLSTRING, "L\"ut");
nl();
eof();
lit= "\"ut\\\"";
init(lit);
problem(IProblem.SCANNER_UNBOUNDED_STRING, lit);
token(IToken.tSTRING, "\"ut\\\"");
eof();
lit= 'L'+lit;
init(lit);
problem(IProblem.SCANNER_UNBOUNDED_STRING, lit);
token(IToken.tLSTRING, "L\"ut\\\"");
eof();
}
public void testOperatorAndPunctuators() throws Exception {
final String ops= "{}[]###()<::><%%>%:%:%:;:...?.::..*+-*/%^&|~=!<>+=-=*=/=%=" +
"^=&=|=<<>><<=>>===!=<=>=&&||++--,->*-><?>?\\";
final int[] tokens= new int[] {
IToken.tLBRACE, IToken.tRBRACE, IToken.tLBRACKET, IToken.tRBRACKET, IToken.tPOUNDPOUND,
IToken.tPOUND, IToken.tLPAREN, IToken.tRPAREN, IToken.tLBRACKET, IToken.tRBRACKET,
IToken.tLBRACE, IToken.tRBRACE, IToken.tPOUNDPOUND, IToken.tPOUND, IToken.tSEMI,
IToken.tCOLON, IToken.tELLIPSIS, IToken.tQUESTION, IToken.tDOT, IToken.tCOLONCOLON, IToken.tDOT,
IToken.tDOTSTAR, IToken.tPLUS, IToken.tMINUS, IToken.tSTAR, IToken.tDIV, IToken.tMOD,
IToken.tXOR, IToken.tAMPER, IToken.tBITOR, IToken.tCOMPL, IToken.tASSIGN, IToken.tNOT,
IToken.tLT, IToken.tGT, IToken.tPLUSASSIGN, IToken.tMINUSASSIGN, IToken.tSTARASSIGN,
IToken.tDIVASSIGN, IToken.tMODASSIGN, IToken.tXORASSIGN, IToken.tAMPERASSIGN,
IToken.tBITORASSIGN, IToken.tSHIFTL, IToken.tSHIFTR, IToken.tSHIFTLASSIGN,
IToken.tSHIFTRASSIGN, IToken.tEQUAL, IToken.tNOTEQUAL, IToken.tLTEQUAL, IToken.tGTEQUAL,
IToken.tAND, IToken.tOR, IToken.tINCR, IToken.tDECR, IToken.tCOMMA, IToken.tARROWSTAR,
IToken.tARROW, IGCCToken.tMIN, IGCCToken.tMAX, IToken.tBACKSLASH,
};
for (int splices=0; splices<9; splices++) {
for (int trigraphs= 0; trigraphs<6; trigraphs++) {
StringBuffer buf= new StringBuffer();
String input= useTrigraphs(ops.toCharArray(), trigraphs);
init(instertLineSplices(input, splices));
for (int i = 0; i < tokens.length; i++) {
Token token= fLexer.currentToken();
buf.append(token.getTokenImage());
token(tokens[i]);
}
eof();
assertEquals(ops, buf.toString()); // check token image
init(input, true, false);
for (int i = 0; i < tokens.length; i++) {
switch (tokens[i]) {
case IGCCToken.tMIN:
token(IToken.tLT);
token(IToken.tQUESTION);
break;
case IGCCToken.tMAX:
token(IToken.tGT);
token(IToken.tQUESTION);
break;
default:
token(tokens[i]);
break;
}
}
eof();
}
}
}
private String instertLineSplices(String input, int splices) {
int m1= splices%3;
int m2= (splices-m1)/3;
char[] c= input.toCharArray();
StringBuffer result= new StringBuffer();
for (int i = 0; i < c.length; i++) {
result.append(c[i]);
if (c[i]=='?' && i+2 < c.length && c[i+1] == '?' && TRIGRAPH_CHARS.indexOf(c[i+2]) >= 0) {
result.append(c[++i]);
result.append(c[++i]);
}
switch(m1) {
case 1:
result.append("\\\n");
break;
case 2:
result.append("\\ \n");
break;
}
switch(m2) {
case 1:
result.append("\\\r\n");
break;
case 2:
result.append("\\\t\r\n");
break;
}
}
return result.toString();
}
private String useTrigraphs(char[] input, int mode) {
if (mode == 0) {
return new String(input);
}
boolean yes= mode > 1;
StringBuffer result= new StringBuffer();
for (int i = 0; i < input.length; i++) {
char c = input[i];
int idx= TRIGRAPH_REPLACES_CHARS.indexOf(c);
if (idx > 0) {
if (yes) {
result.append("??");
result.append(TRIGRAPH_CHARS.charAt(idx));
}
else {
result.append(c);
}
if (mode < 3) {
yes= !yes;
}
}
else {
result.append(c);
}
}
return result.toString();
}
public void testLineSplicingOperator() throws Exception {
// splicing in operator
init("|\\\n|");
token(IToken.tOR);
eof();
init("|\\\r\n|");
token(IToken.tOR);
eof();
}
}

View file

@ -0,0 +1,61 @@
/*******************************************************************************
* Copyright (c) 2007 Wind River Systems, Inc. and others.
* All rights reserved. This program and the accompanying materials
* are made available under the terms of the Eclipse Public License v1.0
* which accompanies this distribution, and is available at
* http://www.eclipse.org/legal/epl-v10.html
*
* Contributors:
* Markus Schorn - initial API and implementation
*******************************************************************************/
package org.eclipse.cdt.core.parser.tests.ast2;
import java.util.ArrayList;
import org.eclipse.cdt.internal.core.parser.scanner.ILexerLog;
public class TestLexerLog implements ILexerLog {
private ArrayList fComments= new ArrayList();
private ArrayList fProblems= new ArrayList();
public void handleComment(boolean isBlockComment, char[] source, int offset, int endOffset) {
fComments.add(new String(source, offset, endOffset-offset));
}
public void handleProblem(int problemID, char[] source, int offset, int endOffset) {
fProblems.add(createString(problemID, new String(source, offset, endOffset-offset)));
}
public String createString(int problemID, String image) {
return String.valueOf(problemID) + ":" + image;
}
public void clear() {
fComments.clear();
fProblems.clear();
}
public int getProblemCount() {
return fProblems.size();
}
public int getCommentCount() {
return fComments.size();
}
public String removeFirstProblem() {
if (fProblems.isEmpty()) {
return "no problems have been reported";
}
return (String) fProblems.remove(0);
}
public String removeFirstComment() {
if (fComments.isEmpty()) {
return "no comments have been reported";
}
return (String) fComments.remove(0);
}
}

View file

@ -61,6 +61,7 @@ Export-Package: org.eclipse.cdt.core,
org.eclipse.cdt.internal.core.parser.ast.quick;x-internal:=true,
org.eclipse.cdt.internal.core.parser.problem;x-internal:=true,
org.eclipse.cdt.internal.core.parser.pst;x-internal:=true,
org.eclipse.cdt.internal.core.parser.scanner;x-internal:=true,
org.eclipse.cdt.internal.core.parser.scanner2;x-internal:=true,
org.eclipse.cdt.internal.core.parser.token;x-friends:="org.eclipse.cdt.ui",
org.eclipse.cdt.internal.core.parser.util;x-internal:=true,

View file

@ -28,7 +28,7 @@ import org.eclipse.cdt.core.parser.IToken;
*/
public interface IExtensionToken {
int t__otherDeclSpecModifierFirst= IToken.tLAST + 100;
int t__otherDeclSpecModifierLast= IToken.tLAST + 110;
int t__otherDeclSpecModifierFirst= IToken.FIRST_RESERVED_IExtensionToken;
int t__otherDeclSpecModifierLast= IToken.FIRST_RESERVED_IExtensionToken + 10;
}

View file

@ -18,11 +18,11 @@ package org.eclipse.cdt.core.parser;
*/
public interface IGCCToken extends IToken {
public static final int t_typeof = tLAST + 1;
public static final int t___alignof__ = tLAST + 2;
public static final int tMAX = tLAST + 3;
public static final int tMIN = tLAST + 4;
public static final int t__attribute__ = tLAST + 5;
public static final int t__declspec = tLAST + 6;
public static final int t_typeof = FIRST_RESERVED_IGCCToken;
public static final int t___alignof__ = FIRST_RESERVED_IGCCToken + 1;
public static final int tMAX = FIRST_RESERVED_IGCCToken + 2;
public static final int tMIN = FIRST_RESERVED_IGCCToken + 3;
public static final int t__attribute__ = FIRST_RESERVED_IGCCToken + 4;
public static final int t__declspec = FIRST_RESERVED_IGCCToken + 5;
}

View file

@ -30,8 +30,8 @@ import org.eclipse.cdt.internal.core.parser.scanner2.ILocationResolver;
*/
public interface IScanner extends IMacroCollector {
public static final int tPOUNDPOUND = -6;
public static final int tPOUND = -7;
/** @deprecated */ public static final int tPOUNDPOUND = IToken.tPOUNDPOUND;
/** @deprecated */ public static final int tPOUND = IToken.tPOUND;
public void setOffsetBoundary( int offset );
public void setContentAssistMode( int offset );

View file

@ -41,287 +41,188 @@ public interface IToken {
// Token types
int FIRST_RESERVED_SCANNER= -100;
int LAST_RESERVED_SCANNER= -1;
static public final int tIDENTIFIER = 1;
static public final int tINTEGER = 2;
static public final int tCOLONCOLON = 3;
static public final int tCOLON = 4;
static public final int tSEMI = 5;
static public final int tCOMMA = 6;
static public final int tQUESTION = 7;
static public final int tLPAREN = 8;
static public final int tRPAREN = 9;
static public final int tLBRACKET = 10;
static public final int tRBRACKET = 11;
static public final int tLBRACE = 12;
static public final int tRBRACE = 13;
static public final int tPLUSASSIGN = 14;
static public final int tINCR = 15;
static public final int tPLUS = 16;
static public final int tMINUSASSIGN = 17;
static public final int tDECR = 18;
static public final int tARROWSTAR = 19;
static public final int tARROW = 20;
static public final int tMINUS = 21;
static public final int tSTARASSIGN = 22;
static public final int tSTAR = 23;
static public final int tMODASSIGN = 24;
static public final int tMOD = 25;
static public final int tXORASSIGN = 26;
static public final int tXOR = 27;
static public final int tAMPERASSIGN = 28;
static public final int tAND = 29;
static public final int tAMPER = 30;
static public final int tBITORASSIGN = 31;
static public final int tOR = 32;
static public final int tBITOR = 33;
static public final int tCOMPL = 34;
static public final int tBITCOMPLEMENT = 34;
static public final int tNOTEQUAL = 35;
static public final int tNOT = 36;
static public final int tEQUAL = 37;
static public final int tASSIGN = 38;
static public final int tSHIFTL = 40;
static public final int tLTEQUAL = 41;
static public final int tLT = 42;
static public final int tSHIFTRASSIGN = 43;
static public final int tSHIFTR = 44;
static public final int tGTEQUAL = 45;
static public final int tGT = 46;
static public final int tSHIFTLASSIGN = 47;
static public final int tELLIPSIS = 48;
static public final int tDOTSTAR = 49;
static public final int tDOT = 50;
static public final int tDIVASSIGN = 51;
static public final int tDIV = 52;
static public final int tBACKSLASH= 53;
/** @deprecated use {@link #tAND} */
static public final int t_and = 54;
/** @deprecated use {@link #tAMPERASSIGN} */
static public final int t_and_eq = 55;
static public final int t_asm = 56;
static public final int t_auto = 57;
/** @deprecated use {@link #tAMPER} */
static public final int t_bitand = 58;
/** @deprecated use {@link #tBITOR} */
static public final int t_bitor = 59;
static public final int t_bool = 60;
static public final int t_break = 61;
static public final int t_case = 62;
static public final int t_catch = 63;
static public final int t_char = 64;
static public final int t_class = 65;
/** @deprecated use {@link #tBITCOMPLEMENT} */
static public final int tCOMPL= tBITCOMPLEMENT;
/** @deprecated use {@link #tBITCOMPLEMENT} */
static public final int t_compl = 66;
static public final int t_const = 67;
static public final int t_const_cast = 69;
static public final int t_continue = 70;
static public final int t_default = 71;
static public final int t_delete = 72;
static public final int t_do = 73;
static public final int t_double = 74;
static public final int t_dynamic_cast = 75;
static public final int t_else = 76;
static public final int t_enum = 77;
static public final int t_explicit = 78;
static public final int t_export = 79;
static public final int t_extern = 80;
static public final int t_false = 81;
static public final int t_float = 82;
static public final int t_for = 83;
static public final int t_friend = 84;
static public final int t_goto = 85;
static public final int t_if = 86;
static public final int t_inline = 87;
static public final int t_int = 88;
static public final int t_long = 89;
static public final int t_mutable = 90;
static public final int t_namespace = 91;
static public final int t_new = 92;
/** @deprecated use {@link #tNOT} */
static public final int t_not = 93;
/** @deprecated use {@link #tNOTEQUAL} */
static public final int t_not_eq = 94;
static public final int t_operator = 95;
/** @deprecated use {@link #tOR} */
static public final int t_or = 96;
/** @deprecated use {@link #tBITORASSIGN} */
static public final int t_or_eq = 97;
static public final int t_private = 98;
static public final int t_protected = 99;
static public final int t_public = 100;
static public final int t_register = 101;
static public final int t_reinterpret_cast = 102;
static public final int t_return = 103;
static public final int t_short = 104;
static public final int t_sizeof = 105;
static public final int t_static = 106;
static public final int t_static_cast = 107;
static public final int t_signed = 108;
static public final int t_struct = 109;
static public final int t_switch = 110;
static public final int t_template = 111;
static public final int t_this = 112;
static public final int t_throw = 113;
static public final int t_true = 114;
static public final int t_try = 115;
static public final int t_typedef = 116;
static public final int t_typeid = 117;
static public final int t_typename = 118;
static public final int t_union = 119;
static public final int t_unsigned = 120;
static public final int t_using = 121;
static public final int t_virtual = 122;
static public final int t_void = 123;
static public final int t_volatile = 124;
static public final int t_wchar_t = 125;
static public final int t_while = 126;
/** @deprecated use {@link #tXOR} */
static public final int t_xor = 127;
/** @deprecated use {@link #tXORASSIGN} */
static public final int t_xor_eq = 128;
static public final int tFLOATINGPT = 129;
static public final int tSTRING = 130;
static public final int tLSTRING = 131;
static public final int tCHAR = 132;
static public final int tLCHAR = 133;
static public final int t__Bool = 134;
static public final int t__Complex = 135;
static public final int t__Imaginary = 136;
static public final int t_restrict = 137;
/** @deprecated don't use it */
static public final int tMACROEXP = 138;
static public final int tPOUND= 138;
static public final int tPOUNDPOUND = 139;
static public final int tCOMPLETION = 140;
static public final int tEOC = 141; // End of Completion
/** @deprecated don't use it */
static public final int tCOMMENT = 142;
/** @deprecated don't use it */
static public final int tBLOCKCOMMENT = 143;
/** @deprecated don't use it */
static public final int tLAST = 143;
int FIRST_RESERVED_IGCCToken = 144;
int LAST_RESERVED_IGCCToken = 199;
int FIRST_RESERVED_IExtensionToken = 243;
int LAST_RESERVED_IExtensionToken = 299;
}

View file

@ -19,8 +19,6 @@ public class Keywords {
public static final String CAST = "cast"; //$NON-NLS-1$
public static final String ALIGNOF = "alignof"; //$NON-NLS-1$
public static final String TYPEOF = "typeof"; //$NON-NLS-1$
public static final String cpMIN = "<?"; //$NON-NLS-1$
public static final String cpMAX = ">?"; //$NON-NLS-1$
public static final String _BOOL = "_Bool"; //$NON-NLS-1$
public static final String _COMPLEX = "_Complex"; //$NON-NLS-1$
@ -231,9 +229,14 @@ public class Keywords {
public static final char[] cpDOT = ".".toCharArray(); //$NON-NLS-1$
public static final char[] cpDIVASSIGN = "/=".toCharArray(); //$NON-NLS-1$
public static final char[] cpDIV = "/".toCharArray(); //$NON-NLS-1$
public static final char[] cpBACKSLASH = "\\".toCharArray(); //$NON-NLS-1$
public static final char[] cpPOUND = "#".toCharArray(); //$NON-NLS-1$
public static final char[] cpPOUNDPOUND = "##".toCharArray(); //$NON-NLS-1$
// gcc extensions
public static final char[] cpMIN = "<?".toCharArray(); //$NON-NLS-1$
public static final char[] cpMAX = ">?".toCharArray(); //$NON-NLS-1$
// preprocessor keywords
public static final char[] cIFDEF = "ifdef".toCharArray(); //$NON-NLS-1$
public static final char[] cIFNDEF = "ifndef".toCharArray(); //$NON-NLS-1$

View file

@ -0,0 +1,24 @@
/*******************************************************************************
* Copyright (c) 2007 Wind River Systems, Inc. and others.
* All rights reserved. This program and the accompanying materials
* are made available under the terms of the Eclipse Public License v1.0
* which accompanies this distribution, and is available at
* http://www.eclipse.org/legal/epl-v10.html
*
* Contributors:
* Markus Schorn - initial API and implementation
*******************************************************************************/
package org.eclipse.cdt.internal.core.parser.scanner;
class CompletionTokenException extends Exception {
private Token fToken;
public CompletionTokenException(Token token) {
fToken= token;
}
public Token getToken() {
return fToken;
}
}

View file

@ -0,0 +1,21 @@
/*******************************************************************************
* Copyright (c) 2007 Wind River Systems, Inc. and others.
* All rights reserved. This program and the accompanying materials
* are made available under the terms of the Eclipse Public License v1.0
* which accompanies this distribution, and is available at
* http://www.eclipse.org/legal/epl-v10.html
*
* Contributors:
* Markus Schorn - initial API and implementation
*******************************************************************************/
package org.eclipse.cdt.internal.core.parser.scanner;
class DigraphToken extends Token {
public DigraphToken(int kind, int offset, int endOffset) {
super(kind, offset, endOffset);
}
public char[] getTokenImage() {
return TokenUtil.getDigraphImage(getType());
}
}

View file

@ -0,0 +1,18 @@
/*******************************************************************************
* Copyright (c) 2007 Wind River Systems, Inc. and others.
* All rights reserved. This program and the accompanying materials
* are made available under the terms of the Eclipse Public License v1.0
* which accompanies this distribution, and is available at
* http://www.eclipse.org/legal/epl-v10.html
*
* Contributors:
* Markus Schorn - initial API and implementation
*******************************************************************************/
package org.eclipse.cdt.internal.core.parser.scanner;
public interface ILexerLog {
void handleProblem(int problemID, char[] source, int offset, int endOffset);
void handleComment(boolean isBlockComment, char[] source, int offset, int endOffsetLast);
}

View file

@ -0,0 +1,942 @@
/*******************************************************************************
* Copyright (c) 2007 Wind River Systems, Inc. and others.
* All rights reserved. This program and the accompanying materials
* are made available under the terms of the Eclipse Public License v1.0
* which accompanies this distribution, and is available at
* http://www.eclipse.org/legal/epl-v10.html
*
* Contributors:
* Markus Schorn - initial API and implementation
*******************************************************************************/
package org.eclipse.cdt.internal.core.parser.scanner;
import org.eclipse.cdt.core.dom.ast.IASTProblem;
import org.eclipse.cdt.core.parser.IGCCToken;
import org.eclipse.cdt.core.parser.IProblem;
import org.eclipse.cdt.core.parser.IToken;
/**
* In short this class converts line endings (to '\n') and trigraphs
* (to their corresponding character),
* removes line-splices, comments and whitespace other than newline.
* Returns preprocessor tokens.
* <p>
* In addition to the preprocessor tokens the following tokens may also be returned:
* {@link #tEND_OF_INPUT}, {@link IToken#tCOMPLETION}.
* <p>
* Number literals are split up into {@link IToken#tINTEGER} and {@link IToken#tFLOATINGPT}.
* No checks are done on the number literals.
* <p>
* UNCs are accepted, however characters from outside of the basic source character set are
* not converted to UNCs. Rather than that they are tested with
* {@link Character#isUnicodeIdentifierPart(char)} and may be accepted as part of an
* identifier.
* <p>
* The characters in string literals and char-literals are left as they are found, no conversion to
* an execution character-set is performed.
*/
final public class Lexer {
public static final int tNEWLINE = IToken.FIRST_RESERVED_SCANNER + 1;
public static final int tEND_OF_INPUT = IToken.FIRST_RESERVED_SCANNER + 2;
public static final int tQUOTE_HEADER_NAME = IToken.FIRST_RESERVED_SCANNER + 3;
public static final int tSYSTEM_HEADER_NAME = IToken.FIRST_RESERVED_SCANNER + 4;
private static final int END_OF_INPUT = -1;
private static final int LINE_SPLICE_SEQUENCE = -2;
public static class LexerOptions {
public boolean fSupportDollarInitializers= true;
public boolean fSupportMinAndMax= true;
public boolean fSupportContentAssist= false;
}
// configuration
private final LexerOptions fOptions;
private final ILexerLog fLog;
// the input to the lexer
private final char[] fInput;
private final int fLimit;
// after phase 3 (newline, trigraph, line-splice)
private int fOffset;
private int fEndOffset;
private int fCharPhase3;
private boolean fInsideIncludeDirective= false;
private Token fToken;
// for the few cases where we have to lookahead more than one character
private int fMarkOffset;
private int fMarkEndOffset;
private int fMarkPrefetchedChar;
public Lexer(char[] input, LexerOptions options, ILexerLog log) {
fInput= input;
fLimit= input.length;
fOptions= options;
fLog= log;
nextCharPhase3();
}
public Lexer(char[] input, int limit, LexerOptions options, ILexerLog log) {
fInput= input;
fLimit= limit;
fOptions= options;
fLog= log;
nextCharPhase3();
}
/**
* Call this before consuming the name-token in the include directive. It causes the header-file
* tokens to be created.
*/
public void setInsideIncludeDirective() {
fInsideIncludeDirective= true;
}
/**
* Returns the current preprocessor token, does not advance.
*/
public Token currentToken() {
return fToken;
}
/**
* Advances to the next token, skipping whitespace other than newline.
* @throws CompletionTokenException when completion is requested in a literal or an header-name.
*/
public Token nextToken() throws CompletionTokenException {
return fToken= fetchToken();
}
/**
* Advances to the next pound token that starts a preprocessor directive.
* @return pound token of the directive or end-of-input.
* @throws CompletionTokenException when completion is requested in a literal or an header-name.
*/
public Token nextDirective() throws CompletionTokenException {
Token t= fToken;
boolean haveNL= t==null || t.getType() == tNEWLINE;
loop: while(true) {
t= fetchToken();
if (haveNL) {
switch(t.getType()) {
case tEND_OF_INPUT:
case IToken.tPOUND:
break loop;
}
haveNL= false;
}
else {
switch(t.getType()) {
case tEND_OF_INPUT:
break loop;
case tNEWLINE:
haveNL= true;
break;
}
}
t= fetchToken();
}
fToken= t;
return t;
}
/**
* Computes the next token.
*/
private Token fetchToken() throws CompletionTokenException {
while(true) {
final int start= fOffset;
final int c= fCharPhase3;
final int d= nextCharPhase3();
switch(c) {
case END_OF_INPUT:
return newToken(Lexer.tEND_OF_INPUT, start);
case '\n':
fInsideIncludeDirective= false;
return newToken(Lexer.tNEWLINE, start);
case ' ':
case '\t':
case 0xb: // vertical tab
case '\f':
case '\r':
continue;
case 'L':
switch(d) {
case '"':
nextCharPhase3();
return stringLiteral(start, true);
case '\'':
nextCharPhase3();
return charLiteral(start, true);
}
return identifier(start, 1);
case '"':
if (fInsideIncludeDirective) {
return headerName(start, true);
}
return stringLiteral(start, false);
case '\'':
return charLiteral(start, false);
case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': case 'g': case 'h': case 'i':
case 'j': case 'k': case 'l': case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
case 's': case 't': case 'u': case 'v': case 'w': case 'x': case 'y': case 'z':
case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': case 'G': case 'H': case 'I':
case 'J': case 'K': case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
case 'S': case 'T': case 'U': case 'V': case 'W': case 'X': case 'Y': case 'Z':
case '_':
return identifier(start, 1);
case '$':
if (fOptions.fSupportDollarInitializers) {
return identifier(start, 1);
}
break;
case '\\':
switch(d) {
case 'u': case 'U':
nextCharPhase3();
return identifier(start, 2);
}
return newToken(IToken.tBACKSLASH, start);
case '0': case '1': case '2': case '3': case '4':
case '5': case '6': case '7': case '8': case '9':
return number(start, 1, false);
case '.':
switch(d) {
case '0': case '1': case '2': case '3': case '4':
case '5': case '6': case '7': case '8': case '9':
nextCharPhase3();
return number(start, 2, true);
case '.':
markPhase3();
if (nextCharPhase3() == '.') {
nextCharPhase3();
return newToken(IToken.tELLIPSIS, start);
}
restorePhase3();
break;
case '*':
nextCharPhase3();
return newToken(IToken.tDOTSTAR, start);
}
return newToken(IToken.tDOT, start);
case '#':
if (d == '#') {
nextCharPhase3();
return newToken(IToken.tPOUNDPOUND, start);
}
return newToken(IToken.tPOUND, start);
case '{':
return newToken(IToken.tLBRACE, start);
case '}':
return newToken(IToken.tRBRACE, start);
case '[':
return newToken(IToken.tLBRACKET, start);
case ']':
return newToken(IToken.tRBRACKET, start);
case '(':
return newToken(IToken.tLPAREN, start);
case ')':
return newToken(IToken.tRPAREN, start);
case ';':
return newToken(IToken.tSEMI, start);
case ':':
switch(d) {
case ':':
nextCharPhase3();
return newToken(IToken.tCOLONCOLON, start);
case '>':
nextCharPhase3();
return newDigraphToken(IToken.tRBRACKET, start);
}
return newToken(IToken.tCOLON, start);
case '?':
return newToken(IToken.tQUESTION, start);
case '+':
switch (d) {
case '+':
nextCharPhase3();
return newToken(IToken.tINCR, start);
case '=':
nextCharPhase3();
return newToken(IToken.tPLUSASSIGN, start);
}
return newToken(IToken.tPLUS, start);
case '-':
switch (d) {
case '>':
int e= nextCharPhase3();
if (e == '*') {
nextCharPhase3();
return newToken(IToken.tARROWSTAR, start);
}
return newToken(IToken.tARROW, start);
case '-':
nextCharPhase3();
return newToken(IToken.tDECR, start);
case '=':
nextCharPhase3();
return newToken(IToken.tMINUSASSIGN, start);
}
return newToken(IToken.tMINUS, start);
case '*':
if (d == '=') {
nextCharPhase3();
return newToken(IToken.tSTARASSIGN, start);
}
return newToken(IToken.tSTAR, start);
case '/':
switch (d) {
case '=':
nextCharPhase3();
return newToken(IToken.tDIVASSIGN, start);
case '/':
nextCharPhase3();
lineComment(start);
continue;
case '*':
nextCharPhase3();
blockComment(start);
continue;
}
return newToken(IToken.tDIV, start);
case '%':
switch (d) {
case '=':
nextCharPhase3();
return newToken(IToken.tMODASSIGN, start);
case '>':
nextCharPhase3();
return newDigraphToken(IToken.tRBRACE, start);
case ':':
final int e= nextCharPhase3();
if (e == '%') {
markPhase3();
if (nextCharPhase3() == ':') {
nextCharPhase3();
return newDigraphToken(IToken.tPOUNDPOUND, start);
}
restorePhase3();
}
return newDigraphToken(IToken.tPOUND, start);
}
return newToken(IToken.tMOD, start);
case '^':
if (d == '=') {
nextCharPhase3();
return newToken(IToken.tXORASSIGN, start);
}
return newToken(IToken.tXOR, start);
case '&':
switch (d) {
case '&':
nextCharPhase3();
return newToken(IToken.tAND, start);
case '=':
nextCharPhase3();
return newToken(IToken.tAMPERASSIGN, start);
}
return newToken(IToken.tAMPER, start);
case '|':
switch (d) {
case '|':
nextCharPhase3();
return newToken(IToken.tOR, start);
case '=':
nextCharPhase3();
return newToken(IToken.tBITORASSIGN, start);
}
return newToken(IToken.tBITOR, start);
case '~':
return newToken(IToken.tBITCOMPLEMENT, start);
case '!':
if (d == '=') {
nextCharPhase3();
return newToken(IToken.tNOTEQUAL, start);
}
return newToken(IToken.tNOT, start);
case '=':
if (d == '=') {
nextCharPhase3();
return newToken(IToken.tEQUAL, start);
}
return newToken(IToken.tASSIGN, start);
case '<':
if (fInsideIncludeDirective) {
return headerName(start, false);
}
switch(d) {
case '=':
nextCharPhase3();
return newToken(IToken.tLTEQUAL, start);
case '<':
final int e= nextCharPhase3();
if (e == '=') {
nextCharPhase3();
return newToken(IToken.tSHIFTLASSIGN, start);
}
return newToken(IToken.tSHIFTL, start);
case '?':
if (fOptions.fSupportMinAndMax) {
nextCharPhase3();
return newToken(IGCCToken.tMIN, start);
}
break;
case ':':
nextCharPhase3();
return newDigraphToken(IToken.tLBRACKET, start);
case '%':
nextCharPhase3();
return newDigraphToken(IToken.tLBRACE, start);
}
return newToken(IToken.tLT, start);
case '>':
switch(d) {
case '=':
nextCharPhase3();
return newToken(IToken.tGTEQUAL, start);
case '>':
final int e= nextCharPhase3();
if (e == '=') {
nextCharPhase3();
return newToken(IToken.tSHIFTRASSIGN, start);
}
return newToken(IToken.tSHIFTR, start);
case '?':
if (fOptions.fSupportMinAndMax) {
nextCharPhase3();
return newToken(IGCCToken.tMAX, start);
}
break;
}
return newToken(IToken.tGT, start);
case ',':
return newToken(IToken.tCOMMA, start);
default:
// in case we have some other letter to start an identifier
if (Character.isUnicodeIdentifierStart((char) c)) {
return identifier(start, 1);
}
break;
}
handleProblem(IASTProblem.SCANNER_BAD_CHARACTER, start);
// loop is continued, character is treated as white-space.
}
}
private Token newToken(int kind, int offset) {
return new SimpleToken(kind, offset, fOffset);
}
private Token newDigraphToken(int kind, int offset) {
return new DigraphToken(kind, offset, fOffset);
}
private Token newToken(int kind, int offset, int length) {
return new TokenWithImage(kind, this, offset, fOffset, length);
}
private void handleProblem(int problemID, int offset) {
fLog.handleProblem(problemID, fInput, offset, fOffset);
}
private Token headerName(final int start, final boolean expectQuotes) throws CompletionTokenException {
int length= 1;
boolean done = false;
int c= fCharPhase3;
loop: while (!done) {
switch (c) {
case END_OF_INPUT:
if (fOptions.fSupportContentAssist) {
throw new CompletionTokenException(
newToken((expectQuotes ? tQUOTE_HEADER_NAME : tSYSTEM_HEADER_NAME), start, length));
}
// no break;
case '\n':
handleProblem(IProblem.SCANNER_UNBOUNDED_STRING, start);
break loop;
case '"':
done= expectQuotes;
break;
case '>':
done= !expectQuotes;
break;
}
length++;
c= nextCharPhase3();
}
return newToken((expectQuotes ? tQUOTE_HEADER_NAME : tSYSTEM_HEADER_NAME), start, length);
}
private void blockComment(final int start) {
int c= nextCharPhase3();
while(true) {
switch (c) {
case END_OF_INPUT:
fLog.handleComment(true, fInput, start, fOffset);
return;
case '*':
c= nextCharPhase3();
if (c == '/') {
nextCharPhase3();
fLog.handleComment(true, fInput, start, fOffset);
return;
}
break;
default:
c= nextCharPhase3();
break;
}
}
}
private void lineComment(final int start) {
int c= fCharPhase3;
while(true) {
switch (c) {
case END_OF_INPUT:
case '\n':
fLog.handleComment(false, fInput, start, fOffset);
return;
}
c= nextCharPhase3();
}
}
private Token stringLiteral(final int start, final boolean wide) throws CompletionTokenException {
boolean escaped = false;
boolean done = false;
int length= wide ? 2 : 1;
int c= fCharPhase3;
loop: while (!done) {
switch(c) {
case END_OF_INPUT:
if (fOptions.fSupportContentAssist) {
throw new CompletionTokenException(newToken(wide ? IToken.tLSTRING : IToken.tSTRING, start, length));
}
// no break;
case '\n':
handleProblem(IProblem.SCANNER_UNBOUNDED_STRING, start);
break loop;
case '\\':
escaped= !escaped;
break;
case '"':
if (!escaped) {
done= true;
}
escaped= false;
break;
default:
escaped= false;
break;
}
length++;
c= nextCharPhase3();
}
return newToken(wide ? IToken.tLSTRING : IToken.tSTRING, start, length);
}
private Token charLiteral(final int start, boolean wide) throws CompletionTokenException {
boolean escaped = false;
boolean done = false;
int length= wide ? 2 : 1;
int c= fCharPhase3;
loop: while (!done) {
switch(c) {
case END_OF_INPUT:
if (fOptions.fSupportContentAssist) {
throw new CompletionTokenException(newToken(wide ? IToken.tLCHAR : IToken.tCHAR, start, length));
}
// no break;
case '\n':
handleProblem(IProblem.SCANNER_BAD_CHARACTER, start);
break loop;
case '\\':
escaped= !escaped;
break;
case '\'':
if (!escaped) {
done= true;
}
escaped= false;
break;
default:
escaped= false;
break;
}
length++;
c= nextCharPhase3();
}
return newToken(wide ? IToken.tLCHAR : IToken.tCHAR, start, length);
}
private Token identifier(int start, int length) {
int tokenKind= IToken.tIDENTIFIER;
boolean isPartOfIdentifier= true;
int c= fCharPhase3;
while (true) {
switch(c) {
case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': case 'g': case 'h': case 'i':
case 'j': case 'k': case 'l': case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
case 's': case 't': case 'u': case 'v': case 'w': case 'x': case 'y': case 'z':
case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': case 'G': case 'H': case 'I':
case 'J': case 'K': case 'L': case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
case 'S': case 'T': case 'U': case 'V': case 'W': case 'X': case 'Y': case 'Z':
case '_':
case '0': case '1': case '2': case '3': case '4':
case '5': case '6': case '7': case '8': case '9':
break;
case '\\': // universal character name
markPhase3();
switch(nextCharPhase3()) {
case 'u': case 'U':
length++;
break;
default:
restorePhase3();
isPartOfIdentifier= false;
break;
}
break;
case END_OF_INPUT:
if (fOptions.fSupportContentAssist) {
tokenKind= IToken.tCOMPLETION;
}
isPartOfIdentifier= false;
break;
case ' ': case '\t': case 0xb: case '\f': case '\r': case '\n':
isPartOfIdentifier= false;
break;
case '$':
isPartOfIdentifier= fOptions.fSupportDollarInitializers;
break;
case '{': case '}': case '[': case ']': case '#': case '(': case ')': case '<': case '>':
case '%': case ':': case ';': case '.': case '?': case '*': case '+': case '-': case '/':
case '^': case '&': case '|': case '~': case '!': case '=': case ',': case '"': case '\'':
isPartOfIdentifier= false;
break;
default:
isPartOfIdentifier= Character.isUnicodeIdentifierPart((char) c);
break;
}
if (!isPartOfIdentifier) {
break;
}
length++;
c= nextCharPhase3();
}
return newToken(tokenKind, start, length);
}
private Token number(final int start, int length, boolean isFloat) throws CompletionTokenException {
boolean isPartOfNumber= true;
int c= fCharPhase3;
while (true) {
switch(c) {
// non-digit
case 'a': case 'b': case 'c': case 'd': case 'f': case 'g': case 'h': case 'i':
case 'j': case 'k': case 'l': case 'm': case 'n': case 'o': case 'q': case 'r':
case 's': case 't': case 'u': case 'v': case 'w': case 'x': case 'y': case 'z':
case 'A': case 'B': case 'C': case 'D': case 'F': case 'G': case 'H': case 'I':
case 'J': case 'K': case 'L': case 'M': case 'N': case 'O': case 'Q': case 'R':
case 'S': case 'T': case 'U': case 'V': case 'W': case 'X': case 'Y': case 'Z':
case '_':
// digit
case '0': case '1': case '2': case '3': case '4':
case '5': case '6': case '7': case '8': case '9':
break;
// period
case '.':
isFloat= true;
break;
// sign
case 'p':
case 'P':
case 'e':
case 'E':
length++;
c= nextCharPhase3();
switch (c) {
case '+': case '-':
case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9':
isFloat= true;
length++;
c= nextCharPhase3();
break;
}
continue;
// universal character name (non-digit)
case '\\':
markPhase3();
switch(nextCharPhase3()) {
case 'u': case 'U':
length++;
break;
default:
restorePhase3();
isPartOfNumber= false;
break;
}
break;
case tEND_OF_INPUT:
if (fOptions.fSupportContentAssist) {
throw new CompletionTokenException(
newToken((isFloat ? IToken.tFLOATINGPT : IToken.tINTEGER), start, length));
}
isPartOfNumber= false;
break;
default:
isPartOfNumber= false;
break;
}
if (!isPartOfNumber) {
break;
}
c= nextCharPhase3();
length++;
}
return newToken((isFloat ? IToken.tFLOATINGPT : IToken.tINTEGER), start, length);
}
/**
* Saves the current state of phase3, necessary for '...', '%:%:' and UNCs.
*/
private void markPhase3() {
fMarkOffset= fOffset;
fMarkEndOffset= fEndOffset;
fMarkPrefetchedChar= fCharPhase3;
}
/**
* Restores a previously saved state of phase3.
*/
private void restorePhase3() {
fOffset= fMarkOffset;
fEndOffset= fMarkEndOffset;
fCharPhase3= fMarkPrefetchedChar;
}
/**
* Perform phase 1-3: Replace \r\n with \n, handle trigraphs, detect line-splicing.
* Changes fOffset, fEndOffset and fCharPhase3.
*/
private int nextCharPhase3() {
int offset;
int c;
do {
offset= fEndOffset;
c= fetchCharPhase3(offset); // changes fEndOffset
}
while(c == LINE_SPLICE_SEQUENCE);
fOffset= offset;
fCharPhase3= c;
return c;
}
/**
* Perform phase 1-3: Replace \r\n with \n, handle trigraphs, detect line-splicing.
* Changes <code>fEndOffset</code>, but is stateless otherwise.
*/
private int fetchCharPhase3(int pos) {
if (pos >= fLimit) {
fEndOffset= fLimit;
return END_OF_INPUT;
}
final char c= fInput[pos++];
switch(c) {
// windows line-ending
case '\r':
if (pos < fLimit && fInput[pos] == '\n') {
fEndOffset= pos+1;
return '\n';
}
fEndOffset= pos;
return c;
// trigraph sequences
case '?':
if (pos+1 >= fLimit || fInput[pos] != '?') {
fEndOffset= pos;
return c;
}
final char trigraph= checkTrigraph(fInput[pos+1]);
if (trigraph == 0) {
fEndOffset= pos;
return c;
}
if (trigraph != '\\') {
fEndOffset= pos+2;
return trigraph;
}
pos+= 2;
// no break, handle backslash
case '\\':
final int lsPos= findEndOfLineSpliceSequence(pos);
if (lsPos > pos) {
fEndOffset= lsPos;
return LINE_SPLICE_SEQUENCE;
}
fEndOffset= pos;
return '\\'; // don't return c, it may be a '?'
default:
fEndOffset= pos;
return c;
}
}
/**
* Maps a trigraph to the character it encodes.
* @param c trigraph without leading question marks.
* @return the character encoded or 0.
*/
private char checkTrigraph(char c) {
switch(c) {
case '=': return '#';
case '\'':return '^';
case '(': return '[';
case ')': return ']';
case '!': return '|';
case '<': return '{';
case '>': return '}';
case '-': return '~';
case '/': return '\\';
}
return 0;
}
/**
* Returns the endoffset for a line-splice sequence, or -1 if there is none.
*/
private int findEndOfLineSpliceSequence(int pos) {
boolean haveBackslash= true;
int result= -1;
loop: while(pos < fLimit) {
switch(fInput[pos++]) {
case '\n':
if (haveBackslash) {
result= pos;
haveBackslash= false;
continue loop;
}
return result;
case '\r': case ' ': case '\f': case '\t': case 0xb: // vertical tab
if (haveBackslash) {
continue loop;
}
return result;
case '?':
if (pos+1 >= fLimit || fInput[pos] != '?' || fInput[++pos] != '/') {
return result;
}
// fall through to backslash handling
case '\\':
if (!haveBackslash) {
haveBackslash= true;
continue loop;
}
return result;
default:
return result;
}
}
return result;
}
/**
* Returns the image from the input without any modification.
*/
public char[] getInputChars(int offset, int endOffset) {
final int length= endOffset-offset;
final char[] result= new char[length];
System.arraycopy(fInput, offset, result, 0, length);
return result;
}
/**
* Returns the image with trigraphs replaced and line-splices removed.
*/
char[] getTokenImage(int offset, int endOffset, int imageLength) {
final int length= endOffset-offset;
final char[] result= new char[imageLength];
if (length == imageLength) {
System.arraycopy(fInput, offset, result, 0, length);
}
else {
markPhase3();
fEndOffset= offset;
int idx= 0;
while (idx<imageLength) {
int c= fetchCharPhase3(fEndOffset);
if (c != LINE_SPLICE_SEQUENCE) {
result[idx++]= (char) c;
}
}
restorePhase3();
}
return result;
}
}

View file

@ -0,0 +1,21 @@
/*******************************************************************************
* Copyright (c) 2007 Wind River Systems, Inc. and others.
* All rights reserved. This program and the accompanying materials
* are made available under the terms of the Eclipse Public License v1.0
* which accompanies this distribution, and is available at
* http://www.eclipse.org/legal/epl-v10.html
*
* Contributors:
* Markus Schorn - initial API and implementation
*******************************************************************************/
package org.eclipse.cdt.internal.core.parser.scanner;
class SimpleToken extends Token {
public SimpleToken(int kind, int offset, int endOffset) {
super(kind, offset, endOffset);
}
public char[] getTokenImage() {
return TokenUtil.getImage(getType());
}
}

View file

@ -0,0 +1,111 @@
/*******************************************************************************
* Copyright (c) 2007 Wind River Systems, Inc. and others.
* All rights reserved. This program and the accompanying materials
* are made available under the terms of the Eclipse Public License v1.0
* which accompanies this distribution, and is available at
* http://www.eclipse.org/legal/epl-v10.html
*
* Contributors:
* Markus Schorn - initial API and implementation
*******************************************************************************/
package org.eclipse.cdt.internal.core.parser.scanner;
import org.eclipse.cdt.core.parser.IToken;
public abstract class Token implements IToken {
private int fKind;
int fOffset;
int fEndOffset;
private IToken fNextGrammarToken;
Token(int kind, int offset, int endOffset) {
fKind= kind;
fOffset= offset;
fEndOffset= endOffset;
}
public int getType() {
return fKind;
}
public int getOffset() {
return fOffset;
}
public int getEndOffset() {
return fEndOffset;
}
public int getLength() {
return fEndOffset-fOffset;
}
public IToken getNext() {
return fNextGrammarToken;
}
public abstract char[] getTokenImage();
// for the preprocessor to classify preprocessor tokens
public void setType(int kind) {
// mstodo make non-public
fKind= kind;
}
// for the preprocessor to chain the tokens
public void setNext(IToken t) {
// mstodo make non-public
fNextGrammarToken= t;
}
public boolean isOperator() {
// mstodo
return TokenUtil.isOperator(fKind);
}
public char[] getCharImage() {
// mstodo
throw new UnsupportedOperationException();
}
public String getImage() {
// mstodo
throw new UnsupportedOperationException();
}
public char[] getFilename() {
// mstodo
throw new UnsupportedOperationException();
}
public boolean looksLikeExpression() {
// mstodo
throw new UnsupportedOperationException();
}
public boolean canBeAPrefix() {
// mstodo
throw new UnsupportedOperationException();
}
public int getLineNumber() {
// mstodo
throw new UnsupportedOperationException();
}
public boolean isPointer() {
// mstodo
throw new UnsupportedOperationException();
}
}

View file

@ -0,0 +1,146 @@
/*******************************************************************************
* Copyright (c) 2007 Wind River Systems, Inc. and others.
* All rights reserved. This program and the accompanying materials
* are made available under the terms of the Eclipse Public License v1.0
* which accompanies this distribution, and is available at
* http://www.eclipse.org/legal/epl-v10.html
*
* Contributors:
* Markus Schorn - initial API and implementation
*******************************************************************************/
package org.eclipse.cdt.internal.core.parser.scanner;
import org.eclipse.cdt.core.parser.IGCCToken;
import org.eclipse.cdt.core.parser.IToken;
import org.eclipse.cdt.core.parser.Keywords;
public class TokenUtil {
private static final char[] IMAGE_EMPTY = new char[0];
private static final char[] IMAGE_POUND_POUND = "##".toCharArray(); //$NON-NLS-1$
private static final char[] IMAGE_POUND = "#".toCharArray(); //$NON-NLS-1$
private static final char[] DIGRAPH_LBRACE= "<%".toCharArray(); //$NON-NLS-1$
private static final char[] DIGRAPH_RBRACE= "%>".toCharArray(); //$NON-NLS-1$
private static final char[] DIGRAPH_LBRACKET= "<:".toCharArray(); //$NON-NLS-1$
private static final char[] DIGRAPH_RBRACKET= ":>".toCharArray(); //$NON-NLS-1$
private static final char[] DIGRAPH_POUND = "%:".toCharArray(); //$NON-NLS-1$
private static final char[] DIGRAPH_POUNDPOUND = "%:%:".toCharArray(); //$NON-NLS-1$
public static boolean isOperator(int kind) {
switch (kind) {
case IToken.t_delete: case IToken.t_new:
// bit operations
case IToken.tAMPER: case IToken.tAMPERASSIGN:
case IToken.tARROW: case IToken.tARROWSTAR:
case IToken.tBITOR: case IToken.tBITORASSIGN:
case IToken.tBITCOMPLEMENT:
case IToken.tSHIFTL: case IToken.tSHIFTLASSIGN:
case IToken.tSHIFTR: case IToken.tSHIFTRASSIGN:
case IToken.tXOR: case IToken.tXORASSIGN:
// logical operations
case IToken.tNOT: case IToken.tAND: case IToken.tOR:
// arithmetic
case IToken.tDECR: case IToken.tINCR:
case IToken.tDIV: case IToken.tDIVASSIGN:
case IToken.tMINUS: case IToken.tMINUSASSIGN:
case IToken.tMOD: case IToken.tMODASSIGN:
case IToken.tPLUS: case IToken.tPLUSASSIGN:
case IToken.tSTAR: case IToken.tSTARASSIGN:
case IGCCToken.tMAX: case IGCCToken.tMIN:
// comparison
case IToken.tEQUAL: case IToken.tNOTEQUAL:
case IToken.tGT: case IToken.tGTEQUAL:
case IToken.tLT: case IToken.tLTEQUAL:
// other
case IToken.tASSIGN: case IToken.tCOMMA:
return true;
}
return false;
}
public static char[] getImage(int type) {
switch (type) {
case IToken.tPOUND: return IMAGE_POUND;
case IToken.tPOUNDPOUND: return IMAGE_POUND_POUND;
case IToken.tCOLONCOLON: return Keywords.cpCOLONCOLON;
case IToken.tCOLON: return Keywords.cpCOLON;
case IToken.tSEMI: return Keywords.cpSEMI;
case IToken.tCOMMA: return Keywords.cpCOMMA;
case IToken.tQUESTION: return Keywords.cpQUESTION;
case IToken.tLPAREN : return Keywords.cpLPAREN;
case IToken.tRPAREN : return Keywords.cpRPAREN;
case IToken.tLBRACKET: return Keywords.cpLBRACKET;
case IToken.tRBRACKET: return Keywords.cpRBRACKET;
case IToken.tLBRACE: return Keywords.cpLBRACE;
case IToken.tRBRACE: return Keywords.cpRBRACE;
case IToken.tPLUSASSIGN: return Keywords.cpPLUSASSIGN;
case IToken.tINCR: return Keywords.cpINCR;
case IToken.tPLUS: return Keywords.cpPLUS;
case IToken.tMINUSASSIGN: return Keywords.cpMINUSASSIGN;
case IToken.tDECR: return Keywords.cpDECR;
case IToken.tARROWSTAR: return Keywords.cpARROWSTAR;
case IToken.tARROW: return Keywords.cpARROW;
case IToken.tMINUS: return Keywords.cpMINUS;
case IToken.tSTARASSIGN: return Keywords.cpSTARASSIGN;
case IToken.tSTAR: return Keywords.cpSTAR;
case IToken.tMODASSIGN: return Keywords.cpMODASSIGN;
case IToken.tMOD: return Keywords.cpMOD;
case IToken.tXORASSIGN: return Keywords.cpXORASSIGN;
case IToken.tXOR: return Keywords.cpXOR;
case IToken.tAMPERASSIGN: return Keywords.cpAMPERASSIGN;
case IToken.tAND: return Keywords.cpAND;
case IToken.tAMPER: return Keywords.cpAMPER;
case IToken.tBITORASSIGN: return Keywords.cpBITORASSIGN;
case IToken.tOR: return Keywords.cpOR;
case IToken.tBITOR: return Keywords.cpBITOR;
case IToken.tBITCOMPLEMENT: return Keywords.cpCOMPL;
case IToken.tNOTEQUAL: return Keywords.cpNOTEQUAL;
case IToken.tNOT: return Keywords.cpNOT;
case IToken.tEQUAL: return Keywords.cpEQUAL;
case IToken.tASSIGN: return Keywords.cpASSIGN;
case IToken.tSHIFTL: return Keywords.cpSHIFTL;
case IToken.tLTEQUAL: return Keywords.cpLTEQUAL;
case IToken.tLT: return Keywords.cpLT;
case IToken.tSHIFTRASSIGN: return Keywords.cpSHIFTRASSIGN;
case IToken.tSHIFTR: return Keywords.cpSHIFTR;
case IToken.tGTEQUAL: return Keywords.cpGTEQUAL;
case IToken.tGT: return Keywords.cpGT;
case IToken.tSHIFTLASSIGN: return Keywords.cpSHIFTLASSIGN;
case IToken.tELLIPSIS: return Keywords.cpELLIPSIS;
case IToken.tDOTSTAR: return Keywords.cpDOTSTAR;
case IToken.tDOT: return Keywords.cpDOT;
case IToken.tDIVASSIGN: return Keywords.cpDIVASSIGN;
case IToken.tDIV: return Keywords.cpDIV;
case IToken.tBACKSLASH: return Keywords.cpBACKSLASH;
case IGCCToken.tMIN: return Keywords.cpMIN;
case IGCCToken.tMAX: return Keywords.cpMAX;
default:
assert false: type;
return IMAGE_EMPTY;
}
}
public static char[] getDigraphImage(int type) {
switch (type) {
case IToken.tPOUND: return DIGRAPH_POUND;
case IToken.tPOUNDPOUND: return DIGRAPH_POUNDPOUND;
case IToken.tLBRACKET: return DIGRAPH_LBRACKET;
case IToken.tRBRACKET: return DIGRAPH_RBRACKET;
case IToken.tLBRACE: return DIGRAPH_LBRACE;
case IToken.tRBRACE: return DIGRAPH_RBRACE;
default:
assert false: type;
return IMAGE_EMPTY;
}
}
}

View file

@ -0,0 +1,38 @@
/*******************************************************************************
* Copyright (c) 2007 Wind River Systems, Inc. and others.
* All rights reserved. This program and the accompanying materials
* are made available under the terms of the Eclipse Public License v1.0
* which accompanies this distribution, and is available at
* http://www.eclipse.org/legal/epl-v10.html
*
* Contributors:
* Markus Schorn - initial API and implementation
*******************************************************************************/
package org.eclipse.cdt.internal.core.parser.scanner;
class TokenWithImage extends Token {
final private Lexer fLexer;
final private int fImageLength;
private char[] fImage;
public TokenWithImage(int kind, Lexer source, int offset, int endOffset, int imageLength) {
super(kind, offset, endOffset);
fLexer= source;
fImageLength= imageLength;
}
public TokenWithImage(int kind, int offset, int endOffset, char[] image) {
super(kind, offset, endOffset);
fLexer= null;
fImageLength= 0;
fImage= image;
}
public char[] getTokenImage() {
if (fImage == null) {
fImage= fLexer.getTokenImage(fOffset, fEndOffset, fImageLength);
}
return fImage;
}
}

View file

@ -4059,7 +4059,7 @@ abstract class BaseScanner implements IScanner {
private static final MacroExpansionToken EXPANSION_TOKEN = new MacroExpansionToken();
static {
CharArrayIntMap words = new CharArrayIntMap(IToken.tLAST, -1);
CharArrayIntMap words = new CharArrayIntMap(40, -1);
// Common keywords
words.put(Keywords.cAUTO, IToken.t_auto);