From 285d224021ad46177c105a5092ecebb5b0406658 Mon Sep 17 00:00:00 2001 From: Markus Schorn Date: Tue, 9 Oct 2007 15:09:59 +0000 Subject: [PATCH] For bug 205272, the lexer with JUnit tests. --- .../parser/tests/ast2/DOMParserTestSuite.java | 1 + .../core/parser/tests/ast2/LexerTests.java | 545 ++++++++++ .../core/parser/tests/ast2/TestLexerLog.java | 61 ++ .../org.eclipse.cdt.core/META-INF/MANIFEST.MF | 1 + .../cdt/core/dom/parser/IExtensionToken.java | 4 +- .../eclipse/cdt/core/parser/IGCCToken.java | 12 +- .../org/eclipse/cdt/core/parser/IScanner.java | 4 +- .../org/eclipse/cdt/core/parser/IToken.java | 165 +-- .../org/eclipse/cdt/core/parser/Keywords.java | 7 +- .../scanner/CompletionTokenException.java | 24 + .../core/parser/scanner/DigraphToken.java | 21 + .../core/parser/scanner/ILexerLog.java | 18 + .../internal/core/parser/scanner/Lexer.java | 942 ++++++++++++++++++ .../core/parser/scanner/SimpleToken.java | 21 + .../internal/core/parser/scanner/Token.java | 111 +++ .../core/parser/scanner/TokenUtil.java | 146 +++ .../core/parser/scanner/TokenWithImage.java | 38 + .../core/parser/scanner2/BaseScanner.java | 2 +- 18 files changed, 1978 insertions(+), 145 deletions(-) create mode 100644 core/org.eclipse.cdt.core.tests/parser/org/eclipse/cdt/core/parser/tests/ast2/LexerTests.java create mode 100644 core/org.eclipse.cdt.core.tests/parser/org/eclipse/cdt/core/parser/tests/ast2/TestLexerLog.java create mode 100644 core/org.eclipse.cdt.core/parser/org/eclipse/cdt/internal/core/parser/scanner/CompletionTokenException.java create mode 100644 core/org.eclipse.cdt.core/parser/org/eclipse/cdt/internal/core/parser/scanner/DigraphToken.java create mode 100644 core/org.eclipse.cdt.core/parser/org/eclipse/cdt/internal/core/parser/scanner/ILexerLog.java create mode 100644 core/org.eclipse.cdt.core/parser/org/eclipse/cdt/internal/core/parser/scanner/Lexer.java create mode 100644 core/org.eclipse.cdt.core/parser/org/eclipse/cdt/internal/core/parser/scanner/SimpleToken.java create mode 100644 core/org.eclipse.cdt.core/parser/org/eclipse/cdt/internal/core/parser/scanner/Token.java create mode 100644 core/org.eclipse.cdt.core/parser/org/eclipse/cdt/internal/core/parser/scanner/TokenUtil.java create mode 100644 core/org.eclipse.cdt.core/parser/org/eclipse/cdt/internal/core/parser/scanner/TokenWithImage.java diff --git a/core/org.eclipse.cdt.core.tests/parser/org/eclipse/cdt/core/parser/tests/ast2/DOMParserTestSuite.java b/core/org.eclipse.cdt.core.tests/parser/org/eclipse/cdt/core/parser/tests/ast2/DOMParserTestSuite.java index 140baf93b41..843cc532e74 100644 --- a/core/org.eclipse.cdt.core.tests/parser/org/eclipse/cdt/core/parser/tests/ast2/DOMParserTestSuite.java +++ b/core/org.eclipse.cdt.core.tests/parser/org/eclipse/cdt/core/parser/tests/ast2/DOMParserTestSuite.java @@ -26,6 +26,7 @@ public class DOMParserTestSuite extends TestCase { public static Test suite() { TestSuite suite= new TestSuite(ParserTestSuite.class.getName()); + suite.addTest(LexerTests.suite()); suite.addTest(DOMScannerTests.suite()); suite.addTest(AST2Tests.suite()); suite.addTestSuite( GCCTests.class ); diff --git a/core/org.eclipse.cdt.core.tests/parser/org/eclipse/cdt/core/parser/tests/ast2/LexerTests.java b/core/org.eclipse.cdt.core.tests/parser/org/eclipse/cdt/core/parser/tests/ast2/LexerTests.java new file mode 100644 index 00000000000..51da1384302 --- /dev/null +++ b/core/org.eclipse.cdt.core.tests/parser/org/eclipse/cdt/core/parser/tests/ast2/LexerTests.java @@ -0,0 +1,545 @@ +/******************************************************************************* + * Copyright (c) 2007 Wind River Systems, Inc. and others. + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Eclipse Public License v1.0 + * which accompanies this distribution, and is available at + * http://www.eclipse.org/legal/epl-v10.html + * + * Contributors: + * Markus Schorn - initial API and implementation + *******************************************************************************/ +package org.eclipse.cdt.core.parser.tests.ast2; + +import junit.framework.TestSuite; + +import org.eclipse.cdt.core.parser.IGCCToken; +import org.eclipse.cdt.core.parser.IProblem; +import org.eclipse.cdt.core.parser.IToken; +import org.eclipse.cdt.core.testplugin.util.BaseTestCase; +import org.eclipse.cdt.internal.core.parser.scanner.Lexer; +import org.eclipse.cdt.internal.core.parser.scanner.Token; +import org.eclipse.cdt.internal.core.parser.scanner.Lexer.LexerOptions; + + +public class LexerTests extends BaseTestCase { + static String TRIGRAPH_REPLACES_CHARS= "#^[]|{}~\\"; + static String TRIGRAPH_CHARS= "='()!<>-/"; + + public static TestSuite suite() { + return suite(LexerTests.class); + } + + private Lexer fLexer; + private TestLexerLog fLog= new TestLexerLog(); + private int fLastEndOffset; + + public LexerTests() { + super(); + } + + public LexerTests(String name) { + super(name); + } + + private void init(String input) throws Exception { + fLog.clear(); + fLexer= new Lexer(input.toCharArray(), new LexerOptions(), fLog); + fLexer.nextToken(); + fLastEndOffset= 0; + } + + private void init(String input, boolean dollar, boolean minmax) throws Exception { + fLog.clear(); + final LexerOptions lexerOptions = new LexerOptions(); + lexerOptions.fSupportDollarInitializers= dollar; + lexerOptions.fSupportMinAndMax= minmax; + fLexer= new Lexer(input.toCharArray(), lexerOptions, fLog); + fLexer.nextToken(); + fLastEndOffset= 0; + } + + private void token(int tokenType) throws Exception { + token(tokenType, null); + } + + private void token(int tokenType, String image) throws Exception { + Token t= fLexer.currentToken(); + assertEquals(tokenType, t.getType()); + assertEquals(fLastEndOffset, t.getOffset()); + fLastEndOffset= t.getEndOffset(); + if (image != null) { + assertEquals(image, new String(t.getTokenImage())); + } + fLexer.nextToken(); + } + + private void integer(String expectedImage) throws Exception { + token(IToken.tINTEGER, expectedImage); + } + + private void floating(String expectedImage) throws Exception { + token(IToken.tFLOATINGPT, expectedImage); + } + + private void id(String expectedImage) throws Exception { + token(IToken.tIDENTIFIER, expectedImage); + } + + private void str(String expectedImage) throws Exception { + token(IToken.tSTRING, "\"" + expectedImage + "\""); + } + + private void wstr(String expectedImage) throws Exception { + token(IToken.tLSTRING, "L\"" + expectedImage + "\""); + } + + private void ch(String expectedImage) throws Exception { + token(IToken.tCHAR, expectedImage); + } + + private void wch(String expectedImage) throws Exception { + token(IToken.tLCHAR, expectedImage); + } + + private void eof() throws Exception { + IToken t= fLexer.nextToken(); + assertEquals("superfluous token " + t, Lexer.tEND_OF_INPUT, t.getType()); + assertEquals(0, fLog.getProblemCount()); + assertEquals(0, fLog.getCommentCount()); + } + + private void nl() throws Exception { + token(Lexer.tNEWLINE); + } + + private void ws() throws Exception { + int offset= fLexer.currentToken().getOffset(); + assertTrue(offset > fLastEndOffset); + fLastEndOffset= offset; + } + + private void problem(int kind, String img) throws Exception { + assertEquals(fLog.createString(kind, img), fLog.removeFirstProblem()); + } + + private void comment(String img) throws Exception { + ws(); + assertEquals(img, fLog.removeFirstComment()); + } + + public void testTrigraphSequences() throws Exception { + init("\"??=??/??'??(??)??!????-\""); + str("#\\^[]|{}~"); + eof(); + + init("??=??'??(??)??!????-"); + token(IToken.tPOUND); + token(IToken.tXOR); + token(IToken.tLBRACKET); + token(IToken.tRBRACKET); + token(IToken.tBITOR); + token(IToken.tLBRACE); + token(IToken.tRBRACE); + token(IToken.tCOMPL); + eof(); + + init("a??/\nb"); + id("ab"); + eof(); + } + + public void testWindowsLineEnding() throws Exception { + init("\n\n"); + nl(); nl(); eof(); + init("\r\n\r\n"); + nl(); nl(); eof(); + } + + public void testLineSplicingTrigraph() throws Exception { + // a trigraph cannot be spliced + init("??\\\n="); + token(IToken.tQUESTION); + token(IToken.tQUESTION); + token(IToken.tASSIGN); + eof(); + + init("??\\\r\n="); + token(IToken.tQUESTION); + token(IToken.tQUESTION); + token(IToken.tASSIGN); + eof(); + + // trigraph can be used to splice a line + init("a??/\nb"); + id("ab"); + eof(); + } + + public void testLineSplicingStringLiteral() throws Exception { + // splicing in string literal + init("\"a\\\nb\""); + str("ab"); + eof(); + + init("\"a\\\r\nb\""); + str("ab"); + eof(); + } + + public void testLineSplicingCharLiteral() throws Exception { + init("'a\\\nb'"); + ch("'ab'"); + eof(); + + init("'a\\\r\nb'"); + ch("'ab'"); + eof(); + } + + public void testLineSplicingHeaderName() throws Exception { + init("p\"a\\\nb\""); + fLexer.setInsideIncludeDirective(); + id("p"); + token(Lexer.tQUOTE_HEADER_NAME, "\"ab\""); + eof(); + + init("p\"a\\\r\nb\""); + fLexer.setInsideIncludeDirective(); + id("p"); + token(Lexer.tQUOTE_HEADER_NAME, "\"ab\""); + eof(); + + init("p"); + fLexer.setInsideIncludeDirective(); + id("p"); + token(Lexer.tSYSTEM_HEADER_NAME, ""); + eof(); + + init("p"); + fLexer.setInsideIncludeDirective(); + id("p"); + token(Lexer.tSYSTEM_HEADER_NAME, ""); + eof(); + } + + public void testLineSplicingComment() throws Exception { + init("// a\\\nb\n"); + comment("// a\\\nb"); + nl(); + eof(); + + init("// a\\\nb\n"); + comment("// a\\\nb"); + nl(); + eof(); + + init("/\\\n\\\n/ ab\n"); + comment("/\\\n\\\n/ ab"); + nl(); + eof(); + + init("/\\\n* a\\\nb*\\\n/"); + comment("/\\\n* a\\\nb*\\\n/"); + eof(); + } + + public void testLineSplicingIdentifier() throws Exception { + init("a\\\nb"); + id("ab"); + eof(); + + init("a\\\r\nb"); + id("ab"); + eof(); + } + + public void testLineSplicingNumber() throws Exception { + init(".\\\n1"); + floating(".1"); + eof(); + + init(".\\\r\n1"); + floating(".1"); + eof(); + } + + public void testComments() throws Exception { + init("// /*\na"); + comment("// /*"); + nl(); + id("a"); + eof(); + + init("/* // /* \n xxx*/a"); + comment("/* // /* \n xxx*/"); + id("a"); + eof(); + } + + public void testHeaderName() throws Exception { + init("p\"'/*//\\\""); + fLexer.setInsideIncludeDirective(); + id("p"); + token(Lexer.tQUOTE_HEADER_NAME, "\"'/*//\\\""); + eof(); + + init("p<'\"/*//>"); + fLexer.setInsideIncludeDirective(); + id("p"); + token(Lexer.tSYSTEM_HEADER_NAME, "<'\"/*//>"); + eof(); + } + + public void testIdentifier() throws Exception { + final String ident= "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_$\\u1234\\U123456780123456789"; + int unc1= ident.indexOf('\\'); + for (int i = 0; i <= unc1; i++) { + String id= ident.substring(i); + init(id); + id(id); + eof(); + } + String id= ident.substring(ident.indexOf('\\', unc1+1)); + init(id); + id(id); + eof(); + + for (int i= 0; i <10; i++) { + String nonid= ident.substring(ident.length()-i-1); + init(nonid); + integer(nonid); + eof(); + } + + init(ident, false, true); + final int idxDollar = ident.indexOf('$'); + id(ident.substring(0, idxDollar)); + problem(IProblem.SCANNER_BAD_CHARACTER, "$"); + ws(); + id(ident.substring(idxDollar+1)); + } + + public void testNumber() throws Exception { + final String number= ".0123456789.abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_" + + "\\uaaaa\\Uaaaaaaaae+e-E+E-"; + for (int i = 0; i < 11; i++) { + String n= number.substring(i); + init(n); + floating(n); + eof(); + } + int idxPlus= number.indexOf('+'); + for (int i = 11; i < number.length(); i++) { + String n= number.substring(i); + init(n); + int startString= 0; + if (i==11) {token(IToken.tDOT); startString=1;} + if (i><<=>>===!=<=>=&&||++--,->*->?\\"; + final int[] tokens= new int[] { + IToken.tLBRACE, IToken.tRBRACE, IToken.tLBRACKET, IToken.tRBRACKET, IToken.tPOUNDPOUND, + IToken.tPOUND, IToken.tLPAREN, IToken.tRPAREN, IToken.tLBRACKET, IToken.tRBRACKET, + IToken.tLBRACE, IToken.tRBRACE, IToken.tPOUNDPOUND, IToken.tPOUND, IToken.tSEMI, + IToken.tCOLON, IToken.tELLIPSIS, IToken.tQUESTION, IToken.tDOT, IToken.tCOLONCOLON, IToken.tDOT, + IToken.tDOTSTAR, IToken.tPLUS, IToken.tMINUS, IToken.tSTAR, IToken.tDIV, IToken.tMOD, + IToken.tXOR, IToken.tAMPER, IToken.tBITOR, IToken.tCOMPL, IToken.tASSIGN, IToken.tNOT, + IToken.tLT, IToken.tGT, IToken.tPLUSASSIGN, IToken.tMINUSASSIGN, IToken.tSTARASSIGN, + IToken.tDIVASSIGN, IToken.tMODASSIGN, IToken.tXORASSIGN, IToken.tAMPERASSIGN, + IToken.tBITORASSIGN, IToken.tSHIFTL, IToken.tSHIFTR, IToken.tSHIFTLASSIGN, + IToken.tSHIFTRASSIGN, IToken.tEQUAL, IToken.tNOTEQUAL, IToken.tLTEQUAL, IToken.tGTEQUAL, + IToken.tAND, IToken.tOR, IToken.tINCR, IToken.tDECR, IToken.tCOMMA, IToken.tARROWSTAR, + IToken.tARROW, IGCCToken.tMIN, IGCCToken.tMAX, IToken.tBACKSLASH, + }; + + for (int splices=0; splices<9; splices++) { + for (int trigraphs= 0; trigraphs<6; trigraphs++) { + StringBuffer buf= new StringBuffer(); + String input= useTrigraphs(ops.toCharArray(), trigraphs); + init(instertLineSplices(input, splices)); + for (int i = 0; i < tokens.length; i++) { + Token token= fLexer.currentToken(); + buf.append(token.getTokenImage()); + token(tokens[i]); + } + eof(); + assertEquals(ops, buf.toString()); // check token image + + init(input, true, false); + for (int i = 0; i < tokens.length; i++) { + switch (tokens[i]) { + case IGCCToken.tMIN: + token(IToken.tLT); + token(IToken.tQUESTION); + break; + case IGCCToken.tMAX: + token(IToken.tGT); + token(IToken.tQUESTION); + break; + default: + token(tokens[i]); + break; + } + } + eof(); + } + } + } + + private String instertLineSplices(String input, int splices) { + int m1= splices%3; + int m2= (splices-m1)/3; + char[] c= input.toCharArray(); + StringBuffer result= new StringBuffer(); + for (int i = 0; i < c.length; i++) { + result.append(c[i]); + if (c[i]=='?' && i+2 < c.length && c[i+1] == '?' && TRIGRAPH_CHARS.indexOf(c[i+2]) >= 0) { + result.append(c[++i]); + result.append(c[++i]); + } + switch(m1) { + case 1: + result.append("\\\n"); + break; + case 2: + result.append("\\ \n"); + break; + } + switch(m2) { + case 1: + result.append("\\\r\n"); + break; + case 2: + result.append("\\\t\r\n"); + break; + } + } + return result.toString(); + } + + private String useTrigraphs(char[] input, int mode) { + if (mode == 0) { + return new String(input); + } + + boolean yes= mode > 1; + StringBuffer result= new StringBuffer(); + for (int i = 0; i < input.length; i++) { + char c = input[i]; + int idx= TRIGRAPH_REPLACES_CHARS.indexOf(c); + if (idx > 0) { + if (yes) { + result.append("??"); + result.append(TRIGRAPH_CHARS.charAt(idx)); + } + else { + result.append(c); + } + if (mode < 3) { + yes= !yes; + } + } + else { + result.append(c); + } + } + return result.toString(); + } + + public void testLineSplicingOperator() throws Exception { + // splicing in operator + init("|\\\n|"); + token(IToken.tOR); + eof(); + + init("|\\\r\n|"); + token(IToken.tOR); + eof(); + } +} diff --git a/core/org.eclipse.cdt.core.tests/parser/org/eclipse/cdt/core/parser/tests/ast2/TestLexerLog.java b/core/org.eclipse.cdt.core.tests/parser/org/eclipse/cdt/core/parser/tests/ast2/TestLexerLog.java new file mode 100644 index 00000000000..6dd59a25e42 --- /dev/null +++ b/core/org.eclipse.cdt.core.tests/parser/org/eclipse/cdt/core/parser/tests/ast2/TestLexerLog.java @@ -0,0 +1,61 @@ +/******************************************************************************* + * Copyright (c) 2007 Wind River Systems, Inc. and others. + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Eclipse Public License v1.0 + * which accompanies this distribution, and is available at + * http://www.eclipse.org/legal/epl-v10.html + * + * Contributors: + * Markus Schorn - initial API and implementation + *******************************************************************************/ +package org.eclipse.cdt.core.parser.tests.ast2; + +import java.util.ArrayList; + +import org.eclipse.cdt.internal.core.parser.scanner.ILexerLog; + +public class TestLexerLog implements ILexerLog { + + private ArrayList fComments= new ArrayList(); + private ArrayList fProblems= new ArrayList(); + + public void handleComment(boolean isBlockComment, char[] source, int offset, int endOffset) { + fComments.add(new String(source, offset, endOffset-offset)); + } + + public void handleProblem(int problemID, char[] source, int offset, int endOffset) { + fProblems.add(createString(problemID, new String(source, offset, endOffset-offset))); + } + + public String createString(int problemID, String image) { + return String.valueOf(problemID) + ":" + image; + } + + public void clear() { + fComments.clear(); + fProblems.clear(); + } + + public int getProblemCount() { + return fProblems.size(); + } + + public int getCommentCount() { + return fComments.size(); + } + + public String removeFirstProblem() { + if (fProblems.isEmpty()) { + return "no problems have been reported"; + } + return (String) fProblems.remove(0); + } + + public String removeFirstComment() { + if (fComments.isEmpty()) { + return "no comments have been reported"; + } + return (String) fComments.remove(0); + } + +} diff --git a/core/org.eclipse.cdt.core/META-INF/MANIFEST.MF b/core/org.eclipse.cdt.core/META-INF/MANIFEST.MF index edbc90cf653..f651ce7870e 100644 --- a/core/org.eclipse.cdt.core/META-INF/MANIFEST.MF +++ b/core/org.eclipse.cdt.core/META-INF/MANIFEST.MF @@ -61,6 +61,7 @@ Export-Package: org.eclipse.cdt.core, org.eclipse.cdt.internal.core.parser.ast.quick;x-internal:=true, org.eclipse.cdt.internal.core.parser.problem;x-internal:=true, org.eclipse.cdt.internal.core.parser.pst;x-internal:=true, + org.eclipse.cdt.internal.core.parser.scanner;x-internal:=true, org.eclipse.cdt.internal.core.parser.scanner2;x-internal:=true, org.eclipse.cdt.internal.core.parser.token;x-friends:="org.eclipse.cdt.ui", org.eclipse.cdt.internal.core.parser.util;x-internal:=true, diff --git a/core/org.eclipse.cdt.core/parser/org/eclipse/cdt/core/dom/parser/IExtensionToken.java b/core/org.eclipse.cdt.core/parser/org/eclipse/cdt/core/dom/parser/IExtensionToken.java index 17601d55bb8..fc440a2e784 100644 --- a/core/org.eclipse.cdt.core/parser/org/eclipse/cdt/core/dom/parser/IExtensionToken.java +++ b/core/org.eclipse.cdt.core/parser/org/eclipse/cdt/core/dom/parser/IExtensionToken.java @@ -28,7 +28,7 @@ import org.eclipse.cdt.core.parser.IToken; */ public interface IExtensionToken { - int t__otherDeclSpecModifierFirst= IToken.tLAST + 100; - int t__otherDeclSpecModifierLast= IToken.tLAST + 110; + int t__otherDeclSpecModifierFirst= IToken.FIRST_RESERVED_IExtensionToken; + int t__otherDeclSpecModifierLast= IToken.FIRST_RESERVED_IExtensionToken + 10; } diff --git a/core/org.eclipse.cdt.core/parser/org/eclipse/cdt/core/parser/IGCCToken.java b/core/org.eclipse.cdt.core/parser/org/eclipse/cdt/core/parser/IGCCToken.java index 19c96c4e5df..47b24f5294a 100644 --- a/core/org.eclipse.cdt.core/parser/org/eclipse/cdt/core/parser/IGCCToken.java +++ b/core/org.eclipse.cdt.core/parser/org/eclipse/cdt/core/parser/IGCCToken.java @@ -18,11 +18,11 @@ package org.eclipse.cdt.core.parser; */ public interface IGCCToken extends IToken { - public static final int t_typeof = tLAST + 1; - public static final int t___alignof__ = tLAST + 2; - public static final int tMAX = tLAST + 3; - public static final int tMIN = tLAST + 4; - public static final int t__attribute__ = tLAST + 5; - public static final int t__declspec = tLAST + 6; + public static final int t_typeof = FIRST_RESERVED_IGCCToken; + public static final int t___alignof__ = FIRST_RESERVED_IGCCToken + 1; + public static final int tMAX = FIRST_RESERVED_IGCCToken + 2; + public static final int tMIN = FIRST_RESERVED_IGCCToken + 3; + public static final int t__attribute__ = FIRST_RESERVED_IGCCToken + 4; + public static final int t__declspec = FIRST_RESERVED_IGCCToken + 5; } diff --git a/core/org.eclipse.cdt.core/parser/org/eclipse/cdt/core/parser/IScanner.java b/core/org.eclipse.cdt.core/parser/org/eclipse/cdt/core/parser/IScanner.java index 302537597a1..d838079180a 100644 --- a/core/org.eclipse.cdt.core/parser/org/eclipse/cdt/core/parser/IScanner.java +++ b/core/org.eclipse.cdt.core/parser/org/eclipse/cdt/core/parser/IScanner.java @@ -30,8 +30,8 @@ import org.eclipse.cdt.internal.core.parser.scanner2.ILocationResolver; */ public interface IScanner extends IMacroCollector { - public static final int tPOUNDPOUND = -6; - public static final int tPOUND = -7; + /** @deprecated */ public static final int tPOUNDPOUND = IToken.tPOUNDPOUND; + /** @deprecated */ public static final int tPOUND = IToken.tPOUND; public void setOffsetBoundary( int offset ); public void setContentAssistMode( int offset ); diff --git a/core/org.eclipse.cdt.core/parser/org/eclipse/cdt/core/parser/IToken.java b/core/org.eclipse.cdt.core/parser/org/eclipse/cdt/core/parser/IToken.java index 014ed3731f5..7aac95532a3 100644 --- a/core/org.eclipse.cdt.core/parser/org/eclipse/cdt/core/parser/IToken.java +++ b/core/org.eclipse.cdt.core/parser/org/eclipse/cdt/core/parser/IToken.java @@ -41,287 +41,188 @@ public interface IToken { // Token types + int FIRST_RESERVED_SCANNER= -100; + int LAST_RESERVED_SCANNER= -1; + static public final int tIDENTIFIER = 1; - static public final int tINTEGER = 2; - static public final int tCOLONCOLON = 3; - static public final int tCOLON = 4; - static public final int tSEMI = 5; - static public final int tCOMMA = 6; - static public final int tQUESTION = 7; - static public final int tLPAREN = 8; - static public final int tRPAREN = 9; - static public final int tLBRACKET = 10; - static public final int tRBRACKET = 11; - static public final int tLBRACE = 12; - static public final int tRBRACE = 13; - static public final int tPLUSASSIGN = 14; - static public final int tINCR = 15; - static public final int tPLUS = 16; - static public final int tMINUSASSIGN = 17; - static public final int tDECR = 18; - static public final int tARROWSTAR = 19; - static public final int tARROW = 20; - static public final int tMINUS = 21; - static public final int tSTARASSIGN = 22; - static public final int tSTAR = 23; - static public final int tMODASSIGN = 24; - static public final int tMOD = 25; - static public final int tXORASSIGN = 26; - static public final int tXOR = 27; - static public final int tAMPERASSIGN = 28; - static public final int tAND = 29; - static public final int tAMPER = 30; - static public final int tBITORASSIGN = 31; - static public final int tOR = 32; - static public final int tBITOR = 33; - - static public final int tCOMPL = 34; - + static public final int tBITCOMPLEMENT = 34; static public final int tNOTEQUAL = 35; - static public final int tNOT = 36; - static public final int tEQUAL = 37; - static public final int tASSIGN = 38; - static public final int tSHIFTL = 40; - static public final int tLTEQUAL = 41; - static public final int tLT = 42; - static public final int tSHIFTRASSIGN = 43; - static public final int tSHIFTR = 44; - static public final int tGTEQUAL = 45; - static public final int tGT = 46; - static public final int tSHIFTLASSIGN = 47; - static public final int tELLIPSIS = 48; - static public final int tDOTSTAR = 49; - static public final int tDOT = 50; - static public final int tDIVASSIGN = 51; - static public final int tDIV = 52; - + static public final int tBACKSLASH= 53; + + /** @deprecated use {@link #tAND} */ static public final int t_and = 54; - + /** @deprecated use {@link #tAMPERASSIGN} */ static public final int t_and_eq = 55; static public final int t_asm = 56; - static public final int t_auto = 57; + /** @deprecated use {@link #tAMPER} */ static public final int t_bitand = 58; - + /** @deprecated use {@link #tBITOR} */ static public final int t_bitor = 59; static public final int t_bool = 60; - static public final int t_break = 61; - static public final int t_case = 62; - static public final int t_catch = 63; - static public final int t_char = 64; - static public final int t_class = 65; - + + /** @deprecated use {@link #tBITCOMPLEMENT} */ + static public final int tCOMPL= tBITCOMPLEMENT; + /** @deprecated use {@link #tBITCOMPLEMENT} */ static public final int t_compl = 66; static public final int t_const = 67; - static public final int t_const_cast = 69; - static public final int t_continue = 70; - static public final int t_default = 71; - static public final int t_delete = 72; - static public final int t_do = 73; - static public final int t_double = 74; - static public final int t_dynamic_cast = 75; - static public final int t_else = 76; - static public final int t_enum = 77; - static public final int t_explicit = 78; - static public final int t_export = 79; - static public final int t_extern = 80; - static public final int t_false = 81; - static public final int t_float = 82; - static public final int t_for = 83; - static public final int t_friend = 84; - static public final int t_goto = 85; - static public final int t_if = 86; - static public final int t_inline = 87; - static public final int t_int = 88; - static public final int t_long = 89; - static public final int t_mutable = 90; - static public final int t_namespace = 91; - static public final int t_new = 92; + /** @deprecated use {@link #tNOT} */ static public final int t_not = 93; - + /** @deprecated use {@link #tNOTEQUAL} */ static public final int t_not_eq = 94; static public final int t_operator = 95; - + + /** @deprecated use {@link #tOR} */ static public final int t_or = 96; - + /** @deprecated use {@link #tBITORASSIGN} */ static public final int t_or_eq = 97; static public final int t_private = 98; - static public final int t_protected = 99; - static public final int t_public = 100; - static public final int t_register = 101; - static public final int t_reinterpret_cast = 102; - static public final int t_return = 103; - static public final int t_short = 104; - static public final int t_sizeof = 105; - static public final int t_static = 106; - static public final int t_static_cast = 107; - static public final int t_signed = 108; - static public final int t_struct = 109; - static public final int t_switch = 110; - static public final int t_template = 111; - static public final int t_this = 112; - static public final int t_throw = 113; - static public final int t_true = 114; - static public final int t_try = 115; - static public final int t_typedef = 116; - static public final int t_typeid = 117; - static public final int t_typename = 118; - static public final int t_union = 119; - static public final int t_unsigned = 120; - static public final int t_using = 121; - static public final int t_virtual = 122; - static public final int t_void = 123; - static public final int t_volatile = 124; - static public final int t_wchar_t = 125; - static public final int t_while = 126; - + + /** @deprecated use {@link #tXOR} */ static public final int t_xor = 127; - + /** @deprecated use {@link #tXORASSIGN} */ static public final int t_xor_eq = 128; static public final int tFLOATINGPT = 129; - static public final int tSTRING = 130; - static public final int tLSTRING = 131; - static public final int tCHAR = 132; - static public final int tLCHAR = 133; - static public final int t__Bool = 134; - static public final int t__Complex = 135; - static public final int t__Imaginary = 136; - static public final int t_restrict = 137; - + + /** @deprecated don't use it */ static public final int tMACROEXP = 138; + static public final int tPOUND= 138; static public final int tPOUNDPOUND = 139; - static public final int tCOMPLETION = 140; - static public final int tEOC = 141; // End of Completion + /** @deprecated don't use it */ static public final int tCOMMENT = 142; - + /** @deprecated don't use it */ static public final int tBLOCKCOMMENT = 143; - - + /** @deprecated don't use it */ static public final int tLAST = 143; + int FIRST_RESERVED_IGCCToken = 144; + int LAST_RESERVED_IGCCToken = 199; + + int FIRST_RESERVED_IExtensionToken = 243; + int LAST_RESERVED_IExtensionToken = 299; } diff --git a/core/org.eclipse.cdt.core/parser/org/eclipse/cdt/core/parser/Keywords.java b/core/org.eclipse.cdt.core/parser/org/eclipse/cdt/core/parser/Keywords.java index 7463e803192..b299c751172 100644 --- a/core/org.eclipse.cdt.core/parser/org/eclipse/cdt/core/parser/Keywords.java +++ b/core/org.eclipse.cdt.core/parser/org/eclipse/cdt/core/parser/Keywords.java @@ -19,8 +19,6 @@ public class Keywords { public static final String CAST = "cast"; //$NON-NLS-1$ public static final String ALIGNOF = "alignof"; //$NON-NLS-1$ public static final String TYPEOF = "typeof"; //$NON-NLS-1$ - public static final String cpMIN = "?"; //$NON-NLS-1$ public static final String _BOOL = "_Bool"; //$NON-NLS-1$ public static final String _COMPLEX = "_Complex"; //$NON-NLS-1$ @@ -231,9 +229,14 @@ public class Keywords { public static final char[] cpDOT = ".".toCharArray(); //$NON-NLS-1$ public static final char[] cpDIVASSIGN = "/=".toCharArray(); //$NON-NLS-1$ public static final char[] cpDIV = "/".toCharArray(); //$NON-NLS-1$ + public static final char[] cpBACKSLASH = "\\".toCharArray(); //$NON-NLS-1$ public static final char[] cpPOUND = "#".toCharArray(); //$NON-NLS-1$ public static final char[] cpPOUNDPOUND = "##".toCharArray(); //$NON-NLS-1$ + // gcc extensions + public static final char[] cpMIN = "?".toCharArray(); //$NON-NLS-1$ + // preprocessor keywords public static final char[] cIFDEF = "ifdef".toCharArray(); //$NON-NLS-1$ public static final char[] cIFNDEF = "ifndef".toCharArray(); //$NON-NLS-1$ diff --git a/core/org.eclipse.cdt.core/parser/org/eclipse/cdt/internal/core/parser/scanner/CompletionTokenException.java b/core/org.eclipse.cdt.core/parser/org/eclipse/cdt/internal/core/parser/scanner/CompletionTokenException.java new file mode 100644 index 00000000000..506f7898ee2 --- /dev/null +++ b/core/org.eclipse.cdt.core/parser/org/eclipse/cdt/internal/core/parser/scanner/CompletionTokenException.java @@ -0,0 +1,24 @@ +/******************************************************************************* + * Copyright (c) 2007 Wind River Systems, Inc. and others. + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Eclipse Public License v1.0 + * which accompanies this distribution, and is available at + * http://www.eclipse.org/legal/epl-v10.html + * + * Contributors: + * Markus Schorn - initial API and implementation + *******************************************************************************/ +package org.eclipse.cdt.internal.core.parser.scanner; + +class CompletionTokenException extends Exception { + + private Token fToken; + + public CompletionTokenException(Token token) { + fToken= token; + } + + public Token getToken() { + return fToken; + } +} diff --git a/core/org.eclipse.cdt.core/parser/org/eclipse/cdt/internal/core/parser/scanner/DigraphToken.java b/core/org.eclipse.cdt.core/parser/org/eclipse/cdt/internal/core/parser/scanner/DigraphToken.java new file mode 100644 index 00000000000..d83c81e8c14 --- /dev/null +++ b/core/org.eclipse.cdt.core/parser/org/eclipse/cdt/internal/core/parser/scanner/DigraphToken.java @@ -0,0 +1,21 @@ +/******************************************************************************* + * Copyright (c) 2007 Wind River Systems, Inc. and others. + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Eclipse Public License v1.0 + * which accompanies this distribution, and is available at + * http://www.eclipse.org/legal/epl-v10.html + * + * Contributors: + * Markus Schorn - initial API and implementation + *******************************************************************************/ +package org.eclipse.cdt.internal.core.parser.scanner; + +class DigraphToken extends Token { + public DigraphToken(int kind, int offset, int endOffset) { + super(kind, offset, endOffset); + } + + public char[] getTokenImage() { + return TokenUtil.getDigraphImage(getType()); + } +} diff --git a/core/org.eclipse.cdt.core/parser/org/eclipse/cdt/internal/core/parser/scanner/ILexerLog.java b/core/org.eclipse.cdt.core/parser/org/eclipse/cdt/internal/core/parser/scanner/ILexerLog.java new file mode 100644 index 00000000000..8c5c9043b5a --- /dev/null +++ b/core/org.eclipse.cdt.core/parser/org/eclipse/cdt/internal/core/parser/scanner/ILexerLog.java @@ -0,0 +1,18 @@ +/******************************************************************************* + * Copyright (c) 2007 Wind River Systems, Inc. and others. + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Eclipse Public License v1.0 + * which accompanies this distribution, and is available at + * http://www.eclipse.org/legal/epl-v10.html + * + * Contributors: + * Markus Schorn - initial API and implementation + *******************************************************************************/ +package org.eclipse.cdt.internal.core.parser.scanner; + +public interface ILexerLog { + + void handleProblem(int problemID, char[] source, int offset, int endOffset); + + void handleComment(boolean isBlockComment, char[] source, int offset, int endOffsetLast); +} diff --git a/core/org.eclipse.cdt.core/parser/org/eclipse/cdt/internal/core/parser/scanner/Lexer.java b/core/org.eclipse.cdt.core/parser/org/eclipse/cdt/internal/core/parser/scanner/Lexer.java new file mode 100644 index 00000000000..eabddb459b8 --- /dev/null +++ b/core/org.eclipse.cdt.core/parser/org/eclipse/cdt/internal/core/parser/scanner/Lexer.java @@ -0,0 +1,942 @@ +/******************************************************************************* + * Copyright (c) 2007 Wind River Systems, Inc. and others. + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Eclipse Public License v1.0 + * which accompanies this distribution, and is available at + * http://www.eclipse.org/legal/epl-v10.html + * + * Contributors: + * Markus Schorn - initial API and implementation + *******************************************************************************/ +package org.eclipse.cdt.internal.core.parser.scanner; + +import org.eclipse.cdt.core.dom.ast.IASTProblem; +import org.eclipse.cdt.core.parser.IGCCToken; +import org.eclipse.cdt.core.parser.IProblem; +import org.eclipse.cdt.core.parser.IToken; + +/** + * In short this class converts line endings (to '\n') and trigraphs + * (to their corresponding character), + * removes line-splices, comments and whitespace other than newline. + * Returns preprocessor tokens. + *

+ * In addition to the preprocessor tokens the following tokens may also be returned: + * {@link #tEND_OF_INPUT}, {@link IToken#tCOMPLETION}. + *

+ * Number literals are split up into {@link IToken#tINTEGER} and {@link IToken#tFLOATINGPT}. + * No checks are done on the number literals. + *

+ * UNCs are accepted, however characters from outside of the basic source character set are + * not converted to UNCs. Rather than that they are tested with + * {@link Character#isUnicodeIdentifierPart(char)} and may be accepted as part of an + * identifier. + *

+ * The characters in string literals and char-literals are left as they are found, no conversion to + * an execution character-set is performed. + */ +final public class Lexer { + public static final int tNEWLINE = IToken.FIRST_RESERVED_SCANNER + 1; + public static final int tEND_OF_INPUT = IToken.FIRST_RESERVED_SCANNER + 2; + public static final int tQUOTE_HEADER_NAME = IToken.FIRST_RESERVED_SCANNER + 3; + public static final int tSYSTEM_HEADER_NAME = IToken.FIRST_RESERVED_SCANNER + 4; + + private static final int END_OF_INPUT = -1; + private static final int LINE_SPLICE_SEQUENCE = -2; + + public static class LexerOptions { + public boolean fSupportDollarInitializers= true; + public boolean fSupportMinAndMax= true; + public boolean fSupportContentAssist= false; + } + + // configuration + private final LexerOptions fOptions; + private final ILexerLog fLog; + + // the input to the lexer + private final char[] fInput; + private final int fLimit; + + // after phase 3 (newline, trigraph, line-splice) + private int fOffset; + private int fEndOffset; + private int fCharPhase3; + + private boolean fInsideIncludeDirective= false; + private Token fToken; + + // for the few cases where we have to lookahead more than one character + private int fMarkOffset; + private int fMarkEndOffset; + private int fMarkPrefetchedChar; + + + public Lexer(char[] input, LexerOptions options, ILexerLog log) { + fInput= input; + fLimit= input.length; + fOptions= options; + fLog= log; + nextCharPhase3(); + } + + public Lexer(char[] input, int limit, LexerOptions options, ILexerLog log) { + fInput= input; + fLimit= limit; + fOptions= options; + fLog= log; + nextCharPhase3(); + } + + /** + * Call this before consuming the name-token in the include directive. It causes the header-file + * tokens to be created. + */ + public void setInsideIncludeDirective() { + fInsideIncludeDirective= true; + } + + /** + * Returns the current preprocessor token, does not advance. + */ + public Token currentToken() { + return fToken; + } + + /** + * Advances to the next token, skipping whitespace other than newline. + * @throws CompletionTokenException when completion is requested in a literal or an header-name. + */ + public Token nextToken() throws CompletionTokenException { + return fToken= fetchToken(); + } + + /** + * Advances to the next pound token that starts a preprocessor directive. + * @return pound token of the directive or end-of-input. + * @throws CompletionTokenException when completion is requested in a literal or an header-name. + */ + public Token nextDirective() throws CompletionTokenException { + Token t= fToken; + boolean haveNL= t==null || t.getType() == tNEWLINE; + loop: while(true) { + t= fetchToken(); + if (haveNL) { + switch(t.getType()) { + case tEND_OF_INPUT: + case IToken.tPOUND: + break loop; + } + haveNL= false; + } + else { + switch(t.getType()) { + case tEND_OF_INPUT: + break loop; + case tNEWLINE: + haveNL= true; + break; + } + } + t= fetchToken(); + } + fToken= t; + return t; + } + + /** + * Computes the next token. + */ + private Token fetchToken() throws CompletionTokenException { + while(true) { + final int start= fOffset; + final int c= fCharPhase3; + final int d= nextCharPhase3(); + switch(c) { + case END_OF_INPUT: + return newToken(Lexer.tEND_OF_INPUT, start); + case '\n': + fInsideIncludeDirective= false; + return newToken(Lexer.tNEWLINE, start); + case ' ': + case '\t': + case 0xb: // vertical tab + case '\f': + case '\r': + continue; + + case 'L': + switch(d) { + case '"': + nextCharPhase3(); + return stringLiteral(start, true); + case '\'': + nextCharPhase3(); + return charLiteral(start, true); + } + return identifier(start, 1); + + case '"': + if (fInsideIncludeDirective) { + return headerName(start, true); + } + return stringLiteral(start, false); + + case '\'': + return charLiteral(start, false); + + case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': case 'g': case 'h': case 'i': + case 'j': case 'k': case 'l': case 'm': case 'n': case 'o': case 'p': case 'q': case 'r': + case 's': case 't': case 'u': case 'v': case 'w': case 'x': case 'y': case 'z': + case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': case 'G': case 'H': case 'I': + case 'J': case 'K': case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R': + case 'S': case 'T': case 'U': case 'V': case 'W': case 'X': case 'Y': case 'Z': + case '_': + return identifier(start, 1); + + case '$': + if (fOptions.fSupportDollarInitializers) { + return identifier(start, 1); + } + break; + + case '\\': + switch(d) { + case 'u': case 'U': + nextCharPhase3(); + return identifier(start, 2); + } + return newToken(IToken.tBACKSLASH, start); + + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + return number(start, 1, false); + + case '.': + switch(d) { + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + nextCharPhase3(); + return number(start, 2, true); + + case '.': + markPhase3(); + if (nextCharPhase3() == '.') { + nextCharPhase3(); + return newToken(IToken.tELLIPSIS, start); + } + restorePhase3(); + break; + + case '*': + nextCharPhase3(); + return newToken(IToken.tDOTSTAR, start); + } + return newToken(IToken.tDOT, start); + + case '#': + if (d == '#') { + nextCharPhase3(); + return newToken(IToken.tPOUNDPOUND, start); + } + return newToken(IToken.tPOUND, start); + + case '{': + return newToken(IToken.tLBRACE, start); + case '}': + return newToken(IToken.tRBRACE, start); + case '[': + return newToken(IToken.tLBRACKET, start); + case ']': + return newToken(IToken.tRBRACKET, start); + case '(': + return newToken(IToken.tLPAREN, start); + case ')': + return newToken(IToken.tRPAREN, start); + case ';': + return newToken(IToken.tSEMI, start); + + case ':': + switch(d) { + case ':': + nextCharPhase3(); + return newToken(IToken.tCOLONCOLON, start); + case '>': + nextCharPhase3(); + return newDigraphToken(IToken.tRBRACKET, start); + } + return newToken(IToken.tCOLON, start); + + case '?': + return newToken(IToken.tQUESTION, start); + + case '+': + switch (d) { + case '+': + nextCharPhase3(); + return newToken(IToken.tINCR, start); + case '=': + nextCharPhase3(); + return newToken(IToken.tPLUSASSIGN, start); + } + return newToken(IToken.tPLUS, start); + + case '-': + switch (d) { + case '>': + int e= nextCharPhase3(); + if (e == '*') { + nextCharPhase3(); + return newToken(IToken.tARROWSTAR, start); + } + return newToken(IToken.tARROW, start); + + case '-': + nextCharPhase3(); + return newToken(IToken.tDECR, start); + case '=': + nextCharPhase3(); + return newToken(IToken.tMINUSASSIGN, start); + } + return newToken(IToken.tMINUS, start); + + case '*': + if (d == '=') { + nextCharPhase3(); + return newToken(IToken.tSTARASSIGN, start); + } + return newToken(IToken.tSTAR, start); + + case '/': + switch (d) { + case '=': + nextCharPhase3(); + return newToken(IToken.tDIVASSIGN, start); + case '/': + nextCharPhase3(); + lineComment(start); + continue; + case '*': + nextCharPhase3(); + blockComment(start); + continue; + } + return newToken(IToken.tDIV, start); + + case '%': + switch (d) { + case '=': + nextCharPhase3(); + return newToken(IToken.tMODASSIGN, start); + case '>': + nextCharPhase3(); + return newDigraphToken(IToken.tRBRACE, start); + case ':': + final int e= nextCharPhase3(); + if (e == '%') { + markPhase3(); + if (nextCharPhase3() == ':') { + nextCharPhase3(); + return newDigraphToken(IToken.tPOUNDPOUND, start); + } + restorePhase3(); + } + return newDigraphToken(IToken.tPOUND, start); + } + return newToken(IToken.tMOD, start); + + case '^': + if (d == '=') { + nextCharPhase3(); + return newToken(IToken.tXORASSIGN, start); + } + return newToken(IToken.tXOR, start); + + case '&': + switch (d) { + case '&': + nextCharPhase3(); + return newToken(IToken.tAND, start); + case '=': + nextCharPhase3(); + return newToken(IToken.tAMPERASSIGN, start); + } + return newToken(IToken.tAMPER, start); + + case '|': + switch (d) { + case '|': + nextCharPhase3(); + return newToken(IToken.tOR, start); + case '=': + nextCharPhase3(); + return newToken(IToken.tBITORASSIGN, start); + } + return newToken(IToken.tBITOR, start); + + case '~': + return newToken(IToken.tBITCOMPLEMENT, start); + + case '!': + if (d == '=') { + nextCharPhase3(); + return newToken(IToken.tNOTEQUAL, start); + } + return newToken(IToken.tNOT, start); + + case '=': + if (d == '=') { + nextCharPhase3(); + return newToken(IToken.tEQUAL, start); + } + return newToken(IToken.tASSIGN, start); + + case '<': + if (fInsideIncludeDirective) { + return headerName(start, false); + } + + switch(d) { + case '=': + nextCharPhase3(); + return newToken(IToken.tLTEQUAL, start); + case '<': + final int e= nextCharPhase3(); + if (e == '=') { + nextCharPhase3(); + return newToken(IToken.tSHIFTLASSIGN, start); + } + return newToken(IToken.tSHIFTL, start); + case '?': + if (fOptions.fSupportMinAndMax) { + nextCharPhase3(); + return newToken(IGCCToken.tMIN, start); + } + break; + case ':': + nextCharPhase3(); + return newDigraphToken(IToken.tLBRACKET, start); + case '%': + nextCharPhase3(); + return newDigraphToken(IToken.tLBRACE, start); + } + return newToken(IToken.tLT, start); + + case '>': + switch(d) { + case '=': + nextCharPhase3(); + return newToken(IToken.tGTEQUAL, start); + case '>': + final int e= nextCharPhase3(); + if (e == '=') { + nextCharPhase3(); + return newToken(IToken.tSHIFTRASSIGN, start); + } + return newToken(IToken.tSHIFTR, start); + case '?': + if (fOptions.fSupportMinAndMax) { + nextCharPhase3(); + return newToken(IGCCToken.tMAX, start); + } + break; + } + return newToken(IToken.tGT, start); + + case ',': + return newToken(IToken.tCOMMA, start); + + default: + // in case we have some other letter to start an identifier + if (Character.isUnicodeIdentifierStart((char) c)) { + return identifier(start, 1); + } + break; + } + + handleProblem(IASTProblem.SCANNER_BAD_CHARACTER, start); + // loop is continued, character is treated as white-space. + } + } + + private Token newToken(int kind, int offset) { + return new SimpleToken(kind, offset, fOffset); + } + + private Token newDigraphToken(int kind, int offset) { + return new DigraphToken(kind, offset, fOffset); + } + + private Token newToken(int kind, int offset, int length) { + return new TokenWithImage(kind, this, offset, fOffset, length); + } + + private void handleProblem(int problemID, int offset) { + fLog.handleProblem(problemID, fInput, offset, fOffset); + } + + private Token headerName(final int start, final boolean expectQuotes) throws CompletionTokenException { + int length= 1; + boolean done = false; + int c= fCharPhase3; + loop: while (!done) { + switch (c) { + case END_OF_INPUT: + if (fOptions.fSupportContentAssist) { + throw new CompletionTokenException( + newToken((expectQuotes ? tQUOTE_HEADER_NAME : tSYSTEM_HEADER_NAME), start, length)); + } + // no break; + case '\n': + handleProblem(IProblem.SCANNER_UNBOUNDED_STRING, start); + break loop; + + case '"': + done= expectQuotes; + break; + case '>': + done= !expectQuotes; + break; + } + length++; + c= nextCharPhase3(); + } + return newToken((expectQuotes ? tQUOTE_HEADER_NAME : tSYSTEM_HEADER_NAME), start, length); + } + + private void blockComment(final int start) { + int c= nextCharPhase3(); + while(true) { + switch (c) { + case END_OF_INPUT: + fLog.handleComment(true, fInput, start, fOffset); + return; + case '*': + c= nextCharPhase3(); + if (c == '/') { + nextCharPhase3(); + fLog.handleComment(true, fInput, start, fOffset); + return; + } + break; + default: + c= nextCharPhase3(); + break; + } + } + } + + private void lineComment(final int start) { + int c= fCharPhase3; + while(true) { + switch (c) { + case END_OF_INPUT: + case '\n': + fLog.handleComment(false, fInput, start, fOffset); + return; + } + c= nextCharPhase3(); + } + } + + private Token stringLiteral(final int start, final boolean wide) throws CompletionTokenException { + boolean escaped = false; + boolean done = false; + int length= wide ? 2 : 1; + int c= fCharPhase3; + + loop: while (!done) { + switch(c) { + case END_OF_INPUT: + if (fOptions.fSupportContentAssist) { + throw new CompletionTokenException(newToken(wide ? IToken.tLSTRING : IToken.tSTRING, start, length)); + } + // no break; + case '\n': + handleProblem(IProblem.SCANNER_UNBOUNDED_STRING, start); + break loop; + + case '\\': + escaped= !escaped; + break; + case '"': + if (!escaped) { + done= true; + } + escaped= false; + break; + default: + escaped= false; + break; + } + length++; + c= nextCharPhase3(); + } + return newToken(wide ? IToken.tLSTRING : IToken.tSTRING, start, length); + } + + private Token charLiteral(final int start, boolean wide) throws CompletionTokenException { + boolean escaped = false; + boolean done = false; + int length= wide ? 2 : 1; + int c= fCharPhase3; + + loop: while (!done) { + switch(c) { + case END_OF_INPUT: + if (fOptions.fSupportContentAssist) { + throw new CompletionTokenException(newToken(wide ? IToken.tLCHAR : IToken.tCHAR, start, length)); + } + // no break; + case '\n': + handleProblem(IProblem.SCANNER_BAD_CHARACTER, start); + break loop; + case '\\': + escaped= !escaped; + break; + case '\'': + if (!escaped) { + done= true; + } + escaped= false; + break; + default: + escaped= false; + break; + } + length++; + c= nextCharPhase3(); + } + return newToken(wide ? IToken.tLCHAR : IToken.tCHAR, start, length); + } + + private Token identifier(int start, int length) { + int tokenKind= IToken.tIDENTIFIER; + boolean isPartOfIdentifier= true; + int c= fCharPhase3; + while (true) { + switch(c) { + case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': case 'g': case 'h': case 'i': + case 'j': case 'k': case 'l': case 'm': case 'n': case 'o': case 'p': case 'q': case 'r': + case 's': case 't': case 'u': case 'v': case 'w': case 'x': case 'y': case 'z': + case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': case 'G': case 'H': case 'I': + case 'J': case 'K': case 'L': case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R': + case 'S': case 'T': case 'U': case 'V': case 'W': case 'X': case 'Y': case 'Z': + case '_': + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + break; + + case '\\': // universal character name + markPhase3(); + switch(nextCharPhase3()) { + case 'u': case 'U': + length++; + break; + default: + restorePhase3(); + isPartOfIdentifier= false; + break; + } + break; + + case END_OF_INPUT: + if (fOptions.fSupportContentAssist) { + tokenKind= IToken.tCOMPLETION; + } + isPartOfIdentifier= false; + break; + case ' ': case '\t': case 0xb: case '\f': case '\r': case '\n': + isPartOfIdentifier= false; + break; + + case '$': + isPartOfIdentifier= fOptions.fSupportDollarInitializers; + break; + + case '{': case '}': case '[': case ']': case '#': case '(': case ')': case '<': case '>': + case '%': case ':': case ';': case '.': case '?': case '*': case '+': case '-': case '/': + case '^': case '&': case '|': case '~': case '!': case '=': case ',': case '"': case '\'': + isPartOfIdentifier= false; + break; + + default: + isPartOfIdentifier= Character.isUnicodeIdentifierPart((char) c); + break; + } + + if (!isPartOfIdentifier) { + break; + } + + length++; + c= nextCharPhase3(); + } + + return newToken(tokenKind, start, length); + } + + private Token number(final int start, int length, boolean isFloat) throws CompletionTokenException { + boolean isPartOfNumber= true; + int c= fCharPhase3; + while (true) { + switch(c) { + // non-digit + case 'a': case 'b': case 'c': case 'd': case 'f': case 'g': case 'h': case 'i': + case 'j': case 'k': case 'l': case 'm': case 'n': case 'o': case 'q': case 'r': + case 's': case 't': case 'u': case 'v': case 'w': case 'x': case 'y': case 'z': + case 'A': case 'B': case 'C': case 'D': case 'F': case 'G': case 'H': case 'I': + case 'J': case 'K': case 'L': case 'M': case 'N': case 'O': case 'Q': case 'R': + case 'S': case 'T': case 'U': case 'V': case 'W': case 'X': case 'Y': case 'Z': + case '_': + + // digit + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + break; + + // period + case '.': + isFloat= true; + break; + + // sign + case 'p': + case 'P': + case 'e': + case 'E': + length++; + c= nextCharPhase3(); + switch (c) { + case '+': case '-': + case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': + isFloat= true; + length++; + c= nextCharPhase3(); + break; + } + continue; + + // universal character name (non-digit) + case '\\': + markPhase3(); + switch(nextCharPhase3()) { + case 'u': case 'U': + length++; + break; + default: + restorePhase3(); + isPartOfNumber= false; + break; + } + break; + + case tEND_OF_INPUT: + if (fOptions.fSupportContentAssist) { + throw new CompletionTokenException( + newToken((isFloat ? IToken.tFLOATINGPT : IToken.tINTEGER), start, length)); + } + isPartOfNumber= false; + break; + + default: + isPartOfNumber= false; + break; + } + if (!isPartOfNumber) { + break; + } + + c= nextCharPhase3(); + length++; + } + + return newToken((isFloat ? IToken.tFLOATINGPT : IToken.tINTEGER), start, length); + } + + + /** + * Saves the current state of phase3, necessary for '...', '%:%:' and UNCs. + */ + private void markPhase3() { + fMarkOffset= fOffset; + fMarkEndOffset= fEndOffset; + fMarkPrefetchedChar= fCharPhase3; + } + + /** + * Restores a previously saved state of phase3. + */ + private void restorePhase3() { + fOffset= fMarkOffset; + fEndOffset= fMarkEndOffset; + fCharPhase3= fMarkPrefetchedChar; + } + + /** + * Perform phase 1-3: Replace \r\n with \n, handle trigraphs, detect line-splicing. + * Changes fOffset, fEndOffset and fCharPhase3. + */ + private int nextCharPhase3() { + int offset; + int c; + do { + offset= fEndOffset; + c= fetchCharPhase3(offset); // changes fEndOffset + } + while(c == LINE_SPLICE_SEQUENCE); + + fOffset= offset; + fCharPhase3= c; + return c; + } + + /** + * Perform phase 1-3: Replace \r\n with \n, handle trigraphs, detect line-splicing. + * Changes fEndOffset, but is stateless otherwise. + */ + private int fetchCharPhase3(int pos) { + if (pos >= fLimit) { + fEndOffset= fLimit; + return END_OF_INPUT; + } + final char c= fInput[pos++]; + switch(c) { + // windows line-ending + case '\r': + if (pos < fLimit && fInput[pos] == '\n') { + fEndOffset= pos+1; + return '\n'; + } + fEndOffset= pos; + return c; + + // trigraph sequences + case '?': + if (pos+1 >= fLimit || fInput[pos] != '?') { + fEndOffset= pos; + return c; + } + final char trigraph= checkTrigraph(fInput[pos+1]); + if (trigraph == 0) { + fEndOffset= pos; + return c; + } + if (trigraph != '\\') { + fEndOffset= pos+2; + return trigraph; + } + pos+= 2; + // no break, handle backslash + + case '\\': + final int lsPos= findEndOfLineSpliceSequence(pos); + if (lsPos > pos) { + fEndOffset= lsPos; + return LINE_SPLICE_SEQUENCE; + } + fEndOffset= pos; + return '\\'; // don't return c, it may be a '?' + + default: + fEndOffset= pos; + return c; + } + } + + /** + * Maps a trigraph to the character it encodes. + * @param c trigraph without leading question marks. + * @return the character encoded or 0. + */ + private char checkTrigraph(char c) { + switch(c) { + case '=': return '#'; + case '\'':return '^'; + case '(': return '['; + case ')': return ']'; + case '!': return '|'; + case '<': return '{'; + case '>': return '}'; + case '-': return '~'; + case '/': return '\\'; + } + return 0; + } + + /** + * Returns the endoffset for a line-splice sequence, or -1 if there is none. + */ + private int findEndOfLineSpliceSequence(int pos) { + boolean haveBackslash= true; + int result= -1; + loop: while(pos < fLimit) { + switch(fInput[pos++]) { + case '\n': + if (haveBackslash) { + result= pos; + haveBackslash= false; + continue loop; + } + return result; + + case '\r': case ' ': case '\f': case '\t': case 0xb: // vertical tab + if (haveBackslash) { + continue loop; + } + return result; + + case '?': + if (pos+1 >= fLimit || fInput[pos] != '?' || fInput[++pos] != '/') { + return result; + } + // fall through to backslash handling + + case '\\': + if (!haveBackslash) { + haveBackslash= true; + continue loop; + } + return result; + + default: + return result; + } + } + return result; + } + + /** + * Returns the image from the input without any modification. + */ + public char[] getInputChars(int offset, int endOffset) { + final int length= endOffset-offset; + final char[] result= new char[length]; + System.arraycopy(fInput, offset, result, 0, length); + return result; + } + + /** + * Returns the image with trigraphs replaced and line-splices removed. + */ + char[] getTokenImage(int offset, int endOffset, int imageLength) { + final int length= endOffset-offset; + final char[] result= new char[imageLength]; + if (length == imageLength) { + System.arraycopy(fInput, offset, result, 0, length); + } + else { + markPhase3(); + fEndOffset= offset; + int idx= 0; + while (idx