From 285d224021ad46177c105a5092ecebb5b0406658 Mon Sep 17 00:00:00 2001
From: Markus Schorn <markus.schorn@windriver.com>
Date: Tue, 9 Oct 2007 15:09:59 +0000
Subject: [PATCH] For bug 205272, the lexer with JUnit tests.

---
 .../parser/tests/ast2/DOMParserTestSuite.java |   1 +
 .../core/parser/tests/ast2/LexerTests.java    | 545 ++++++++++
 .../core/parser/tests/ast2/TestLexerLog.java  |  61 ++
 .../org.eclipse.cdt.core/META-INF/MANIFEST.MF |   1 +
 .../cdt/core/dom/parser/IExtensionToken.java  |   4 +-
 .../eclipse/cdt/core/parser/IGCCToken.java    |  12 +-
 .../org/eclipse/cdt/core/parser/IScanner.java |   4 +-
 .../org/eclipse/cdt/core/parser/IToken.java   | 165 +--
 .../org/eclipse/cdt/core/parser/Keywords.java |   7 +-
 .../scanner/CompletionTokenException.java     |  24 +
 .../core/parser/scanner/DigraphToken.java     |  21 +
 .../core/parser/scanner/ILexerLog.java        |  18 +
 .../internal/core/parser/scanner/Lexer.java   | 942 ++++++++++++++++++
 .../core/parser/scanner/SimpleToken.java      |  21 +
 .../internal/core/parser/scanner/Token.java   | 111 +++
 .../core/parser/scanner/TokenUtil.java        | 146 +++
 .../core/parser/scanner/TokenWithImage.java   |  38 +
 .../core/parser/scanner2/BaseScanner.java     |   2 +-
 18 files changed, 1978 insertions(+), 145 deletions(-)
 create mode 100644 core/org.eclipse.cdt.core.tests/parser/org/eclipse/cdt/core/parser/tests/ast2/LexerTests.java
 create mode 100644 core/org.eclipse.cdt.core.tests/parser/org/eclipse/cdt/core/parser/tests/ast2/TestLexerLog.java
 create mode 100644 core/org.eclipse.cdt.core/parser/org/eclipse/cdt/internal/core/parser/scanner/CompletionTokenException.java
 create mode 100644 core/org.eclipse.cdt.core/parser/org/eclipse/cdt/internal/core/parser/scanner/DigraphToken.java
 create mode 100644 core/org.eclipse.cdt.core/parser/org/eclipse/cdt/internal/core/parser/scanner/ILexerLog.java
 create mode 100644 core/org.eclipse.cdt.core/parser/org/eclipse/cdt/internal/core/parser/scanner/Lexer.java
 create mode 100644 core/org.eclipse.cdt.core/parser/org/eclipse/cdt/internal/core/parser/scanner/SimpleToken.java
 create mode 100644 core/org.eclipse.cdt.core/parser/org/eclipse/cdt/internal/core/parser/scanner/Token.java
 create mode 100644 core/org.eclipse.cdt.core/parser/org/eclipse/cdt/internal/core/parser/scanner/TokenUtil.java
 create mode 100644 core/org.eclipse.cdt.core/parser/org/eclipse/cdt/internal/core/parser/scanner/TokenWithImage.java

diff --git a/core/org.eclipse.cdt.core.tests/parser/org/eclipse/cdt/core/parser/tests/ast2/DOMParserTestSuite.java b/core/org.eclipse.cdt.core.tests/parser/org/eclipse/cdt/core/parser/tests/ast2/DOMParserTestSuite.java
index 140baf93b41..843cc532e74 100644
--- a/core/org.eclipse.cdt.core.tests/parser/org/eclipse/cdt/core/parser/tests/ast2/DOMParserTestSuite.java
+++ b/core/org.eclipse.cdt.core.tests/parser/org/eclipse/cdt/core/parser/tests/ast2/DOMParserTestSuite.java
@@ -26,6 +26,7 @@ public class DOMParserTestSuite extends TestCase {
 
 	public static Test suite() { 
 		TestSuite suite= new TestSuite(ParserTestSuite.class.getName());
+		suite.addTest(LexerTests.suite());
 		suite.addTest(DOMScannerTests.suite());
 		suite.addTest(AST2Tests.suite());
 		suite.addTestSuite( GCCTests.class );
diff --git a/core/org.eclipse.cdt.core.tests/parser/org/eclipse/cdt/core/parser/tests/ast2/LexerTests.java b/core/org.eclipse.cdt.core.tests/parser/org/eclipse/cdt/core/parser/tests/ast2/LexerTests.java
new file mode 100644
index 00000000000..51da1384302
--- /dev/null
+++ b/core/org.eclipse.cdt.core.tests/parser/org/eclipse/cdt/core/parser/tests/ast2/LexerTests.java
@@ -0,0 +1,545 @@
+/*******************************************************************************
+ * Copyright (c) 2007 Wind River Systems, Inc. and others.
+ * All rights reserved. This program and the accompanying materials
+ * are made available under the terms of the Eclipse Public License v1.0
+ * which accompanies this distribution, and is available at
+ * http://www.eclipse.org/legal/epl-v10.html
+ *
+ * Contributors:
+ *    Markus Schorn - initial API and implementation
+ *******************************************************************************/
+package org.eclipse.cdt.core.parser.tests.ast2;
+
+import junit.framework.TestSuite;
+
+import org.eclipse.cdt.core.parser.IGCCToken;
+import org.eclipse.cdt.core.parser.IProblem;
+import org.eclipse.cdt.core.parser.IToken;
+import org.eclipse.cdt.core.testplugin.util.BaseTestCase;
+import org.eclipse.cdt.internal.core.parser.scanner.Lexer;
+import org.eclipse.cdt.internal.core.parser.scanner.Token;
+import org.eclipse.cdt.internal.core.parser.scanner.Lexer.LexerOptions;
+
+
+public class LexerTests extends BaseTestCase {
+	static String TRIGRAPH_REPLACES_CHARS= "#^[]|{}~\\";
+	static String TRIGRAPH_CHARS= "='()!<>-/";
+
+	public static TestSuite suite() {
+		return suite(LexerTests.class);
+	}
+
+	private Lexer fLexer;
+	private TestLexerLog fLog= new TestLexerLog();
+	private int fLastEndOffset;
+
+	public LexerTests() {
+		super();
+	}
+
+	public LexerTests(String name) {
+		super(name);
+	}
+
+	private void init(String input) throws Exception {
+		fLog.clear();
+		fLexer= new Lexer(input.toCharArray(), new LexerOptions(), fLog);
+		fLexer.nextToken();
+		fLastEndOffset= 0;
+	}
+
+	private void init(String input, boolean dollar, boolean minmax) throws Exception {
+		fLog.clear();
+		final LexerOptions lexerOptions = new LexerOptions();
+		lexerOptions.fSupportDollarInitializers= dollar;
+		lexerOptions.fSupportMinAndMax= minmax;
+		fLexer= new Lexer(input.toCharArray(), lexerOptions, fLog);
+		fLexer.nextToken();
+		fLastEndOffset= 0;
+	}
+
+	private void token(int tokenType) throws Exception {
+		token(tokenType, null);
+	}
+	
+	private void token(int tokenType, String image) throws Exception {
+		Token t= fLexer.currentToken();
+		assertEquals(tokenType, t.getType());
+		assertEquals(fLastEndOffset, t.getOffset());
+		fLastEndOffset= t.getEndOffset();
+		if (image != null) {
+			assertEquals(image, new String(t.getTokenImage()));
+		}
+		fLexer.nextToken();
+	}
+	
+	private void integer(String expectedImage) throws Exception {
+		token(IToken.tINTEGER, expectedImage);
+	}
+
+	private void floating(String expectedImage) throws Exception {
+		token(IToken.tFLOATINGPT, expectedImage);
+	}
+
+	private void id(String expectedImage) throws Exception {
+		token(IToken.tIDENTIFIER, expectedImage);
+	}
+
+	private void str(String expectedImage) throws Exception {
+		token(IToken.tSTRING, "\"" + expectedImage + "\"");
+	}
+
+	private void wstr(String expectedImage) throws Exception {
+		token(IToken.tLSTRING, "L\"" + expectedImage + "\"");
+	}
+
+	private void ch(String expectedImage) throws Exception {
+		token(IToken.tCHAR, expectedImage);
+	}
+
+	private void wch(String expectedImage) throws Exception {
+		token(IToken.tLCHAR, expectedImage);
+	}
+
+	private void eof() throws Exception {
+		IToken t= fLexer.nextToken();
+		assertEquals("superfluous token " + t, Lexer.tEND_OF_INPUT, t.getType());
+		assertEquals(0, fLog.getProblemCount());
+		assertEquals(0, fLog.getCommentCount());
+	}
+	
+	private void nl() throws Exception {
+		token(Lexer.tNEWLINE);
+	}
+
+	private void ws() throws Exception {
+		int offset= fLexer.currentToken().getOffset();
+		assertTrue(offset > fLastEndOffset);
+		fLastEndOffset= offset;
+	}
+
+	private void problem(int kind, String img) throws Exception {
+		assertEquals(fLog.createString(kind, img), fLog.removeFirstProblem());
+	}
+
+	private void comment(String img) throws Exception {
+		ws();
+		assertEquals(img, fLog.removeFirstComment());
+	}
+
+	public void testTrigraphSequences() throws Exception {
+		init("\"??=??/??'??(??)??!??<??>??-\"");
+		str("#\\^[]|{}~");
+		eof();
+		
+		init("??=??'??(??)??!??<??>??-");
+		token(IToken.tPOUND);
+		token(IToken.tXOR);
+		token(IToken.tLBRACKET);
+		token(IToken.tRBRACKET);
+		token(IToken.tBITOR);
+		token(IToken.tLBRACE);
+		token(IToken.tRBRACE);
+		token(IToken.tCOMPL);
+		eof();
+		
+		init("a??/\nb");
+		id("ab");
+		eof();
+	}
+	
+	public void testWindowsLineEnding() throws Exception {
+		init("\n\n");
+		nl(); nl(); eof();
+		init("\r\n\r\n");
+		nl(); nl(); eof();
+	}
+	
+	public void testLineSplicingTrigraph() throws Exception {
+		// a trigraph cannot be spliced
+		init("??\\\n="); 
+		token(IToken.tQUESTION);
+		token(IToken.tQUESTION);
+		token(IToken.tASSIGN);
+		eof();
+
+		init("??\\\r\n="); 
+		token(IToken.tQUESTION);
+		token(IToken.tQUESTION);
+		token(IToken.tASSIGN);
+		eof();
+		
+		// trigraph can be used to splice a line
+		init("a??/\nb");
+		id("ab");
+		eof();
+	}
+		
+	public void testLineSplicingStringLiteral() throws Exception {
+		// splicing in string literal
+		init("\"a\\\nb\""); 
+		str("ab"); 
+		eof();
+
+		init("\"a\\\r\nb\"");
+		str("ab"); 
+		eof();
+	}
+
+	public void testLineSplicingCharLiteral() throws Exception {
+		init("'a\\\nb'"); 
+		ch("'ab'"); 
+		eof();
+
+		init("'a\\\r\nb'");
+		ch("'ab'"); 
+		eof();
+	}
+
+	public void testLineSplicingHeaderName() throws Exception {
+		init("p\"a\\\nb\""); 
+		fLexer.setInsideIncludeDirective();
+		id("p");
+		token(Lexer.tQUOTE_HEADER_NAME, "\"ab\""); 
+		eof();
+
+		init("p\"a\\\r\nb\"");
+		fLexer.setInsideIncludeDirective();
+		id("p");
+		token(Lexer.tQUOTE_HEADER_NAME, "\"ab\""); 
+		eof();
+
+		init("p<a\\\nb>"); 
+		fLexer.setInsideIncludeDirective();
+		id("p");
+		token(Lexer.tSYSTEM_HEADER_NAME, "<ab>"); 
+		eof();
+
+		init("p<a\\\r\nb>");
+		fLexer.setInsideIncludeDirective();
+		id("p");
+		token(Lexer.tSYSTEM_HEADER_NAME, "<ab>"); 
+		eof();
+	}
+
+	public void testLineSplicingComment() throws Exception {
+		init("// a\\\nb\n");
+		comment("// a\\\nb");
+		nl();
+		eof();
+
+		init("// a\\\nb\n");
+		comment("// a\\\nb");
+		nl();
+		eof();
+
+		init("/\\\n\\\n/ ab\n");
+		comment("/\\\n\\\n/ ab");
+		nl();
+		eof();
+
+		init("/\\\n* a\\\nb*\\\n/");
+		comment("/\\\n* a\\\nb*\\\n/");
+		eof();
+	}
+
+	public void testLineSplicingIdentifier() throws Exception {
+		init("a\\\nb");
+		id("ab");
+		eof();
+
+		init("a\\\r\nb");
+		id("ab");
+		eof();
+	}
+
+	public void testLineSplicingNumber() throws Exception {
+		init(".\\\n1");
+		floating(".1");
+		eof();
+
+		init(".\\\r\n1");
+		floating(".1");
+		eof();
+	}
+
+	public void testComments() throws Exception {
+		init("// /*\na");
+		comment("// /*");
+		nl();
+		id("a");
+		eof();
+		
+		init("/* // /* \n xxx*/a");
+		comment("/* // /* \n xxx*/");
+		id("a");
+		eof();
+	}
+	
+	public void testHeaderName() throws Exception {
+		init("p\"'/*//\\\"");
+		fLexer.setInsideIncludeDirective();
+		id("p");
+		token(Lexer.tQUOTE_HEADER_NAME, "\"'/*//\\\"");
+		eof();
+
+		init("p<'\"/*//>");
+		fLexer.setInsideIncludeDirective();
+		id("p");
+		token(Lexer.tSYSTEM_HEADER_NAME, "<'\"/*//>");
+		eof();
+	}
+	
+	public void testIdentifier() throws Exception {
+		final String ident= "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_$\\u1234\\U123456780123456789";
+		int unc1= ident.indexOf('\\');
+		for (int i = 0; i <= unc1; i++) {
+			String id= ident.substring(i);
+			init(id); 
+			id(id);
+			eof();
+		}
+		String id= ident.substring(ident.indexOf('\\', unc1+1));
+		init(id); 
+		id(id);
+		eof();
+		
+		for (int i= 0; i <10; i++) {
+			String nonid= ident.substring(ident.length()-i-1);
+			init(nonid);
+			integer(nonid);
+			eof();
+		}
+		
+		init(ident, false, true); 
+		final int idxDollar = ident.indexOf('$');
+		id(ident.substring(0, idxDollar));
+		problem(IProblem.SCANNER_BAD_CHARACTER, "$");
+		ws();
+		id(ident.substring(idxDollar+1));
+	}
+	
+	public void testNumber() throws Exception {
+		final String number= ".0123456789.abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_" +
+			"\\uaaaa\\Uaaaaaaaae+e-E+E-";
+		for (int i = 0; i < 11; i++) {
+			String n= number.substring(i);
+			init(n); 
+			floating(n);
+			eof();
+		}
+		int idxPlus= number.indexOf('+');
+		for (int i = 11; i < number.length(); i++) {
+			String n= number.substring(i);
+			init(n);
+			int startString= 0;
+			if (i==11) {token(IToken.tDOT); startString=1;}
+			if (i<idxPlus) id(n.substring(startString, idxPlus-i));
+			if (i<idxPlus+1) token(IToken.tPLUS);
+			if (i<idxPlus+2) id("e");
+			if (i<idxPlus+3) token(IToken.tMINUS);
+			if (i<idxPlus+4) id("E");
+			if (i<idxPlus+5) token(IToken.tPLUS);
+			if (i<idxPlus+6) id("E");
+			token(IToken.tMINUS);
+			eof();
+		}
+	}
+	
+	public void testCharLiteral() throws Exception {
+		String lit= "'abc0123\\'\".:; \\\\'";
+		init(lit);
+		ch(lit);
+		eof();
+
+		lit= 'L'+lit;
+		init(lit);
+		wch(lit);
+		eof();
+
+		lit= "'ut\n";
+		init(lit);
+		problem(IProblem.SCANNER_BAD_CHARACTER, "'ut");
+		ch("'ut");
+		nl();
+		eof();
+
+		lit= 'L'+lit;
+		init(lit);
+		problem(IProblem.SCANNER_BAD_CHARACTER, "L'ut");
+		wch("L'ut");
+		nl();
+		eof();
+		
+		lit= "'ut\\'";
+		init(lit);
+		problem(IProblem.SCANNER_BAD_CHARACTER, lit);
+		ch("'ut\\'");
+		eof();
+
+		lit= 'L'+lit;
+		init(lit);
+		problem(IProblem.SCANNER_BAD_CHARACTER, lit);
+		wch("L'ut\\'");
+		eof();
+	}
+
+	public void testStringLiteral() throws Exception {
+		String lit= "abc0123\\\"'.:; \\\\";
+		init('"' + lit + '"');
+		str(lit);
+		eof();
+
+		init("L\"" + lit + '"');
+		wstr(lit);
+		eof();
+
+		lit= "ut\n";
+		init('"' + lit);
+		problem(IProblem.SCANNER_UNBOUNDED_STRING, "\"ut");
+		token(IToken.tSTRING, "\"ut");
+		nl();
+		eof();
+
+		init("L\"" + lit);
+		problem(IProblem.SCANNER_UNBOUNDED_STRING, "L\"ut");
+		token(IToken.tLSTRING, "L\"ut");
+		nl();
+		eof();
+		
+		lit= "\"ut\\\"";
+		init(lit);
+		problem(IProblem.SCANNER_UNBOUNDED_STRING, lit);
+		token(IToken.tSTRING, "\"ut\\\"");
+		eof();
+
+		lit= 'L'+lit;
+		init(lit);
+		problem(IProblem.SCANNER_UNBOUNDED_STRING, lit);
+		token(IToken.tLSTRING, "L\"ut\\\"");
+		eof();
+	}
+
+	public void testOperatorAndPunctuators() throws Exception {
+		final String ops= "{}[]###()<::><%%>%:%:%:;:...?.::..*+-*/%^&|~=!<>+=-=*=/=%=" +
+		"^=&=|=<<>><<=>>===!=<=>=&&||++--,->*-><?>?\\";
+		final int[] tokens= new int[] {
+				IToken.tLBRACE, IToken.tRBRACE, IToken.tLBRACKET, IToken.tRBRACKET,	IToken.tPOUNDPOUND, 
+				IToken.tPOUND, IToken.tLPAREN, IToken.tRPAREN, IToken.tLBRACKET, IToken.tRBRACKET, 
+				IToken.tLBRACE, IToken.tRBRACE, IToken.tPOUNDPOUND, IToken.tPOUND, IToken.tSEMI, 
+				IToken.tCOLON, IToken.tELLIPSIS, IToken.tQUESTION, IToken.tDOT, IToken.tCOLONCOLON, IToken.tDOT,
+				IToken.tDOTSTAR, IToken.tPLUS, IToken.tMINUS, IToken.tSTAR, IToken.tDIV, IToken.tMOD,
+				IToken.tXOR, IToken.tAMPER, IToken.tBITOR, IToken.tCOMPL, IToken.tASSIGN, IToken.tNOT, 
+				IToken.tLT, IToken.tGT, IToken.tPLUSASSIGN, IToken.tMINUSASSIGN, IToken.tSTARASSIGN, 
+				IToken.tDIVASSIGN, IToken.tMODASSIGN, IToken.tXORASSIGN, IToken.tAMPERASSIGN, 
+				IToken.tBITORASSIGN, IToken.tSHIFTL, IToken.tSHIFTR, IToken.tSHIFTLASSIGN, 
+				IToken.tSHIFTRASSIGN, IToken.tEQUAL, IToken.tNOTEQUAL, IToken.tLTEQUAL, IToken.tGTEQUAL,
+				IToken.tAND, IToken.tOR, IToken.tINCR, IToken.tDECR, IToken.tCOMMA, IToken.tARROWSTAR,
+				IToken.tARROW, IGCCToken.tMIN, IGCCToken.tMAX, IToken.tBACKSLASH,
+			};
+		
+		for (int splices=0; splices<9; splices++) {
+			for (int trigraphs= 0; trigraphs<6; trigraphs++) {
+				StringBuffer buf= new StringBuffer();
+				String input= useTrigraphs(ops.toCharArray(), trigraphs);
+				init(instertLineSplices(input, splices)); 
+				for (int i = 0; i < tokens.length; i++) {
+					Token token= fLexer.currentToken();
+					buf.append(token.getTokenImage());
+					token(tokens[i]);
+				}
+				eof();
+				assertEquals(ops, buf.toString()); // check token image
+
+				init(input, true, false); 
+				for (int i = 0; i < tokens.length; i++) {
+					switch (tokens[i]) {
+					case IGCCToken.tMIN:
+						token(IToken.tLT);
+						token(IToken.tQUESTION);
+						break;
+					case IGCCToken.tMAX:
+						token(IToken.tGT);
+						token(IToken.tQUESTION);
+						break;
+					default:
+						token(tokens[i]);
+					break;
+					}
+				}
+				eof();
+			}
+		}
+	}
+
+	private String instertLineSplices(String input, int splices) {
+		int m1= splices%3;
+		int m2= (splices-m1)/3;
+		char[] c= input.toCharArray();
+		StringBuffer result= new StringBuffer();
+		for (int i = 0; i < c.length; i++) {
+			result.append(c[i]);
+			if (c[i]=='?' && i+2 < c.length && c[i+1] == '?' && TRIGRAPH_CHARS.indexOf(c[i+2]) >= 0) {
+				result.append(c[++i]);
+				result.append(c[++i]);
+			}
+			switch(m1) {
+			case 1:
+				result.append("\\\n");
+				break;
+			case 2:
+				result.append("\\ \n");
+				break;
+			}
+			switch(m2) {
+			case 1:
+				result.append("\\\r\n");
+				break;
+			case 2:
+				result.append("\\\t\r\n");
+				break;
+			}
+		}
+		return result.toString();
+	}
+
+	private String useTrigraphs(char[] input, int mode) {
+		if (mode == 0) {
+			return new String(input);
+		}
+
+		boolean yes= mode > 1;
+		StringBuffer result= new StringBuffer();
+		for (int i = 0; i < input.length; i++) {
+			char c = input[i];
+			int idx= TRIGRAPH_REPLACES_CHARS.indexOf(c);
+			if (idx > 0) {
+				if (yes) {
+					result.append("??");
+					result.append(TRIGRAPH_CHARS.charAt(idx));
+				}
+				else {
+					result.append(c);
+				}
+				if (mode < 3) {
+					yes= !yes;
+				}
+			}
+			else {
+				result.append(c);
+			}
+		}
+		return result.toString();
+	}
+	
+	public void testLineSplicingOperator() throws Exception {
+		// splicing in operator
+		init("|\\\n|");
+		token(IToken.tOR);
+		eof();
+		
+		init("|\\\r\n|");
+		token(IToken.tOR); 
+		eof();
+	}
+}
diff --git a/core/org.eclipse.cdt.core.tests/parser/org/eclipse/cdt/core/parser/tests/ast2/TestLexerLog.java b/core/org.eclipse.cdt.core.tests/parser/org/eclipse/cdt/core/parser/tests/ast2/TestLexerLog.java
new file mode 100644
index 00000000000..6dd59a25e42
--- /dev/null
+++ b/core/org.eclipse.cdt.core.tests/parser/org/eclipse/cdt/core/parser/tests/ast2/TestLexerLog.java
@@ -0,0 +1,61 @@
+/*******************************************************************************
+ * Copyright (c) 2007 Wind River Systems, Inc. and others.
+ * All rights reserved. This program and the accompanying materials
+ * are made available under the terms of the Eclipse Public License v1.0
+ * which accompanies this distribution, and is available at
+ * http://www.eclipse.org/legal/epl-v10.html
+ *
+ * Contributors:
+ *    Markus Schorn - initial API and implementation
+ *******************************************************************************/ 
+package org.eclipse.cdt.core.parser.tests.ast2;
+
+import java.util.ArrayList;
+
+import org.eclipse.cdt.internal.core.parser.scanner.ILexerLog;
+
+public class TestLexerLog implements ILexerLog {
+
+	private ArrayList fComments= new ArrayList();
+	private ArrayList fProblems= new ArrayList();
+	
+	public void handleComment(boolean isBlockComment, char[] source, int offset, int endOffset) {
+		fComments.add(new String(source, offset, endOffset-offset));
+	}
+
+	public void handleProblem(int problemID, char[] source, int offset, int endOffset) {
+		fProblems.add(createString(problemID, new String(source, offset, endOffset-offset)));
+	}
+
+	public String createString(int problemID, String image) {
+		return String.valueOf(problemID) + ":" + image;
+	}
+	
+	public void clear() {
+		fComments.clear();
+		fProblems.clear();
+	}
+
+	public int getProblemCount() {
+		return fProblems.size();
+	}
+
+	public int getCommentCount() {
+		return fComments.size();
+	}
+
+	public String removeFirstProblem() {
+		if (fProblems.isEmpty()) {
+			return "no problems have been reported";
+		}
+		return (String) fProblems.remove(0);
+	}
+
+	public String removeFirstComment() {
+		if (fComments.isEmpty()) {
+			return "no comments have been reported";
+		}
+		return (String) fComments.remove(0);
+	}
+
+}
diff --git a/core/org.eclipse.cdt.core/META-INF/MANIFEST.MF b/core/org.eclipse.cdt.core/META-INF/MANIFEST.MF
index edbc90cf653..f651ce7870e 100644
--- a/core/org.eclipse.cdt.core/META-INF/MANIFEST.MF
+++ b/core/org.eclipse.cdt.core/META-INF/MANIFEST.MF
@@ -61,6 +61,7 @@ Export-Package: org.eclipse.cdt.core,
  org.eclipse.cdt.internal.core.parser.ast.quick;x-internal:=true,
  org.eclipse.cdt.internal.core.parser.problem;x-internal:=true,
  org.eclipse.cdt.internal.core.parser.pst;x-internal:=true,
+ org.eclipse.cdt.internal.core.parser.scanner;x-internal:=true,
  org.eclipse.cdt.internal.core.parser.scanner2;x-internal:=true,
  org.eclipse.cdt.internal.core.parser.token;x-friends:="org.eclipse.cdt.ui",
  org.eclipse.cdt.internal.core.parser.util;x-internal:=true,
diff --git a/core/org.eclipse.cdt.core/parser/org/eclipse/cdt/core/dom/parser/IExtensionToken.java b/core/org.eclipse.cdt.core/parser/org/eclipse/cdt/core/dom/parser/IExtensionToken.java
index 17601d55bb8..fc440a2e784 100644
--- a/core/org.eclipse.cdt.core/parser/org/eclipse/cdt/core/dom/parser/IExtensionToken.java
+++ b/core/org.eclipse.cdt.core/parser/org/eclipse/cdt/core/dom/parser/IExtensionToken.java
@@ -28,7 +28,7 @@ import org.eclipse.cdt.core.parser.IToken;
  */
 public interface IExtensionToken {
 
-	int t__otherDeclSpecModifierFirst= IToken.tLAST + 100;
-	int t__otherDeclSpecModifierLast= IToken.tLAST + 110;
+	int t__otherDeclSpecModifierFirst= IToken.FIRST_RESERVED_IExtensionToken;
+	int t__otherDeclSpecModifierLast= IToken.FIRST_RESERVED_IExtensionToken + 10;
 
 }
diff --git a/core/org.eclipse.cdt.core/parser/org/eclipse/cdt/core/parser/IGCCToken.java b/core/org.eclipse.cdt.core/parser/org/eclipse/cdt/core/parser/IGCCToken.java
index 19c96c4e5df..47b24f5294a 100644
--- a/core/org.eclipse.cdt.core/parser/org/eclipse/cdt/core/parser/IGCCToken.java
+++ b/core/org.eclipse.cdt.core/parser/org/eclipse/cdt/core/parser/IGCCToken.java
@@ -18,11 +18,11 @@ package org.eclipse.cdt.core.parser;
  */
 public interface IGCCToken extends IToken {
 	
-	public static final int t_typeof = tLAST + 1;
-	public static final int t___alignof__ = tLAST + 2;
-	public static final int tMAX = tLAST + 3;
-	public static final int tMIN = tLAST + 4;
-	public static final int t__attribute__ = tLAST + 5;
-	public static final int t__declspec = tLAST + 6;
+	public static final int t_typeof = FIRST_RESERVED_IGCCToken;
+	public static final int t___alignof__ = FIRST_RESERVED_IGCCToken + 1;
+	public static final int tMAX = FIRST_RESERVED_IGCCToken + 2;
+	public static final int tMIN = FIRST_RESERVED_IGCCToken + 3;
+	public static final int t__attribute__ = FIRST_RESERVED_IGCCToken + 4;
+	public static final int t__declspec = FIRST_RESERVED_IGCCToken + 5;
 	
 }
diff --git a/core/org.eclipse.cdt.core/parser/org/eclipse/cdt/core/parser/IScanner.java b/core/org.eclipse.cdt.core/parser/org/eclipse/cdt/core/parser/IScanner.java
index 302537597a1..d838079180a 100644
--- a/core/org.eclipse.cdt.core/parser/org/eclipse/cdt/core/parser/IScanner.java
+++ b/core/org.eclipse.cdt.core/parser/org/eclipse/cdt/core/parser/IScanner.java
@@ -30,8 +30,8 @@ import org.eclipse.cdt.internal.core.parser.scanner2.ILocationResolver;
  */
 public interface IScanner extends IMacroCollector {
 	
-	public static final int tPOUNDPOUND = -6;
-	public static final int tPOUND      = -7;
+	/** @deprecated */ public static final int tPOUNDPOUND = IToken.tPOUNDPOUND;
+	/** @deprecated */ public static final int tPOUND      = IToken.tPOUND;
 
     public void setOffsetBoundary( int offset );
 	public void setContentAssistMode( int offset );
diff --git a/core/org.eclipse.cdt.core/parser/org/eclipse/cdt/core/parser/IToken.java b/core/org.eclipse.cdt.core/parser/org/eclipse/cdt/core/parser/IToken.java
index 014ed3731f5..7aac95532a3 100644
--- a/core/org.eclipse.cdt.core/parser/org/eclipse/cdt/core/parser/IToken.java
+++ b/core/org.eclipse.cdt.core/parser/org/eclipse/cdt/core/parser/IToken.java
@@ -41,287 +41,188 @@ public interface IToken {
 	
 	
 	// Token types
+	int FIRST_RESERVED_SCANNER= -100;
+	int LAST_RESERVED_SCANNER= -1;
+	
 	static public final int tIDENTIFIER = 1;
-
 	static public final int tINTEGER = 2;
-
 	static public final int tCOLONCOLON = 3;
-
 	static public final int tCOLON = 4;
-
 	static public final int tSEMI = 5;
-
 	static public final int tCOMMA = 6;
-
 	static public final int tQUESTION = 7;
-
 	static public final int tLPAREN = 8;
-
 	static public final int tRPAREN = 9;
-
 	static public final int tLBRACKET = 10;
-
 	static public final int tRBRACKET = 11;
-
 	static public final int tLBRACE = 12;
-
 	static public final int tRBRACE = 13;
-
 	static public final int tPLUSASSIGN = 14;
-
 	static public final int tINCR = 15;
-
 	static public final int tPLUS = 16;
-
 	static public final int tMINUSASSIGN = 17;
-
 	static public final int tDECR = 18;
-
 	static public final int tARROWSTAR = 19;
-
 	static public final int tARROW = 20;
-
 	static public final int tMINUS = 21;
-
 	static public final int tSTARASSIGN = 22;
-
 	static public final int tSTAR = 23;
-
 	static public final int tMODASSIGN = 24;
-
 	static public final int tMOD = 25;
-
 	static public final int tXORASSIGN = 26;
-
 	static public final int tXOR = 27;
-
 	static public final int tAMPERASSIGN = 28;
-
 	static public final int tAND = 29;
-
 	static public final int tAMPER = 30;
-
 	static public final int tBITORASSIGN = 31;
-
 	static public final int tOR = 32;
-
 	static public final int tBITOR = 33;
-
-	static public final int tCOMPL = 34;
-
+	static public final int tBITCOMPLEMENT = 34;
 	static public final int tNOTEQUAL = 35;
-
 	static public final int tNOT = 36;
-
 	static public final int tEQUAL = 37;
-
 	static public final int tASSIGN = 38;
-
 	static public final int tSHIFTL = 40;
-
 	static public final int tLTEQUAL = 41;
-
 	static public final int tLT = 42;
-
 	static public final int tSHIFTRASSIGN = 43;
-
 	static public final int tSHIFTR = 44;
-
 	static public final int tGTEQUAL = 45;
-
 	static public final int tGT = 46;
-
 	static public final int tSHIFTLASSIGN = 47;
-
 	static public final int tELLIPSIS = 48;
-
 	static public final int tDOTSTAR = 49;
-
 	static public final int tDOT = 50;
-
 	static public final int tDIVASSIGN = 51;
-
 	static public final int tDIV = 52;
-
+	static public final int tBACKSLASH= 53;
+	
+	/** @deprecated use {@link #tAND} */
 	static public final int t_and = 54;
-
+	/** @deprecated use {@link #tAMPERASSIGN} */
 	static public final int t_and_eq = 55;
 
 	static public final int t_asm = 56;
-
 	static public final int t_auto = 57;
 
+	/** @deprecated use {@link #tAMPER} */
 	static public final int t_bitand = 58;
-
+	/** @deprecated use {@link #tBITOR} */
 	static public final int t_bitor = 59;
 
 	static public final int t_bool = 60;
-
 	static public final int t_break = 61;
-
 	static public final int t_case = 62;
-
 	static public final int t_catch = 63;
-
 	static public final int t_char = 64;
-
 	static public final int t_class = 65;
-
+	
+	/** @deprecated use {@link #tBITCOMPLEMENT} */
+	static public final int tCOMPL= tBITCOMPLEMENT;
+	/** @deprecated use {@link #tBITCOMPLEMENT} */
 	static public final int t_compl = 66;
 
 	static public final int t_const = 67;
-
 	static public final int t_const_cast = 69;
-
 	static public final int t_continue = 70;
-
 	static public final int t_default = 71;
-
 	static public final int t_delete = 72;
-
 	static public final int t_do = 73;
-
 	static public final int t_double = 74;
-
 	static public final int t_dynamic_cast = 75;
-
 	static public final int t_else = 76;
-
 	static public final int t_enum = 77;
-
 	static public final int t_explicit = 78;
-
 	static public final int t_export = 79;
-
 	static public final int t_extern = 80;
-
 	static public final int t_false = 81;
-
 	static public final int t_float = 82;
-
 	static public final int t_for = 83;
-
 	static public final int t_friend = 84;
-
 	static public final int t_goto = 85;
-
 	static public final int t_if = 86;
-
 	static public final int t_inline = 87;
-
 	static public final int t_int = 88;
-
 	static public final int t_long = 89;
-
 	static public final int t_mutable = 90;
-
 	static public final int t_namespace = 91;
-
 	static public final int t_new = 92;
 
+	/** @deprecated use {@link #tNOT} */
 	static public final int t_not = 93;
-
+	/** @deprecated use {@link #tNOTEQUAL} */	
 	static public final int t_not_eq = 94;
 
 	static public final int t_operator = 95;
-
+	
+	/** @deprecated use {@link #tOR} */
 	static public final int t_or = 96;
-
+	/** @deprecated use {@link #tBITORASSIGN} */
 	static public final int t_or_eq = 97;
 
 	static public final int t_private = 98;
-
 	static public final int t_protected = 99;
-
 	static public final int t_public = 100;
-
 	static public final int t_register = 101;
-
 	static public final int t_reinterpret_cast = 102;
-
 	static public final int t_return = 103;
-
 	static public final int t_short = 104;
-
 	static public final int t_sizeof = 105;
-
 	static public final int t_static = 106;
-
 	static public final int t_static_cast = 107;
-
 	static public final int t_signed = 108;
-
 	static public final int t_struct = 109;
-
 	static public final int t_switch = 110;
-
 	static public final int t_template = 111;
-
 	static public final int t_this = 112;
-
 	static public final int t_throw = 113;
-
 	static public final int t_true = 114;
-
 	static public final int t_try = 115;
-
 	static public final int t_typedef = 116;
-
 	static public final int t_typeid = 117;
-
 	static public final int t_typename = 118;
-
 	static public final int t_union = 119;
-
 	static public final int t_unsigned = 120;
-
 	static public final int t_using = 121;
-
 	static public final int t_virtual = 122;
-
 	static public final int t_void = 123;
-
 	static public final int t_volatile = 124;
-
 	static public final int t_wchar_t = 125;
-
 	static public final int t_while = 126;
-
+	
+	/** @deprecated use {@link #tXOR} */
 	static public final int t_xor = 127;
-
+	/** @deprecated use {@link #tXORASSIGN} */
 	static public final int t_xor_eq = 128;
 	
 	static public final int tFLOATINGPT = 129;
-
 	static public final int tSTRING = 130;
-	
 	static public final int tLSTRING = 131;
-
 	static public final int tCHAR = 132;
-	
 	static public final int tLCHAR = 133;
-
 	static public final int t__Bool = 134;
-
 	static public final int t__Complex = 135;
-
 	static public final int t__Imaginary = 136;
-
 	static public final int t_restrict = 137;
-
+	
+	/** @deprecated don't use it */
 	static public final int tMACROEXP = 138;
 	
+	static public final int tPOUND= 	  138;
 	static public final int tPOUNDPOUND = 139;
-	
 	static public final int tCOMPLETION = 140;
-	
 	static public final int tEOC = 141; // End of Completion
 	
+	/** @deprecated don't use it */
 	static public final int tCOMMENT = 142;
-	
+	/** @deprecated don't use it */
 	static public final int tBLOCKCOMMENT = 143;
-
-
+	/** @deprecated don't use it */
 	static public final int tLAST = 143;
 	
+	int FIRST_RESERVED_IGCCToken		= 144;
+	int LAST_RESERVED_IGCCToken			= 199;
+	
+	int FIRST_RESERVED_IExtensionToken	= 243;
+	int LAST_RESERVED_IExtensionToken	= 299;
 }
diff --git a/core/org.eclipse.cdt.core/parser/org/eclipse/cdt/core/parser/Keywords.java b/core/org.eclipse.cdt.core/parser/org/eclipse/cdt/core/parser/Keywords.java
index 7463e803192..b299c751172 100644
--- a/core/org.eclipse.cdt.core/parser/org/eclipse/cdt/core/parser/Keywords.java
+++ b/core/org.eclipse.cdt.core/parser/org/eclipse/cdt/core/parser/Keywords.java
@@ -19,8 +19,6 @@ public class Keywords {
 	public static final String CAST = "cast"; //$NON-NLS-1$
 	public static final String ALIGNOF = "alignof"; //$NON-NLS-1$
 	public static final String TYPEOF = "typeof"; //$NON-NLS-1$
-	public static final String cpMIN = "<?"; //$NON-NLS-1$
-	public static final String cpMAX = ">?"; //$NON-NLS-1$
 	
 	public static final String _BOOL = "_Bool"; //$NON-NLS-1$
 	public static final String _COMPLEX = "_Complex"; //$NON-NLS-1$
@@ -231,9 +229,14 @@ public class Keywords {
 	public static final char[] cpDOT = 	".".toCharArray(); //$NON-NLS-1$
 	public static final char[] cpDIVASSIGN =	"/=".toCharArray(); //$NON-NLS-1$
 	public static final char[] cpDIV = 	"/".toCharArray(); //$NON-NLS-1$
+	public static final char[] cpBACKSLASH = "\\".toCharArray(); //$NON-NLS-1$
 	public static final char[] cpPOUND = "#".toCharArray(); //$NON-NLS-1$
 	public static final char[] cpPOUNDPOUND = "##".toCharArray(); //$NON-NLS-1$
 	
+	// gcc extensions
+	public static final char[] cpMIN = "<?".toCharArray(); //$NON-NLS-1$
+	public static final char[] cpMAX = ">?".toCharArray(); //$NON-NLS-1$
+	
 	// preprocessor keywords
 	public static final char[] cIFDEF = "ifdef".toCharArray(); //$NON-NLS-1$
 	public static final char[] cIFNDEF = "ifndef".toCharArray(); //$NON-NLS-1$
diff --git a/core/org.eclipse.cdt.core/parser/org/eclipse/cdt/internal/core/parser/scanner/CompletionTokenException.java b/core/org.eclipse.cdt.core/parser/org/eclipse/cdt/internal/core/parser/scanner/CompletionTokenException.java
new file mode 100644
index 00000000000..506f7898ee2
--- /dev/null
+++ b/core/org.eclipse.cdt.core/parser/org/eclipse/cdt/internal/core/parser/scanner/CompletionTokenException.java
@@ -0,0 +1,24 @@
+/*******************************************************************************
+ * Copyright (c) 2007 Wind River Systems, Inc. and others.
+ * All rights reserved. This program and the accompanying materials
+ * are made available under the terms of the Eclipse Public License v1.0
+ * which accompanies this distribution, and is available at
+ * http://www.eclipse.org/legal/epl-v10.html
+ *
+ * Contributors:
+ *    Markus Schorn - initial API and implementation
+ *******************************************************************************/ 
+package org.eclipse.cdt.internal.core.parser.scanner;
+
+class CompletionTokenException extends Exception {
+
+	private Token fToken;
+
+	public CompletionTokenException(Token token) {
+		fToken= token;
+	}
+	
+	public Token getToken() {
+		return fToken;
+	}
+}
diff --git a/core/org.eclipse.cdt.core/parser/org/eclipse/cdt/internal/core/parser/scanner/DigraphToken.java b/core/org.eclipse.cdt.core/parser/org/eclipse/cdt/internal/core/parser/scanner/DigraphToken.java
new file mode 100644
index 00000000000..d83c81e8c14
--- /dev/null
+++ b/core/org.eclipse.cdt.core/parser/org/eclipse/cdt/internal/core/parser/scanner/DigraphToken.java
@@ -0,0 +1,21 @@
+/*******************************************************************************
+ * Copyright (c) 2007 Wind River Systems, Inc. and others.
+ * All rights reserved. This program and the accompanying materials
+ * are made available under the terms of the Eclipse Public License v1.0
+ * which accompanies this distribution, and is available at
+ * http://www.eclipse.org/legal/epl-v10.html
+ *
+ * Contributors:
+ *    Markus Schorn - initial API and implementation
+ *******************************************************************************/ 
+package org.eclipse.cdt.internal.core.parser.scanner;
+
+class DigraphToken extends Token {
+	public DigraphToken(int kind, int offset, int endOffset) {
+		super(kind, offset, endOffset);
+	}
+
+	public char[] getTokenImage() {
+		return TokenUtil.getDigraphImage(getType());
+	}
+}
diff --git a/core/org.eclipse.cdt.core/parser/org/eclipse/cdt/internal/core/parser/scanner/ILexerLog.java b/core/org.eclipse.cdt.core/parser/org/eclipse/cdt/internal/core/parser/scanner/ILexerLog.java
new file mode 100644
index 00000000000..8c5c9043b5a
--- /dev/null
+++ b/core/org.eclipse.cdt.core/parser/org/eclipse/cdt/internal/core/parser/scanner/ILexerLog.java
@@ -0,0 +1,18 @@
+/*******************************************************************************
+ * Copyright (c) 2007 Wind River Systems, Inc. and others.
+ * All rights reserved. This program and the accompanying materials
+ * are made available under the terms of the Eclipse Public License v1.0
+ * which accompanies this distribution, and is available at
+ * http://www.eclipse.org/legal/epl-v10.html
+ *
+ * Contributors:
+ *    Markus Schorn - initial API and implementation
+ *******************************************************************************/ 
+package org.eclipse.cdt.internal.core.parser.scanner;
+
+public interface ILexerLog {
+
+	void handleProblem(int problemID, char[] source, int offset, int endOffset);
+
+	void handleComment(boolean isBlockComment, char[] source, int offset, int endOffsetLast);
+}
diff --git a/core/org.eclipse.cdt.core/parser/org/eclipse/cdt/internal/core/parser/scanner/Lexer.java b/core/org.eclipse.cdt.core/parser/org/eclipse/cdt/internal/core/parser/scanner/Lexer.java
new file mode 100644
index 00000000000..eabddb459b8
--- /dev/null
+++ b/core/org.eclipse.cdt.core/parser/org/eclipse/cdt/internal/core/parser/scanner/Lexer.java
@@ -0,0 +1,942 @@
+/*******************************************************************************
+ * Copyright (c) 2007 Wind River Systems, Inc. and others.
+ * All rights reserved. This program and the accompanying materials
+ * are made available under the terms of the Eclipse Public License v1.0
+ * which accompanies this distribution, and is available at
+ * http://www.eclipse.org/legal/epl-v10.html
+ *
+ * Contributors:
+ *    Markus Schorn - initial API and implementation
+ *******************************************************************************/ 
+package org.eclipse.cdt.internal.core.parser.scanner;
+
+import org.eclipse.cdt.core.dom.ast.IASTProblem;
+import org.eclipse.cdt.core.parser.IGCCToken;
+import org.eclipse.cdt.core.parser.IProblem;
+import org.eclipse.cdt.core.parser.IToken;
+
+/**
+ * In short this class converts line endings (to '\n') and trigraphs 
+ * (to their corresponding character), 
+ * removes line-splices, comments and whitespace other than newline.
+ * Returns preprocessor tokens.
+ * <p>
+ * In addition to the preprocessor tokens the following tokens may also be returned:
+ * {@link #tEND_OF_INPUT}, {@link IToken#tCOMPLETION}.
+ * <p>
+ * Number literals are split up into {@link IToken#tINTEGER} and {@link IToken#tFLOATINGPT}. 
+ * No checks are done on the number literals.
+ * <p>
+ * UNCs are accepted, however characters from outside of the basic source character set are
+ * not converted to UNCs. Rather than that they are tested with 
+ * {@link Character#isUnicodeIdentifierPart(char)} and may be accepted as part of an 
+ * identifier.
+ * <p>
+ * The characters in string literals and char-literals are left as they are found, no conversion to
+ * an execution character-set is performed.
+ */
+final public class Lexer {
+	public static final int tNEWLINE		= IToken.FIRST_RESERVED_SCANNER + 1;
+	public static final int tEND_OF_INPUT	= IToken.FIRST_RESERVED_SCANNER + 2;
+	public static final int tQUOTE_HEADER_NAME    = IToken.FIRST_RESERVED_SCANNER + 3;
+	public static final int tSYSTEM_HEADER_NAME   = IToken.FIRST_RESERVED_SCANNER + 4;
+	
+	private static final int END_OF_INPUT = -1;
+	private static final int LINE_SPLICE_SEQUENCE = -2;
+	
+	public static class LexerOptions {
+		public boolean fSupportDollarInitializers= true;
+		public boolean fSupportMinAndMax= true;
+		public boolean fSupportContentAssist= false;
+	}
+
+	// configuration
+	private final LexerOptions fOptions;
+	private final ILexerLog fLog;
+	
+	// the input to the lexer
+	private final char[] fInput;
+	private final int fLimit;
+
+	// after phase 3 (newline, trigraph, line-splice)
+	private int fOffset;
+	private int fEndOffset;
+	private int fCharPhase3;
+	
+	private boolean fInsideIncludeDirective= false;
+	private Token fToken;
+	
+	// for the few cases where we have to lookahead more than one character
+	private int fMarkOffset;
+	private int fMarkEndOffset;
+	private int fMarkPrefetchedChar;
+	
+	
+	public Lexer(char[] input, LexerOptions options, ILexerLog log) {
+		fInput= input;
+		fLimit= input.length;
+		fOptions= options;
+		fLog= log;
+		nextCharPhase3();
+	}
+
+	public Lexer(char[] input, int limit, LexerOptions options, ILexerLog log) {
+		fInput= input;
+		fLimit= limit;
+		fOptions= options;
+		fLog= log;
+		nextCharPhase3();
+	}
+	
+	/**
+	 * Call this before consuming the name-token in the include directive. It causes the header-file 
+	 * tokens to be created. 
+	 */
+	public void setInsideIncludeDirective() {
+		fInsideIncludeDirective= true;
+	}
+	
+	/** 
+	 * Returns the current preprocessor token, does not advance.
+	 */
+	public Token currentToken() {
+		return fToken;
+	}
+	
+	/**
+	 * Advances to the next token, skipping whitespace other than newline.
+	 * @throws CompletionTokenException when completion is requested in a literal or an header-name.
+	 */
+	public Token nextToken() throws CompletionTokenException {
+		return fToken= fetchToken();
+	}
+
+	/** 
+	 * Advances to the next pound token that starts a preprocessor directive. 
+	 * @return pound token of the directive or end-of-input.
+	 * @throws CompletionTokenException when completion is requested in a literal or an header-name.
+	 */
+	public Token nextDirective() throws CompletionTokenException {
+		Token t= fToken;
+		boolean haveNL= t==null || t.getType() == tNEWLINE;
+		loop: while(true) {
+			t= fetchToken();
+			if (haveNL) {
+				switch(t.getType()) {
+				case tEND_OF_INPUT:
+				case IToken.tPOUND:
+					break loop;
+				}
+				haveNL= false;
+			}
+			else {
+				switch(t.getType()) {
+				case tEND_OF_INPUT:
+					break loop;
+				case tNEWLINE:
+					haveNL= true;
+					break;
+				}
+			}
+			t= fetchToken();
+		} 
+		fToken= t;
+		return t;
+	}
+	
+	/**
+	 * Computes the next token.
+	 */
+	private Token fetchToken() throws CompletionTokenException {
+		while(true) {
+			final int start= fOffset;
+			final int c= fCharPhase3;
+			final int d= nextCharPhase3();
+			switch(c) {
+			case END_OF_INPUT:
+				return newToken(Lexer.tEND_OF_INPUT, start);
+			case '\n':
+				fInsideIncludeDirective= false;
+				return newToken(Lexer.tNEWLINE, start);
+			case ' ':
+			case '\t':
+			case 0xb:  // vertical tab
+			case '\f': 
+			case '\r':
+				continue;
+
+			case 'L':
+				switch(d) {
+				case '"':
+					nextCharPhase3();
+					return stringLiteral(start, true);
+				case '\'':
+					nextCharPhase3();
+					return charLiteral(start, true);
+				}
+				return identifier(start, 1);
+
+			case '"':
+				if (fInsideIncludeDirective) {
+					return headerName(start, true);
+				}
+				return stringLiteral(start, false);
+
+			case '\'':
+				return charLiteral(start, false);
+
+			case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': case 'g': case 'h': case 'i': 
+			case 'j': case 'k': case 'l': case 'm': case 'n': case 'o': case 'p': case 'q': case 'r': 
+			case 's': case 't': case 'u': case 'v': case 'w': case 'x': case 'y': case 'z':
+			case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': case 'G': case 'H': case 'I':
+			case 'J': case 'K':           case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R': 
+			case 'S': case 'T': case 'U': case 'V': case 'W': case 'X': case 'Y': case 'Z':
+			case '_':
+				return identifier(start, 1);
+
+			case '$':
+				if (fOptions.fSupportDollarInitializers) {
+					return identifier(start, 1);
+				}
+				break;
+
+			case '\\':
+				switch(d) {
+				case 'u': case 'U':
+					nextCharPhase3();
+					return identifier(start, 2);
+				}
+				return newToken(IToken.tBACKSLASH, start);
+
+			case '0': case '1': case '2': case '3': case '4':
+			case '5': case '6': case '7': case '8': case '9':
+				return number(start, 1, false);
+
+			case '.':
+				switch(d) {
+				case '0': case '1': case '2': case '3': case '4':
+				case '5': case '6': case '7': case '8': case '9':
+					nextCharPhase3();
+					return number(start, 2, true);
+
+				case '.':
+					markPhase3();
+					if (nextCharPhase3() == '.') {
+						nextCharPhase3();
+						return newToken(IToken.tELLIPSIS, start);
+					}
+					restorePhase3();
+					break;
+
+				case '*':
+					nextCharPhase3();
+					return newToken(IToken.tDOTSTAR, start);
+				}
+				return newToken(IToken.tDOT, start);
+
+			case '#':
+				if (d == '#') {
+					nextCharPhase3();
+					return newToken(IToken.tPOUNDPOUND, start);
+				}
+				return newToken(IToken.tPOUND, start);
+
+			case '{':
+				return newToken(IToken.tLBRACE, start);
+			case '}':
+				return newToken(IToken.tRBRACE, start);
+			case '[':
+				return newToken(IToken.tLBRACKET, start);
+			case ']':
+				return newToken(IToken.tRBRACKET, start);
+			case '(':
+				return newToken(IToken.tLPAREN, start);
+			case ')':
+				return newToken(IToken.tRPAREN, start);
+			case ';':
+				return newToken(IToken.tSEMI, start);
+
+			case ':':
+				switch(d) {
+				case ':':
+					nextCharPhase3();
+					return newToken(IToken.tCOLONCOLON, start);
+				case '>': 
+					nextCharPhase3();
+					return newDigraphToken(IToken.tRBRACKET, start);
+				}
+				return newToken(IToken.tCOLON, start);
+
+			case '?':
+				return newToken(IToken.tQUESTION, start);
+
+			case '+':
+				switch (d) {
+				case '+':
+					nextCharPhase3();
+					return newToken(IToken.tINCR, start);
+				case '=':
+					nextCharPhase3();
+					return newToken(IToken.tPLUSASSIGN, start);
+				}
+				return newToken(IToken.tPLUS, start);
+
+			case '-':
+				switch (d) {
+				case '>': 
+					int e= nextCharPhase3();
+					if (e == '*') {
+						nextCharPhase3();
+						return newToken(IToken.tARROWSTAR, start);
+					}
+					return newToken(IToken.tARROW, start);
+
+				case '-':
+					nextCharPhase3();
+					return newToken(IToken.tDECR, start);
+				case '=':
+					nextCharPhase3();
+					return newToken(IToken.tMINUSASSIGN, start);
+				}
+				return newToken(IToken.tMINUS, start);
+
+			case '*':
+				if (d == '=') {
+					nextCharPhase3();
+					return newToken(IToken.tSTARASSIGN, start);
+				}
+				return newToken(IToken.tSTAR, start);
+
+			case '/':
+				switch (d) {
+				case '=':
+					nextCharPhase3();
+					return newToken(IToken.tDIVASSIGN, start);
+				case '/':
+					nextCharPhase3();
+					lineComment(start);
+					continue; 
+				case '*':
+					nextCharPhase3();
+					blockComment(start);
+					continue;
+				}
+				return newToken(IToken.tDIV, start);
+
+			case '%':
+				switch (d) {
+				case '=':
+					nextCharPhase3();
+					return newToken(IToken.tMODASSIGN, start);
+				case '>':
+					nextCharPhase3();
+					return newDigraphToken(IToken.tRBRACE, start);
+				case ':':
+					final int e= nextCharPhase3();
+					if (e == '%') {
+						markPhase3();
+						if (nextCharPhase3() == ':') {
+							nextCharPhase3();
+							return newDigraphToken(IToken.tPOUNDPOUND, start);
+						}
+						restorePhase3();
+					}
+					return newDigraphToken(IToken.tPOUND, start);
+				}
+				return newToken(IToken.tMOD, start);
+
+			case '^':
+				if (d == '=') {
+					nextCharPhase3();
+					return newToken(IToken.tXORASSIGN, start);
+				}
+				return newToken(IToken.tXOR, start);
+
+			case '&':
+				switch (d) {
+				case '&':
+					nextCharPhase3();
+					return newToken(IToken.tAND, start);
+				case '=':
+					nextCharPhase3();
+					return newToken(IToken.tAMPERASSIGN, start);
+				}
+				return newToken(IToken.tAMPER, start);
+
+			case '|':
+				switch (d) {
+				case '|':
+					nextCharPhase3();
+					return newToken(IToken.tOR, start);
+				case '=':
+					nextCharPhase3();
+					return newToken(IToken.tBITORASSIGN, start);
+				}
+				return newToken(IToken.tBITOR, start);
+
+			case '~':
+				return newToken(IToken.tBITCOMPLEMENT, start);
+
+			case '!':
+				if (d == '=') {
+					nextCharPhase3();
+					return newToken(IToken.tNOTEQUAL, start);
+				}
+				return newToken(IToken.tNOT, start);
+
+			case '=':
+				if (d == '=') {
+					nextCharPhase3();
+					return newToken(IToken.tEQUAL, start);
+				}
+				return newToken(IToken.tASSIGN, start);
+
+			case '<':
+				if (fInsideIncludeDirective) {
+					return headerName(start, false);
+				}
+
+				switch(d) {
+				case '=':
+					nextCharPhase3();
+					return newToken(IToken.tLTEQUAL, start);
+				case '<':
+					final int e= nextCharPhase3();
+					if (e == '=') {
+						nextCharPhase3();
+						return newToken(IToken.tSHIFTLASSIGN, start);
+					} 
+					return newToken(IToken.tSHIFTL, start);
+				case '?':
+					if (fOptions.fSupportMinAndMax) {
+						nextCharPhase3();
+						return newToken(IGCCToken.tMIN, start);
+					} 
+					break;
+				case ':':
+					nextCharPhase3();
+					return newDigraphToken(IToken.tLBRACKET, start);
+				case '%':
+					nextCharPhase3();
+					return newDigraphToken(IToken.tLBRACE, start);
+				}
+				return newToken(IToken.tLT, start);
+
+			case '>':
+				switch(d) {
+				case '=':
+					nextCharPhase3();
+					return newToken(IToken.tGTEQUAL, start);
+				case '>':
+					final int e= nextCharPhase3();
+					if (e == '=') {
+						nextCharPhase3();
+						return newToken(IToken.tSHIFTRASSIGN, start);
+					} 
+					return newToken(IToken.tSHIFTR, start);
+				case '?':
+					if (fOptions.fSupportMinAndMax) {
+						nextCharPhase3();
+						return newToken(IGCCToken.tMAX, start);
+					} 
+					break;
+				}
+				return newToken(IToken.tGT, start);
+
+			case ',':
+				return newToken(IToken.tCOMMA, start);
+
+			default:
+				// in case we have some other letter to start an identifier
+				if (Character.isUnicodeIdentifierStart((char) c)) {
+					return identifier(start, 1);
+				}
+				break;
+			}
+			
+			handleProblem(IASTProblem.SCANNER_BAD_CHARACTER, start);
+			// loop is continued, character is treated as white-space.
+		}
+    }
+
+	private Token newToken(int kind, int offset) {
+    	return new SimpleToken(kind, offset, fOffset);
+    }
+
+	private Token newDigraphToken(int kind, int offset) {
+    	return new DigraphToken(kind, offset, fOffset);
+    }
+
+    private Token newToken(int kind, int offset, int length) {
+    	return new TokenWithImage(kind, this, offset, fOffset, length);
+    }
+
+    private void handleProblem(int problemID, int offset) {
+    	fLog.handleProblem(problemID, fInput, offset, fOffset);
+    }
+
+    private Token headerName(final int start, final boolean expectQuotes) throws CompletionTokenException {
+    	int length= 1;
+		boolean done = false;
+		int c= fCharPhase3;
+		loop: while (!done) {
+			switch (c) {
+			case END_OF_INPUT:
+				if (fOptions.fSupportContentAssist) {
+					throw new CompletionTokenException(
+							newToken((expectQuotes ? tQUOTE_HEADER_NAME : tSYSTEM_HEADER_NAME), start, length));
+				}
+				// no break;
+			case '\n':
+				handleProblem(IProblem.SCANNER_UNBOUNDED_STRING, start);
+				break loop;
+				
+			case '"':
+				done= expectQuotes;
+				break;
+			case '>':
+				done= !expectQuotes;
+				break;
+			}
+			length++;
+			c= nextCharPhase3();
+		}
+		return newToken((expectQuotes ? tQUOTE_HEADER_NAME : tSYSTEM_HEADER_NAME), start, length);
+	}
+
+	private void blockComment(final int start) {
+		int c= nextCharPhase3();
+		while(true) {
+			switch (c) {
+			case END_OF_INPUT:
+				fLog.handleComment(true, fInput, start, fOffset);
+				return;
+			case '*':
+				c= nextCharPhase3();
+				if (c == '/') {
+					nextCharPhase3();
+					fLog.handleComment(true, fInput, start, fOffset);
+					return;
+				}
+				break;
+			default:
+				c= nextCharPhase3();
+				break;
+			}
+		}
+	}
+
+	private void lineComment(final int start) {
+		int c= fCharPhase3;
+		while(true) {
+			switch (c) {
+			case END_OF_INPUT:
+			case '\n':
+				fLog.handleComment(false, fInput, start, fOffset);
+				return;
+			}
+			c= nextCharPhase3();
+		}
+	}
+
+	private Token stringLiteral(final int start, final boolean wide) throws CompletionTokenException {
+		boolean escaped = false;
+		boolean done = false;
+		int length= wide ? 2 : 1;
+		int c= fCharPhase3;
+		
+		loop: while (!done) {
+			switch(c) {
+			case END_OF_INPUT:
+				if (fOptions.fSupportContentAssist) {
+					throw new CompletionTokenException(newToken(wide ? IToken.tLSTRING : IToken.tSTRING, start, length));
+				}
+				// no break;
+			case '\n':
+				handleProblem(IProblem.SCANNER_UNBOUNDED_STRING, start);
+				break loop;
+				
+			case '\\': 
+				escaped= !escaped;
+				break;
+			case '"':
+				if (!escaped) {
+					done= true;
+				}
+				escaped= false;
+				break;
+			default:
+				escaped= false;
+				break;
+			}
+			length++;
+			c= nextCharPhase3();
+		}
+		return newToken(wide ? IToken.tLSTRING : IToken.tSTRING, start, length);
+	}
+	
+	private Token charLiteral(final int start, boolean wide) throws CompletionTokenException {
+		boolean escaped = false;
+		boolean done = false;
+		int length= wide ? 2 : 1;
+		int c= fCharPhase3;
+		
+		loop: while (!done) {
+			switch(c) {
+			case END_OF_INPUT:
+				if (fOptions.fSupportContentAssist) {
+					throw new CompletionTokenException(newToken(wide ? IToken.tLCHAR : IToken.tCHAR, start, length));
+				}
+				// no break;
+			case '\n':
+				handleProblem(IProblem.SCANNER_BAD_CHARACTER, start);
+				break loop;
+			case '\\': 
+				escaped= !escaped;
+				break;
+			case '\'':
+				if (!escaped) {
+					done= true;
+				}
+				escaped= false;
+				break;
+			default:
+				escaped= false;
+				break;
+			}
+			length++;
+			c= nextCharPhase3();
+		}
+		return newToken(wide ? IToken.tLCHAR : IToken.tCHAR, start, length);
+	}
+	
+	private Token identifier(int start, int length) {
+		int tokenKind= IToken.tIDENTIFIER;
+    	boolean isPartOfIdentifier= true;
+    	int c= fCharPhase3;
+        while (true) {
+        	switch(c) {
+            case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': case 'g': case 'h': case 'i': 
+            case 'j': case 'k': case 'l': case 'm': case 'n': case 'o': case 'p': case 'q': case 'r': 
+            case 's': case 't': case 'u': case 'v': case 'w': case 'x': case 'y': case 'z':
+            case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': case 'G': case 'H': case 'I':
+            case 'J': case 'K': case 'L': case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R': 
+            case 'S': case 'T': case 'U': case 'V': case 'W': case 'X': case 'Y': case 'Z':
+            case '_': 
+            case '0': case '1': case '2': case '3': case '4':
+            case '5': case '6': case '7': case '8': case '9':
+            	break;
+            	
+            case '\\': // universal character name
+            	markPhase3();
+            	switch(nextCharPhase3()) {
+            	case 'u': case 'U':
+            		length++;
+            		break;
+            	default:
+            		restorePhase3();
+            		isPartOfIdentifier= false;
+            		break;
+            	}
+            	break;
+
+            case END_OF_INPUT:
+				if (fOptions.fSupportContentAssist) {
+					tokenKind= IToken.tCOMPLETION;
+				}
+				isPartOfIdentifier= false;
+				break;
+            case ' ': case '\t': case 0xb: case '\f': case '\r': case '\n':
+                isPartOfIdentifier= false;
+            	break;
+
+            case '$':
+            	isPartOfIdentifier= fOptions.fSupportDollarInitializers;
+            	break;
+            	
+            case '{': case '}': case '[': case ']': case '#': case '(': case ')': case '<': case '>':
+            case '%': case ':': case ';': case '.': case '?': case '*': case '+': case '-': case '/':
+            case '^': case '&': case '|': case '~': case '!': case '=': case ',': case '"': case '\'':
+            	isPartOfIdentifier= false;
+            	break;
+            	
+            default:
+            	isPartOfIdentifier= Character.isUnicodeIdentifierPart((char) c);
+            	break;
+        	}
+        	
+        	if (!isPartOfIdentifier) {
+        		break;
+        	}
+        	
+        	length++;
+        	c= nextCharPhase3();
+        }
+
+        return newToken(tokenKind, start, length);
+	}
+	
+	private Token number(final int start, int length, boolean isFloat) throws CompletionTokenException {
+		boolean isPartOfNumber= true;
+		int c= fCharPhase3;
+		while (true) {
+			switch(c) {
+			// non-digit
+            case 'a': case 'b': case 'c': case 'd':           case 'f': case 'g': case 'h': case 'i': 
+            case 'j': case 'k': case 'l': case 'm': case 'n': case 'o':           case 'q': case 'r': 
+            case 's': case 't': case 'u': case 'v': case 'w': case 'x': case 'y': case 'z':
+            case 'A': case 'B': case 'C': case 'D':           case 'F': case 'G': case 'H': case 'I':
+            case 'J': case 'K': case 'L': case 'M': case 'N': case 'O':           case 'Q': case 'R': 
+            case 'S': case 'T': case 'U': case 'V': case 'W': case 'X': case 'Y': case 'Z':
+            case '_': 
+            	
+            // digit
+            case '0': case '1': case '2': case '3': case '4':
+            case '5': case '6': case '7': case '8': case '9':
+            	break;
+            	
+            // period
+            case '.':
+            	isFloat= true;
+            	break;
+            	
+            // sign
+            case 'p':
+            case 'P':
+            case 'e':
+            case 'E':
+            	length++;
+            	c= nextCharPhase3();
+            	switch (c) {
+            	case '+': case '-':
+            	case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9':
+            		isFloat= true;
+            		length++;
+                	c= nextCharPhase3();
+            		break;
+            	}
+            	continue;
+            	
+            // universal character name (non-digit)
+            case '\\':
+            	markPhase3();
+            	switch(nextCharPhase3()) {
+            	case 'u': case 'U':
+            		length++;
+            		break;
+            	default:
+            		restorePhase3();
+            		isPartOfNumber= false;
+            		break;
+            	}
+            	break;
+            
+            case tEND_OF_INPUT:
+				if (fOptions.fSupportContentAssist) {
+					throw new CompletionTokenException(
+							newToken((isFloat ? IToken.tFLOATINGPT : IToken.tINTEGER), start, length));
+				}
+				isPartOfNumber= false;
+				break;
+				
+            default:
+            	isPartOfNumber= false;
+            	break;
+			}
+        	if (!isPartOfNumber) {
+        		break;
+        	}
+        	
+        	c= nextCharPhase3();
+        	length++;
+		}
+		
+        return newToken((isFloat ? IToken.tFLOATINGPT : IToken.tINTEGER), start, length);
+	}
+	
+	
+	/**
+	 * Saves the current state of phase3, necessary for '...', '%:%:' and UNCs.
+	 */
+	private void markPhase3() {
+		fMarkOffset= fOffset;
+		fMarkEndOffset= fEndOffset;
+		fMarkPrefetchedChar= fCharPhase3;
+	}
+	
+	/**
+	 * Restores a previously saved state of phase3.
+	 */
+	private void restorePhase3() {
+		fOffset= fMarkOffset;
+		fEndOffset= fMarkEndOffset;
+		fCharPhase3= fMarkPrefetchedChar;
+	}
+	
+	/**
+	 * Perform phase 1-3: Replace \r\n with \n, handle trigraphs, detect line-splicing.
+	 * Changes fOffset, fEndOffset and fCharPhase3.
+	 */
+	private int nextCharPhase3() {
+		int offset;
+		int c; 
+		do {
+			offset= fEndOffset;
+			c= fetchCharPhase3(offset); // changes fEndOffset
+		}
+		while(c == LINE_SPLICE_SEQUENCE);
+
+		fOffset= offset;
+		fCharPhase3= c;
+		return c;
+	}
+	
+	/**
+	 * Perform phase 1-3: Replace \r\n with \n, handle trigraphs, detect line-splicing.
+	 * Changes <code>fEndOffset</code>, but is stateless otherwise.
+	 */
+	private int fetchCharPhase3(int pos) {
+		if (pos >= fLimit) {
+			fEndOffset= fLimit;
+			return END_OF_INPUT;
+		}
+		final char c= fInput[pos++];
+		switch(c) {
+			// windows line-ending
+			case '\r':
+			if (pos < fLimit && fInput[pos] == '\n') {	
+				fEndOffset= pos+1;
+				return '\n';
+			}
+			fEndOffset= pos;
+			return c;
+
+		// trigraph sequences
+		case '?':
+			if (pos+1 >= fLimit || fInput[pos] != '?') {
+				fEndOffset= pos;
+				return c;
+			}
+			final char trigraph= checkTrigraph(fInput[pos+1]);
+			if (trigraph == 0) {
+				fEndOffset= pos;
+				return c;
+			}
+			if (trigraph != '\\') {
+				fEndOffset= pos+2;
+				return trigraph;
+			}
+			pos+= 2;
+			// no break, handle backslash
+		
+		case '\\':
+			final int lsPos= findEndOfLineSpliceSequence(pos);
+			if (lsPos > pos) {
+				fEndOffset= lsPos;
+				return LINE_SPLICE_SEQUENCE;
+			}
+			fEndOffset= pos;
+			return '\\';	// don't return c, it may be a '?'
+			
+		default:
+			fEndOffset= pos;
+			return c;
+		}
+	}
+
+	/**
+	 * Maps a trigraph to the character it encodes.
+	 * @param c trigraph without leading question marks.
+	 * @return the character encoded or 0.
+	 */
+	private char checkTrigraph(char c) {
+		switch(c) {
+		case '=': return '#';
+		case '\'':return '^';
+		case '(': return '[';
+		case ')': return ']';
+		case '!': return '|';
+		case '<': return '{';
+		case '>': return '}';
+		case '-': return '~';
+		case '/': return '\\';
+		}
+		return 0;
+	}
+
+	/**
+	 * Returns the endoffset for a line-splice sequence, or -1 if there is none.
+	 */
+	private int findEndOfLineSpliceSequence(int pos) {
+		boolean haveBackslash= true;
+		int result= -1;
+		loop: while(pos < fLimit) {
+			switch(fInput[pos++]) {
+			case '\n':	
+				if (haveBackslash) {
+					result= pos;
+					haveBackslash= false;
+					continue loop;
+				}
+				return result; 					
+		
+			case '\r': case ' ': case '\f': case '\t': case 0xb: // vertical tab  
+				if (haveBackslash) {
+					continue loop;
+				}
+				return result;
+			
+			case '?':
+				if (pos+1 >= fLimit || fInput[pos] != '?' || fInput[++pos] != '/') {
+					return result;
+				}
+				// fall through to backslash handling
+					
+			case '\\':
+				if (!haveBackslash) {
+					haveBackslash= true;
+					continue loop;
+				}
+				return result;
+
+			default:
+				return result;
+			}
+		}
+		return result;
+	}
+
+	/**
+	 * Returns the image from the input without any modification.
+	 */
+	public char[] getInputChars(int offset, int endOffset) {
+		final int length= endOffset-offset;
+		final char[] result= new char[length];
+		System.arraycopy(fInput, offset, result, 0, length);
+		return result;
+	}
+
+	/**
+	 * Returns the image with trigraphs replaced and line-splices removed.
+	 */
+	char[] getTokenImage(int offset, int endOffset, int imageLength) {
+		final int length= endOffset-offset;
+		final char[] result= new char[imageLength];
+		if (length == imageLength) {
+			System.arraycopy(fInput, offset, result, 0, length);
+		}
+		else {
+			markPhase3();
+			fEndOffset= offset;
+			int idx= 0;
+			while (idx<imageLength) {
+				int c= fetchCharPhase3(fEndOffset);
+				if (c != LINE_SPLICE_SEQUENCE) {
+					result[idx++]= (char) c;
+				}
+			}
+			restorePhase3();
+		}
+		return result;
+	}
+}
diff --git a/core/org.eclipse.cdt.core/parser/org/eclipse/cdt/internal/core/parser/scanner/SimpleToken.java b/core/org.eclipse.cdt.core/parser/org/eclipse/cdt/internal/core/parser/scanner/SimpleToken.java
new file mode 100644
index 00000000000..d930f044a16
--- /dev/null
+++ b/core/org.eclipse.cdt.core/parser/org/eclipse/cdt/internal/core/parser/scanner/SimpleToken.java
@@ -0,0 +1,21 @@
+/*******************************************************************************
+ * Copyright (c) 2007 Wind River Systems, Inc. and others.
+ * All rights reserved. This program and the accompanying materials
+ * are made available under the terms of the Eclipse Public License v1.0
+ * which accompanies this distribution, and is available at
+ * http://www.eclipse.org/legal/epl-v10.html
+ *
+ * Contributors:
+ *    Markus Schorn - initial API and implementation
+ *******************************************************************************/ 
+package org.eclipse.cdt.internal.core.parser.scanner;
+
+class SimpleToken extends Token {
+	public SimpleToken(int kind, int offset, int endOffset) {
+		super(kind, offset, endOffset);
+	}
+
+	public char[] getTokenImage() {
+		return TokenUtil.getImage(getType());
+	}
+}
diff --git a/core/org.eclipse.cdt.core/parser/org/eclipse/cdt/internal/core/parser/scanner/Token.java b/core/org.eclipse.cdt.core/parser/org/eclipse/cdt/internal/core/parser/scanner/Token.java
new file mode 100644
index 00000000000..0d008e3cf58
--- /dev/null
+++ b/core/org.eclipse.cdt.core/parser/org/eclipse/cdt/internal/core/parser/scanner/Token.java
@@ -0,0 +1,111 @@
+/*******************************************************************************
+ * Copyright (c) 2007 Wind River Systems, Inc. and others.
+ * All rights reserved. This program and the accompanying materials
+ * are made available under the terms of the Eclipse Public License v1.0
+ * which accompanies this distribution, and is available at
+ * http://www.eclipse.org/legal/epl-v10.html
+ *
+ * Contributors:
+ *    Markus Schorn - initial API and implementation
+ *******************************************************************************/ 
+package org.eclipse.cdt.internal.core.parser.scanner;
+
+import org.eclipse.cdt.core.parser.IToken;
+
+
+public abstract class Token implements IToken {
+	private int fKind;
+
+	int fOffset;
+	int fEndOffset;
+	
+	private IToken fNextGrammarToken;
+
+	Token(int kind, int offset, int endOffset) {
+		fKind= kind;
+		fOffset= offset;
+		fEndOffset= endOffset;
+	}
+
+	public int getType() {
+		return fKind;
+	}
+
+	public int getOffset() {
+		return fOffset;
+	}
+
+	public int getEndOffset() {
+		return fEndOffset;
+	}
+
+	public int getLength() {
+		return fEndOffset-fOffset;
+	}
+
+
+	public IToken getNext() {
+		return fNextGrammarToken;
+	}
+	
+	public abstract char[] getTokenImage();
+
+	
+	// for the preprocessor to classify preprocessor tokens
+	public void setType(int kind) {
+		// mstodo make non-public
+		fKind= kind;
+	}
+	
+	// for the preprocessor to chain the tokens
+	public void setNext(IToken t) {
+		// mstodo make non-public
+		fNextGrammarToken= t;
+	}
+
+
+	
+	
+	public boolean isOperator() {
+		// mstodo
+		return TokenUtil.isOperator(fKind);
+	}
+
+	public char[] getCharImage() {
+		// mstodo
+		throw new UnsupportedOperationException();
+	}
+
+	public String getImage() {
+		// mstodo 
+		throw new UnsupportedOperationException();
+	}
+
+
+	
+	public char[] getFilename() {
+		// mstodo
+		throw new UnsupportedOperationException();
+	}
+
+	public boolean looksLikeExpression() {
+		// mstodo
+		throw new UnsupportedOperationException();
+	}
+
+	public boolean canBeAPrefix() {
+		// mstodo
+		throw new UnsupportedOperationException();
+	}
+	
+	public int getLineNumber() {
+		// mstodo
+		throw new UnsupportedOperationException();
+	}
+
+	public boolean isPointer() {
+		// mstodo
+		throw new UnsupportedOperationException();
+	}
+
+}
diff --git a/core/org.eclipse.cdt.core/parser/org/eclipse/cdt/internal/core/parser/scanner/TokenUtil.java b/core/org.eclipse.cdt.core/parser/org/eclipse/cdt/internal/core/parser/scanner/TokenUtil.java
new file mode 100644
index 00000000000..31f4b4248f8
--- /dev/null
+++ b/core/org.eclipse.cdt.core/parser/org/eclipse/cdt/internal/core/parser/scanner/TokenUtil.java
@@ -0,0 +1,146 @@
+/*******************************************************************************
+ * Copyright (c) 2007 Wind River Systems, Inc. and others.
+ * All rights reserved. This program and the accompanying materials
+ * are made available under the terms of the Eclipse Public License v1.0
+ * which accompanies this distribution, and is available at
+ * http://www.eclipse.org/legal/epl-v10.html
+ *
+ * Contributors:
+ *    Markus Schorn - initial API and implementation
+ *******************************************************************************/ 
+package org.eclipse.cdt.internal.core.parser.scanner;
+
+import org.eclipse.cdt.core.parser.IGCCToken;
+import org.eclipse.cdt.core.parser.IToken;
+import org.eclipse.cdt.core.parser.Keywords;
+
+
+public class TokenUtil {
+	private static final char[] IMAGE_EMPTY = new char[0]; 
+	private static final char[] IMAGE_POUND_POUND = "##".toCharArray(); //$NON-NLS-1$
+	private static final char[] IMAGE_POUND = "#".toCharArray(); //$NON-NLS-1$
+	
+	private static final char[] DIGRAPH_LBRACE=   "<%".toCharArray(); //$NON-NLS-1$
+	private static final char[] DIGRAPH_RBRACE=   "%>".toCharArray(); //$NON-NLS-1$
+	private static final char[] DIGRAPH_LBRACKET= "<:".toCharArray(); //$NON-NLS-1$
+	private static final char[] DIGRAPH_RBRACKET= ":>".toCharArray(); //$NON-NLS-1$
+	private static final char[] DIGRAPH_POUND = "%:".toCharArray(); //$NON-NLS-1$
+	private static final char[] DIGRAPH_POUNDPOUND = "%:%:".toCharArray(); //$NON-NLS-1$
+
+	public static boolean isOperator(int kind) {
+		switch (kind) {
+		case IToken.t_delete: case IToken.t_new:
+
+		// bit operations
+		case IToken.tAMPER: case IToken.tAMPERASSIGN:
+		case IToken.tARROW: case IToken.tARROWSTAR:
+		case IToken.tBITOR: case IToken.tBITORASSIGN:
+		case IToken.tBITCOMPLEMENT:
+		case IToken.tSHIFTL: case IToken.tSHIFTLASSIGN:
+		case IToken.tSHIFTR: case IToken.tSHIFTRASSIGN:
+		case IToken.tXOR: case IToken.tXORASSIGN:
+		
+        // logical operations
+		case IToken.tNOT: case IToken.tAND: case IToken.tOR:
+
+		// arithmetic
+		case IToken.tDECR: case IToken.tINCR:
+		case IToken.tDIV: case IToken.tDIVASSIGN:
+		case IToken.tMINUS: case IToken.tMINUSASSIGN:
+		case IToken.tMOD: case IToken.tMODASSIGN:
+		case IToken.tPLUS: case IToken.tPLUSASSIGN:
+		case IToken.tSTAR: case IToken.tSTARASSIGN:
+		case IGCCToken.tMAX: case IGCCToken.tMIN:
+			
+		// comparison
+		case IToken.tEQUAL: case IToken.tNOTEQUAL:
+		case IToken.tGT: case IToken.tGTEQUAL:
+		case IToken.tLT: case IToken.tLTEQUAL:
+			
+		// other
+		case IToken.tASSIGN: case IToken.tCOMMA:
+			return true;
+		}
+		
+		return false;
+	}
+
+	public static char[] getImage(int type) {
+        switch (type) {
+    	case IToken.tPOUND:	       	return IMAGE_POUND;
+    	case IToken.tPOUNDPOUND:   	return IMAGE_POUND_POUND;	
+        case IToken.tCOLONCOLON:   	return Keywords.cpCOLONCOLON; 
+        case IToken.tCOLON:        	return Keywords.cpCOLON;
+        case IToken.tSEMI:         	return Keywords.cpSEMI;
+        case IToken.tCOMMA:        	return Keywords.cpCOMMA;
+        case IToken.tQUESTION:     	return Keywords.cpQUESTION;
+        case IToken.tLPAREN :      	return Keywords.cpLPAREN;
+        case IToken.tRPAREN :      	return Keywords.cpRPAREN;
+        case IToken.tLBRACKET:     	return Keywords.cpLBRACKET;
+        case IToken.tRBRACKET:     	return Keywords.cpRBRACKET;
+        case IToken.tLBRACE:       	return Keywords.cpLBRACE;
+        case IToken.tRBRACE:       	return Keywords.cpRBRACE;
+        case IToken.tPLUSASSIGN:   	return Keywords.cpPLUSASSIGN;
+        case IToken.tINCR:         	return Keywords.cpINCR;
+        case IToken.tPLUS:         	return Keywords.cpPLUS;
+        case IToken.tMINUSASSIGN:  	return Keywords.cpMINUSASSIGN;
+        case IToken.tDECR:         	return Keywords.cpDECR;
+        case IToken.tARROWSTAR:    	return Keywords.cpARROWSTAR;
+        case IToken.tARROW:        	return Keywords.cpARROW;
+        case IToken.tMINUS:        	return Keywords.cpMINUS;
+        case IToken.tSTARASSIGN:   	return Keywords.cpSTARASSIGN;
+        case IToken.tSTAR:         	return Keywords.cpSTAR;
+        case IToken.tMODASSIGN:    	return Keywords.cpMODASSIGN;
+        case IToken.tMOD:          	return Keywords.cpMOD;
+        case IToken.tXORASSIGN:    	return Keywords.cpXORASSIGN;
+        case IToken.tXOR:          	return Keywords.cpXOR;
+        case IToken.tAMPERASSIGN:  	return Keywords.cpAMPERASSIGN;
+        case IToken.tAND:          	return Keywords.cpAND;
+        case IToken.tAMPER:        	return Keywords.cpAMPER;
+        case IToken.tBITORASSIGN:  	return Keywords.cpBITORASSIGN;
+        case IToken.tOR:           	return Keywords.cpOR;
+        case IToken.tBITOR:        	return Keywords.cpBITOR;
+        case IToken.tBITCOMPLEMENT:	return Keywords.cpCOMPL;
+        case IToken.tNOTEQUAL:     	return Keywords.cpNOTEQUAL;
+        case IToken.tNOT:          	return Keywords.cpNOT;
+        case IToken.tEQUAL:        	return Keywords.cpEQUAL;
+        case IToken.tASSIGN:       	return Keywords.cpASSIGN;
+        case IToken.tSHIFTL:       	return Keywords.cpSHIFTL;
+        case IToken.tLTEQUAL:      	return Keywords.cpLTEQUAL;
+        case IToken.tLT:           	return Keywords.cpLT;
+        case IToken.tSHIFTRASSIGN: 	return Keywords.cpSHIFTRASSIGN;
+        case IToken.tSHIFTR:       	return Keywords.cpSHIFTR;
+        case IToken.tGTEQUAL:      	return Keywords.cpGTEQUAL;
+        case IToken.tGT:           	return Keywords.cpGT;
+        case IToken.tSHIFTLASSIGN: 	return Keywords.cpSHIFTLASSIGN;
+        case IToken.tELLIPSIS:     	return Keywords.cpELLIPSIS;
+        case IToken.tDOTSTAR:      	return Keywords.cpDOTSTAR;
+        case IToken.tDOT:          	return Keywords.cpDOT;
+        case IToken.tDIVASSIGN:    	return Keywords.cpDIVASSIGN;
+        case IToken.tDIV:          	return Keywords.cpDIV;
+        case IToken.tBACKSLASH:		return Keywords.cpBACKSLASH;
+        
+        case IGCCToken.tMIN:		return Keywords.cpMIN;
+        case IGCCToken.tMAX:		return Keywords.cpMAX;
+        
+        default:
+        	assert false: type;
+            return IMAGE_EMPTY; 
+        }
+	}
+	
+	public static char[] getDigraphImage(int type) {
+        switch (type) {
+    	case IToken.tPOUND:	       	return DIGRAPH_POUND;
+    	case IToken.tPOUNDPOUND:   	return DIGRAPH_POUNDPOUND;	
+        case IToken.tLBRACKET:     	return DIGRAPH_LBRACKET;
+        case IToken.tRBRACKET:     	return DIGRAPH_RBRACKET;
+        case IToken.tLBRACE:       	return DIGRAPH_LBRACE;
+        case IToken.tRBRACE:       	return DIGRAPH_RBRACE;
+        
+        default:
+        	assert false: type;
+            return IMAGE_EMPTY; 
+        }
+	}
+}
diff --git a/core/org.eclipse.cdt.core/parser/org/eclipse/cdt/internal/core/parser/scanner/TokenWithImage.java b/core/org.eclipse.cdt.core/parser/org/eclipse/cdt/internal/core/parser/scanner/TokenWithImage.java
new file mode 100644
index 00000000000..0f75aea75ed
--- /dev/null
+++ b/core/org.eclipse.cdt.core/parser/org/eclipse/cdt/internal/core/parser/scanner/TokenWithImage.java
@@ -0,0 +1,38 @@
+/*******************************************************************************
+ * Copyright (c) 2007 Wind River Systems, Inc. and others.
+ * All rights reserved. This program and the accompanying materials
+ * are made available under the terms of the Eclipse Public License v1.0
+ * which accompanies this distribution, and is available at
+ * http://www.eclipse.org/legal/epl-v10.html
+ *
+ * Contributors:
+ *    Markus Schorn - initial API and implementation
+ *******************************************************************************/ 
+package org.eclipse.cdt.internal.core.parser.scanner;
+
+class TokenWithImage extends Token {
+
+	final private Lexer fLexer;
+	final private int fImageLength;
+	private char[] fImage;
+
+	public TokenWithImage(int kind, Lexer source, int offset, int endOffset, int imageLength) {
+		super(kind, offset, endOffset);
+		fLexer= source;
+		fImageLength= imageLength;
+	}
+
+	public TokenWithImage(int kind, int offset, int endOffset, char[] image) {
+		super(kind, offset, endOffset);
+		fLexer= null;
+		fImageLength= 0;
+		fImage= image;
+	}
+
+	public char[] getTokenImage() {
+		if (fImage == null) {
+			fImage= fLexer.getTokenImage(fOffset, fEndOffset, fImageLength);
+		}
+		return fImage; 
+	}
+}
diff --git a/core/org.eclipse.cdt.core/parser/org/eclipse/cdt/internal/core/parser/scanner2/BaseScanner.java b/core/org.eclipse.cdt.core/parser/org/eclipse/cdt/internal/core/parser/scanner2/BaseScanner.java
index d9583ec938c..1f94dd24f2e 100644
--- a/core/org.eclipse.cdt.core/parser/org/eclipse/cdt/internal/core/parser/scanner2/BaseScanner.java
+++ b/core/org.eclipse.cdt.core/parser/org/eclipse/cdt/internal/core/parser/scanner2/BaseScanner.java
@@ -4059,7 +4059,7 @@ abstract class BaseScanner implements IScanner {
     private static final MacroExpansionToken EXPANSION_TOKEN = new MacroExpansionToken();
 
     static {
-        CharArrayIntMap words = new CharArrayIntMap(IToken.tLAST, -1);
+        CharArrayIntMap words = new CharArrayIntMap(40, -1);
 
         // Common keywords
         words.put(Keywords.cAUTO, IToken.t_auto);