[271163] support for UTF string literals in scanner

2025-08-09 09:15:38 +02:00 · 2009-04-15 19:19:05 +00:00 · 2009-04-15 19:19:05 +00:00 · e389841b9f
commit e389841b9f
parent c3c92bd841
13 changed files with 358 additions and 48 deletions
--- a/core/org.eclipse.cdt.core.tests/parser/org/eclipse/cdt/core/parser/tests/scanner/LexerTests.java
+++ b/core/org.eclipse.cdt.core.tests/parser/org/eclipse/cdt/core/parser/tests/scanner/LexerTests.java
@ -99,6 +99,14 @@ public class LexerTests extends BaseTestCase {
 		token(IToken.tLSTRING, "L\"" + expectedImage + "\"");
 	}

+	private void utf16str(String expectedImage) throws Exception {
+		token(IToken.tUTF16STRING, "u\"" + expectedImage + "\"");
+	}
+	
+	private void utf32str(String expectedImage) throws Exception {
+		token(IToken.tUTF32STRING, "U\"" + expectedImage + "\"");
+	}
+	
 	private void ch(String expectedImage) throws Exception {
 		token(IToken.tCHAR, expectedImage);
 	}
@ -106,6 +114,14 @@ public class LexerTests extends BaseTestCase {
 	private void wch(String expectedImage) throws Exception {
 		token(IToken.tLCHAR, expectedImage);
 	}
+	
+	private void utf16ch(String expectedImage) throws Exception {
+		token(IToken.tUTF16CHAR, expectedImage);
+	}
+	
+	private void utf32ch(String expectedImage) throws Exception {
+		token(IToken.tUTF32CHAR, expectedImage);
+	}

 	private void eof() throws Exception {
 		IToken t= fLexer.nextToken();
@ -388,9 +404,19 @@ public class LexerTests extends BaseTestCase {
 		ch(lit);
 		eof();

-		lit= 'L'+lit;
-		init(lit);
-		wch(lit);
+		String lit2= 'L'+lit;
+		init(lit2);
+		wch(lit2);
+		eof();
+		
+		lit2= 'u'+lit;
+		init(lit2);
+		utf16ch(lit2);
+		eof();
+		
+		lit2= 'U'+lit;
+		init(lit2);
+		utf32ch(lit2);
 		eof();

 		lit= "'ut\n";
@ -400,24 +426,50 @@ public class LexerTests extends BaseTestCase {
 		nl();
 		eof();

-		lit= 'L'+lit;
-		init(lit);
+		lit2= 'L'+lit;
+		init(lit2);
 		problem(IProblem.SCANNER_BAD_CHARACTER, "L'ut");
 		wch("L'ut");
 		nl();
 		eof();
 		
+		lit2= 'u'+lit;
+		init(lit2);
+		problem(IProblem.SCANNER_BAD_CHARACTER, "u'ut");
+		utf16ch("u'ut");
+		nl();
+		eof();
+		
+		lit2= 'U'+lit;
+		init(lit2);
+		problem(IProblem.SCANNER_BAD_CHARACTER, "U'ut");
+		utf32ch("U'ut");
+		nl();
+		eof();
+		
 		lit= "'ut\\'";
 		init(lit);
 		problem(IProblem.SCANNER_BAD_CHARACTER, lit);
 		ch("'ut\\'");
 		eof();

-		lit= 'L'+lit;
-		init(lit);
-		problem(IProblem.SCANNER_BAD_CHARACTER, lit);
+		lit2= 'L'+lit;
+		init(lit2);
+		problem(IProblem.SCANNER_BAD_CHARACTER, lit2);
 		wch("L'ut\\'");
 		eof();
+		
+		lit2= 'u'+lit;
+		init(lit2);
+		problem(IProblem.SCANNER_BAD_CHARACTER, lit2);
+		utf16ch("u'ut\\'");
+		eof();
+		
+		lit2= 'U'+lit;
+		init(lit2);
+		problem(IProblem.SCANNER_BAD_CHARACTER, lit2);
+		utf32ch("U'ut\\'");
+		eof();
 	}

 	public void testStringLiteral() throws Exception {
@ -429,6 +481,14 @@ public class LexerTests extends BaseTestCase {
 		init("L\"" + lit + '"');
 		wstr(lit);
 		eof();
+		
+		init("u\"" + lit + '"');
+		utf16str(lit);
+		eof();
+		
+		init("U\"" + lit + '"');
+		utf32str(lit);
+		eof();

 		lit= "ut\n";
 		init('"' + lit);
@ -443,17 +503,41 @@ public class LexerTests extends BaseTestCase {
 		nl();
 		eof();
 		
+		init("u\"" + lit);
+		problem(IProblem.SCANNER_UNBOUNDED_STRING, "u\"ut");
+		token(IToken.tUTF16STRING, "u\"ut");
+		nl();
+		eof();
+		
+		init("U\"" + lit);
+		problem(IProblem.SCANNER_UNBOUNDED_STRING, "U\"ut");
+		token(IToken.tUTF32STRING, "U\"ut");
+		nl();
+		eof();
+		
 		lit= "\"ut\\\"";
 		init(lit);
 		problem(IProblem.SCANNER_UNBOUNDED_STRING, lit);
 		token(IToken.tSTRING, "\"ut\\\"");
 		eof();

-		lit= 'L'+lit;
-		init(lit);
-		problem(IProblem.SCANNER_UNBOUNDED_STRING, lit);
+		String lit2= 'L'+lit;
+		init(lit2);
+		problem(IProblem.SCANNER_UNBOUNDED_STRING, lit2);
 		token(IToken.tLSTRING, "L\"ut\\\"");
 		eof();
+		
+		lit2= 'u'+lit;
+		init(lit2);
+		problem(IProblem.SCANNER_UNBOUNDED_STRING, lit2);
+		token(IToken.tUTF16STRING, "u\"ut\\\"");
+		eof();
+		
+		lit2= 'U'+lit;
+		init(lit2);
+		problem(IProblem.SCANNER_UNBOUNDED_STRING, lit2);
+		token(IToken.tUTF32STRING, "U\"ut\\\"");
+		eof();
 	}

 	public void testOperatorAndPunctuators() throws Exception {
--- a/core/org.eclipse.cdt.core.tests/parser/org/eclipse/cdt/core/parser/tests/scanner/PortedScannerTests.java
+++ b/core/org.eclipse.cdt.core.tests/parser/org/eclipse/cdt/core/parser/tests/scanner/PortedScannerTests.java
@ -22,6 +22,8 @@ import junit.framework.TestSuite;
 import org.eclipse.cdt.core.dom.ast.IASTPreprocessorIncludeStatement;
 import org.eclipse.cdt.core.dom.ast.IASTProblem;
 import org.eclipse.cdt.core.dom.ast.IMacroBinding;
+import org.eclipse.cdt.core.dom.parser.IScannerExtensionConfiguration;
+import org.eclipse.cdt.core.dom.parser.cpp.GPPScannerExtensionConfiguration;
 import org.eclipse.cdt.core.parser.IGCCToken;
 import org.eclipse.cdt.core.parser.IProblem;
 import org.eclipse.cdt.core.parser.IToken;
@ -233,6 +235,28 @@ public class PortedScannerTests extends PreprocessorTestsBase {
 		validateEOF();

 	}
+	
+	public void testUTFStrings() throws Exception {
+		IScannerExtensionConfiguration config = new GPPScannerExtensionConfiguration() {
+			@Override public boolean supportUTFLiterals() { return true; }
+		};
+		initializeScanner("ubiquitous u\"utf16\" User U\"utf32\"", ParserLanguage.CPP, config); 
+		validateIdentifier("ubiquitous"); 
+		validateUTF16String("utf16"); 
+		validateIdentifier("User"); 
+		validateUTF32String("utf32"); 
+		validateEOF();
+	}
+	
+	public void testUTFChars() throws Exception {
+		IScannerExtensionConfiguration config = new GPPScannerExtensionConfiguration() {
+			@Override public boolean supportUTFLiterals() { return true; }
+		};
+		initializeScanner("u'asdf' U'1234'", ParserLanguage.CPP, config);
+		validateUTF16Char("asdf");
+		validateUTF32Char("1234");
+		validateEOF();
+	}

 	public void testNumerics() throws Exception {
 		initializeScanner("3.0 0.9 .5 3. 4E5 2.01E-03 ..."); 
@ -1477,6 +1501,31 @@ public class PortedScannerTests extends PreprocessorTestsBase {
 		validateLString("ONETWO"); 
 		validateEOF();
 	}
+	
+	public void testUTFStringConcatenation() throws Exception {
+		IScannerExtensionConfiguration config = new GPPScannerExtensionConfiguration() {
+			@Override public boolean supportUTFLiterals() { return true; }
+		};
+		initializeScanner("u\"a\" u\"b\"", ParserLanguage.CPP, config);
+		validateUTF16String("ab");
+		validateEOF();
+		initializeScanner("u\"a\" \"b\"", ParserLanguage.CPP, config);
+		validateUTF16String("ab");
+		validateEOF();
+		initializeScanner("\"a\" u\"b\"", ParserLanguage.CPP, config);
+		validateUTF16String("ab");
+		validateEOF();
+		
+		initializeScanner("U\"a\" U\"b\"", ParserLanguage.CPP, config);
+		validateUTF32String("ab");
+		validateEOF();
+		initializeScanner("U\"a\" \"b\"", ParserLanguage.CPP, config);
+		validateUTF32String("ab");
+		validateEOF();
+		initializeScanner("\"a\" U\"b\"", ParserLanguage.CPP, config);
+		validateUTF32String("ab");
+		validateEOF();
+	}

 	public void testEmptyIncludeDirective() throws Exception {
 		initializeScanner("#include \n#include <foo.h>\n"); 
--- a/core/org.eclipse.cdt.core.tests/parser/org/eclipse/cdt/core/parser/tests/scanner/PreprocessorTestsBase.java
+++ b/core/org.eclipse.cdt.core.tests/parser/org/eclipse/cdt/core/parser/tests/scanner/PreprocessorTestsBase.java
@ -61,17 +61,27 @@ public abstract class PreprocessorTestsBase extends BaseTestCase {
 	protected void initializeScanner(String input, ParserLanguage lang) throws IOException {
 		initializeScanner(new CodeReader(input.toCharArray()), lang, ParserMode.COMPLETE_PARSE, new ScannerInfo());
 	}
+	
+	protected void initializeScanner(String input, ParserLanguage lang, IScannerExtensionConfiguration scannerConfig) throws IOException {
+		initializeScanner(new CodeReader(input.toCharArray()), lang, ParserMode.COMPLETE_PARSE, new ScannerInfo(), scannerConfig);
+	}

 	protected void initializeScanner(CodeReader input, ParserLanguage lang, ParserMode mode, IScannerInfo scannerInfo) throws IOException {
-		ICodeReaderFactory readerFactory= FileCodeReaderFactory.getInstance();
-		IScannerExtensionConfiguration scannerConfig;
+		initializeScanner(input, lang, mode, scannerInfo, null);
+	}
 	
-	    if (lang == ParserLanguage.C) {
-	    	scannerConfig= GCCScannerExtensionConfiguration.getInstance();
-	    }
-	    else {
-	    	scannerConfig= GPPScannerExtensionConfiguration.getInstance();
-	    }
+	protected void initializeScanner(CodeReader input, ParserLanguage lang, ParserMode mode, IScannerInfo scannerInfo, IScannerExtensionConfiguration scannerConfig) throws IOException {
+		ICodeReaderFactory readerFactory= FileCodeReaderFactory.getInstance();
+		//IScannerExtensionConfiguration scannerConfig;
+	
+		if(scannerConfig == null) {
+		    if (lang == ParserLanguage.C) {
+		    	scannerConfig= GCCScannerExtensionConfiguration.getInstance();
+		    }
+		    else {
+		    	scannerConfig= GPPScannerExtensionConfiguration.getInstance();
+		    }
+		}
 	    
 		fScanner= new CPreprocessor(input, scannerInfo, lang, NULL_LOG, scannerConfig, readerFactory);
 		fLocationResolver= fScanner.getLocationMap();
@ -120,6 +130,18 @@ public abstract class PreprocessorTestsBase extends BaseTestCase {
 		validateToken(IToken.tSTRING, "\"" + expectedImage + "\"");
 	}

+	protected void validateLString(String expectedImage) throws Exception {
+		validateToken(IToken.tLSTRING, "L\"" + expectedImage + "\"");
+	}
+	
+	protected void validateUTF16String(String expectedImage) throws Exception {
+		validateToken(IToken.tUTF16STRING, "u\"" + expectedImage + "\"");
+	}
+	
+	protected void validateUTF32String(String expectedImage) throws Exception {
+		validateToken(IToken.tUTF32STRING, "U\"" + expectedImage + "\"");
+	}
+	
 	protected void validateChar(String expectedImage) throws Exception {
 		validateToken(IToken.tCHAR, "'" + expectedImage + "'");
 	}
@ -127,11 +149,15 @@ public abstract class PreprocessorTestsBase extends BaseTestCase {
 	protected void validateWideChar(String expectedImage) throws Exception {
 		validateToken(IToken.tLCHAR, "L'" + expectedImage + "'");
 	}
-
-	protected void validateLString(String expectedImage) throws Exception {
-		validateToken(IToken.tLSTRING, "L\"" + expectedImage + "\"");
+	
+	protected void validateUTF16Char(String expectedImage) throws Exception {
+		validateToken(IToken.tUTF16CHAR, "u'" + expectedImage + "'");
 	}
-
+	
+	protected void validateUTF32Char(String expectedImage) throws Exception {
+		validateToken(IToken.tUTF32CHAR, "U'" + expectedImage + "'");
+	}
+	
 	protected void validateFloatingPointLiteral(String expectedImage) throws Exception {
 		validateToken(IToken.tFLOATINGPT, expectedImage);
 	}
--- a/core/org.eclipse.cdt.core/parser/org/eclipse/cdt/core/dom/parser/AbstractScannerExtensionConfiguration.java
+++ b/core/org.eclipse.cdt.core/parser/org/eclipse/cdt/core/dom/parser/AbstractScannerExtensionConfiguration.java
@ -63,6 +63,16 @@ public abstract class AbstractScannerExtensionConfiguration implements IScannerE
 		return false;
 	}
 	
+	
+	/**
+	 * {@inheritDoc}
+	 * @since 5.1
+	 */
+	public boolean supportUTFLiterals() {
+		return true;
+	}
+	
+	
 	/**
 	 * {@inheritDoc}
 	 * @since 5.1
--- a/core/org.eclipse.cdt.core/parser/org/eclipse/cdt/core/dom/parser/IScannerExtensionConfiguration.java
+++ b/core/org.eclipse.cdt.core/parser/org/eclipse/cdt/core/dom/parser/IScannerExtensionConfiguration.java
@ -100,4 +100,13 @@ public interface IScannerExtensionConfiguration {
 	 *         or <code>null</code> for no additional keywords.
 	 */
 	public CharArrayIntMap getAdditionalPreprocessorKeywords();
+	
+	
+	/**
+     * Support for UTF string literals.
+     *
+	 * @since 5.1
+     * @see "http://publib.boulder.ibm.com/infocenter/comphelp/v101v121/index.jsp?topic=/com.ibm.xlcpp101.aix.doc/language_ref/unicode_standard.html"
+	 */
+	public boolean supportUTFLiterals();
 }
--- a/core/org.eclipse.cdt.core/parser/org/eclipse/cdt/core/parser/IToken.java
+++ b/core/org.eclipse.cdt.core/parser/org/eclipse/cdt/core/parser/IToken.java
@ -172,10 +172,17 @@ public interface IToken {
 	/** @deprecated use {@link #tXOR} */ @Deprecated int t_xor = 127;
 	/** @deprecated use {@link #tXORASSIGN} */ @Deprecated int t_xor_eq = 128; 
 	int tFLOATINGPT = 129;
+	
 	int tSTRING = 130;
 	int tLSTRING = 131;
+	/** @since 5.1 */ int tUTF16STRING = 5000;
+	/** @since 5.1 */ int tUTF32STRING = 5001;
+	
 	int tCHAR = 132;
 	int tLCHAR = 133;
+	/** @since 5.1 */ int tUTF16CHAR = 5002;
+	/** @since 5.1 */ int tUTF32CHAR = 5003;
+	
 	int t__Bool = 134;
 	int t__Complex = 135;
 	int t__Imaginary = 136;
@ -197,4 +204,6 @@ public interface IToken {
 	
 	int FIRST_RESERVED_IExtensionToken	= 243;
 	int LAST_RESERVED_IExtensionToken	= 299;
+	
+	
 }
--- a/core/org.eclipse.cdt.core/parser/org/eclipse/cdt/internal/core/dom/parser/c/GNUCSourceParser.java
+++ b/core/org.eclipse.cdt.core/parser/org/eclipse/cdt/internal/core/dom/parser/c/GNUCSourceParser.java
@ -680,12 +680,16 @@ public class GNUCSourceParser extends AbstractGNUSourceCodeParser {
            return literalExpression;
        case IToken.tSTRING:
        case IToken.tLSTRING:
+        case IToken.tUTF16STRING:
+        case IToken.tUTF32STRING:
            t = consume();
            literalExpression = nodeFactory.newLiteralExpression(IASTLiteralExpression.lk_string_literal, t.getImage());
            ((ASTNode) literalExpression).setOffsetAndLength(t.getOffset(), t.getEndOffset() - t.getOffset());
            return literalExpression;
        case IToken.tCHAR:
        case IToken.tLCHAR:
+        case IToken.tUTF16CHAR:
+        case IToken.tUTF32CHAR:
            t = consume();
            literalExpression = nodeFactory.newLiteralExpression(IASTLiteralExpression.lk_char_constant, t.getImage());
            ((ASTNode) literalExpression).setOffsetAndLength(t.getOffset(), t.getLength());
--- a/core/org.eclipse.cdt.core/parser/org/eclipse/cdt/internal/core/dom/parser/cpp/GNUCPPSourceParser.java
+++ b/core/org.eclipse.cdt.core/parser/org/eclipse/cdt/internal/core/dom/parser/cpp/GNUCPPSourceParser.java
@ -1164,12 +1164,16 @@ public class GNUCPPSourceParser extends AbstractGNUSourceCodeParser {
            return literalExpression;
        case IToken.tSTRING:
        case IToken.tLSTRING:
+        case IToken.tUTF16STRING:
+        case IToken.tUTF32STRING:
            t = consume();
            literalExpression = nodeFactory.newLiteralExpression(IASTLiteralExpression.lk_string_literal, t.getImage()); 
            ((ASTNode) literalExpression).setOffsetAndLength(t.getOffset(), t.getEndOffset() - t.getOffset());
            return literalExpression;
        case IToken.tCHAR:
        case IToken.tLCHAR:
+        case IToken.tUTF16CHAR:
+        case IToken.tUTF32CHAR:
            t = consume();
            literalExpression = nodeFactory.newLiteralExpression(IASTLiteralExpression.lk_char_constant, t.getImage()); 
            ((ASTNode) literalExpression).setOffsetAndLength(t.getOffset(), t.getEndOffset() - t.getOffset());
--- a/core/org.eclipse.cdt.core/parser/org/eclipse/cdt/internal/core/parser/scanner/CPreprocessor.java
+++ b/core/org.eclipse.cdt.core/parser/org/eclipse/cdt/internal/core/parser/scanner/CPreprocessor.java
@ -201,6 +201,7 @@ public class CPreprocessor implements ILexerLog, IScanner, IAdaptable {
        fLexOptions.fSupportAtSignInIdentifiers= configuration.supportAtSignInIdentifiers();
        fLexOptions.fSupportMinAndMax = configuration.supportMinAndMaxOperators();
        fLexOptions.fSupportSlashPercentComments= configuration.supportSlashPercentComments();
+        fLexOptions.fSupportUTFLiterals = configuration.supportUTFLiterals();
        fLocationMap= new LocationMap(fLexOptions);
        fKeywords= new CharArrayIntMap(40, -1);
        fPPKeywords= new CharArrayIntMap(40, -1);
@ -555,7 +556,10 @@ public class CPreprocessor implements ILexerLog, IScanner, IAdaptable {
    		
    	case IToken.tSTRING:
    	case IToken.tLSTRING:
-    		boolean isWide= tt1 == IToken.tLSTRING;
+        case IToken.tUTF16STRING:
+        case IToken.tUTF32STRING:
+        	
+    		StringType st = StringType.fromToken(tt1);
    		Token t2;
    		StringBuffer buf= null;
    		int endOffset= 0;
@ -565,7 +569,9 @@ public class CPreprocessor implements ILexerLog, IScanner, IAdaptable {
    			switch(tt2) {
    			case IToken.tLSTRING:
    			case IToken.tSTRING:
-    				isWide= tt2 == IToken.tLSTRING;
+    		    case IToken.tUTF16STRING:
+    		    case IToken.tUTF32STRING:
+    				st = StringType.max(st, StringType.fromToken(tt2));
    				if (buf == null) {
    					buf= new StringBuffer();
    					appendStringContent(buf, t1);
@ -580,15 +586,17 @@ public class CPreprocessor implements ILexerLog, IScanner, IAdaptable {
    		}
    		pushbackToken(t2);
    		if (buf != null) {
-    			char[] image= new char[buf.length() + (isWide ? 3 : 2)];
+    			char[] prefix = st.getPrefix();
+    			char[] image= new char[buf.length() + prefix.length + 2];
    			int off= -1;
-    			if (isWide) {
-    				image[++off]= 'L';
-    			}
+    			
+    			for(char c : prefix)
+    				image[++off] = c;
+    			
    			image[++off]= '"';
    			buf.getChars(0, buf.length(), image, ++off);
    			image[image.length-1]= '"';
-    			t1= new TokenWithImage((isWide ? IToken.tLSTRING : IToken.tSTRING), null, t1.getOffset(), endOffset, image);
+    			t1= new TokenWithImage(st.getTokenValue(), null, t1.getOffset(), endOffset, image);
    		}
    	}

@ -598,7 +606,7 @@ public class CPreprocessor implements ILexerLog, IScanner, IAdaptable {
    	fLastToken= t1;
    	return t1;
    }
-
+    
    
    public void skipInactiveCode() throws OffsetLimitReachedException {
    	final Lexer lexer= fCurrentContext.getLexer();
@ -619,8 +627,14 @@ public class CPreprocessor implements ILexerLog, IScanner, IAdaptable {
 	private void appendStringContent(StringBuffer buf, Token t1) {
    	final char[] image= t1.getCharImage();
    	final int length= image.length;
+    	int start = 1;
+    	for(char c : image) {
+    		if(c == '"')
+    			break;
+    		start++;
+    	}
+    	
    	if (length > 1) {
-    		final int start= image[0]=='"' ? 1 : 2;
    		final int diff= image[length-1] == '"' ? length-start-1 : length-start;
    		if (diff > 0) {
    			buf.append(image, start, diff);
--- a/core/org.eclipse.cdt.core/parser/org/eclipse/cdt/internal/core/parser/scanner/ExpressionEvaluator.java
+++ b/core/org.eclipse.cdt.core/parser/org/eclipse/cdt/internal/core/parser/scanner/ExpressionEvaluator.java
@ -237,6 +237,8 @@ public class ExpressionEvaluator {
            return ~unaryExpression();
        case IToken.tCHAR:
        case IToken.tLCHAR:
+    	case IToken.tUTF16CHAR:
+    	case IToken.tUTF32CHAR:
        case IToken.tINTEGER:
        	long val= getValue(fTokens);
        	consume();
@ -271,6 +273,8 @@ public class ExpressionEvaluator {
        case IToken.tAMPERASSIGN:
        case IToken.tSTRING:
        case IToken.tLSTRING:
+        case IToken.tUTF16STRING:
+        case IToken.tUTF32STRING:
            throw new EvalException(IProblem.SCANNER_EXPRESSION_SYNTAX_ERROR, null); 
        	
        default:
@ -326,6 +330,8 @@ public class ExpressionEvaluator {
    	case IToken.tCHAR:
    		return getChar(t.getCharImage(), 1);
    	case IToken.tLCHAR:
+    	case IToken.tUTF16CHAR:
+    	case IToken.tUTF32CHAR:
    		return getChar(t.getCharImage(), 2);
    	case IToken.tINTEGER:
    		return getNumber(t.getCharImage());
--- a/core/org.eclipse.cdt.core/parser/org/eclipse/cdt/internal/core/parser/scanner/Lexer.java
+++ b/core/org.eclipse.cdt.core/parser/org/eclipse/cdt/internal/core/parser/scanner/Lexer.java
@ -7,6 +7,7 @@
 *
 * Contributors:
 *    Markus Schorn - initial API and implementation
+ *    Mike Kucera (IBM) - UTF string literals
 *******************************************************************************/ 
 package org.eclipse.cdt.internal.core.parser.scanner;

@ -51,6 +52,7 @@ final public class Lexer implements ITokenSequence {
 		public boolean fSupportMinAndMax= true;
 		public boolean fCreateImageLocations= true;
 		public boolean fSupportSlashPercentComments= false;
+		public boolean fSupportUTFLiterals= true;
 		
 		@Override
 		public Object clone() {
@ -254,11 +256,11 @@ final public class Lexer implements ITokenSequence {
 				continue;
 				
 			case '"':
-				stringLiteral(start, false);
+				stringLiteral(start, IToken.tSTRING);
 				continue;

 			case '\'':
-				charLiteral(start, false);
+				charLiteral(start, IToken.tCHAR);
 				continue;

 			case '/':
@ -339,28 +341,42 @@ final public class Lexer implements ITokenSequence {
 				switch(d) {
 				case '"':
 					nextCharPhase3();
-					return stringLiteral(start, true);
+					return stringLiteral(start, IToken.tLSTRING);
 				case '\'':
 					nextCharPhase3();
-					return charLiteral(start, true);
+					return charLiteral(start, IToken.tLCHAR);
 				}
 				return identifier(start, 1);

+			case 'u': 	
+			case 'U':
+				if(fOptions.fSupportUTFLiterals) {
+					if(d == '"') {
+						nextCharPhase3();
+						return stringLiteral(start, c == 'u' ? IToken.tUTF16STRING : IToken.tUTF32STRING);
+					}
+					if(d == '\'') {
+						nextCharPhase3();
+						return charLiteral(start, c == 'u' ? IToken.tUTF16CHAR : IToken.tUTF32CHAR);
+					}
+				}
+				return identifier(start, 1);
+				
 			case '"':
 				if (fInsideIncludeDirective) {
 					return headerName(start, true);
 				}
-				return stringLiteral(start, false);
+				return stringLiteral(start, IToken.tSTRING);

 			case '\'':
-				return charLiteral(start, false);
+				return charLiteral(start, IToken.tCHAR);

 			case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': case 'g': case 'h': case 'i': 
 			case 'j': case 'k': case 'l': case 'm': case 'n': case 'o': case 'p': case 'q': case 'r': 
-			case 's': case 't': case 'u': case 'v': case 'w': case 'x': case 'y': case 'z':
+			case 's': case 't':           case 'v': case 'w': case 'x': case 'y': case 'z':
 			case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': case 'G': case 'H': case 'I':
 			case 'J': case 'K':           case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R': 
-			case 'S': case 'T': case 'U': case 'V': case 'W': case 'X': case 'Y': case 'Z':
+			case 'S': case 'T':           case 'V': case 'W': case 'X': case 'Y': case 'Z':
 			case '_':
 				return identifier(start, 1);

@ -726,17 +742,18 @@ final public class Lexer implements ITokenSequence {
 	}

 	@SuppressWarnings("fallthrough")
-	private Token stringLiteral(final int start, final boolean wide) throws OffsetLimitReachedException {
+	private Token stringLiteral(final int start, final int tokenType) throws OffsetLimitReachedException {
 		boolean escaped = false;
 		boolean done = false;
-		int length= wide ? 2 : 1;
+		
+		int length = tokenType == IToken.tSTRING ? 1 : 2;
 		int c= fCharPhase3;
 		
 		loop: while (!done) {
 			switch(c) {
 			case END_OF_INPUT:
 				if (fSupportContentAssist) {
-					throw new OffsetLimitReachedException(ORIGIN_LEXER, newToken(wide ? IToken.tLSTRING : IToken.tSTRING, start, length));
+					throw new OffsetLimitReachedException(ORIGIN_LEXER, newToken(tokenType, start, length));
 				}
 				// no break;
 			case '\n':
@ -759,21 +776,21 @@ final public class Lexer implements ITokenSequence {
 			length++;
 			c= nextCharPhase3();
 		}
-		return newToken(wide ? IToken.tLSTRING : IToken.tSTRING, start, length);
+		return newToken(tokenType, start, length);
 	}
 	
 	@SuppressWarnings("fallthrough")
-	private Token charLiteral(final int start, boolean wide) throws OffsetLimitReachedException {
+	private Token charLiteral(final int start, final int tokenType) throws OffsetLimitReachedException {
 		boolean escaped = false;
 		boolean done = false;
-		int length= wide ? 2 : 1;
+		int length= tokenType == IToken.tCHAR ? 1 : 2;
 		int c= fCharPhase3;
 		
 		loop: while (!done) {
 			switch(c) {
 			case END_OF_INPUT:
 				if (fSupportContentAssist) {
-					throw new OffsetLimitReachedException(ORIGIN_LEXER, newToken(wide ? IToken.tLCHAR : IToken.tCHAR, start, length));
+					throw new OffsetLimitReachedException(ORIGIN_LEXER, newToken(tokenType, start, length));
 				}
 				// no break;
 			case '\n':
@ -795,7 +812,7 @@ final public class Lexer implements ITokenSequence {
 			length++;
 			c= nextCharPhase3();
 		}
-		return newToken(wide ? IToken.tLCHAR : IToken.tCHAR, start, length);
+		return newToken(tokenType, start, length);
 	}
 	
 	private Token identifier(int start, int length) {
--- a/core/org.eclipse.cdt.core/parser/org/eclipse/cdt/internal/core/parser/scanner/MacroExpander.java
+++ b/core/org.eclipse.cdt.core/parser/org/eclipse/cdt/internal/core/parser/scanner/MacroExpander.java
@ -874,8 +874,12 @@ public class MacroExpander {
 			switch(t.getType()) {
 			case IToken.tSTRING:
 			case IToken.tLSTRING:
+	        case IToken.tUTF16STRING:
+	        case IToken.tUTF32STRING:
 			case IToken.tCHAR:
 			case IToken.tLCHAR:
+	    	case IToken.tUTF16CHAR:
+	    	case IToken.tUTF32CHAR:
 				final char[] image= t.getCharImage();
 				for (final char c : image) {
 						if (c == '"' || c == '\\') {
--- a/core/org.eclipse.cdt.core/parser/org/eclipse/cdt/internal/core/parser/scanner/StringType.java
+++ b/core/org.eclipse.cdt.core/parser/org/eclipse/cdt/internal/core/parser/scanner/StringType.java
@ -0,0 +1,74 @@
+/*******************************************************************************
+ * Copyright (c) 2009 IBM Corporation and others.
+ * All rights reserved. This program and the accompanying materials
+ * are made available under the terms of the Eclipse Public License v1.0
+ * which accompanies this distribution, and is available at
+ * http://www.eclipse.org/legal/epl-v10.html
+ *
+ * Contributors:
+ *    Mike Kucera (IBM) - Initial API and implementation
+ *******************************************************************************/
+package org.eclipse.cdt.internal.core.parser.scanner;
+
+import org.eclipse.cdt.core.parser.IToken;
+
+/**
+ * Utility class that provides some simple operations
+ * for string literals.
+ */
+@SuppressWarnings("nls")
+public enum StringType {
+
+	// listed in order of "wideness"
+	NARROW("", IToken.tSTRING),
+	WIDE("L",  IToken.tLSTRING),
+	UTF16("u", IToken.tUTF16STRING),
+	UTF32("U", IToken.tUTF32STRING);
+	
+	
+	private char[] prefix;
+	private int tokenVal;
+	
+	private StringType(String prefix, int tokenVal) {
+		this.prefix = prefix.toCharArray();
+		this.tokenVal = tokenVal;
+	}
+	
+	public char[] getPrefix() {
+		return prefix;
+	}
+	
+	public int getTokenValue() {
+		return tokenVal;
+	}
+	
+	/**
+	 * Returns the StringType value that represesnts the 'wider'
+	 * of the two given StringTypes.
+	 * @thows NullPointerException if an argument is null
+	 */
+	public static StringType max(StringType st1, StringType st2) {
+		return values()[Math.max(st1.ordinal(), st2.ordinal())];
+	}
+
+	/**
+	 * Returns the StringType value for the given string literal type.
+	 * 
+	 * @see IToken#tSTRING
+	 * @see IToken#tLSTRING
+	 * @see IToken#tUTF16STRING
+	 * @see IToken#tUTF32STRING
+	 * 
+	 * @throws IllegalArgumentException if the tokenVal does not represent a string literal
+	 */
+	public static StringType fromToken(int tokenVal) {
+		switch(tokenVal) {
+		case IToken.tSTRING:      return NARROW;
+    	case IToken.tLSTRING:     return WIDE;
+        case IToken.tUTF16STRING: return UTF16;
+        case IToken.tUTF32STRING: return UTF32;
+        default:
+        	throw new IllegalArgumentException(tokenVal + " is not a string token");
+		}
+	}
+}