diff --git a/core/org.eclipse.cdt.core.tests/parser/org/eclipse/cdt/core/parser/tests/scanner/LexerTests.java b/core/org.eclipse.cdt.core.tests/parser/org/eclipse/cdt/core/parser/tests/scanner/LexerTests.java index cec6d72b88b..135c9173697 100644 --- a/core/org.eclipse.cdt.core.tests/parser/org/eclipse/cdt/core/parser/tests/scanner/LexerTests.java +++ b/core/org.eclipse.cdt.core.tests/parser/org/eclipse/cdt/core/parser/tests/scanner/LexerTests.java @@ -99,6 +99,14 @@ public class LexerTests extends BaseTestCase { token(IToken.tLSTRING, "L\"" + expectedImage + "\""); } + private void utf16str(String expectedImage) throws Exception { + token(IToken.tUTF16STRING, "u\"" + expectedImage + "\""); + } + + private void utf32str(String expectedImage) throws Exception { + token(IToken.tUTF32STRING, "U\"" + expectedImage + "\""); + } + private void ch(String expectedImage) throws Exception { token(IToken.tCHAR, expectedImage); } @@ -106,6 +114,14 @@ public class LexerTests extends BaseTestCase { private void wch(String expectedImage) throws Exception { token(IToken.tLCHAR, expectedImage); } + + private void utf16ch(String expectedImage) throws Exception { + token(IToken.tUTF16CHAR, expectedImage); + } + + private void utf32ch(String expectedImage) throws Exception { + token(IToken.tUTF32CHAR, expectedImage); + } private void eof() throws Exception { IToken t= fLexer.nextToken(); @@ -388,9 +404,19 @@ public class LexerTests extends BaseTestCase { ch(lit); eof(); - lit= 'L'+lit; - init(lit); - wch(lit); + String lit2= 'L'+lit; + init(lit2); + wch(lit2); + eof(); + + lit2= 'u'+lit; + init(lit2); + utf16ch(lit2); + eof(); + + lit2= 'U'+lit; + init(lit2); + utf32ch(lit2); eof(); lit= "'ut\n"; @@ -400,24 +426,50 @@ public class LexerTests extends BaseTestCase { nl(); eof(); - lit= 'L'+lit; - init(lit); + lit2= 'L'+lit; + init(lit2); problem(IProblem.SCANNER_BAD_CHARACTER, "L'ut"); wch("L'ut"); nl(); eof(); + lit2= 'u'+lit; + init(lit2); + problem(IProblem.SCANNER_BAD_CHARACTER, "u'ut"); + utf16ch("u'ut"); + nl(); + eof(); + + lit2= 'U'+lit; + init(lit2); + problem(IProblem.SCANNER_BAD_CHARACTER, "U'ut"); + utf32ch("U'ut"); + nl(); + eof(); + lit= "'ut\\'"; init(lit); problem(IProblem.SCANNER_BAD_CHARACTER, lit); ch("'ut\\'"); eof(); - lit= 'L'+lit; - init(lit); - problem(IProblem.SCANNER_BAD_CHARACTER, lit); + lit2= 'L'+lit; + init(lit2); + problem(IProblem.SCANNER_BAD_CHARACTER, lit2); wch("L'ut\\'"); eof(); + + lit2= 'u'+lit; + init(lit2); + problem(IProblem.SCANNER_BAD_CHARACTER, lit2); + utf16ch("u'ut\\'"); + eof(); + + lit2= 'U'+lit; + init(lit2); + problem(IProblem.SCANNER_BAD_CHARACTER, lit2); + utf32ch("U'ut\\'"); + eof(); } public void testStringLiteral() throws Exception { @@ -429,6 +481,14 @@ public class LexerTests extends BaseTestCase { init("L\"" + lit + '"'); wstr(lit); eof(); + + init("u\"" + lit + '"'); + utf16str(lit); + eof(); + + init("U\"" + lit + '"'); + utf32str(lit); + eof(); lit= "ut\n"; init('"' + lit); @@ -443,17 +503,41 @@ public class LexerTests extends BaseTestCase { nl(); eof(); + init("u\"" + lit); + problem(IProblem.SCANNER_UNBOUNDED_STRING, "u\"ut"); + token(IToken.tUTF16STRING, "u\"ut"); + nl(); + eof(); + + init("U\"" + lit); + problem(IProblem.SCANNER_UNBOUNDED_STRING, "U\"ut"); + token(IToken.tUTF32STRING, "U\"ut"); + nl(); + eof(); + lit= "\"ut\\\""; init(lit); problem(IProblem.SCANNER_UNBOUNDED_STRING, lit); token(IToken.tSTRING, "\"ut\\\""); eof(); - lit= 'L'+lit; - init(lit); - problem(IProblem.SCANNER_UNBOUNDED_STRING, lit); + String lit2= 'L'+lit; + init(lit2); + problem(IProblem.SCANNER_UNBOUNDED_STRING, lit2); token(IToken.tLSTRING, "L\"ut\\\""); eof(); + + lit2= 'u'+lit; + init(lit2); + problem(IProblem.SCANNER_UNBOUNDED_STRING, lit2); + token(IToken.tUTF16STRING, "u\"ut\\\""); + eof(); + + lit2= 'U'+lit; + init(lit2); + problem(IProblem.SCANNER_UNBOUNDED_STRING, lit2); + token(IToken.tUTF32STRING, "U\"ut\\\""); + eof(); } public void testOperatorAndPunctuators() throws Exception { diff --git a/core/org.eclipse.cdt.core.tests/parser/org/eclipse/cdt/core/parser/tests/scanner/PortedScannerTests.java b/core/org.eclipse.cdt.core.tests/parser/org/eclipse/cdt/core/parser/tests/scanner/PortedScannerTests.java index 65e900592e2..e1a3d3b40d5 100644 --- a/core/org.eclipse.cdt.core.tests/parser/org/eclipse/cdt/core/parser/tests/scanner/PortedScannerTests.java +++ b/core/org.eclipse.cdt.core.tests/parser/org/eclipse/cdt/core/parser/tests/scanner/PortedScannerTests.java @@ -22,6 +22,8 @@ import junit.framework.TestSuite; import org.eclipse.cdt.core.dom.ast.IASTPreprocessorIncludeStatement; import org.eclipse.cdt.core.dom.ast.IASTProblem; import org.eclipse.cdt.core.dom.ast.IMacroBinding; +import org.eclipse.cdt.core.dom.parser.IScannerExtensionConfiguration; +import org.eclipse.cdt.core.dom.parser.cpp.GPPScannerExtensionConfiguration; import org.eclipse.cdt.core.parser.IGCCToken; import org.eclipse.cdt.core.parser.IProblem; import org.eclipse.cdt.core.parser.IToken; @@ -233,6 +235,28 @@ public class PortedScannerTests extends PreprocessorTestsBase { validateEOF(); } + + public void testUTFStrings() throws Exception { + IScannerExtensionConfiguration config = new GPPScannerExtensionConfiguration() { + @Override public boolean supportUTFLiterals() { return true; } + }; + initializeScanner("ubiquitous u\"utf16\" User U\"utf32\"", ParserLanguage.CPP, config); + validateIdentifier("ubiquitous"); + validateUTF16String("utf16"); + validateIdentifier("User"); + validateUTF32String("utf32"); + validateEOF(); + } + + public void testUTFChars() throws Exception { + IScannerExtensionConfiguration config = new GPPScannerExtensionConfiguration() { + @Override public boolean supportUTFLiterals() { return true; } + }; + initializeScanner("u'asdf' U'1234'", ParserLanguage.CPP, config); + validateUTF16Char("asdf"); + validateUTF32Char("1234"); + validateEOF(); + } public void testNumerics() throws Exception { initializeScanner("3.0 0.9 .5 3. 4E5 2.01E-03 ..."); @@ -1477,6 +1501,31 @@ public class PortedScannerTests extends PreprocessorTestsBase { validateLString("ONETWO"); validateEOF(); } + + public void testUTFStringConcatenation() throws Exception { + IScannerExtensionConfiguration config = new GPPScannerExtensionConfiguration() { + @Override public boolean supportUTFLiterals() { return true; } + }; + initializeScanner("u\"a\" u\"b\"", ParserLanguage.CPP, config); + validateUTF16String("ab"); + validateEOF(); + initializeScanner("u\"a\" \"b\"", ParserLanguage.CPP, config); + validateUTF16String("ab"); + validateEOF(); + initializeScanner("\"a\" u\"b\"", ParserLanguage.CPP, config); + validateUTF16String("ab"); + validateEOF(); + + initializeScanner("U\"a\" U\"b\"", ParserLanguage.CPP, config); + validateUTF32String("ab"); + validateEOF(); + initializeScanner("U\"a\" \"b\"", ParserLanguage.CPP, config); + validateUTF32String("ab"); + validateEOF(); + initializeScanner("\"a\" U\"b\"", ParserLanguage.CPP, config); + validateUTF32String("ab"); + validateEOF(); + } public void testEmptyIncludeDirective() throws Exception { initializeScanner("#include \n#include \n"); diff --git a/core/org.eclipse.cdt.core.tests/parser/org/eclipse/cdt/core/parser/tests/scanner/PreprocessorTestsBase.java b/core/org.eclipse.cdt.core.tests/parser/org/eclipse/cdt/core/parser/tests/scanner/PreprocessorTestsBase.java index b7ea1359146..455ec810a0b 100644 --- a/core/org.eclipse.cdt.core.tests/parser/org/eclipse/cdt/core/parser/tests/scanner/PreprocessorTestsBase.java +++ b/core/org.eclipse.cdt.core.tests/parser/org/eclipse/cdt/core/parser/tests/scanner/PreprocessorTestsBase.java @@ -61,17 +61,27 @@ public abstract class PreprocessorTestsBase extends BaseTestCase { protected void initializeScanner(String input, ParserLanguage lang) throws IOException { initializeScanner(new CodeReader(input.toCharArray()), lang, ParserMode.COMPLETE_PARSE, new ScannerInfo()); } + + protected void initializeScanner(String input, ParserLanguage lang, IScannerExtensionConfiguration scannerConfig) throws IOException { + initializeScanner(new CodeReader(input.toCharArray()), lang, ParserMode.COMPLETE_PARSE, new ScannerInfo(), scannerConfig); + } protected void initializeScanner(CodeReader input, ParserLanguage lang, ParserMode mode, IScannerInfo scannerInfo) throws IOException { - ICodeReaderFactory readerFactory= FileCodeReaderFactory.getInstance(); - IScannerExtensionConfiguration scannerConfig; + initializeScanner(input, lang, mode, scannerInfo, null); + } - if (lang == ParserLanguage.C) { - scannerConfig= GCCScannerExtensionConfiguration.getInstance(); - } - else { - scannerConfig= GPPScannerExtensionConfiguration.getInstance(); - } + protected void initializeScanner(CodeReader input, ParserLanguage lang, ParserMode mode, IScannerInfo scannerInfo, IScannerExtensionConfiguration scannerConfig) throws IOException { + ICodeReaderFactory readerFactory= FileCodeReaderFactory.getInstance(); + //IScannerExtensionConfiguration scannerConfig; + + if(scannerConfig == null) { + if (lang == ParserLanguage.C) { + scannerConfig= GCCScannerExtensionConfiguration.getInstance(); + } + else { + scannerConfig= GPPScannerExtensionConfiguration.getInstance(); + } + } fScanner= new CPreprocessor(input, scannerInfo, lang, NULL_LOG, scannerConfig, readerFactory); fLocationResolver= fScanner.getLocationMap(); @@ -120,6 +130,18 @@ public abstract class PreprocessorTestsBase extends BaseTestCase { validateToken(IToken.tSTRING, "\"" + expectedImage + "\""); } + protected void validateLString(String expectedImage) throws Exception { + validateToken(IToken.tLSTRING, "L\"" + expectedImage + "\""); + } + + protected void validateUTF16String(String expectedImage) throws Exception { + validateToken(IToken.tUTF16STRING, "u\"" + expectedImage + "\""); + } + + protected void validateUTF32String(String expectedImage) throws Exception { + validateToken(IToken.tUTF32STRING, "U\"" + expectedImage + "\""); + } + protected void validateChar(String expectedImage) throws Exception { validateToken(IToken.tCHAR, "'" + expectedImage + "'"); } @@ -127,11 +149,15 @@ public abstract class PreprocessorTestsBase extends BaseTestCase { protected void validateWideChar(String expectedImage) throws Exception { validateToken(IToken.tLCHAR, "L'" + expectedImage + "'"); } - - protected void validateLString(String expectedImage) throws Exception { - validateToken(IToken.tLSTRING, "L\"" + expectedImage + "\""); + + protected void validateUTF16Char(String expectedImage) throws Exception { + validateToken(IToken.tUTF16CHAR, "u'" + expectedImage + "'"); } - + + protected void validateUTF32Char(String expectedImage) throws Exception { + validateToken(IToken.tUTF32CHAR, "U'" + expectedImage + "'"); + } + protected void validateFloatingPointLiteral(String expectedImage) throws Exception { validateToken(IToken.tFLOATINGPT, expectedImage); } diff --git a/core/org.eclipse.cdt.core/parser/org/eclipse/cdt/core/dom/parser/AbstractScannerExtensionConfiguration.java b/core/org.eclipse.cdt.core/parser/org/eclipse/cdt/core/dom/parser/AbstractScannerExtensionConfiguration.java index 489d3e22412..b69d08bd2d8 100644 --- a/core/org.eclipse.cdt.core/parser/org/eclipse/cdt/core/dom/parser/AbstractScannerExtensionConfiguration.java +++ b/core/org.eclipse.cdt.core/parser/org/eclipse/cdt/core/dom/parser/AbstractScannerExtensionConfiguration.java @@ -63,6 +63,16 @@ public abstract class AbstractScannerExtensionConfiguration implements IScannerE return false; } + + /** + * {@inheritDoc} + * @since 5.1 + */ + public boolean supportUTFLiterals() { + return true; + } + + /** * {@inheritDoc} * @since 5.1 diff --git a/core/org.eclipse.cdt.core/parser/org/eclipse/cdt/core/dom/parser/IScannerExtensionConfiguration.java b/core/org.eclipse.cdt.core/parser/org/eclipse/cdt/core/dom/parser/IScannerExtensionConfiguration.java index b1ea765ba77..c4a2aa58afb 100644 --- a/core/org.eclipse.cdt.core/parser/org/eclipse/cdt/core/dom/parser/IScannerExtensionConfiguration.java +++ b/core/org.eclipse.cdt.core/parser/org/eclipse/cdt/core/dom/parser/IScannerExtensionConfiguration.java @@ -100,4 +100,13 @@ public interface IScannerExtensionConfiguration { * or null for no additional keywords. */ public CharArrayIntMap getAdditionalPreprocessorKeywords(); + + + /** + * Support for UTF string literals. + * + * @since 5.1 + * @see "http://publib.boulder.ibm.com/infocenter/comphelp/v101v121/index.jsp?topic=/com.ibm.xlcpp101.aix.doc/language_ref/unicode_standard.html" + */ + public boolean supportUTFLiterals(); } diff --git a/core/org.eclipse.cdt.core/parser/org/eclipse/cdt/core/parser/IToken.java b/core/org.eclipse.cdt.core/parser/org/eclipse/cdt/core/parser/IToken.java index f5daddacb30..6bdd02b95b6 100644 --- a/core/org.eclipse.cdt.core/parser/org/eclipse/cdt/core/parser/IToken.java +++ b/core/org.eclipse.cdt.core/parser/org/eclipse/cdt/core/parser/IToken.java @@ -172,10 +172,17 @@ public interface IToken { /** @deprecated use {@link #tXOR} */ @Deprecated int t_xor = 127; /** @deprecated use {@link #tXORASSIGN} */ @Deprecated int t_xor_eq = 128; int tFLOATINGPT = 129; + int tSTRING = 130; int tLSTRING = 131; + /** @since 5.1 */ int tUTF16STRING = 5000; + /** @since 5.1 */ int tUTF32STRING = 5001; + int tCHAR = 132; int tLCHAR = 133; + /** @since 5.1 */ int tUTF16CHAR = 5002; + /** @since 5.1 */ int tUTF32CHAR = 5003; + int t__Bool = 134; int t__Complex = 135; int t__Imaginary = 136; @@ -197,4 +204,6 @@ public interface IToken { int FIRST_RESERVED_IExtensionToken = 243; int LAST_RESERVED_IExtensionToken = 299; + + } diff --git a/core/org.eclipse.cdt.core/parser/org/eclipse/cdt/internal/core/dom/parser/c/GNUCSourceParser.java b/core/org.eclipse.cdt.core/parser/org/eclipse/cdt/internal/core/dom/parser/c/GNUCSourceParser.java index a2d5a53cba4..06d2bc7927a 100644 --- a/core/org.eclipse.cdt.core/parser/org/eclipse/cdt/internal/core/dom/parser/c/GNUCSourceParser.java +++ b/core/org.eclipse.cdt.core/parser/org/eclipse/cdt/internal/core/dom/parser/c/GNUCSourceParser.java @@ -680,12 +680,16 @@ public class GNUCSourceParser extends AbstractGNUSourceCodeParser { return literalExpression; case IToken.tSTRING: case IToken.tLSTRING: + case IToken.tUTF16STRING: + case IToken.tUTF32STRING: t = consume(); literalExpression = nodeFactory.newLiteralExpression(IASTLiteralExpression.lk_string_literal, t.getImage()); ((ASTNode) literalExpression).setOffsetAndLength(t.getOffset(), t.getEndOffset() - t.getOffset()); return literalExpression; case IToken.tCHAR: case IToken.tLCHAR: + case IToken.tUTF16CHAR: + case IToken.tUTF32CHAR: t = consume(); literalExpression = nodeFactory.newLiteralExpression(IASTLiteralExpression.lk_char_constant, t.getImage()); ((ASTNode) literalExpression).setOffsetAndLength(t.getOffset(), t.getLength()); diff --git a/core/org.eclipse.cdt.core/parser/org/eclipse/cdt/internal/core/dom/parser/cpp/GNUCPPSourceParser.java b/core/org.eclipse.cdt.core/parser/org/eclipse/cdt/internal/core/dom/parser/cpp/GNUCPPSourceParser.java index dd8b382a2d8..332f58ae54a 100644 --- a/core/org.eclipse.cdt.core/parser/org/eclipse/cdt/internal/core/dom/parser/cpp/GNUCPPSourceParser.java +++ b/core/org.eclipse.cdt.core/parser/org/eclipse/cdt/internal/core/dom/parser/cpp/GNUCPPSourceParser.java @@ -1164,12 +1164,16 @@ public class GNUCPPSourceParser extends AbstractGNUSourceCodeParser { return literalExpression; case IToken.tSTRING: case IToken.tLSTRING: + case IToken.tUTF16STRING: + case IToken.tUTF32STRING: t = consume(); literalExpression = nodeFactory.newLiteralExpression(IASTLiteralExpression.lk_string_literal, t.getImage()); ((ASTNode) literalExpression).setOffsetAndLength(t.getOffset(), t.getEndOffset() - t.getOffset()); return literalExpression; case IToken.tCHAR: case IToken.tLCHAR: + case IToken.tUTF16CHAR: + case IToken.tUTF32CHAR: t = consume(); literalExpression = nodeFactory.newLiteralExpression(IASTLiteralExpression.lk_char_constant, t.getImage()); ((ASTNode) literalExpression).setOffsetAndLength(t.getOffset(), t.getEndOffset() - t.getOffset()); diff --git a/core/org.eclipse.cdt.core/parser/org/eclipse/cdt/internal/core/parser/scanner/CPreprocessor.java b/core/org.eclipse.cdt.core/parser/org/eclipse/cdt/internal/core/parser/scanner/CPreprocessor.java index 793d865b316..2f229daf857 100644 --- a/core/org.eclipse.cdt.core/parser/org/eclipse/cdt/internal/core/parser/scanner/CPreprocessor.java +++ b/core/org.eclipse.cdt.core/parser/org/eclipse/cdt/internal/core/parser/scanner/CPreprocessor.java @@ -201,6 +201,7 @@ public class CPreprocessor implements ILexerLog, IScanner, IAdaptable { fLexOptions.fSupportAtSignInIdentifiers= configuration.supportAtSignInIdentifiers(); fLexOptions.fSupportMinAndMax = configuration.supportMinAndMaxOperators(); fLexOptions.fSupportSlashPercentComments= configuration.supportSlashPercentComments(); + fLexOptions.fSupportUTFLiterals = configuration.supportUTFLiterals(); fLocationMap= new LocationMap(fLexOptions); fKeywords= new CharArrayIntMap(40, -1); fPPKeywords= new CharArrayIntMap(40, -1); @@ -555,7 +556,10 @@ public class CPreprocessor implements ILexerLog, IScanner, IAdaptable { case IToken.tSTRING: case IToken.tLSTRING: - boolean isWide= tt1 == IToken.tLSTRING; + case IToken.tUTF16STRING: + case IToken.tUTF32STRING: + + StringType st = StringType.fromToken(tt1); Token t2; StringBuffer buf= null; int endOffset= 0; @@ -565,7 +569,9 @@ public class CPreprocessor implements ILexerLog, IScanner, IAdaptable { switch(tt2) { case IToken.tLSTRING: case IToken.tSTRING: - isWide= tt2 == IToken.tLSTRING; + case IToken.tUTF16STRING: + case IToken.tUTF32STRING: + st = StringType.max(st, StringType.fromToken(tt2)); if (buf == null) { buf= new StringBuffer(); appendStringContent(buf, t1); @@ -580,15 +586,17 @@ public class CPreprocessor implements ILexerLog, IScanner, IAdaptable { } pushbackToken(t2); if (buf != null) { - char[] image= new char[buf.length() + (isWide ? 3 : 2)]; + char[] prefix = st.getPrefix(); + char[] image= new char[buf.length() + prefix.length + 2]; int off= -1; - if (isWide) { - image[++off]= 'L'; - } + + for(char c : prefix) + image[++off] = c; + image[++off]= '"'; buf.getChars(0, buf.length(), image, ++off); image[image.length-1]= '"'; - t1= new TokenWithImage((isWide ? IToken.tLSTRING : IToken.tSTRING), null, t1.getOffset(), endOffset, image); + t1= new TokenWithImage(st.getTokenValue(), null, t1.getOffset(), endOffset, image); } } @@ -598,7 +606,7 @@ public class CPreprocessor implements ILexerLog, IScanner, IAdaptable { fLastToken= t1; return t1; } - + public void skipInactiveCode() throws OffsetLimitReachedException { final Lexer lexer= fCurrentContext.getLexer(); @@ -619,8 +627,14 @@ public class CPreprocessor implements ILexerLog, IScanner, IAdaptable { private void appendStringContent(StringBuffer buf, Token t1) { final char[] image= t1.getCharImage(); final int length= image.length; + int start = 1; + for(char c : image) { + if(c == '"') + break; + start++; + } + if (length > 1) { - final int start= image[0]=='"' ? 1 : 2; final int diff= image[length-1] == '"' ? length-start-1 : length-start; if (diff > 0) { buf.append(image, start, diff); diff --git a/core/org.eclipse.cdt.core/parser/org/eclipse/cdt/internal/core/parser/scanner/ExpressionEvaluator.java b/core/org.eclipse.cdt.core/parser/org/eclipse/cdt/internal/core/parser/scanner/ExpressionEvaluator.java index 546e0b69461..322e2d8f03f 100644 --- a/core/org.eclipse.cdt.core/parser/org/eclipse/cdt/internal/core/parser/scanner/ExpressionEvaluator.java +++ b/core/org.eclipse.cdt.core/parser/org/eclipse/cdt/internal/core/parser/scanner/ExpressionEvaluator.java @@ -237,6 +237,8 @@ public class ExpressionEvaluator { return ~unaryExpression(); case IToken.tCHAR: case IToken.tLCHAR: + case IToken.tUTF16CHAR: + case IToken.tUTF32CHAR: case IToken.tINTEGER: long val= getValue(fTokens); consume(); @@ -271,6 +273,8 @@ public class ExpressionEvaluator { case IToken.tAMPERASSIGN: case IToken.tSTRING: case IToken.tLSTRING: + case IToken.tUTF16STRING: + case IToken.tUTF32STRING: throw new EvalException(IProblem.SCANNER_EXPRESSION_SYNTAX_ERROR, null); default: @@ -326,6 +330,8 @@ public class ExpressionEvaluator { case IToken.tCHAR: return getChar(t.getCharImage(), 1); case IToken.tLCHAR: + case IToken.tUTF16CHAR: + case IToken.tUTF32CHAR: return getChar(t.getCharImage(), 2); case IToken.tINTEGER: return getNumber(t.getCharImage()); diff --git a/core/org.eclipse.cdt.core/parser/org/eclipse/cdt/internal/core/parser/scanner/Lexer.java b/core/org.eclipse.cdt.core/parser/org/eclipse/cdt/internal/core/parser/scanner/Lexer.java index 05ce9fc5e6d..e2eea7730c6 100644 --- a/core/org.eclipse.cdt.core/parser/org/eclipse/cdt/internal/core/parser/scanner/Lexer.java +++ b/core/org.eclipse.cdt.core/parser/org/eclipse/cdt/internal/core/parser/scanner/Lexer.java @@ -7,6 +7,7 @@ * * Contributors: * Markus Schorn - initial API and implementation + * Mike Kucera (IBM) - UTF string literals *******************************************************************************/ package org.eclipse.cdt.internal.core.parser.scanner; @@ -51,6 +52,7 @@ final public class Lexer implements ITokenSequence { public boolean fSupportMinAndMax= true; public boolean fCreateImageLocations= true; public boolean fSupportSlashPercentComments= false; + public boolean fSupportUTFLiterals= true; @Override public Object clone() { @@ -254,11 +256,11 @@ final public class Lexer implements ITokenSequence { continue; case '"': - stringLiteral(start, false); + stringLiteral(start, IToken.tSTRING); continue; case '\'': - charLiteral(start, false); + charLiteral(start, IToken.tCHAR); continue; case '/': @@ -339,28 +341,42 @@ final public class Lexer implements ITokenSequence { switch(d) { case '"': nextCharPhase3(); - return stringLiteral(start, true); + return stringLiteral(start, IToken.tLSTRING); case '\'': nextCharPhase3(); - return charLiteral(start, true); + return charLiteral(start, IToken.tLCHAR); } return identifier(start, 1); + case 'u': + case 'U': + if(fOptions.fSupportUTFLiterals) { + if(d == '"') { + nextCharPhase3(); + return stringLiteral(start, c == 'u' ? IToken.tUTF16STRING : IToken.tUTF32STRING); + } + if(d == '\'') { + nextCharPhase3(); + return charLiteral(start, c == 'u' ? IToken.tUTF16CHAR : IToken.tUTF32CHAR); + } + } + return identifier(start, 1); + case '"': if (fInsideIncludeDirective) { return headerName(start, true); } - return stringLiteral(start, false); + return stringLiteral(start, IToken.tSTRING); case '\'': - return charLiteral(start, false); + return charLiteral(start, IToken.tCHAR); case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': case 'g': case 'h': case 'i': case 'j': case 'k': case 'l': case 'm': case 'n': case 'o': case 'p': case 'q': case 'r': - case 's': case 't': case 'u': case 'v': case 'w': case 'x': case 'y': case 'z': + case 's': case 't': case 'v': case 'w': case 'x': case 'y': case 'z': case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': case 'G': case 'H': case 'I': case 'J': case 'K': case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R': - case 'S': case 'T': case 'U': case 'V': case 'W': case 'X': case 'Y': case 'Z': + case 'S': case 'T': case 'V': case 'W': case 'X': case 'Y': case 'Z': case '_': return identifier(start, 1); @@ -726,17 +742,18 @@ final public class Lexer implements ITokenSequence { } @SuppressWarnings("fallthrough") - private Token stringLiteral(final int start, final boolean wide) throws OffsetLimitReachedException { + private Token stringLiteral(final int start, final int tokenType) throws OffsetLimitReachedException { boolean escaped = false; boolean done = false; - int length= wide ? 2 : 1; + + int length = tokenType == IToken.tSTRING ? 1 : 2; int c= fCharPhase3; loop: while (!done) { switch(c) { case END_OF_INPUT: if (fSupportContentAssist) { - throw new OffsetLimitReachedException(ORIGIN_LEXER, newToken(wide ? IToken.tLSTRING : IToken.tSTRING, start, length)); + throw new OffsetLimitReachedException(ORIGIN_LEXER, newToken(tokenType, start, length)); } // no break; case '\n': @@ -759,21 +776,21 @@ final public class Lexer implements ITokenSequence { length++; c= nextCharPhase3(); } - return newToken(wide ? IToken.tLSTRING : IToken.tSTRING, start, length); + return newToken(tokenType, start, length); } @SuppressWarnings("fallthrough") - private Token charLiteral(final int start, boolean wide) throws OffsetLimitReachedException { + private Token charLiteral(final int start, final int tokenType) throws OffsetLimitReachedException { boolean escaped = false; boolean done = false; - int length= wide ? 2 : 1; + int length= tokenType == IToken.tCHAR ? 1 : 2; int c= fCharPhase3; loop: while (!done) { switch(c) { case END_OF_INPUT: if (fSupportContentAssist) { - throw new OffsetLimitReachedException(ORIGIN_LEXER, newToken(wide ? IToken.tLCHAR : IToken.tCHAR, start, length)); + throw new OffsetLimitReachedException(ORIGIN_LEXER, newToken(tokenType, start, length)); } // no break; case '\n': @@ -795,7 +812,7 @@ final public class Lexer implements ITokenSequence { length++; c= nextCharPhase3(); } - return newToken(wide ? IToken.tLCHAR : IToken.tCHAR, start, length); + return newToken(tokenType, start, length); } private Token identifier(int start, int length) { diff --git a/core/org.eclipse.cdt.core/parser/org/eclipse/cdt/internal/core/parser/scanner/MacroExpander.java b/core/org.eclipse.cdt.core/parser/org/eclipse/cdt/internal/core/parser/scanner/MacroExpander.java index 171f473825a..9de676b138d 100644 --- a/core/org.eclipse.cdt.core/parser/org/eclipse/cdt/internal/core/parser/scanner/MacroExpander.java +++ b/core/org.eclipse.cdt.core/parser/org/eclipse/cdt/internal/core/parser/scanner/MacroExpander.java @@ -874,8 +874,12 @@ public class MacroExpander { switch(t.getType()) { case IToken.tSTRING: case IToken.tLSTRING: + case IToken.tUTF16STRING: + case IToken.tUTF32STRING: case IToken.tCHAR: case IToken.tLCHAR: + case IToken.tUTF16CHAR: + case IToken.tUTF32CHAR: final char[] image= t.getCharImage(); for (final char c : image) { if (c == '"' || c == '\\') { diff --git a/core/org.eclipse.cdt.core/parser/org/eclipse/cdt/internal/core/parser/scanner/StringType.java b/core/org.eclipse.cdt.core/parser/org/eclipse/cdt/internal/core/parser/scanner/StringType.java new file mode 100644 index 00000000000..43982a1bab9 --- /dev/null +++ b/core/org.eclipse.cdt.core/parser/org/eclipse/cdt/internal/core/parser/scanner/StringType.java @@ -0,0 +1,74 @@ +/******************************************************************************* + * Copyright (c) 2009 IBM Corporation and others. + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Eclipse Public License v1.0 + * which accompanies this distribution, and is available at + * http://www.eclipse.org/legal/epl-v10.html + * + * Contributors: + * Mike Kucera (IBM) - Initial API and implementation + *******************************************************************************/ +package org.eclipse.cdt.internal.core.parser.scanner; + +import org.eclipse.cdt.core.parser.IToken; + +/** + * Utility class that provides some simple operations + * for string literals. + */ +@SuppressWarnings("nls") +public enum StringType { + + // listed in order of "wideness" + NARROW("", IToken.tSTRING), + WIDE("L", IToken.tLSTRING), + UTF16("u", IToken.tUTF16STRING), + UTF32("U", IToken.tUTF32STRING); + + + private char[] prefix; + private int tokenVal; + + private StringType(String prefix, int tokenVal) { + this.prefix = prefix.toCharArray(); + this.tokenVal = tokenVal; + } + + public char[] getPrefix() { + return prefix; + } + + public int getTokenValue() { + return tokenVal; + } + + /** + * Returns the StringType value that represesnts the 'wider' + * of the two given StringTypes. + * @thows NullPointerException if an argument is null + */ + public static StringType max(StringType st1, StringType st2) { + return values()[Math.max(st1.ordinal(), st2.ordinal())]; + } + + /** + * Returns the StringType value for the given string literal type. + * + * @see IToken#tSTRING + * @see IToken#tLSTRING + * @see IToken#tUTF16STRING + * @see IToken#tUTF32STRING + * + * @throws IllegalArgumentException if the tokenVal does not represent a string literal + */ + public static StringType fromToken(int tokenVal) { + switch(tokenVal) { + case IToken.tSTRING: return NARROW; + case IToken.tLSTRING: return WIDE; + case IToken.tUTF16STRING: return UTF16; + case IToken.tUTF32STRING: return UTF32; + default: + throw new IllegalArgumentException(tokenVal + " is not a string token"); + } + } +}