fix parsing international files

bug 70852, 70927
2025-08-29 19:23:38 +02:00 · 2004-08-13 19:52:32 +00:00 · 2004-08-13 19:52:32 +00:00 · c2f1517d54
commit c2f1517d54
parent 4d63aafdaa
7 changed files with 54 additions and 35 deletions
--- a/core/org.eclipse.cdt.core/browser/org/eclipse/cdt/internal/core/browser/cache/TypeParser.java
+++ b/core/org.eclipse.cdt.core/browser/org/eclipse/cdt/internal/core/browser/cache/TypeParser.java
@ -452,7 +452,7 @@ public class TypeParser implements ISourceElementRequestor {
 			try {
 				contents = file.getContents();
 				if (contents != null)
-					reader = new CodeReader(resource.getLocation().toOSString(), contents);
+					reader = new CodeReader(resource.getLocation().toOSString(), file.getCharset(), contents);
 			} catch (CoreException ex) {
 				ex.printStackTrace();
 			} catch (IOException e) {
--- a/core/org.eclipse.cdt.core/index/org/eclipse/cdt/internal/core/search/indexing/SourceIndexer.java
+++ b/core/org.eclipse.cdt.core/index/org/eclipse/cdt/internal/core/search/indexing/SourceIndexer.java
@ -103,7 +103,7 @@ public class SourceIndexer extends AbstractIndexer {
 		InputStream contents = null;
 		try {
 			contents = resourceFile.getContents();
-			CodeReader reader = new CodeReader(resourceFile.getLocation().toOSString(), contents);
+			CodeReader reader = new CodeReader(resourceFile.getLocation().toOSString(), resourceFile.getCharset(), contents);
 			parser = ParserFactory.createParser( 
 							ParserFactory.createScanner(reader, scanInfo, ParserMode.COMPLETE_PARSE, language, requestor, ParserUtil.getScannerLogService(), null ), 
 							requestor, ParserMode.COMPLETE_PARSE, language, ParserUtil.getParserLogService() );
--- a/core/org.eclipse.cdt.core/parser/org/eclipse/cdt/core/parser/CodeReader.java
+++ b/core/org.eclipse.cdt.core/parser/org/eclipse/cdt/core/parser/CodeReader.java
@ -25,8 +25,8 @@ import org.eclipse.cdt.internal.core.parser.scanner2.CharArrayUtils;
 * @author jcamelon
 */
 public class CodeReader {
-
+    private static final String SYSTEM_DEFAULT_ENCODING = System.getProperty( "file.encoding" ); //$NON-NLS-1$
-	private static final String UTF_8 = "UTF-8"; //$NON-NLS-1$
+	//private static final String UTF_8 = "UTF-8"; //$NON-NLS-1$
 	private static final String NF = "<text>"; //$NON-NLS-1$
 	private static final char [] NOFILE = NF.toCharArray(); //$NON-NLS-1$
@ -51,22 +51,31 @@ public class CodeReader {
 		FileInputStream stream = new FileInputStream(filename);
 		try {
-			buffer = load(stream);
+			buffer = load(SYSTEM_DEFAULT_ENCODING, stream);
 		} finally {
 			stream.close();
 		}
 	}
-	
+	public CodeReader(String filename, String charSet ) throws IOException
-	// If you have a handle on a stream to the file, e.g. IFile.getContents()
+	{
 	public CodeReader(String filename, InputStream stream) throws IOException {
 		this.filename = filename.toCharArray();
 		FileInputStream stream = new FileInputStream(filename);
 		try {
 			buffer = load(charSet, stream);
 		} finally {
 			stream.close();
 		}
 	}
 	public CodeReader( String fileName, String charSet, InputStream stream ) throws IOException {
 	    filename = fileName.toCharArray();
 		FileInputStream fstream = 
 			(stream instanceof FileInputStream)
 				? (FileInputStream)stream
-				: new FileInputStream(filename);
+				: new FileInputStream(fileName);
 		try {
-			buffer = load(fstream);
+			buffer = load(charSet, fstream);
 		} finally {
 			// If we create the FileInputStream we need close to it when done,
 			// if not we figure the above layer will do it.
@ -76,21 +85,22 @@ public class CodeReader {
 		}
 	}
-	private char[] load(FileInputStream stream) throws IOException {
+	private char[] load( String charSet, FileInputStream stream ) throws IOException {
-		FileChannel channel = stream.getChannel();
+	    String encoding = Charset.isSupported( charSet ) ? charSet : SYSTEM_DEFAULT_ENCODING; 
        FileChannel channel = stream.getChannel();
 		ByteBuffer byteBuffer = ByteBuffer.allocateDirect((int)channel.size());
 		channel.read(byteBuffer);
 		byteBuffer.rewind();
-		// TODO use the real encoding
+		
-		CharBuffer charBuffer = Charset.forName(UTF_8).decode(byteBuffer);
+		CharBuffer charBuffer = Charset.forName(encoding).decode(byteBuffer);
 		if (charBuffer.hasArray())
 			return charBuffer.array();
 		// Got to copy it out
 		char[] buff = new char[charBuffer.length()];
 		charBuffer.get(buff);
 		return buff;
 	}
 	protected char[] xload(FileInputStream stream) throws IOException {
@ -98,7 +108,7 @@ public class CodeReader {
 		MappedByteBuffer map = channel.map(FileChannel.MapMode.READ_ONLY, 0, channel.size());
 		// TODO use the real encoding
-		CharBuffer charBuffer = Charset.forName(UTF_8).decode(map);
+		CharBuffer charBuffer = Charset.forName(SYSTEM_DEFAULT_ENCODING).decode(map);
 		if (charBuffer.hasArray())
 			return charBuffer.array();
--- a/core/org.eclipse.cdt.core/parser/org/eclipse/cdt/internal/core/parser/scanner2/Scanner2.java
+++ b/core/org.eclipse.cdt.core/parser/org/eclipse/cdt/internal/core/parser/scanner2/Scanner2.java
@ -732,6 +732,12 @@ public class Scanner2 implements IScanner, IScannerData {
 					return newToken(IToken.tCOMMA );
 				default:
 				    if( Character.isLetter( buffer[pos] ) ){
 				        t = scanIdentifier();
 						if (t instanceof MacroExpansionToken)
 							continue;
 						return t;
 				    }
 					// skip over anything we don't handle
 			}
 		}
@ -773,7 +779,7 @@ public class Scanner2 implements IScanner, IScannerData {
 		while (++bufferPos[bufferStackPos] < limit) {
 			char c = buffer[bufferPos[bufferStackPos]];
 			if ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')
-					|| c == '_' || (c >= '0' && c <= '9')) {
+				|| c == '_' || (c >= '0' && c <= '9') || Character.isUnicodeIdentifierPart(c) ) {
 				++len;
 				continue;
 			}
@ -1346,7 +1352,7 @@ public class Scanner2 implements IScanner, IScannerData {
 			while (++bufferPos[bufferStackPos] < limit) {
 				c = buffer[bufferPos[bufferStackPos]];
 				if ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')
-						|| c == '_' || (c >= '0' && c <= '9')) {
+						|| c == '_' || (c >= '0' && c <= '9') || Character.isUnicodeIdentifierPart(c)) {
 					++len;
 					continue;
 				}
@ -1481,7 +1487,7 @@ public class Scanner2 implements IScanner, IScannerData {
 			return;
 		char c = buffer[idstart];
-		if (!((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') || c == '_')) {
+		if (!((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') || c == '_' || Character.isUnicodeIdentifierPart(c))) {
 		    handleProblem( IProblem.PREPROCESSOR_INVALID_MACRO_DEFN, idstart, null );
 			skipToNewLine();
 			return;
@ -1491,7 +1497,7 @@ public class Scanner2 implements IScanner, IScannerData {
 		while (++bufferPos[bufferStackPos] < limit) {
 			c = buffer[bufferPos[bufferStackPos]];
 			if ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')
-					|| c == '_' || (c >= '0' && c <= '9')) {
+					|| c == '_' || (c >= '0' && c <= '9') || Character.isUnicodeIdentifierPart(c)) {
 				++idlen;
 				continue;
 			}  
@ -1528,7 +1534,7 @@ public class Scanner2 implements IScanner, IScannerData {
 					bufferPos[bufferStackPos] += 2;
 					arglist[++currarg] = "...".toCharArray(); //$NON-NLS-1$
 					continue;
-				} else if (!((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c == '_')) {
+				} else if (!((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c == '_' || Character.isUnicodeIdentifierPart(c))) {
 				    handleProblem( IProblem.PREPROCESSOR_INVALID_MACRO_DEFN, idstart, name );
 					// yuck
 					skipToNewLine();
@ -1655,7 +1661,7 @@ public class Scanner2 implements IScanner, IScannerData {
 			return;
 		char c = buffer[idstart];
-		if (!((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') || c == '_')) {
+		if (!((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') || c == '_' || Character.isUnicodeIdentifierPart(c))) {
 			skipToNewLine();
 			return;
 		}
@ -1664,7 +1670,7 @@ public class Scanner2 implements IScanner, IScannerData {
 		while (++bufferPos[bufferStackPos] < limit) {
 			c = buffer[bufferPos[bufferStackPos]];
 			if ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')
-					|| c == '_' || (c >= '0' && c <= '9')) {
+					|| c == '_' || (c >= '0' && c <= '9' || Character.isUnicodeIdentifierPart(c))) {
 				++idlen;
 				continue;
 			} 
@ -1700,7 +1706,7 @@ public class Scanner2 implements IScanner, IScannerData {
 			return;
 		char c = buffer[idstart];
-		if (!((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') || c == '_')) {
+		if (!((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') || c == '_' || Character.isUnicodeIdentifierPart(c))) {
 			skipToNewLine();
 			return;
 		}
@ -1709,7 +1715,7 @@ public class Scanner2 implements IScanner, IScannerData {
 		while (++bufferPos[bufferStackPos] < limit) {
 			c = buffer[bufferPos[bufferStackPos]];
 			if ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')
-					|| c == '_' || (c >= '0' && c <= '9')) {
+					|| c == '_' || (c >= '0' && c <= '9' || Character.isUnicodeIdentifierPart(c))) {
 				++idlen;
 				continue;
 			} 
@ -2048,7 +2054,7 @@ public class Scanner2 implements IScanner, IScannerData {
 		while (++bufferPos[bufferStackPos] < limit) {
 			char c = buffer[bufferPos[bufferStackPos]];
 			if ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')
-					|| c == '_' || (c >= '0' && c <= '9')) {
+					|| c == '_' || (c >= '0' && c <= '9') || Character.isUnicodeIdentifierPart(c)) {
 				continue;
 			} 
 			break;
@ -2274,14 +2280,14 @@ public class Scanner2 implements IScanner, IScannerData {
 		while (++pos < limit) {
 			char c = expansion[pos];
-			if ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c == '_') {
+			if ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c == '_' || Character.isUnicodeIdentifierPart(c)) {
 				wsstart = -1;
 				int idstart = pos;
 				while (++pos < limit) {
 					c = expansion[pos];
 					if (!((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')
-							|| (c >= '0' && c <= '9') || c == '_')) {
+							|| (c >= '0' && c <= '9') || c == '_' || Character.isUnicodeIdentifierPart(c))) {
 						break;
 					}
 				}
@ -2482,11 +2488,11 @@ public class Scanner2 implements IScanner, IScannerData {
 					// grab the identifier
 					c = expansion[pos];
 					int idstart = pos;
-					if ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'X') || c == '_') {
+					if ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'X') || c == '_' || Character.isUnicodeIdentifierPart(c)) {
 						while (++pos < limit) {
 						    c = expansion[pos];
 							if( !((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'X')
-									|| (c >= '0' && c <= '9') || c == '_') )
+									|| (c >= '0' && c <= '9') || c == '_' || Character.isUnicodeIdentifierPart(c)) )
 								break;
 						}
 					} // else TODO something
@ -3092,7 +3098,7 @@ public class Scanner2 implements IScanner, IScannerData {
 		{
 			char c = prefix[i];
 			if ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')
-					|| c == '_' || (c >= '0' && c <= '9')) 
+					|| c == '_' || (c >= '0' && c <= '9') || Character.isUnicodeIdentifierPart(c) ) 
 				continue;
 			handleInvalidCompletion();
 		}
--- a/core/org.eclipse.cdt.core/search/org/eclipse/cdt/internal/core/search/matching/MatchLocator.java
+++ b/core/org.eclipse.cdt.core/search/org/eclipse/cdt/internal/core/search/matching/MatchLocator.java
@ -507,7 +507,7 @@ public class MatchLocator implements IMatchLocator{
 							if (currentResource.isAccessible() && currentResource instanceof IFile) {
 								IFile file = (IFile) currentResource;
 								contents = file.getContents();
-								reader = new CodeReader(currentResource.getLocation().toOSString(), contents);
+								reader = new CodeReader(currentResource.getLocation().toOSString(), file.getCharset(), contents);
 								realPath = currentResource.getLocation();
 								project = file.getProject();
 							} else {
--- a/core/org.eclipse.cdt.core/src/org/eclipse/cdt/core/parser/ParserUtil.java
+++ b/core/org.eclipse.cdt.core/src/org/eclipse/cdt/core/parser/ParserUtil.java
@ -70,7 +70,7 @@ public class ParserUtil
 				try
 				{
 					in = ((IFile)resultingResource).getContents();
-					return new CodeReader(finalPath, in);
+					return new CodeReader(finalPath, ((IFile)resultingResource).getCharset(), in);
 				} finally {
 					if (in != null)
 					{
--- a/core/org.eclipse.cdt.ui/src/org/eclipse/cdt/internal/ui/search/actions/SelectionParseAction.java
+++ b/core/org.eclipse.cdt.ui/src/org/eclipse/cdt/internal/ui/search/actions/SelectionParseAction.java
@ -32,6 +32,7 @@ import org.eclipse.cdt.internal.ui.search.CSearchMessages;
 import org.eclipse.cdt.ui.CUIPlugin;
 import org.eclipse.core.resources.IFile;
 import org.eclipse.core.resources.IProject;
 import org.eclipse.core.runtime.CoreException;
 import org.eclipse.jface.action.Action;
 import org.eclipse.jface.action.IStatusLineManager;
 import org.eclipse.jface.text.BadLocationException;
@ -101,12 +102,14 @@ public class SelectionParseAction extends Action {
 		CodeReader reader = null;
 		try {
 			if( workingCopy == null )
-				reader = new CodeReader(resourceFile.getLocation().toOSString());
+				reader = new CodeReader(resourceFile.getLocation().toOSString(), resourceFile.getCharset() );
 			else 
 				reader = new CodeReader(resourceFile.getLocation().toOSString(), workingCopy.getContents());
 		} catch (IOException e) {
 			e.printStackTrace();
-		}
+		} catch ( CoreException e ) {
            e.printStackTrace();
        }
 		try
 		{