mirror of
https://github.com/eclipse-cdt/cdt
synced 2025-07-25 01:45:33 +02:00
Bug 303750: FileCharArray needs to skip UTF-8 byte order mark.
This commit is contained in:
parent
3a69434dbb
commit
ab8c900a86
1 changed files with 21 additions and 3 deletions
|
@ -26,7 +26,8 @@ import java.nio.charset.CodingErrorAction;
|
|||
* soft references.
|
||||
*/
|
||||
public class FileCharArray extends LazyCharArray {
|
||||
|
||||
private static final String UTF8_CHARSET_NAME = "UTF-8"; //$NON-NLS-1$
|
||||
|
||||
public static AbstractCharArray create(String fileName, String charSet, InputStream in) throws IOException {
|
||||
// no support for non-local files
|
||||
if (!(in instanceof FileInputStream)) {
|
||||
|
@ -49,11 +50,22 @@ public class FileCharArray extends LazyCharArray {
|
|||
ByteBuffer byteBuffer = ByteBuffer.allocate(lsize);
|
||||
channel.read(byteBuffer);
|
||||
byteBuffer.flip();
|
||||
|
||||
skipUTF8ByteOrderMark(byteBuffer, charSet);
|
||||
|
||||
CharBuffer charBuffer = Charset.forName(charSet).decode(byteBuffer);
|
||||
char[] buf= extractChars(charBuffer);
|
||||
return new CharArray(buf);
|
||||
}
|
||||
|
||||
private static void skipUTF8ByteOrderMark(ByteBuffer buf, String charset) {
|
||||
if (charset.equals(UTF8_CHARSET_NAME) && buf.remaining() >= 3) {
|
||||
int pos = buf.position();
|
||||
if (buf.get(pos) == (byte) 0xEF && buf.get(++pos) == (byte) 0xBB &&
|
||||
buf.get(++pos) == (byte) 0xBF) {
|
||||
buf.position(++pos);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private static char[] extractChars(CharBuffer charBuffer) {
|
||||
if (charBuffer.hasArray() && charBuffer.arrayOffset() == 0) {
|
||||
|
@ -103,7 +115,7 @@ public class FileCharArray extends LazyCharArray {
|
|||
final CharsetDecoder decoder = charset.newDecoder().onMalformedInput(CodingErrorAction.REPLACE)
|
||||
.onUnmappableCharacter(CodingErrorAction.REPLACE);
|
||||
|
||||
int needBytes = (int) (CHUNK_SIZE * (double) decoder.averageCharsPerByte()); // avoid rounding errors.
|
||||
int needBytes = 3 + (int) (CHUNK_SIZE * (double) decoder.averageCharsPerByte()); // avoid rounding errors.
|
||||
final ByteBuffer in = ByteBuffer.allocate(needBytes);
|
||||
final CharBuffer dest= CharBuffer.allocate(CHUNK_SIZE);
|
||||
|
||||
|
@ -118,6 +130,9 @@ public class FileCharArray extends LazyCharArray {
|
|||
|
||||
endOfInput= count < in.capacity();
|
||||
in.flip();
|
||||
if (fileOffset == 0) {
|
||||
skipUTF8ByteOrderMark(in, fCharSet);
|
||||
}
|
||||
decoder.decode(in, dest, endOfInput);
|
||||
fileOffset+= in.position();
|
||||
}
|
||||
|
@ -160,6 +175,9 @@ public class FileCharArray extends LazyCharArray {
|
|||
in.clear();
|
||||
channel.read(in);
|
||||
in.flip();
|
||||
if (fileOffset == 0) {
|
||||
skipUTF8ByteOrderMark(in, fCharSet);
|
||||
}
|
||||
decoder.decode(in, dest, true);
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Add table
Reference in a new issue