1
0
Fork 0
mirror of https://github.com/eclipse-cdt/cdt synced 2025-04-29 19:45:01 +02:00

Bug 320157: Endless loop decoding large file.

This commit is contained in:
Markus Schorn 2010-08-25 11:36:52 +00:00
parent 0f63f42919
commit 9f594c8aee
4 changed files with 284 additions and 128 deletions

View file

@ -0,0 +1,117 @@
/*******************************************************************************
* Copyright (c) 2010 Wind River Systems, Inc. and others.
* All rights reserved. This program and the accompanying materials
* are made available under the terms of the Eclipse Public License v1.0
* which accompanies this distribution, and is available at
* http://www.eclipse.org/legal/epl-v10.html
*
* Contributors:
* Markus Schorn - Initial API and implementation
*******************************************************************************/
package org.eclipse.cdt.core.parser.tests.scanner;
import java.io.BufferedOutputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStream;
import junit.framework.TestSuite;
import org.eclipse.cdt.core.testplugin.util.BaseTestCase;
import org.eclipse.cdt.internal.core.parser.scanner.AbstractCharArray;
import org.eclipse.cdt.internal.core.parser.scanner.FileCharArray;
import org.eclipse.cdt.internal.core.parser.scanner.LazyCharArray;
public class FileCharArrayTests extends BaseTestCase {
public static TestSuite suite() {
return suite(FileCharArrayTests.class);
}
private File fFile;
@Override
protected void tearDown() throws Exception {
if (fFile != null) {
fFile.delete();
}
}
public void testAlignedMinus() throws IOException {
testFile(true, LazyCharArray.CHUNK_SIZE*3-1);
}
public void testAlignedEven() throws IOException {
testFile(true, LazyCharArray.CHUNK_SIZE*3);
}
public void testAlignedPlus() throws IOException {
testFile(true, LazyCharArray.CHUNK_SIZE*3+1);
}
public void testUnAlignedMinus() throws IOException {
testFile(false, LazyCharArray.CHUNK_SIZE*3-1);
}
public void testUnAlignedEven() throws IOException {
testFile(false, LazyCharArray.CHUNK_SIZE*3);
}
public void testUnAlignedPlus() throws IOException {
testFile(false, LazyCharArray.CHUNK_SIZE*3+1);
}
private void testFile(boolean aligned, int charSize) throws IOException {
createFile(aligned, charSize);
AbstractCharArray charArray;
final FileInputStream inputStream = new FileInputStream(fFile);
try {
charArray = FileCharArray.create(fFile.getPath(), "utf-8", inputStream);
} finally {
inputStream.close();
}
checkContent(charArray, LazyCharArray.CHUNK_SIZE, charSize);
assertEquals(charSize, charArray.getLength());
((LazyCharArray) charArray).testClearData();
checkContent(charArray, LazyCharArray.CHUNK_SIZE, charSize);
assertEquals(charSize, charArray.getLength());
}
public void checkContent(AbstractCharArray charArray, int from, int to) {
for (int i = from; i < to; i++) {
assertEquals(i % 127, charArray.get(i));
if (i+3<=to) {
char[] dest= new char[3];
charArray.arraycopy(i, dest, 0, 3);
for (int j = 0; j < dest.length; j++) {
assertEquals((i+j) % 127, dest[j]);
}
}
}
}
private void createFile(boolean aligned, int charSize) throws IOException {
fFile= File.createTempFile("data", ".txt");
OutputStream out= new BufferedOutputStream(new FileOutputStream(fFile));
try {
if (!aligned) {
out.write(0xc2);
out.write(0xa2);
} else {
out.write(0);
}
for (int i = 1; i < charSize; i++) {
out.write(i % 127);
}
} finally {
out.close();
}
}
}

View file

@ -26,6 +26,7 @@ public class ScannerTestSuite extends TestSuite {
suite.addTest(ExpansionExplorerTests.suite());
suite.addTest(InactiveCodeTests.suite());
suite.addTest(StreamHasherTests.suite());
suite.addTest(FileCharArrayTests.suite());
return suite;
}
}

View file

@ -19,8 +19,11 @@ import java.nio.CharBuffer;
import java.nio.channels.FileChannel;
import java.nio.charset.Charset;
import java.nio.charset.CharsetDecoder;
import java.nio.charset.CoderResult;
import java.nio.charset.CodingErrorAction;
import org.eclipse.cdt.core.CCorePlugin;
/**
* Implementation of char array for a file referencing content via
* soft references.
@ -81,6 +84,10 @@ public class FileCharArray extends LazyCharArray {
private String fFileName;
private String fCharSet;
private FileChannel fChannel;
private long fNextFileOffset= 0;
private int fNextCharOffset= 0;
private boolean fReachedEOF= false;
private FileCharArray(String fileName, String charSet) {
fFileName= fileName;
@ -88,7 +95,7 @@ public class FileCharArray extends LazyCharArray {
}
@Override
protected Chunk createChunk(int chunkOffset) {
protected Chunk createChunk(int chunkNumber) {
FileInputStream fis;
try {
fis = new FileInputStream(fFileName);
@ -98,7 +105,7 @@ public class FileCharArray extends LazyCharArray {
}
fChannel= fis.getChannel();
try {
return super.createChunk(chunkOffset);
return super.createChunk(chunkNumber);
} finally {
fChannel= null;
try {
@ -109,40 +116,60 @@ public class FileCharArray extends LazyCharArray {
}
@Override
protected char[] readChunkData(long fileOffset, long[] fileEndOffsetHolder) throws IOException {
assert fChannel != null;
final Charset charset = Charset.forName(fCharSet);
final CharsetDecoder decoder = charset.newDecoder().onMalformedInput(CodingErrorAction.REPLACE)
.onUnmappableCharacter(CodingErrorAction.REPLACE);
protected Chunk nextChunk() {
if (fReachedEOF)
return null;
try {
assert fChannel != null;
final Charset charset = Charset.forName(fCharSet);
final CharsetDecoder decoder = charset.newDecoder().onMalformedInput(CodingErrorAction.REPLACE)
.onUnmappableCharacter(CodingErrorAction.REPLACE);
int needBytes = 3 + (int) (CHUNK_SIZE * (double) decoder.averageCharsPerByte()); // avoid rounding errors.
final ByteBuffer in = ByteBuffer.allocate(needBytes);
final CharBuffer dest= CharBuffer.allocate(CHUNK_SIZE);
int needBytes = 3 + (int) (CHUNK_SIZE * (double) decoder.averageCharsPerByte()); // avoid rounding errors.
final ByteBuffer in = ByteBuffer.allocate(needBytes);
final CharBuffer dest= CharBuffer.allocate(CHUNK_SIZE);
boolean endOfInput= false;
while (dest.position() < CHUNK_SIZE && !endOfInput) {
fChannel.position(fileOffset);
in.clear();
int count= fChannel.read(in);
if (count == -1) {
break;
boolean eof;
CoderResult result;
long fileOffset= fNextFileOffset;
do {
in.clear();
fChannel.position(fileOffset);
fChannel.read(in);
eof= in.remaining() > 0;
in.flip();
if (fileOffset == 0) {
skipUTF8ByteOrderMark(in, fCharSet);
}
result = decoder.decode(in, dest, eof);
fileOffset+= in.position();
} while (result == CoderResult.UNDERFLOW && !eof);
dest.flip();
if (dest.remaining() == 0) {
fReachedEOF= true;
return null;
}
endOfInput= count < in.capacity();
in.flip();
if (fileOffset == 0) {
skipUTF8ByteOrderMark(in, fCharSet);
if (eof && result == CoderResult.UNDERFLOW) {
fReachedEOF= true;
}
decoder.decode(in, dest, endOfInput);
fileOffset+= in.position();
final char[] chars = extractChars(dest);
Chunk chunk = newChunk(fNextFileOffset, fileOffset, fNextCharOffset, chars);
fNextFileOffset= fileOffset;
fNextCharOffset+= chars.length;
return chunk;
} catch (Exception e) {
// The file cannot be read
CCorePlugin.log(e);
fReachedEOF= true;
return null;
}
fileEndOffsetHolder[0]= fileOffset;
dest.flip();
return extractChars(dest);
}
@Override
protected void rereadChunkData(long fileOffset, long fileEndOffset, char[] dest) {
protected void rereadChunkData(Chunk chunk, char[] dest) {
FileInputStream fis;
try {
fis = new FileInputStream(fFileName);
@ -152,7 +179,7 @@ public class FileCharArray extends LazyCharArray {
}
try {
FileChannel channel = fis.getChannel();
decode(channel, fileOffset, fileEndOffset, CharBuffer.wrap(dest));
decode(channel, chunk.fSourceOffset, chunk.fSourceEndOffset, CharBuffer.wrap(dest));
} catch (IOException e) {
// File cannot be read
} finally {
@ -168,11 +195,10 @@ public class FileCharArray extends LazyCharArray {
final CharsetDecoder decoder = charset.newDecoder().onMalformedInput(CodingErrorAction.REPLACE)
.onUnmappableCharacter(CodingErrorAction.REPLACE);
int needBytes = (int) (fileEndOffset - fileOffset);
final ByteBuffer in = ByteBuffer.allocate(needBytes);
channel.position(fileOffset);
final ByteBuffer in = ByteBuffer.allocate((int) (fileEndOffset - fileOffset));
in.clear();
channel.position(fileOffset);
channel.read(in);
in.flip();
if (fileOffset == 0) {

View file

@ -18,32 +18,37 @@ import java.util.List;
/**
* Implementation of char array for a file referencing content via
* soft references.
* Because of bug 320157 we need to deal with chunks of different length.
*/
public abstract class LazyCharArray extends AbstractCharArray {
private final static int CHUNK_BITS= 16; // 2^16 == 64K
protected final static int CHUNK_SIZE= 1 << CHUNK_BITS;
public final static int CHUNK_SIZE= 1 << CHUNK_BITS;
protected static class Chunk {
final int fDataLength;
final long fFileOffset;
final long fFileEndOffset;
private SoftReference<char[]> fData;
final int fCharOffset;
final int fCharEndOffset;
final long fSourceOffset;
final long fSourceEndOffset;
private SoftReference<char[]> fCharsReference;
private Chunk(long fileOffset, long fileEndOffset, char[] data) {
fDataLength= data.length;
fFileOffset= fileOffset;
fFileEndOffset= fileEndOffset;
fData= new SoftReference<char[]>(data);
private Chunk(long sourceOffset, long sourceEndOffset, int charOffset, char[] chars) {
fCharOffset= charOffset;
fCharEndOffset= charOffset+ chars.length;
fSourceOffset= sourceOffset;
fSourceEndOffset= sourceEndOffset;
fCharsReference= new SoftReference<char[]>(chars);
}
}
private int fLength= -1;
private List<Chunk> fChunks= new ArrayList<Chunk>();
private StreamHasher hasher;
private long hash64;
private StreamHasher fHasher;
private long fHash64;
// Make a reference to the currently used char[], such that it is not collected.
private char[] fCurrentChars;
protected LazyCharArray() {
hasher = new StreamHasher();
fHasher = new StreamHasher();
}
@Override
@ -53,7 +58,7 @@ public abstract class LazyCharArray extends AbstractCharArray {
@Override
public final int getLength() {
readUpTo(Integer.MAX_VALUE);
readAllChunks();
return fLength;
}
@ -62,131 +67,138 @@ public abstract class LazyCharArray extends AbstractCharArray {
if (offset < 0)
return false;
readUpTo(offset);
if (fLength >= 0)
return offset < fLength;
assert offset < fChunks.size() << CHUNK_BITS;
return true;
return getChunkForOffset(offset) != null;
}
@Override
public long getContentsHash() {
if (hasher != null) {
readUpTo(Integer.MAX_VALUE);
hash64 = hasher.computeHash();
hasher = null;
if (fHasher != null) {
readAllChunks();
fHash64 = fHasher.computeHash();
fHasher = null;
}
return hash64;
}
private void readUpTo(int offset) {
if (fLength >= 0)
return;
final int chunkOffset= offset >> CHUNK_BITS;
getChunkData(chunkOffset);
return fHash64;
}
@Override
public final char get(int offset) {
int chunkOffset= offset >> CHUNK_BITS;
char[] data= getChunkData(chunkOffset);
return data[offset & (CHUNK_SIZE - 1)];
Chunk chunk= getChunkForOffset(offset);
if (chunk != null) {
return getChunkData(chunk)[offset - chunk.fCharOffset];
}
return 0;
}
@Override
public final void arraycopy(int offset, char[] destination, int destinationPos, int length) {
int chunkOffset= offset >> CHUNK_BITS;
int loffset= offset & (CHUNK_SIZE - 1);
char[] data= getChunkData(chunkOffset);
final int canCopy = data.length - loffset;
if (length <= canCopy) {
System.arraycopy(data, loffset, destination, destinationPos, length);
return;
final Chunk chunk= getChunkForOffset(offset);
final int offsetInChunk= offset-chunk.fCharOffset;
final char[] data= getChunkData(chunk);
final int maxLenInChunk = data.length - offsetInChunk;
if (length <= maxLenInChunk) {
System.arraycopy(data, offsetInChunk, destination, destinationPos, length);
} else {
System.arraycopy(data, offsetInChunk, destination, destinationPos, maxLenInChunk);
arraycopy(offset+maxLenInChunk, destination, destinationPos+maxLenInChunk, length-maxLenInChunk);
}
System.arraycopy(data, loffset, destination, destinationPos, canCopy);
arraycopy(offset+canCopy, destination, destinationPos+canCopy, length-canCopy);
}
private char[] getChunkData(int chunkOffset) {
Chunk chunk= getChunk(chunkOffset);
if (chunk != null) {
char[] data= chunk.fData.get();
if (data != null)
return data;
return loadChunkData(chunk);
private void readAllChunks() {
if (fLength < 0) {
getChunkForOffset(Integer.MAX_VALUE);
}
return null;
}
private Chunk getChunk(int chunkOffset) {
private Chunk getChunkForOffset(int offset) {
int minChunkNumber= offset >> CHUNK_BITS;
for(;;) {
Chunk chunk= getChunkByNumber(minChunkNumber);
if (chunk == null)
return null;
if (offset < chunk.fCharEndOffset) {
return chunk;
}
minChunkNumber++;
}
}
private Chunk getChunkByNumber(int chunkNumber) {
final int chunkCount = fChunks.size();
if (chunkOffset < chunkCount)
return fChunks.get(chunkOffset);
if (chunkNumber < chunkCount)
return fChunks.get(chunkNumber);
if (fLength >=0)
return null;
return createChunk(chunkOffset);
return createChunk(chunkNumber);
}
/**
* Called when a chunk is requested for the first time. There is no
* need to override this method.
*/
protected Chunk createChunk(int chunkOffset) {
final int chunkCount = fChunks.size();
long fileOffset= chunkCount == 0 ? 0 : fChunks.get(chunkCount - 1).fFileEndOffset;
try {
for (int i = chunkCount; i <= chunkOffset; i++) {
long[] fileEndOffset= {0};
char[] data= readChunkData(fileOffset, fileEndOffset);
final int charCount= data.length;
if (charCount == 0) {
fLength= fChunks.size() * CHUNK_SIZE;
break;
}
if (hasher != null) {
hasher.addChunk(data);
}
// New chunk
Chunk chunk= new Chunk(fileOffset, fileEndOffset[0], data);
fChunks.add(chunk);
if (charCount < CHUNK_SIZE) {
fLength= (fChunks.size() - 1) * CHUNK_SIZE + charCount;
break;
}
fileOffset= fileEndOffset[0];
protected Chunk createChunk(int chunkNumber) {
for (int i = fChunks.size(); i <= chunkNumber; i++) {
Chunk chunk= nextChunk();
if (chunk == null) {
final int chunkCount= fChunks.size();
fLength= chunkCount == 0 ? 0 : fChunks.get(chunkCount-1).fCharEndOffset;
break;
}
} catch (Exception e) {
// File cannot be read
return null;
}
if (chunkOffset < fChunks.size())
return fChunks.get(chunkOffset);
if (fHasher != null) {
final char[] chunkData = getChunkData(chunk);
fHasher.addChunk(chunkData);
}
fChunks.add(chunk);
}
if (chunkNumber < fChunks.size())
return fChunks.get(chunkNumber);
return null;
}
private char[] loadChunkData(Chunk chunk) {
char[] result= new char[chunk.fDataLength];
rereadChunkData(chunk.fFileOffset, chunk.fFileEndOffset, result);
chunk.fData= new SoftReference<char[]>(result);
return result;
/**
* Creates a new chunk.
*/
protected Chunk newChunk(long sourceOffset, long sourceEndOffset, int charOffset, char[] chars) {
fCurrentChars= chars;
return new Chunk(sourceOffset, sourceEndOffset, charOffset, chars);
}
/**
* Read the chunk data at the given source offset and provide the end-offset in
* the source.
* Read the next chunk from the input.
*/
protected abstract char[] readChunkData(long sourceOffset, long[] sourceEndOffsetHolder) throws Exception;
protected abstract Chunk nextChunk();
private char[] getChunkData(Chunk chunk) {
char[] data= chunk.fCharsReference.get();
if (data == null) {
data= new char[chunk.fCharEndOffset - chunk.fCharOffset];
rereadChunkData(chunk, data);
chunk.fCharsReference= new SoftReference<char[]>(data);
}
return fCurrentChars= data;
}
/**
* Read the chunk data at the given source range. In case the source range no longer (fully) exists,
* Reread the data for the chunk. In case the source range no longer (fully) exists,
* read as much as possible.
*/
protected abstract void rereadChunkData(long fileOffset, long fileEndOffset, char[] dest);
protected abstract void rereadChunkData(Chunk chunk, char[] data);
/**
* For testing purposes: Simulates that all the data gets collected.
*/
public void testClearData() {
for (Chunk chunk : fChunks) {
chunk.fCharsReference= new SoftReference<char[]>(null);
}
if (fCurrentChars != null)
fCurrentChars= null;
}
}