1
0
Fork 0
mirror of https://github.com/eclipse-cdt/cdt synced 2025-04-29 19:45:01 +02:00

Bug 320157: Endless loop decoding large file.

This commit is contained in:
Markus Schorn 2010-08-25 11:36:52 +00:00
parent 0f63f42919
commit 9f594c8aee
4 changed files with 284 additions and 128 deletions

View file

@ -0,0 +1,117 @@
/*******************************************************************************
* Copyright (c) 2010 Wind River Systems, Inc. and others.
* All rights reserved. This program and the accompanying materials
* are made available under the terms of the Eclipse Public License v1.0
* which accompanies this distribution, and is available at
* http://www.eclipse.org/legal/epl-v10.html
*
* Contributors:
* Markus Schorn - Initial API and implementation
*******************************************************************************/
package org.eclipse.cdt.core.parser.tests.scanner;
import java.io.BufferedOutputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStream;
import junit.framework.TestSuite;
import org.eclipse.cdt.core.testplugin.util.BaseTestCase;
import org.eclipse.cdt.internal.core.parser.scanner.AbstractCharArray;
import org.eclipse.cdt.internal.core.parser.scanner.FileCharArray;
import org.eclipse.cdt.internal.core.parser.scanner.LazyCharArray;
public class FileCharArrayTests extends BaseTestCase {
public static TestSuite suite() {
return suite(FileCharArrayTests.class);
}
private File fFile;
@Override
protected void tearDown() throws Exception {
if (fFile != null) {
fFile.delete();
}
}
public void testAlignedMinus() throws IOException {
testFile(true, LazyCharArray.CHUNK_SIZE*3-1);
}
public void testAlignedEven() throws IOException {
testFile(true, LazyCharArray.CHUNK_SIZE*3);
}
public void testAlignedPlus() throws IOException {
testFile(true, LazyCharArray.CHUNK_SIZE*3+1);
}
public void testUnAlignedMinus() throws IOException {
testFile(false, LazyCharArray.CHUNK_SIZE*3-1);
}
public void testUnAlignedEven() throws IOException {
testFile(false, LazyCharArray.CHUNK_SIZE*3);
}
public void testUnAlignedPlus() throws IOException {
testFile(false, LazyCharArray.CHUNK_SIZE*3+1);
}
private void testFile(boolean aligned, int charSize) throws IOException {
createFile(aligned, charSize);
AbstractCharArray charArray;
final FileInputStream inputStream = new FileInputStream(fFile);
try {
charArray = FileCharArray.create(fFile.getPath(), "utf-8", inputStream);
} finally {
inputStream.close();
}
checkContent(charArray, LazyCharArray.CHUNK_SIZE, charSize);
assertEquals(charSize, charArray.getLength());
((LazyCharArray) charArray).testClearData();
checkContent(charArray, LazyCharArray.CHUNK_SIZE, charSize);
assertEquals(charSize, charArray.getLength());
}
public void checkContent(AbstractCharArray charArray, int from, int to) {
for (int i = from; i < to; i++) {
assertEquals(i % 127, charArray.get(i));
if (i+3<=to) {
char[] dest= new char[3];
charArray.arraycopy(i, dest, 0, 3);
for (int j = 0; j < dest.length; j++) {
assertEquals((i+j) % 127, dest[j]);
}
}
}
}
private void createFile(boolean aligned, int charSize) throws IOException {
fFile= File.createTempFile("data", ".txt");
OutputStream out= new BufferedOutputStream(new FileOutputStream(fFile));
try {
if (!aligned) {
out.write(0xc2);
out.write(0xa2);
} else {
out.write(0);
}
for (int i = 1; i < charSize; i++) {
out.write(i % 127);
}
} finally {
out.close();
}
}
}

View file

@ -26,6 +26,7 @@ public class ScannerTestSuite extends TestSuite {
suite.addTest(ExpansionExplorerTests.suite()); suite.addTest(ExpansionExplorerTests.suite());
suite.addTest(InactiveCodeTests.suite()); suite.addTest(InactiveCodeTests.suite());
suite.addTest(StreamHasherTests.suite()); suite.addTest(StreamHasherTests.suite());
suite.addTest(FileCharArrayTests.suite());
return suite; return suite;
} }
} }

View file

@ -19,8 +19,11 @@ import java.nio.CharBuffer;
import java.nio.channels.FileChannel; import java.nio.channels.FileChannel;
import java.nio.charset.Charset; import java.nio.charset.Charset;
import java.nio.charset.CharsetDecoder; import java.nio.charset.CharsetDecoder;
import java.nio.charset.CoderResult;
import java.nio.charset.CodingErrorAction; import java.nio.charset.CodingErrorAction;
import org.eclipse.cdt.core.CCorePlugin;
/** /**
* Implementation of char array for a file referencing content via * Implementation of char array for a file referencing content via
* soft references. * soft references.
@ -81,6 +84,10 @@ public class FileCharArray extends LazyCharArray {
private String fFileName; private String fFileName;
private String fCharSet; private String fCharSet;
private FileChannel fChannel; private FileChannel fChannel;
private long fNextFileOffset= 0;
private int fNextCharOffset= 0;
private boolean fReachedEOF= false;
private FileCharArray(String fileName, String charSet) { private FileCharArray(String fileName, String charSet) {
fFileName= fileName; fFileName= fileName;
@ -88,7 +95,7 @@ public class FileCharArray extends LazyCharArray {
} }
@Override @Override
protected Chunk createChunk(int chunkOffset) { protected Chunk createChunk(int chunkNumber) {
FileInputStream fis; FileInputStream fis;
try { try {
fis = new FileInputStream(fFileName); fis = new FileInputStream(fFileName);
@ -98,7 +105,7 @@ public class FileCharArray extends LazyCharArray {
} }
fChannel= fis.getChannel(); fChannel= fis.getChannel();
try { try {
return super.createChunk(chunkOffset); return super.createChunk(chunkNumber);
} finally { } finally {
fChannel= null; fChannel= null;
try { try {
@ -109,7 +116,11 @@ public class FileCharArray extends LazyCharArray {
} }
@Override @Override
protected char[] readChunkData(long fileOffset, long[] fileEndOffsetHolder) throws IOException { protected Chunk nextChunk() {
if (fReachedEOF)
return null;
try {
assert fChannel != null; assert fChannel != null;
final Charset charset = Charset.forName(fCharSet); final Charset charset = Charset.forName(fCharSet);
final CharsetDecoder decoder = charset.newDecoder().onMalformedInput(CodingErrorAction.REPLACE) final CharsetDecoder decoder = charset.newDecoder().onMalformedInput(CodingErrorAction.REPLACE)
@ -119,30 +130,46 @@ public class FileCharArray extends LazyCharArray {
final ByteBuffer in = ByteBuffer.allocate(needBytes); final ByteBuffer in = ByteBuffer.allocate(needBytes);
final CharBuffer dest= CharBuffer.allocate(CHUNK_SIZE); final CharBuffer dest= CharBuffer.allocate(CHUNK_SIZE);
boolean endOfInput= false; boolean eof;
while (dest.position() < CHUNK_SIZE && !endOfInput) { CoderResult result;
fChannel.position(fileOffset); long fileOffset= fNextFileOffset;
do {
in.clear(); in.clear();
int count= fChannel.read(in); fChannel.position(fileOffset);
if (count == -1) { fChannel.read(in);
break; eof= in.remaining() > 0;
}
endOfInput= count < in.capacity();
in.flip(); in.flip();
if (fileOffset == 0) { if (fileOffset == 0) {
skipUTF8ByteOrderMark(in, fCharSet); skipUTF8ByteOrderMark(in, fCharSet);
} }
decoder.decode(in, dest, endOfInput); result = decoder.decode(in, dest, eof);
fileOffset+= in.position(); fileOffset+= in.position();
} } while (result == CoderResult.UNDERFLOW && !eof);
fileEndOffsetHolder[0]= fileOffset;
dest.flip(); dest.flip();
return extractChars(dest); if (dest.remaining() == 0) {
fReachedEOF= true;
return null;
}
if (eof && result == CoderResult.UNDERFLOW) {
fReachedEOF= true;
}
final char[] chars = extractChars(dest);
Chunk chunk = newChunk(fNextFileOffset, fileOffset, fNextCharOffset, chars);
fNextFileOffset= fileOffset;
fNextCharOffset+= chars.length;
return chunk;
} catch (Exception e) {
// The file cannot be read
CCorePlugin.log(e);
fReachedEOF= true;
return null;
}
} }
@Override @Override
protected void rereadChunkData(long fileOffset, long fileEndOffset, char[] dest) { protected void rereadChunkData(Chunk chunk, char[] dest) {
FileInputStream fis; FileInputStream fis;
try { try {
fis = new FileInputStream(fFileName); fis = new FileInputStream(fFileName);
@ -152,7 +179,7 @@ public class FileCharArray extends LazyCharArray {
} }
try { try {
FileChannel channel = fis.getChannel(); FileChannel channel = fis.getChannel();
decode(channel, fileOffset, fileEndOffset, CharBuffer.wrap(dest)); decode(channel, chunk.fSourceOffset, chunk.fSourceEndOffset, CharBuffer.wrap(dest));
} catch (IOException e) { } catch (IOException e) {
// File cannot be read // File cannot be read
} finally { } finally {
@ -168,11 +195,10 @@ public class FileCharArray extends LazyCharArray {
final CharsetDecoder decoder = charset.newDecoder().onMalformedInput(CodingErrorAction.REPLACE) final CharsetDecoder decoder = charset.newDecoder().onMalformedInput(CodingErrorAction.REPLACE)
.onUnmappableCharacter(CodingErrorAction.REPLACE); .onUnmappableCharacter(CodingErrorAction.REPLACE);
int needBytes = (int) (fileEndOffset - fileOffset); final ByteBuffer in = ByteBuffer.allocate((int) (fileEndOffset - fileOffset));
final ByteBuffer in = ByteBuffer.allocate(needBytes);
channel.position(fileOffset);
in.clear(); in.clear();
channel.position(fileOffset);
channel.read(in); channel.read(in);
in.flip(); in.flip();
if (fileOffset == 0) { if (fileOffset == 0) {

View file

@ -18,32 +18,37 @@ import java.util.List;
/** /**
* Implementation of char array for a file referencing content via * Implementation of char array for a file referencing content via
* soft references. * soft references.
* Because of bug 320157 we need to deal with chunks of different length.
*/ */
public abstract class LazyCharArray extends AbstractCharArray { public abstract class LazyCharArray extends AbstractCharArray {
private final static int CHUNK_BITS= 16; // 2^16 == 64K private final static int CHUNK_BITS= 16; // 2^16 == 64K
protected final static int CHUNK_SIZE= 1 << CHUNK_BITS; public final static int CHUNK_SIZE= 1 << CHUNK_BITS;
protected static class Chunk { protected static class Chunk {
final int fDataLength; final int fCharOffset;
final long fFileOffset; final int fCharEndOffset;
final long fFileEndOffset; final long fSourceOffset;
private SoftReference<char[]> fData; final long fSourceEndOffset;
private SoftReference<char[]> fCharsReference;
private Chunk(long fileOffset, long fileEndOffset, char[] data) { private Chunk(long sourceOffset, long sourceEndOffset, int charOffset, char[] chars) {
fDataLength= data.length; fCharOffset= charOffset;
fFileOffset= fileOffset; fCharEndOffset= charOffset+ chars.length;
fFileEndOffset= fileEndOffset; fSourceOffset= sourceOffset;
fData= new SoftReference<char[]>(data); fSourceEndOffset= sourceEndOffset;
fCharsReference= new SoftReference<char[]>(chars);
} }
} }
private int fLength= -1; private int fLength= -1;
private List<Chunk> fChunks= new ArrayList<Chunk>(); private List<Chunk> fChunks= new ArrayList<Chunk>();
private StreamHasher hasher; private StreamHasher fHasher;
private long hash64; private long fHash64;
// Make a reference to the currently used char[], such that it is not collected.
private char[] fCurrentChars;
protected LazyCharArray() { protected LazyCharArray() {
hasher = new StreamHasher(); fHasher = new StreamHasher();
} }
@Override @Override
@ -53,7 +58,7 @@ public abstract class LazyCharArray extends AbstractCharArray {
@Override @Override
public final int getLength() { public final int getLength() {
readUpTo(Integer.MAX_VALUE); readAllChunks();
return fLength; return fLength;
} }
@ -62,131 +67,138 @@ public abstract class LazyCharArray extends AbstractCharArray {
if (offset < 0) if (offset < 0)
return false; return false;
readUpTo(offset);
if (fLength >= 0) if (fLength >= 0)
return offset < fLength; return offset < fLength;
assert offset < fChunks.size() << CHUNK_BITS; return getChunkForOffset(offset) != null;
return true;
} }
@Override @Override
public long getContentsHash() { public long getContentsHash() {
if (hasher != null) { if (fHasher != null) {
readUpTo(Integer.MAX_VALUE); readAllChunks();
hash64 = hasher.computeHash(); fHash64 = fHasher.computeHash();
hasher = null; fHasher = null;
} }
return hash64; return fHash64;
}
private void readUpTo(int offset) {
if (fLength >= 0)
return;
final int chunkOffset= offset >> CHUNK_BITS;
getChunkData(chunkOffset);
} }
@Override @Override
public final char get(int offset) { public final char get(int offset) {
int chunkOffset= offset >> CHUNK_BITS; Chunk chunk= getChunkForOffset(offset);
char[] data= getChunkData(chunkOffset); if (chunk != null) {
return data[offset & (CHUNK_SIZE - 1)]; return getChunkData(chunk)[offset - chunk.fCharOffset];
}
return 0;
} }
@Override @Override
public final void arraycopy(int offset, char[] destination, int destinationPos, int length) { public final void arraycopy(int offset, char[] destination, int destinationPos, int length) {
int chunkOffset= offset >> CHUNK_BITS; final Chunk chunk= getChunkForOffset(offset);
int loffset= offset & (CHUNK_SIZE - 1); final int offsetInChunk= offset-chunk.fCharOffset;
char[] data= getChunkData(chunkOffset); final char[] data= getChunkData(chunk);
final int canCopy = data.length - loffset; final int maxLenInChunk = data.length - offsetInChunk;
if (length <= canCopy) { if (length <= maxLenInChunk) {
System.arraycopy(data, loffset, destination, destinationPos, length); System.arraycopy(data, offsetInChunk, destination, destinationPos, length);
return; } else {
System.arraycopy(data, offsetInChunk, destination, destinationPos, maxLenInChunk);
arraycopy(offset+maxLenInChunk, destination, destinationPos+maxLenInChunk, length-maxLenInChunk);
} }
System.arraycopy(data, loffset, destination, destinationPos, canCopy);
arraycopy(offset+canCopy, destination, destinationPos+canCopy, length-canCopy);
} }
private char[] getChunkData(int chunkOffset) { private void readAllChunks() {
Chunk chunk= getChunk(chunkOffset); if (fLength < 0) {
if (chunk != null) { getChunkForOffset(Integer.MAX_VALUE);
char[] data= chunk.fData.get();
if (data != null)
return data;
return loadChunkData(chunk);
} }
}
private Chunk getChunkForOffset(int offset) {
int minChunkNumber= offset >> CHUNK_BITS;
for(;;) {
Chunk chunk= getChunkByNumber(minChunkNumber);
if (chunk == null)
return null; return null;
if (offset < chunk.fCharEndOffset) {
return chunk;
}
minChunkNumber++;
}
} }
private Chunk getChunk(int chunkOffset) { private Chunk getChunkByNumber(int chunkNumber) {
final int chunkCount = fChunks.size(); final int chunkCount = fChunks.size();
if (chunkOffset < chunkCount) if (chunkNumber < chunkCount)
return fChunks.get(chunkOffset); return fChunks.get(chunkNumber);
if (fLength >=0) if (fLength >=0)
return null; return null;
return createChunk(chunkOffset); return createChunk(chunkNumber);
} }
/** /**
* Called when a chunk is requested for the first time. There is no * Called when a chunk is requested for the first time. There is no
* need to override this method. * need to override this method.
*/ */
protected Chunk createChunk(int chunkOffset) { protected Chunk createChunk(int chunkNumber) {
for (int i = fChunks.size(); i <= chunkNumber; i++) {
Chunk chunk= nextChunk();
if (chunk == null) {
final int chunkCount= fChunks.size(); final int chunkCount= fChunks.size();
long fileOffset= chunkCount == 0 ? 0 : fChunks.get(chunkCount - 1).fFileEndOffset; fLength= chunkCount == 0 ? 0 : fChunks.get(chunkCount-1).fCharEndOffset;
try {
for (int i = chunkCount; i <= chunkOffset; i++) {
long[] fileEndOffset= {0};
char[] data= readChunkData(fileOffset, fileEndOffset);
final int charCount= data.length;
if (charCount == 0) {
fLength= fChunks.size() * CHUNK_SIZE;
break; break;
} }
if (hasher != null) { if (fHasher != null) {
hasher.addChunk(data); final char[] chunkData = getChunkData(chunk);
fHasher.addChunk(chunkData);
} }
// New chunk
Chunk chunk= new Chunk(fileOffset, fileEndOffset[0], data);
fChunks.add(chunk); fChunks.add(chunk);
if (charCount < CHUNK_SIZE) {
fLength= (fChunks.size() - 1) * CHUNK_SIZE + charCount;
break;
}
fileOffset= fileEndOffset[0];
}
} catch (Exception e) {
// File cannot be read
return null;
} }
if (chunkOffset < fChunks.size()) if (chunkNumber < fChunks.size())
return fChunks.get(chunkOffset); return fChunks.get(chunkNumber);
return null; return null;
} }
private char[] loadChunkData(Chunk chunk) {
char[] result= new char[chunk.fDataLength];
rereadChunkData(chunk.fFileOffset, chunk.fFileEndOffset, result);
chunk.fData= new SoftReference<char[]>(result);
return result;
}
/** /**
* Read the chunk data at the given source offset and provide the end-offset in * Creates a new chunk.
* the source.
*/ */
protected abstract char[] readChunkData(long sourceOffset, long[] sourceEndOffsetHolder) throws Exception; protected Chunk newChunk(long sourceOffset, long sourceEndOffset, int charOffset, char[] chars) {
fCurrentChars= chars;
return new Chunk(sourceOffset, sourceEndOffset, charOffset, chars);
}
/** /**
* Read the chunk data at the given source range. In case the source range no longer (fully) exists, * Read the next chunk from the input.
*/
protected abstract Chunk nextChunk();
private char[] getChunkData(Chunk chunk) {
char[] data= chunk.fCharsReference.get();
if (data == null) {
data= new char[chunk.fCharEndOffset - chunk.fCharOffset];
rereadChunkData(chunk, data);
chunk.fCharsReference= new SoftReference<char[]>(data);
}
return fCurrentChars= data;
}
/**
* Reread the data for the chunk. In case the source range no longer (fully) exists,
* read as much as possible. * read as much as possible.
*/ */
protected abstract void rereadChunkData(long fileOffset, long fileEndOffset, char[] dest); protected abstract void rereadChunkData(Chunk chunk, char[] data);
/**
* For testing purposes: Simulates that all the data gets collected.
*/
public void testClearData() {
for (Chunk chunk : fChunks) {
chunk.fCharsReference= new SoftReference<char[]>(null);
}
if (fCurrentChars != null)
fCurrentChars= null;
}
} }