mirror of
https://github.com/eclipse-cdt/cdt
synced 2025-04-29 19:45:01 +02:00
Bug 320157: Endless loop decoding large file.
This commit is contained in:
parent
0f63f42919
commit
9f594c8aee
4 changed files with 284 additions and 128 deletions
|
@ -0,0 +1,117 @@
|
|||
/*******************************************************************************
|
||||
* Copyright (c) 2010 Wind River Systems, Inc. and others.
|
||||
* All rights reserved. This program and the accompanying materials
|
||||
* are made available under the terms of the Eclipse Public License v1.0
|
||||
* which accompanies this distribution, and is available at
|
||||
* http://www.eclipse.org/legal/epl-v10.html
|
||||
*
|
||||
* Contributors:
|
||||
* Markus Schorn - Initial API and implementation
|
||||
*******************************************************************************/
|
||||
package org.eclipse.cdt.core.parser.tests.scanner;
|
||||
|
||||
import java.io.BufferedOutputStream;
|
||||
import java.io.File;
|
||||
import java.io.FileInputStream;
|
||||
import java.io.FileOutputStream;
|
||||
import java.io.IOException;
|
||||
import java.io.OutputStream;
|
||||
|
||||
import junit.framework.TestSuite;
|
||||
|
||||
import org.eclipse.cdt.core.testplugin.util.BaseTestCase;
|
||||
import org.eclipse.cdt.internal.core.parser.scanner.AbstractCharArray;
|
||||
import org.eclipse.cdt.internal.core.parser.scanner.FileCharArray;
|
||||
import org.eclipse.cdt.internal.core.parser.scanner.LazyCharArray;
|
||||
|
||||
public class FileCharArrayTests extends BaseTestCase {
|
||||
|
||||
public static TestSuite suite() {
|
||||
return suite(FileCharArrayTests.class);
|
||||
}
|
||||
|
||||
private File fFile;
|
||||
|
||||
@Override
|
||||
protected void tearDown() throws Exception {
|
||||
if (fFile != null) {
|
||||
fFile.delete();
|
||||
}
|
||||
}
|
||||
|
||||
public void testAlignedMinus() throws IOException {
|
||||
testFile(true, LazyCharArray.CHUNK_SIZE*3-1);
|
||||
}
|
||||
|
||||
public void testAlignedEven() throws IOException {
|
||||
testFile(true, LazyCharArray.CHUNK_SIZE*3);
|
||||
}
|
||||
|
||||
public void testAlignedPlus() throws IOException {
|
||||
testFile(true, LazyCharArray.CHUNK_SIZE*3+1);
|
||||
}
|
||||
|
||||
public void testUnAlignedMinus() throws IOException {
|
||||
testFile(false, LazyCharArray.CHUNK_SIZE*3-1);
|
||||
}
|
||||
|
||||
public void testUnAlignedEven() throws IOException {
|
||||
testFile(false, LazyCharArray.CHUNK_SIZE*3);
|
||||
}
|
||||
|
||||
public void testUnAlignedPlus() throws IOException {
|
||||
testFile(false, LazyCharArray.CHUNK_SIZE*3+1);
|
||||
}
|
||||
|
||||
private void testFile(boolean aligned, int charSize) throws IOException {
|
||||
createFile(aligned, charSize);
|
||||
|
||||
AbstractCharArray charArray;
|
||||
final FileInputStream inputStream = new FileInputStream(fFile);
|
||||
try {
|
||||
charArray = FileCharArray.create(fFile.getPath(), "utf-8", inputStream);
|
||||
} finally {
|
||||
inputStream.close();
|
||||
}
|
||||
|
||||
checkContent(charArray, LazyCharArray.CHUNK_SIZE, charSize);
|
||||
assertEquals(charSize, charArray.getLength());
|
||||
|
||||
((LazyCharArray) charArray).testClearData();
|
||||
|
||||
checkContent(charArray, LazyCharArray.CHUNK_SIZE, charSize);
|
||||
assertEquals(charSize, charArray.getLength());
|
||||
|
||||
}
|
||||
|
||||
public void checkContent(AbstractCharArray charArray, int from, int to) {
|
||||
for (int i = from; i < to; i++) {
|
||||
assertEquals(i % 127, charArray.get(i));
|
||||
if (i+3<=to) {
|
||||
char[] dest= new char[3];
|
||||
charArray.arraycopy(i, dest, 0, 3);
|
||||
for (int j = 0; j < dest.length; j++) {
|
||||
assertEquals((i+j) % 127, dest[j]);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private void createFile(boolean aligned, int charSize) throws IOException {
|
||||
fFile= File.createTempFile("data", ".txt");
|
||||
OutputStream out= new BufferedOutputStream(new FileOutputStream(fFile));
|
||||
try {
|
||||
if (!aligned) {
|
||||
out.write(0xc2);
|
||||
out.write(0xa2);
|
||||
} else {
|
||||
out.write(0);
|
||||
}
|
||||
for (int i = 1; i < charSize; i++) {
|
||||
out.write(i % 127);
|
||||
}
|
||||
} finally {
|
||||
out.close();
|
||||
}
|
||||
}
|
||||
}
|
|
@ -26,6 +26,7 @@ public class ScannerTestSuite extends TestSuite {
|
|||
suite.addTest(ExpansionExplorerTests.suite());
|
||||
suite.addTest(InactiveCodeTests.suite());
|
||||
suite.addTest(StreamHasherTests.suite());
|
||||
suite.addTest(FileCharArrayTests.suite());
|
||||
return suite;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -19,8 +19,11 @@ import java.nio.CharBuffer;
|
|||
import java.nio.channels.FileChannel;
|
||||
import java.nio.charset.Charset;
|
||||
import java.nio.charset.CharsetDecoder;
|
||||
import java.nio.charset.CoderResult;
|
||||
import java.nio.charset.CodingErrorAction;
|
||||
|
||||
import org.eclipse.cdt.core.CCorePlugin;
|
||||
|
||||
/**
|
||||
* Implementation of char array for a file referencing content via
|
||||
* soft references.
|
||||
|
@ -81,6 +84,10 @@ public class FileCharArray extends LazyCharArray {
|
|||
private String fFileName;
|
||||
private String fCharSet;
|
||||
private FileChannel fChannel;
|
||||
private long fNextFileOffset= 0;
|
||||
private int fNextCharOffset= 0;
|
||||
private boolean fReachedEOF= false;
|
||||
|
||||
|
||||
private FileCharArray(String fileName, String charSet) {
|
||||
fFileName= fileName;
|
||||
|
@ -88,7 +95,7 @@ public class FileCharArray extends LazyCharArray {
|
|||
}
|
||||
|
||||
@Override
|
||||
protected Chunk createChunk(int chunkOffset) {
|
||||
protected Chunk createChunk(int chunkNumber) {
|
||||
FileInputStream fis;
|
||||
try {
|
||||
fis = new FileInputStream(fFileName);
|
||||
|
@ -98,7 +105,7 @@ public class FileCharArray extends LazyCharArray {
|
|||
}
|
||||
fChannel= fis.getChannel();
|
||||
try {
|
||||
return super.createChunk(chunkOffset);
|
||||
return super.createChunk(chunkNumber);
|
||||
} finally {
|
||||
fChannel= null;
|
||||
try {
|
||||
|
@ -109,40 +116,60 @@ public class FileCharArray extends LazyCharArray {
|
|||
}
|
||||
|
||||
@Override
|
||||
protected char[] readChunkData(long fileOffset, long[] fileEndOffsetHolder) throws IOException {
|
||||
assert fChannel != null;
|
||||
final Charset charset = Charset.forName(fCharSet);
|
||||
final CharsetDecoder decoder = charset.newDecoder().onMalformedInput(CodingErrorAction.REPLACE)
|
||||
.onUnmappableCharacter(CodingErrorAction.REPLACE);
|
||||
protected Chunk nextChunk() {
|
||||
if (fReachedEOF)
|
||||
return null;
|
||||
|
||||
try {
|
||||
assert fChannel != null;
|
||||
final Charset charset = Charset.forName(fCharSet);
|
||||
final CharsetDecoder decoder = charset.newDecoder().onMalformedInput(CodingErrorAction.REPLACE)
|
||||
.onUnmappableCharacter(CodingErrorAction.REPLACE);
|
||||
|
||||
int needBytes = 3 + (int) (CHUNK_SIZE * (double) decoder.averageCharsPerByte()); // avoid rounding errors.
|
||||
final ByteBuffer in = ByteBuffer.allocate(needBytes);
|
||||
final CharBuffer dest= CharBuffer.allocate(CHUNK_SIZE);
|
||||
int needBytes = 3 + (int) (CHUNK_SIZE * (double) decoder.averageCharsPerByte()); // avoid rounding errors.
|
||||
final ByteBuffer in = ByteBuffer.allocate(needBytes);
|
||||
final CharBuffer dest= CharBuffer.allocate(CHUNK_SIZE);
|
||||
|
||||
boolean endOfInput= false;
|
||||
while (dest.position() < CHUNK_SIZE && !endOfInput) {
|
||||
fChannel.position(fileOffset);
|
||||
in.clear();
|
||||
int count= fChannel.read(in);
|
||||
if (count == -1) {
|
||||
break;
|
||||
boolean eof;
|
||||
CoderResult result;
|
||||
long fileOffset= fNextFileOffset;
|
||||
do {
|
||||
in.clear();
|
||||
fChannel.position(fileOffset);
|
||||
fChannel.read(in);
|
||||
eof= in.remaining() > 0;
|
||||
in.flip();
|
||||
if (fileOffset == 0) {
|
||||
skipUTF8ByteOrderMark(in, fCharSet);
|
||||
}
|
||||
result = decoder.decode(in, dest, eof);
|
||||
fileOffset+= in.position();
|
||||
} while (result == CoderResult.UNDERFLOW && !eof);
|
||||
|
||||
dest.flip();
|
||||
if (dest.remaining() == 0) {
|
||||
fReachedEOF= true;
|
||||
return null;
|
||||
}
|
||||
|
||||
endOfInput= count < in.capacity();
|
||||
in.flip();
|
||||
if (fileOffset == 0) {
|
||||
skipUTF8ByteOrderMark(in, fCharSet);
|
||||
if (eof && result == CoderResult.UNDERFLOW) {
|
||||
fReachedEOF= true;
|
||||
}
|
||||
decoder.decode(in, dest, endOfInput);
|
||||
fileOffset+= in.position();
|
||||
final char[] chars = extractChars(dest);
|
||||
Chunk chunk = newChunk(fNextFileOffset, fileOffset, fNextCharOffset, chars);
|
||||
fNextFileOffset= fileOffset;
|
||||
fNextCharOffset+= chars.length;
|
||||
|
||||
return chunk;
|
||||
} catch (Exception e) {
|
||||
// The file cannot be read
|
||||
CCorePlugin.log(e);
|
||||
fReachedEOF= true;
|
||||
return null;
|
||||
}
|
||||
fileEndOffsetHolder[0]= fileOffset;
|
||||
dest.flip();
|
||||
return extractChars(dest);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void rereadChunkData(long fileOffset, long fileEndOffset, char[] dest) {
|
||||
protected void rereadChunkData(Chunk chunk, char[] dest) {
|
||||
FileInputStream fis;
|
||||
try {
|
||||
fis = new FileInputStream(fFileName);
|
||||
|
@ -152,7 +179,7 @@ public class FileCharArray extends LazyCharArray {
|
|||
}
|
||||
try {
|
||||
FileChannel channel = fis.getChannel();
|
||||
decode(channel, fileOffset, fileEndOffset, CharBuffer.wrap(dest));
|
||||
decode(channel, chunk.fSourceOffset, chunk.fSourceEndOffset, CharBuffer.wrap(dest));
|
||||
} catch (IOException e) {
|
||||
// File cannot be read
|
||||
} finally {
|
||||
|
@ -168,11 +195,10 @@ public class FileCharArray extends LazyCharArray {
|
|||
final CharsetDecoder decoder = charset.newDecoder().onMalformedInput(CodingErrorAction.REPLACE)
|
||||
.onUnmappableCharacter(CodingErrorAction.REPLACE);
|
||||
|
||||
int needBytes = (int) (fileEndOffset - fileOffset);
|
||||
final ByteBuffer in = ByteBuffer.allocate(needBytes);
|
||||
|
||||
channel.position(fileOffset);
|
||||
final ByteBuffer in = ByteBuffer.allocate((int) (fileEndOffset - fileOffset));
|
||||
|
||||
in.clear();
|
||||
channel.position(fileOffset);
|
||||
channel.read(in);
|
||||
in.flip();
|
||||
if (fileOffset == 0) {
|
||||
|
|
|
@ -18,32 +18,37 @@ import java.util.List;
|
|||
/**
|
||||
* Implementation of char array for a file referencing content via
|
||||
* soft references.
|
||||
* Because of bug 320157 we need to deal with chunks of different length.
|
||||
*/
|
||||
public abstract class LazyCharArray extends AbstractCharArray {
|
||||
private final static int CHUNK_BITS= 16; // 2^16 == 64K
|
||||
protected final static int CHUNK_SIZE= 1 << CHUNK_BITS;
|
||||
public final static int CHUNK_SIZE= 1 << CHUNK_BITS;
|
||||
|
||||
protected static class Chunk {
|
||||
final int fDataLength;
|
||||
final long fFileOffset;
|
||||
final long fFileEndOffset;
|
||||
private SoftReference<char[]> fData;
|
||||
final int fCharOffset;
|
||||
final int fCharEndOffset;
|
||||
final long fSourceOffset;
|
||||
final long fSourceEndOffset;
|
||||
private SoftReference<char[]> fCharsReference;
|
||||
|
||||
private Chunk(long fileOffset, long fileEndOffset, char[] data) {
|
||||
fDataLength= data.length;
|
||||
fFileOffset= fileOffset;
|
||||
fFileEndOffset= fileEndOffset;
|
||||
fData= new SoftReference<char[]>(data);
|
||||
private Chunk(long sourceOffset, long sourceEndOffset, int charOffset, char[] chars) {
|
||||
fCharOffset= charOffset;
|
||||
fCharEndOffset= charOffset+ chars.length;
|
||||
fSourceOffset= sourceOffset;
|
||||
fSourceEndOffset= sourceEndOffset;
|
||||
fCharsReference= new SoftReference<char[]>(chars);
|
||||
}
|
||||
}
|
||||
|
||||
private int fLength= -1;
|
||||
private List<Chunk> fChunks= new ArrayList<Chunk>();
|
||||
private StreamHasher hasher;
|
||||
private long hash64;
|
||||
private StreamHasher fHasher;
|
||||
private long fHash64;
|
||||
// Make a reference to the currently used char[], such that it is not collected.
|
||||
private char[] fCurrentChars;
|
||||
|
||||
protected LazyCharArray() {
|
||||
hasher = new StreamHasher();
|
||||
fHasher = new StreamHasher();
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -53,7 +58,7 @@ public abstract class LazyCharArray extends AbstractCharArray {
|
|||
|
||||
@Override
|
||||
public final int getLength() {
|
||||
readUpTo(Integer.MAX_VALUE);
|
||||
readAllChunks();
|
||||
return fLength;
|
||||
}
|
||||
|
||||
|
@ -62,131 +67,138 @@ public abstract class LazyCharArray extends AbstractCharArray {
|
|||
if (offset < 0)
|
||||
return false;
|
||||
|
||||
readUpTo(offset);
|
||||
if (fLength >= 0)
|
||||
return offset < fLength;
|
||||
|
||||
assert offset < fChunks.size() << CHUNK_BITS;
|
||||
return true;
|
||||
return getChunkForOffset(offset) != null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public long getContentsHash() {
|
||||
if (hasher != null) {
|
||||
readUpTo(Integer.MAX_VALUE);
|
||||
hash64 = hasher.computeHash();
|
||||
hasher = null;
|
||||
if (fHasher != null) {
|
||||
readAllChunks();
|
||||
fHash64 = fHasher.computeHash();
|
||||
fHasher = null;
|
||||
}
|
||||
return hash64;
|
||||
}
|
||||
|
||||
private void readUpTo(int offset) {
|
||||
if (fLength >= 0)
|
||||
return;
|
||||
|
||||
final int chunkOffset= offset >> CHUNK_BITS;
|
||||
getChunkData(chunkOffset);
|
||||
return fHash64;
|
||||
}
|
||||
|
||||
@Override
|
||||
public final char get(int offset) {
|
||||
int chunkOffset= offset >> CHUNK_BITS;
|
||||
char[] data= getChunkData(chunkOffset);
|
||||
return data[offset & (CHUNK_SIZE - 1)];
|
||||
Chunk chunk= getChunkForOffset(offset);
|
||||
if (chunk != null) {
|
||||
return getChunkData(chunk)[offset - chunk.fCharOffset];
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
@Override
|
||||
public final void arraycopy(int offset, char[] destination, int destinationPos, int length) {
|
||||
int chunkOffset= offset >> CHUNK_BITS;
|
||||
int loffset= offset & (CHUNK_SIZE - 1);
|
||||
char[] data= getChunkData(chunkOffset);
|
||||
final int canCopy = data.length - loffset;
|
||||
if (length <= canCopy) {
|
||||
System.arraycopy(data, loffset, destination, destinationPos, length);
|
||||
return;
|
||||
final Chunk chunk= getChunkForOffset(offset);
|
||||
final int offsetInChunk= offset-chunk.fCharOffset;
|
||||
final char[] data= getChunkData(chunk);
|
||||
final int maxLenInChunk = data.length - offsetInChunk;
|
||||
if (length <= maxLenInChunk) {
|
||||
System.arraycopy(data, offsetInChunk, destination, destinationPos, length);
|
||||
} else {
|
||||
System.arraycopy(data, offsetInChunk, destination, destinationPos, maxLenInChunk);
|
||||
arraycopy(offset+maxLenInChunk, destination, destinationPos+maxLenInChunk, length-maxLenInChunk);
|
||||
}
|
||||
System.arraycopy(data, loffset, destination, destinationPos, canCopy);
|
||||
arraycopy(offset+canCopy, destination, destinationPos+canCopy, length-canCopy);
|
||||
}
|
||||
|
||||
private char[] getChunkData(int chunkOffset) {
|
||||
Chunk chunk= getChunk(chunkOffset);
|
||||
if (chunk != null) {
|
||||
char[] data= chunk.fData.get();
|
||||
if (data != null)
|
||||
return data;
|
||||
|
||||
return loadChunkData(chunk);
|
||||
private void readAllChunks() {
|
||||
if (fLength < 0) {
|
||||
getChunkForOffset(Integer.MAX_VALUE);
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
private Chunk getChunk(int chunkOffset) {
|
||||
private Chunk getChunkForOffset(int offset) {
|
||||
int minChunkNumber= offset >> CHUNK_BITS;
|
||||
for(;;) {
|
||||
Chunk chunk= getChunkByNumber(minChunkNumber);
|
||||
if (chunk == null)
|
||||
return null;
|
||||
|
||||
if (offset < chunk.fCharEndOffset) {
|
||||
return chunk;
|
||||
}
|
||||
minChunkNumber++;
|
||||
}
|
||||
}
|
||||
|
||||
private Chunk getChunkByNumber(int chunkNumber) {
|
||||
final int chunkCount = fChunks.size();
|
||||
if (chunkOffset < chunkCount)
|
||||
return fChunks.get(chunkOffset);
|
||||
if (chunkNumber < chunkCount)
|
||||
return fChunks.get(chunkNumber);
|
||||
|
||||
if (fLength >=0)
|
||||
return null;
|
||||
|
||||
return createChunk(chunkOffset);
|
||||
return createChunk(chunkNumber);
|
||||
}
|
||||
|
||||
/**
|
||||
* Called when a chunk is requested for the first time. There is no
|
||||
* need to override this method.
|
||||
*/
|
||||
protected Chunk createChunk(int chunkOffset) {
|
||||
final int chunkCount = fChunks.size();
|
||||
long fileOffset= chunkCount == 0 ? 0 : fChunks.get(chunkCount - 1).fFileEndOffset;
|
||||
try {
|
||||
for (int i = chunkCount; i <= chunkOffset; i++) {
|
||||
long[] fileEndOffset= {0};
|
||||
char[] data= readChunkData(fileOffset, fileEndOffset);
|
||||
final int charCount= data.length;
|
||||
if (charCount == 0) {
|
||||
fLength= fChunks.size() * CHUNK_SIZE;
|
||||
break;
|
||||
}
|
||||
if (hasher != null) {
|
||||
hasher.addChunk(data);
|
||||
}
|
||||
// New chunk
|
||||
Chunk chunk= new Chunk(fileOffset, fileEndOffset[0], data);
|
||||
fChunks.add(chunk);
|
||||
if (charCount < CHUNK_SIZE) {
|
||||
fLength= (fChunks.size() - 1) * CHUNK_SIZE + charCount;
|
||||
break;
|
||||
}
|
||||
fileOffset= fileEndOffset[0];
|
||||
protected Chunk createChunk(int chunkNumber) {
|
||||
for (int i = fChunks.size(); i <= chunkNumber; i++) {
|
||||
Chunk chunk= nextChunk();
|
||||
if (chunk == null) {
|
||||
final int chunkCount= fChunks.size();
|
||||
fLength= chunkCount == 0 ? 0 : fChunks.get(chunkCount-1).fCharEndOffset;
|
||||
break;
|
||||
}
|
||||
} catch (Exception e) {
|
||||
// File cannot be read
|
||||
return null;
|
||||
}
|
||||
|
||||
if (chunkOffset < fChunks.size())
|
||||
return fChunks.get(chunkOffset);
|
||||
if (fHasher != null) {
|
||||
final char[] chunkData = getChunkData(chunk);
|
||||
fHasher.addChunk(chunkData);
|
||||
}
|
||||
fChunks.add(chunk);
|
||||
}
|
||||
|
||||
if (chunkNumber < fChunks.size())
|
||||
return fChunks.get(chunkNumber);
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
private char[] loadChunkData(Chunk chunk) {
|
||||
char[] result= new char[chunk.fDataLength];
|
||||
rereadChunkData(chunk.fFileOffset, chunk.fFileEndOffset, result);
|
||||
chunk.fData= new SoftReference<char[]>(result);
|
||||
return result;
|
||||
/**
|
||||
* Creates a new chunk.
|
||||
*/
|
||||
protected Chunk newChunk(long sourceOffset, long sourceEndOffset, int charOffset, char[] chars) {
|
||||
fCurrentChars= chars;
|
||||
return new Chunk(sourceOffset, sourceEndOffset, charOffset, chars);
|
||||
}
|
||||
|
||||
/**
|
||||
* Read the chunk data at the given source offset and provide the end-offset in
|
||||
* the source.
|
||||
* Read the next chunk from the input.
|
||||
*/
|
||||
protected abstract char[] readChunkData(long sourceOffset, long[] sourceEndOffsetHolder) throws Exception;
|
||||
protected abstract Chunk nextChunk();
|
||||
|
||||
private char[] getChunkData(Chunk chunk) {
|
||||
char[] data= chunk.fCharsReference.get();
|
||||
if (data == null) {
|
||||
data= new char[chunk.fCharEndOffset - chunk.fCharOffset];
|
||||
rereadChunkData(chunk, data);
|
||||
chunk.fCharsReference= new SoftReference<char[]>(data);
|
||||
}
|
||||
return fCurrentChars= data;
|
||||
}
|
||||
|
||||
/**
|
||||
* Read the chunk data at the given source range. In case the source range no longer (fully) exists,
|
||||
* Reread the data for the chunk. In case the source range no longer (fully) exists,
|
||||
* read as much as possible.
|
||||
*/
|
||||
protected abstract void rereadChunkData(long fileOffset, long fileEndOffset, char[] dest);
|
||||
protected abstract void rereadChunkData(Chunk chunk, char[] data);
|
||||
|
||||
/**
|
||||
* For testing purposes: Simulates that all the data gets collected.
|
||||
*/
|
||||
public void testClearData() {
|
||||
for (Chunk chunk : fChunks) {
|
||||
chunk.fCharsReference= new SoftReference<char[]>(null);
|
||||
}
|
||||
if (fCurrentChars != null)
|
||||
fCurrentChars= null;
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Add table
Reference in a new issue