From 9f594c8aee6cb75d1599c837bc984ac58d58acaf Mon Sep 17 00:00:00 2001 From: Markus Schorn Date: Wed, 25 Aug 2010 11:36:52 +0000 Subject: [PATCH] Bug 320157: Endless loop decoding large file. --- .../tests/scanner/FileCharArrayTests.java | 117 ++++++++++ .../tests/scanner/ScannerTestSuite.java | 1 + .../core/parser/scanner/FileCharArray.java | 92 +++++--- .../core/parser/scanner/LazyCharArray.java | 202 ++++++++++-------- 4 files changed, 284 insertions(+), 128 deletions(-) create mode 100644 core/org.eclipse.cdt.core.tests/parser/org/eclipse/cdt/core/parser/tests/scanner/FileCharArrayTests.java diff --git a/core/org.eclipse.cdt.core.tests/parser/org/eclipse/cdt/core/parser/tests/scanner/FileCharArrayTests.java b/core/org.eclipse.cdt.core.tests/parser/org/eclipse/cdt/core/parser/tests/scanner/FileCharArrayTests.java new file mode 100644 index 00000000000..3ad739acd49 --- /dev/null +++ b/core/org.eclipse.cdt.core.tests/parser/org/eclipse/cdt/core/parser/tests/scanner/FileCharArrayTests.java @@ -0,0 +1,117 @@ +/******************************************************************************* + * Copyright (c) 2010 Wind River Systems, Inc. and others. + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Eclipse Public License v1.0 + * which accompanies this distribution, and is available at + * http://www.eclipse.org/legal/epl-v10.html + * + * Contributors: + * Markus Schorn - Initial API and implementation + *******************************************************************************/ +package org.eclipse.cdt.core.parser.tests.scanner; + +import java.io.BufferedOutputStream; +import java.io.File; +import java.io.FileInputStream; +import java.io.FileOutputStream; +import java.io.IOException; +import java.io.OutputStream; + +import junit.framework.TestSuite; + +import org.eclipse.cdt.core.testplugin.util.BaseTestCase; +import org.eclipse.cdt.internal.core.parser.scanner.AbstractCharArray; +import org.eclipse.cdt.internal.core.parser.scanner.FileCharArray; +import org.eclipse.cdt.internal.core.parser.scanner.LazyCharArray; + +public class FileCharArrayTests extends BaseTestCase { + + public static TestSuite suite() { + return suite(FileCharArrayTests.class); + } + + private File fFile; + + @Override + protected void tearDown() throws Exception { + if (fFile != null) { + fFile.delete(); + } + } + + public void testAlignedMinus() throws IOException { + testFile(true, LazyCharArray.CHUNK_SIZE*3-1); + } + + public void testAlignedEven() throws IOException { + testFile(true, LazyCharArray.CHUNK_SIZE*3); + } + + public void testAlignedPlus() throws IOException { + testFile(true, LazyCharArray.CHUNK_SIZE*3+1); + } + + public void testUnAlignedMinus() throws IOException { + testFile(false, LazyCharArray.CHUNK_SIZE*3-1); + } + + public void testUnAlignedEven() throws IOException { + testFile(false, LazyCharArray.CHUNK_SIZE*3); + } + + public void testUnAlignedPlus() throws IOException { + testFile(false, LazyCharArray.CHUNK_SIZE*3+1); + } + + private void testFile(boolean aligned, int charSize) throws IOException { + createFile(aligned, charSize); + + AbstractCharArray charArray; + final FileInputStream inputStream = new FileInputStream(fFile); + try { + charArray = FileCharArray.create(fFile.getPath(), "utf-8", inputStream); + } finally { + inputStream.close(); + } + + checkContent(charArray, LazyCharArray.CHUNK_SIZE, charSize); + assertEquals(charSize, charArray.getLength()); + + ((LazyCharArray) charArray).testClearData(); + + checkContent(charArray, LazyCharArray.CHUNK_SIZE, charSize); + assertEquals(charSize, charArray.getLength()); + + } + + public void checkContent(AbstractCharArray charArray, int from, int to) { + for (int i = from; i < to; i++) { + assertEquals(i % 127, charArray.get(i)); + if (i+3<=to) { + char[] dest= new char[3]; + charArray.arraycopy(i, dest, 0, 3); + for (int j = 0; j < dest.length; j++) { + assertEquals((i+j) % 127, dest[j]); + } + } + } + } + + private void createFile(boolean aligned, int charSize) throws IOException { + fFile= File.createTempFile("data", ".txt"); + OutputStream out= new BufferedOutputStream(new FileOutputStream(fFile)); + try { + if (!aligned) { + out.write(0xc2); + out.write(0xa2); + } else { + out.write(0); + } + for (int i = 1; i < charSize; i++) { + out.write(i % 127); + } + } finally { + out.close(); + } + } +} diff --git a/core/org.eclipse.cdt.core.tests/parser/org/eclipse/cdt/core/parser/tests/scanner/ScannerTestSuite.java b/core/org.eclipse.cdt.core.tests/parser/org/eclipse/cdt/core/parser/tests/scanner/ScannerTestSuite.java index 2a94f2441ca..cc5e0dcf2ef 100644 --- a/core/org.eclipse.cdt.core.tests/parser/org/eclipse/cdt/core/parser/tests/scanner/ScannerTestSuite.java +++ b/core/org.eclipse.cdt.core.tests/parser/org/eclipse/cdt/core/parser/tests/scanner/ScannerTestSuite.java @@ -26,6 +26,7 @@ public class ScannerTestSuite extends TestSuite { suite.addTest(ExpansionExplorerTests.suite()); suite.addTest(InactiveCodeTests.suite()); suite.addTest(StreamHasherTests.suite()); + suite.addTest(FileCharArrayTests.suite()); return suite; } } diff --git a/core/org.eclipse.cdt.core/parser/org/eclipse/cdt/internal/core/parser/scanner/FileCharArray.java b/core/org.eclipse.cdt.core/parser/org/eclipse/cdt/internal/core/parser/scanner/FileCharArray.java index e21bf09eae0..850c3afde20 100644 --- a/core/org.eclipse.cdt.core/parser/org/eclipse/cdt/internal/core/parser/scanner/FileCharArray.java +++ b/core/org.eclipse.cdt.core/parser/org/eclipse/cdt/internal/core/parser/scanner/FileCharArray.java @@ -19,8 +19,11 @@ import java.nio.CharBuffer; import java.nio.channels.FileChannel; import java.nio.charset.Charset; import java.nio.charset.CharsetDecoder; +import java.nio.charset.CoderResult; import java.nio.charset.CodingErrorAction; +import org.eclipse.cdt.core.CCorePlugin; + /** * Implementation of char array for a file referencing content via * soft references. @@ -81,6 +84,10 @@ public class FileCharArray extends LazyCharArray { private String fFileName; private String fCharSet; private FileChannel fChannel; + private long fNextFileOffset= 0; + private int fNextCharOffset= 0; + private boolean fReachedEOF= false; + private FileCharArray(String fileName, String charSet) { fFileName= fileName; @@ -88,7 +95,7 @@ public class FileCharArray extends LazyCharArray { } @Override - protected Chunk createChunk(int chunkOffset) { + protected Chunk createChunk(int chunkNumber) { FileInputStream fis; try { fis = new FileInputStream(fFileName); @@ -98,7 +105,7 @@ public class FileCharArray extends LazyCharArray { } fChannel= fis.getChannel(); try { - return super.createChunk(chunkOffset); + return super.createChunk(chunkNumber); } finally { fChannel= null; try { @@ -109,40 +116,60 @@ public class FileCharArray extends LazyCharArray { } @Override - protected char[] readChunkData(long fileOffset, long[] fileEndOffsetHolder) throws IOException { - assert fChannel != null; - final Charset charset = Charset.forName(fCharSet); - final CharsetDecoder decoder = charset.newDecoder().onMalformedInput(CodingErrorAction.REPLACE) - .onUnmappableCharacter(CodingErrorAction.REPLACE); + protected Chunk nextChunk() { + if (fReachedEOF) + return null; + + try { + assert fChannel != null; + final Charset charset = Charset.forName(fCharSet); + final CharsetDecoder decoder = charset.newDecoder().onMalformedInput(CodingErrorAction.REPLACE) + .onUnmappableCharacter(CodingErrorAction.REPLACE); - int needBytes = 3 + (int) (CHUNK_SIZE * (double) decoder.averageCharsPerByte()); // avoid rounding errors. - final ByteBuffer in = ByteBuffer.allocate(needBytes); - final CharBuffer dest= CharBuffer.allocate(CHUNK_SIZE); + int needBytes = 3 + (int) (CHUNK_SIZE * (double) decoder.averageCharsPerByte()); // avoid rounding errors. + final ByteBuffer in = ByteBuffer.allocate(needBytes); + final CharBuffer dest= CharBuffer.allocate(CHUNK_SIZE); - boolean endOfInput= false; - while (dest.position() < CHUNK_SIZE && !endOfInput) { - fChannel.position(fileOffset); - in.clear(); - int count= fChannel.read(in); - if (count == -1) { - break; + boolean eof; + CoderResult result; + long fileOffset= fNextFileOffset; + do { + in.clear(); + fChannel.position(fileOffset); + fChannel.read(in); + eof= in.remaining() > 0; + in.flip(); + if (fileOffset == 0) { + skipUTF8ByteOrderMark(in, fCharSet); + } + result = decoder.decode(in, dest, eof); + fileOffset+= in.position(); + } while (result == CoderResult.UNDERFLOW && !eof); + + dest.flip(); + if (dest.remaining() == 0) { + fReachedEOF= true; + return null; } - - endOfInput= count < in.capacity(); - in.flip(); - if (fileOffset == 0) { - skipUTF8ByteOrderMark(in, fCharSet); + if (eof && result == CoderResult.UNDERFLOW) { + fReachedEOF= true; } - decoder.decode(in, dest, endOfInput); - fileOffset+= in.position(); + final char[] chars = extractChars(dest); + Chunk chunk = newChunk(fNextFileOffset, fileOffset, fNextCharOffset, chars); + fNextFileOffset= fileOffset; + fNextCharOffset+= chars.length; + + return chunk; + } catch (Exception e) { + // The file cannot be read + CCorePlugin.log(e); + fReachedEOF= true; + return null; } - fileEndOffsetHolder[0]= fileOffset; - dest.flip(); - return extractChars(dest); } @Override - protected void rereadChunkData(long fileOffset, long fileEndOffset, char[] dest) { + protected void rereadChunkData(Chunk chunk, char[] dest) { FileInputStream fis; try { fis = new FileInputStream(fFileName); @@ -152,7 +179,7 @@ public class FileCharArray extends LazyCharArray { } try { FileChannel channel = fis.getChannel(); - decode(channel, fileOffset, fileEndOffset, CharBuffer.wrap(dest)); + decode(channel, chunk.fSourceOffset, chunk.fSourceEndOffset, CharBuffer.wrap(dest)); } catch (IOException e) { // File cannot be read } finally { @@ -168,11 +195,10 @@ public class FileCharArray extends LazyCharArray { final CharsetDecoder decoder = charset.newDecoder().onMalformedInput(CodingErrorAction.REPLACE) .onUnmappableCharacter(CodingErrorAction.REPLACE); - int needBytes = (int) (fileEndOffset - fileOffset); - final ByteBuffer in = ByteBuffer.allocate(needBytes); - - channel.position(fileOffset); + final ByteBuffer in = ByteBuffer.allocate((int) (fileEndOffset - fileOffset)); + in.clear(); + channel.position(fileOffset); channel.read(in); in.flip(); if (fileOffset == 0) { diff --git a/core/org.eclipse.cdt.core/parser/org/eclipse/cdt/internal/core/parser/scanner/LazyCharArray.java b/core/org.eclipse.cdt.core/parser/org/eclipse/cdt/internal/core/parser/scanner/LazyCharArray.java index d5f6e3324aa..d02a64f90e6 100644 --- a/core/org.eclipse.cdt.core/parser/org/eclipse/cdt/internal/core/parser/scanner/LazyCharArray.java +++ b/core/org.eclipse.cdt.core/parser/org/eclipse/cdt/internal/core/parser/scanner/LazyCharArray.java @@ -18,32 +18,37 @@ import java.util.List; /** * Implementation of char array for a file referencing content via * soft references. + * Because of bug 320157 we need to deal with chunks of different length. */ public abstract class LazyCharArray extends AbstractCharArray { private final static int CHUNK_BITS= 16; // 2^16 == 64K - protected final static int CHUNK_SIZE= 1 << CHUNK_BITS; + public final static int CHUNK_SIZE= 1 << CHUNK_BITS; protected static class Chunk { - final int fDataLength; - final long fFileOffset; - final long fFileEndOffset; - private SoftReference fData; + final int fCharOffset; + final int fCharEndOffset; + final long fSourceOffset; + final long fSourceEndOffset; + private SoftReference fCharsReference; - private Chunk(long fileOffset, long fileEndOffset, char[] data) { - fDataLength= data.length; - fFileOffset= fileOffset; - fFileEndOffset= fileEndOffset; - fData= new SoftReference(data); + private Chunk(long sourceOffset, long sourceEndOffset, int charOffset, char[] chars) { + fCharOffset= charOffset; + fCharEndOffset= charOffset+ chars.length; + fSourceOffset= sourceOffset; + fSourceEndOffset= sourceEndOffset; + fCharsReference= new SoftReference(chars); } } private int fLength= -1; private List fChunks= new ArrayList(); - private StreamHasher hasher; - private long hash64; + private StreamHasher fHasher; + private long fHash64; + // Make a reference to the currently used char[], such that it is not collected. + private char[] fCurrentChars; protected LazyCharArray() { - hasher = new StreamHasher(); + fHasher = new StreamHasher(); } @Override @@ -53,7 +58,7 @@ public abstract class LazyCharArray extends AbstractCharArray { @Override public final int getLength() { - readUpTo(Integer.MAX_VALUE); + readAllChunks(); return fLength; } @@ -62,131 +67,138 @@ public abstract class LazyCharArray extends AbstractCharArray { if (offset < 0) return false; - readUpTo(offset); if (fLength >= 0) return offset < fLength; - assert offset < fChunks.size() << CHUNK_BITS; - return true; + return getChunkForOffset(offset) != null; } @Override public long getContentsHash() { - if (hasher != null) { - readUpTo(Integer.MAX_VALUE); - hash64 = hasher.computeHash(); - hasher = null; + if (fHasher != null) { + readAllChunks(); + fHash64 = fHasher.computeHash(); + fHasher = null; } - return hash64; - } - - private void readUpTo(int offset) { - if (fLength >= 0) - return; - - final int chunkOffset= offset >> CHUNK_BITS; - getChunkData(chunkOffset); + return fHash64; } @Override public final char get(int offset) { - int chunkOffset= offset >> CHUNK_BITS; - char[] data= getChunkData(chunkOffset); - return data[offset & (CHUNK_SIZE - 1)]; + Chunk chunk= getChunkForOffset(offset); + if (chunk != null) { + return getChunkData(chunk)[offset - chunk.fCharOffset]; + } + return 0; } @Override public final void arraycopy(int offset, char[] destination, int destinationPos, int length) { - int chunkOffset= offset >> CHUNK_BITS; - int loffset= offset & (CHUNK_SIZE - 1); - char[] data= getChunkData(chunkOffset); - final int canCopy = data.length - loffset; - if (length <= canCopy) { - System.arraycopy(data, loffset, destination, destinationPos, length); - return; + final Chunk chunk= getChunkForOffset(offset); + final int offsetInChunk= offset-chunk.fCharOffset; + final char[] data= getChunkData(chunk); + final int maxLenInChunk = data.length - offsetInChunk; + if (length <= maxLenInChunk) { + System.arraycopy(data, offsetInChunk, destination, destinationPos, length); + } else { + System.arraycopy(data, offsetInChunk, destination, destinationPos, maxLenInChunk); + arraycopy(offset+maxLenInChunk, destination, destinationPos+maxLenInChunk, length-maxLenInChunk); } - System.arraycopy(data, loffset, destination, destinationPos, canCopy); - arraycopy(offset+canCopy, destination, destinationPos+canCopy, length-canCopy); } - private char[] getChunkData(int chunkOffset) { - Chunk chunk= getChunk(chunkOffset); - if (chunk != null) { - char[] data= chunk.fData.get(); - if (data != null) - return data; - - return loadChunkData(chunk); + private void readAllChunks() { + if (fLength < 0) { + getChunkForOffset(Integer.MAX_VALUE); } - return null; } - private Chunk getChunk(int chunkOffset) { + private Chunk getChunkForOffset(int offset) { + int minChunkNumber= offset >> CHUNK_BITS; + for(;;) { + Chunk chunk= getChunkByNumber(minChunkNumber); + if (chunk == null) + return null; + + if (offset < chunk.fCharEndOffset) { + return chunk; + } + minChunkNumber++; + } + } + + private Chunk getChunkByNumber(int chunkNumber) { final int chunkCount = fChunks.size(); - if (chunkOffset < chunkCount) - return fChunks.get(chunkOffset); + if (chunkNumber < chunkCount) + return fChunks.get(chunkNumber); if (fLength >=0) return null; - return createChunk(chunkOffset); + return createChunk(chunkNumber); } /** * Called when a chunk is requested for the first time. There is no * need to override this method. */ - protected Chunk createChunk(int chunkOffset) { - final int chunkCount = fChunks.size(); - long fileOffset= chunkCount == 0 ? 0 : fChunks.get(chunkCount - 1).fFileEndOffset; - try { - for (int i = chunkCount; i <= chunkOffset; i++) { - long[] fileEndOffset= {0}; - char[] data= readChunkData(fileOffset, fileEndOffset); - final int charCount= data.length; - if (charCount == 0) { - fLength= fChunks.size() * CHUNK_SIZE; - break; - } - if (hasher != null) { - hasher.addChunk(data); - } - // New chunk - Chunk chunk= new Chunk(fileOffset, fileEndOffset[0], data); - fChunks.add(chunk); - if (charCount < CHUNK_SIZE) { - fLength= (fChunks.size() - 1) * CHUNK_SIZE + charCount; - break; - } - fileOffset= fileEndOffset[0]; + protected Chunk createChunk(int chunkNumber) { + for (int i = fChunks.size(); i <= chunkNumber; i++) { + Chunk chunk= nextChunk(); + if (chunk == null) { + final int chunkCount= fChunks.size(); + fLength= chunkCount == 0 ? 0 : fChunks.get(chunkCount-1).fCharEndOffset; + break; } - } catch (Exception e) { - // File cannot be read - return null; - } - - if (chunkOffset < fChunks.size()) - return fChunks.get(chunkOffset); + if (fHasher != null) { + final char[] chunkData = getChunkData(chunk); + fHasher.addChunk(chunkData); + } + fChunks.add(chunk); + } + + if (chunkNumber < fChunks.size()) + return fChunks.get(chunkNumber); return null; } - private char[] loadChunkData(Chunk chunk) { - char[] result= new char[chunk.fDataLength]; - rereadChunkData(chunk.fFileOffset, chunk.fFileEndOffset, result); - chunk.fData= new SoftReference(result); - return result; + /** + * Creates a new chunk. + */ + protected Chunk newChunk(long sourceOffset, long sourceEndOffset, int charOffset, char[] chars) { + fCurrentChars= chars; + return new Chunk(sourceOffset, sourceEndOffset, charOffset, chars); } /** - * Read the chunk data at the given source offset and provide the end-offset in - * the source. + * Read the next chunk from the input. */ - protected abstract char[] readChunkData(long sourceOffset, long[] sourceEndOffsetHolder) throws Exception; + protected abstract Chunk nextChunk(); + + private char[] getChunkData(Chunk chunk) { + char[] data= chunk.fCharsReference.get(); + if (data == null) { + data= new char[chunk.fCharEndOffset - chunk.fCharOffset]; + rereadChunkData(chunk, data); + chunk.fCharsReference= new SoftReference(data); + } + return fCurrentChars= data; + } /** - * Read the chunk data at the given source range. In case the source range no longer (fully) exists, + * Reread the data for the chunk. In case the source range no longer (fully) exists, * read as much as possible. */ - protected abstract void rereadChunkData(long fileOffset, long fileEndOffset, char[] dest); + protected abstract void rereadChunkData(Chunk chunk, char[] data); + + /** + * For testing purposes: Simulates that all the data gets collected. + */ + public void testClearData() { + for (Chunk chunk : fChunks) { + chunk.fCharsReference= new SoftReference(null); + } + if (fCurrentChars != null) + fCurrentChars= null; + } }