1
0
Fork 0
mirror of https://github.com/eclipse-cdt/cdt synced 2025-07-24 01:15:29 +02:00

More efficient way of reading large files, bug 263210.

This commit is contained in:
Markus Schorn 2009-02-02 13:39:51 +00:00
parent e2908ac0e2
commit 251635be7d
2 changed files with 217 additions and 156 deletions

View file

@ -1,14 +1,15 @@
/*******************************************************************************
* Copyright (c) 2005, 2008 IBM Corporation and others.
* Copyright (c) 2005, 2009 IBM Corporation and others.
* All rights reserved. This program and the accompanying materials
* are made available under the terms of the Eclipse Public License v1.0
* which accompanies this distribution, and is available at
* http://www.eclipse.org/legal/epl-v10.html
*
* Contributors:
* IBM Rational Software - Initial API and implementation
* Cheong, Jeong-Sik - fix for 162381
* Valeri Atamaniouk - fix for 170398
* John Camelon (IBM Rational Software) - Initial API and implementation
* Cheong, Jeong-Sik - fix for 162381
* Valeri Atamaniouk - fix for 170398
* Markus Schorn (Wind River Systems)
*******************************************************************************/
package org.eclipse.cdt.core.parser;
@ -17,32 +18,33 @@ import java.io.IOException;
import java.io.InputStream;
import java.nio.ByteBuffer;
import java.nio.CharBuffer;
import java.nio.MappedByteBuffer;
import java.nio.channels.FileChannel;
import java.nio.charset.Charset;
import java.nio.charset.CharsetDecoder;
import java.nio.charset.CoderResult;
import java.nio.charset.CodingErrorAction;
import org.eclipse.cdt.core.parser.util.CharArrayUtils;
/**
* @author jcamelon
* Reads the content of a file into a char[] buffer.
*
* @noextend This class is not intended to be subclassed by clients.
*/
public class CodeReader {
public static final String SYSTEM_DEFAULT_ENCODING = System.getProperty( "file.encoding" ); //$NON-NLS-1$
public static final String SYSTEM_DEFAULT_ENCODING = System.getProperty("file.encoding"); //$NON-NLS-1$
private static final int MB = 1024*1024;
private static final String NF = "<text>"; //$NON-NLS-1$
private static final char [] NOFILE = NF.toCharArray();
/**
* Maximum number of bytes from the source file to load. If the file length
* exceeds this value, the content is truncated
*
* @see #load(String, FileInputStream)
*/
private static final int MAX_FILE_SIZE = Integer.MAX_VALUE;
private static final char[] NOFILE = NF.toCharArray();
private static final int MAX_FILE_SIZE;
static {
MAX_FILE_SIZE = (int) Math.min(Integer.MAX_VALUE, (Runtime.getRuntime().maxMemory()) / 4);
}
public final char[] buffer;
public final char[] filename;
// If you already have preloaded the buffer, e.g. working copy
// If you already have the buffer, e.g. working copy
public CodeReader(String filename, char[] buffer) {
this.filename = filename.toCharArray();
this.buffer = buffer;
@ -52,12 +54,11 @@ public class CodeReader {
public CodeReader(char[] buffer) {
this(NF, buffer);
}
// If you are loading up a file normally
public CodeReader(String filename) throws IOException
{
public CodeReader(String filename) throws IOException {
this.filename = filename.toCharArray();
FileInputStream stream = new FileInputStream(filename);
try {
buffer = load(SYSTEM_DEFAULT_ENCODING, stream);
@ -65,10 +66,10 @@ public class CodeReader {
stream.close();
}
}
public CodeReader(String filename, String charSet ) throws IOException
{
public CodeReader(String filename, String charSet) throws IOException {
this.filename = filename.toCharArray();
FileInputStream stream = new FileInputStream(filename);
try {
buffer = load(charSet, stream);
@ -76,18 +77,15 @@ public class CodeReader {
stream.close();
}
}
public CodeReader( String filename, InputStream stream ) throws IOException
{
this( filename, SYSTEM_DEFAULT_ENCODING, stream );
public CodeReader(String filename, InputStream stream) throws IOException {
this(filename, SYSTEM_DEFAULT_ENCODING, stream);
}
public CodeReader( String fileName, String charSet, InputStream stream ) throws IOException {
filename = fileName.toCharArray();
FileInputStream fstream =
(stream instanceof FileInputStream)
? (FileInputStream)stream
public CodeReader(String fileName, String charSet, InputStream stream) throws IOException {
filename = fileName.toCharArray();
FileInputStream fstream = (stream instanceof FileInputStream) ? (FileInputStream) stream
: new FileInputStream(fileName);
try {
buffer = load(charSet, fstream);
@ -99,77 +97,100 @@ public class CodeReader {
}
}
}
/**
* Load the stream content as a character array. The method loads the stream
* content using given character set name. In case if the character set is
* not supported, the default one is used.
* <p>
* If the file is really large, it is silently truncated.
* </p>
*
* @param charSet
* Character set name to use for decoding.
* @param stream
* Input stream
* @return Loaded character content
* @throws IOException
* Load the stream content as a character array. The method loads the stream content using given
* character set name. In case if the character set is not supported, the default one is used.
*/
private char[] load(String charSet, FileInputStream stream)
throws IOException {
String encoding = Charset.isSupported(charSet) ? charSet
: SYSTEM_DEFAULT_ENCODING;
private char[] load(String charSet, FileInputStream stream) throws IOException {
String encoding = Charset.isSupported(charSet) ? charSet : SYSTEM_DEFAULT_ENCODING;
FileChannel channel = stream.getChannel();
final long lsize = channel.size();
final int isize = (int) lsize;
if (lsize > MAX_FILE_SIZE) {
throw new IOException(
"File '" + getPath() + "' is larger than " + MAX_FILE_SIZE / 1024 / 1024 + "mb"); //$NON-NLS-1$//$NON-NLS-2$ //$NON-NLS-3$
}
// In most cases JDK uses Java-written character set decoders. Heap
// buffer will work way faster here
// Also if the file is larger then 2^31 we truncate it (hope there is
// enough heap space)
ByteBuffer byteBuffer = ByteBuffer.allocate((int) Math.min(channel
.size(), MAX_FILE_SIZE));
channel.read(byteBuffer);
byteBuffer.flip();
CharBuffer charBuffer = Charset.forName(encoding).decode(byteBuffer);
CharBuffer charBuffer;
if (isize < MB) {
charBuffer= decodeSmallFile(channel, isize, encoding);
} else {
charBuffer= decodeLargeFile(channel, isize, encoding);
}
if (charBuffer.hasArray() && charBuffer.arrayOffset() == 0) {
char[] buff= charBuffer.array();
if (buff.length == charBuffer.remaining())
char[] buff = charBuffer.array();
if (buff.length == charBuffer.remaining())
return buff;
}
char[] buff = new char[charBuffer.remaining()];
charBuffer.get(buff);
return buff;
}
protected char[] xload(FileInputStream stream) throws IOException {
FileChannel channel = stream.getChannel();
MappedByteBuffer map = channel.map(FileChannel.MapMode.READ_ONLY, 0, channel.size());
// TODO use the real encoding
CharBuffer charBuffer = Charset.forName(SYSTEM_DEFAULT_ENCODING).decode(map);
if (charBuffer.hasArray())
return charBuffer.array();
// Got to copy it out
char[] buff = new char[charBuffer.length()];
charBuffer.get(buff);
return buff;
private CharBuffer decodeSmallFile(FileChannel channel, final int isize, String encoding) throws IOException {
ByteBuffer byteBuffer = ByteBuffer.allocate(isize);
channel.read(byteBuffer);
byteBuffer.flip();
return Charset.forName(encoding).decode(byteBuffer);
}
private CharBuffer decodeLargeFile(FileChannel channel, final int isize, String encoding) throws IOException {
int chunk = Math.min(isize, MB);
final ByteBuffer in = ByteBuffer.allocate(chunk);
final Charset charset = Charset.forName(encoding);
final CharsetDecoder decoder = charset.newDecoder().onMalformedInput(CodingErrorAction.REPLACE)
.onUnmappableCharacter(CodingErrorAction.REPLACE);
int n = (int) (isize * (double) decoder.averageCharsPerByte()); // avoid rounding errors.
CharBuffer out = CharBuffer.allocate(n);
int offset = 0;
while (offset < isize) {
channel.read(in);
in.flip();
offset += in.limit();
CoderResult cr = decoder.decode(in, out, offset >= isize);
final int remainingBytes = in.remaining();
if (cr.isOverflow()) {
int totalRemainingBytes= isize-offset + remainingBytes;
if (totalRemainingBytes > 0) {
n+= (int) (totalRemainingBytes * (double) decoder.maxCharsPerByte()); // avoid rounding errors.
CharBuffer o = CharBuffer.allocate(n);
out.flip();
o.put(out);
out = o;
}
} else if (!cr.isUnderflow()) {
cr.throwException();
}
if (remainingBytes == 0) {
in.clear();
} else {
byte[] rest = new byte[remainingBytes];
in.get(rest);
in.clear();
in.put(rest);
offset -= remainingBytes;
}
}
out.flip();
return out;
}
public boolean isFile() {
return !CharArrayUtils.equals( filename, NOFILE );
return !CharArrayUtils.equals(filename, NOFILE);
}
@Override
@Override
public String toString() {
return getPath();
}
public String getPath()
{
return new String( filename );
}
public String getPath() {
return new String(filename);
}
}

View file

@ -1,5 +1,5 @@
/*******************************************************************************
* Copyright (c) 2000, 2008 IBM Corporation and others.
* Copyright (c) 2000, 2009 IBM Corporation and others.
* All rights reserved. This program and the accompanying materials
* are made available under the terms of the Eclipse Public License v1.0
* which accompanies this distribution, and is available at
@ -84,6 +84,9 @@ import org.osgi.framework.BundleContext;
/**
* CCorePlugin is the life-cycle owner of the core plug-in, and starting point for access to many core APIs.
*
* @noextend This class is not intended to be subclassed by clients.
* @noinstantiate This class is not intended to be instantiated by clients.
*/
public class CCorePlugin extends Plugin {
@ -206,28 +209,13 @@ public class CCorePlugin extends Plugin {
}
/**
* Answers the shared working copies currently registered for this buffer factory.
* Working copies can be shared by several clients using the same buffer factory,see
* <code>IWorkingCopy.getSharedWorkingCopy</code>.
*
* @param factory the given buffer factory
* @return the list of shared working copies for a given buffer factory
* @see IWorkingCopy
* Returns the shared working copies currently registered for the default buffer factory.
* @since 5.1
*/
public static IWorkingCopy[] getSharedWorkingCopies(IBufferFactory factory){
// if factory is null, default factory must be used
if (factory == null) factory = BufferManager.getDefaultBufferManager().getDefaultBufferFactory();
Map<IBufferFactory, Map<ITranslationUnit, WorkingCopy>> sharedWorkingCopies = CModelManager.getDefault().sharedWorkingCopies;
Map<ITranslationUnit, WorkingCopy> perFactoryWorkingCopies = sharedWorkingCopies.get(factory);
if (perFactoryWorkingCopies == null) return CModelManager.NoWorkingCopy;
Collection<WorkingCopy> copies = perFactoryWorkingCopies.values();
IWorkingCopy[] result = new IWorkingCopy[copies.size()];
copies.toArray(result);
return result;
public static IWorkingCopy[] getSharedWorkingCopies() {
return getSharedWorkingCopies(null);
}
public static String getResourceString(String key) {
try {
return fgResourceBundle.getString(key);
@ -262,41 +250,6 @@ public class CCorePlugin extends Plugin {
return fgCPlugin;
}
public static void log(String e) {
log(createStatus(e));
}
public static void log(Throwable e) {
log("Error", e); //$NON-NLS-1$
}
public static void log(String message, Throwable e) {
Throwable nestedException;
if (e instanceof CModelException
&& (nestedException = ((CModelException)e).getException()) != null) {
e = nestedException;
}
log(createStatus(message, e));
}
public static IStatus createStatus(String msg) {
return createStatus(msg, null);
}
public static IStatus createStatus(String msg, Throwable e) {
return new Status(IStatus.ERROR, PLUGIN_ID, IStatus.ERROR, msg, e);
}
public static void log(IStatus status) {
getDefault().getLog().log(status);
}
// ------ CPlugin
public CCorePlugin() {
super();
fgCPlugin = this;
}
/**
* @see Plugin#shutdown
@ -646,14 +599,6 @@ public class CCorePlugin extends Plugin {
return fCoreModel;
}
/**
* @deprecated use getIndexManager().
*/
@Deprecated
public static IPDOMManager getPDOMManager() {
return getDefault().pdomManager;
}
public static IIndexManager getIndexManager() {
return getDefault().pdomManager;
}
@ -1200,5 +1145,100 @@ public class CCorePlugin extends Plugin {
return UserVarSupplier.getInstance();
}
// NON-API
/**
* @noreference This constructor is not intended to be referenced by clients.
*/
public CCorePlugin() {
super();
fgCPlugin = this;
}
/**
* Answers the shared working copies currently registered for this buffer factory.
* Working copies can be shared by several clients using the same buffer factory,see
* <code>IWorkingCopy.getSharedWorkingCopy</code>.
*
* @param factory the given buffer factory
* @return the list of shared working copies for a given buffer factory
* @see IWorkingCopy
* @noreference This method is not intended to be referenced by clients.
*/
public static IWorkingCopy[] getSharedWorkingCopies(IBufferFactory factory) {
// if factory is null, default factory must be used
if (factory == null)
factory = BufferManager.getDefaultBufferManager().getDefaultBufferFactory();
Map<IBufferFactory, Map<ITranslationUnit, WorkingCopy>> sharedWorkingCopies = CModelManager
.getDefault().sharedWorkingCopies;
Map<ITranslationUnit, WorkingCopy> perFactoryWorkingCopies = sharedWorkingCopies.get(factory);
if (perFactoryWorkingCopies == null)
return CModelManager.NoWorkingCopy;
Collection<WorkingCopy> copies = perFactoryWorkingCopies.values();
IWorkingCopy[] result = new IWorkingCopy[copies.size()];
copies.toArray(result);
return result;
}
/**
* @noreference This method is not intended to be referenced by clients.
*/
public static void log(String e) {
log(createStatus(e));
}
/**
* @noreference This method is not intended to be referenced by clients.
*/
public static void log(Throwable e) {
String msg= e.getMessage();
if (msg == null) {
log("Error", e); //$NON-NLS-1$
} else {
log("Error: " + msg, e); //$NON-NLS-1$
}
}
/**
* @noreference This method is not intended to be referenced by clients.
*/
public static void log(String message, Throwable e) {
Throwable nestedException;
if (e instanceof CModelException
&& (nestedException = ((CModelException)e).getException()) != null) {
e = nestedException;
}
log(createStatus(message, e));
}
/**
* @noreference This method is not intended to be referenced by clients.
*/
public static IStatus createStatus(String msg) {
return createStatus(msg, null);
}
/**
* @noreference This method is not intended to be referenced by clients.
*/
public static IStatus createStatus(String msg, Throwable e) {
return new Status(IStatus.ERROR, PLUGIN_ID, IStatus.ERROR, msg, e);
}
/**
* @noreference This method is not intended to be referenced by clients.
*/
public static void log(IStatus status) {
getDefault().getLog().log(status);
}
/**
* @deprecated use getIndexManager().
* @noreference This method is not intended to be referenced by clients.
*/
@Deprecated
public static IPDOMManager getPDOMManager() {
return getDefault().pdomManager;
}
}