1
0
Fork 0
mirror of https://github.com/eclipse-cdt/cdt synced 2025-04-29 19:45:01 +02:00

Optimizations for the lexer

This commit is contained in:
Markus Schorn 2007-10-31 11:07:51 +00:00
parent c281420174
commit 22033c0504
3 changed files with 105 additions and 91 deletions

View file

@ -142,7 +142,7 @@ public class LexerTests extends BaseTestCase {
token(IToken.tBITOR); token(IToken.tBITOR);
token(IToken.tLBRACE); token(IToken.tLBRACE);
token(IToken.tRBRACE); token(IToken.tRBRACE);
token(IToken.tCOMPL); token(IToken.tBITCOMPLEMENT);
eof(); eof();
init("a??/\nb"); init("a??/\nb");

View file

@ -43,7 +43,6 @@ final public class Lexer {
public static final int tSYSTEM_HEADER_NAME = IToken.FIRST_RESERVED_SCANNER + 4; public static final int tSYSTEM_HEADER_NAME = IToken.FIRST_RESERVED_SCANNER + 4;
private static final int END_OF_INPUT = -1; private static final int END_OF_INPUT = -1;
private static final int LINE_SPLICE_SEQUENCE = -2;
private static final int ORIGIN_LEXER = OffsetLimitReachedException.ORIGIN_LEXER; private static final int ORIGIN_LEXER = OffsetLimitReachedException.ORIGIN_LEXER;
public final static class LexerOptions implements Cloneable { public final static class LexerOptions implements Cloneable {
@ -210,6 +209,7 @@ final public class Lexer {
* @throws OffsetLimitReachedException when completion is requested in a literal or an header-name. * @throws OffsetLimitReachedException when completion is requested in a literal or an header-name.
*/ */
public Token nextDirective() throws OffsetLimitReachedException { public Token nextDirective() throws OffsetLimitReachedException {
fInsideIncludeDirective= false;
final Token t= fToken; final Token t= fToken;
boolean haveNL= t==null || t.getType() == tNEWLINE; boolean haveNL= t==null || t.getType() == tNEWLINE;
while(true) { while(true) {
@ -217,14 +217,38 @@ final public class Lexer {
haveNL= false; haveNL= false;
final int start= fOffset; final int start= fOffset;
final int c= fCharPhase3; final int c= fCharPhase3;
final int d= nextCharPhase3();
// optimization avoids calling nextCharPhase3
int d;
final int pos= fEndOffset;
if (pos+1 >= fLimit) {
d= nextCharPhase3();
}
else {
d= fInput[pos];
switch(d) {
case '\\':
d= nextCharPhase3();
break;
case '?':
if (fInput[pos+1] == '?') {
d= nextCharPhase3();
break;
}
// no break;
default:
fOffset= pos;
fCharPhase3= d;
fEndOffset= pos+1;
}
}
switch(c) { switch(c) {
case END_OF_INPUT: case END_OF_INPUT:
fToken= newToken(Lexer.tEND_OF_INPUT, start); fToken= newToken(Lexer.tEND_OF_INPUT, start);
return fToken; return fToken;
case '\n': case '\n':
haveNL= true; haveNL= true;
fInsideIncludeDirective= false;
continue; continue;
case ' ': case ' ':
case '\t': case '\t':
@ -261,6 +285,7 @@ final public class Lexer {
continue; continue;
} }
if (hadNL) { if (hadNL) {
fFirstTokenAfterNewline= true;
fToken= newToken(IToken.tPOUND, start); fToken= newToken(IToken.tPOUND, start);
return fToken; return fToken;
} }
@ -637,25 +662,21 @@ final public class Lexer {
} }
private void blockComment(final int start) { private void blockComment(final int start) {
int c= nextCharPhase3(); // we can ignore line-splices, trigraphs and windows newlines when searching for the '*'
while(true) { int pos= fEndOffset;
switch (c) { while(pos < fLimit) {
case END_OF_INPUT: if (fInput[pos++] == '*') {
fLog.handleComment(true, start, fOffset); fEndOffset= pos;
return; if (nextCharPhase3() == '/') {
case '*':
c= nextCharPhase3();
if (c == '/') {
nextCharPhase3(); nextCharPhase3();
fLog.handleComment(true, start, fOffset); fLog.handleComment(true, start, fOffset);
return; return;
} }
break;
default:
c= nextCharPhase3();
break;
} }
} }
fCharPhase3= END_OF_INPUT;
fOffset= fEndOffset= pos;
fLog.handleComment(true, start, pos);
} }
private void lineComment(final int start) { private void lineComment(final int start) {
@ -907,75 +928,72 @@ final public class Lexer {
/** /**
* Perform phase 1-3: Replace \r\n with \n, handle trigraphs, detect line-splicing. * Perform phase 1-3: Replace \r\n with \n, handle trigraphs, detect line-splicing.
* Changes fOffset, fEndOffset and fCharPhase3. * Changes fOffset, fEndOffset and fCharPhase3, stateless otherwise.
*/ */
private int nextCharPhase3() { private int nextCharPhase3() {
int offset; int pos= fEndOffset;
int c;
do { do {
offset= fEndOffset; if (pos+1 >= fLimit) {
c= fetchCharPhase3(offset); // changes fEndOffset if (pos >= fLimit) {
} fOffset= fLimit;
while(c == LINE_SPLICE_SEQUENCE); fEndOffset= fLimit;
fCharPhase3= END_OF_INPUT;
fOffset= offset; return END_OF_INPUT;
fCharPhase3= c; }
return c; fOffset= pos;
} fEndOffset= pos+1;
fCharPhase3= fInput[pos];
/** return fCharPhase3;
* Perform phase 1-3: Replace \r\n with \n, handle trigraphs, detect line-splicing. }
* Changes <code>fEndOffset</code>, but is stateless otherwise.
*/ final char c= fInput[pos];
private int fetchCharPhase3(int pos) { fOffset= pos;
if (pos >= fLimit) { fEndOffset= ++pos;
fEndOffset= fLimit; fCharPhase3= c;
return END_OF_INPUT; switch(c) {
}
final char c= fInput[pos++];
switch(c) {
// windows line-ending // windows line-ending
case '\r': case '\r':
if (pos < fLimit && fInput[pos] == '\n') { if (fInput[pos] == '\n') {
fEndOffset= pos+1; fEndOffset= pos+1;
return '\n'; fCharPhase3= '\n';
} return '\n';
fEndOffset= pos; }
return c; return c;
// trigraph sequences // trigraph sequences
case '?': case '?':
if (pos+1 >= fLimit || fInput[pos] != '?') { if (fInput[pos] != '?' || pos+1 >= fLimit) {
return c;
}
final char trigraph= checkTrigraph(fInput[pos+1]);
if (trigraph == 0) {
return c;
}
if (trigraph != '\\') {
fEndOffset= pos+2;
fCharPhase3= trigraph;
return trigraph;
}
pos+= 2;
// no break, handle backslash
case '\\':
final int lsPos= findEndOfLineSpliceSequence(pos);
if (lsPos > pos) {
pos= lsPos;
continue;
}
fEndOffset= pos; fEndOffset= pos;
fCharPhase3= '\\';
return '\\'; // don't return c, it may be a '?'
default:
return c; return c;
} }
final char trigraph= checkTrigraph(fInput[pos+1]);
if (trigraph == 0) {
fEndOffset= pos;
return c;
}
if (trigraph != '\\') {
fEndOffset= pos+2;
return trigraph;
}
pos+= 2;
// no break, handle backslash
case '\\':
final int lsPos= findEndOfLineSpliceSequence(pos);
if (lsPos > pos) {
fEndOffset= lsPos;
return LINE_SPLICE_SEQUENCE;
}
fEndOffset= pos;
return '\\'; // don't return c, it may be a '?'
default:
fEndOffset= pos;
return c;
} }
while(true);
} }
/** /**
* Maps a trigraph to the character it encodes. * Maps a trigraph to the character it encodes.
* @param c trigraph without leading question marks. * @param c trigraph without leading question marks.
@ -1059,12 +1077,8 @@ final public class Lexer {
final char[] result= new char[imageLength]; final char[] result= new char[imageLength];
markPhase3(); markPhase3();
fEndOffset= offset; fEndOffset= offset;
int idx= 0; for (int idx=0; idx<imageLength; idx++) {
while (idx<imageLength) { result[idx]= (char) nextCharPhase3();
int c= fetchCharPhase3(fEndOffset);
if (c != LINE_SPLICE_SEQUENCE) {
result[idx++]= (char) c;
}
} }
restorePhase3(); restorePhase3();
return result; return result;

View file

@ -31,32 +31,32 @@ public abstract class Token implements IToken, Cloneable {
fSource= source; fSource= source;
} }
public int getType() { final public int getType() {
return fKind; return fKind;
} }
public int getOffset() { final public int getOffset() {
return fOffset; return fOffset;
} }
public int getEndOffset() { final public int getEndOffset() {
return fEndOffset; return fEndOffset;
} }
public int getLength() { final public int getLength() {
return fEndOffset-fOffset; return fEndOffset-fOffset;
} }
public IToken getNext() { final public IToken getNext() {
return fNextToken; return fNextToken;
} }
public void setType(int kind) { final public void setType(int kind) {
fKind= kind; fKind= kind;
} }
public void setNext(IToken t) { final public void setNext(IToken t) {
fNextToken= t; fNextToken= t;
} }
@ -67,7 +67,7 @@ public abstract class Token implements IToken, Cloneable {
public abstract char[] getCharImage(); public abstract char[] getCharImage();
public boolean hasGap(Token t) { final public boolean hasGap(Token t) {
return fSource == t.fSource && fEndOffset != t.getOffset(); return fSource == t.fSource && fEndOffset != t.getOffset();
} }
@ -75,7 +75,7 @@ public abstract class Token implements IToken, Cloneable {
return getImage(); return getImage();
} }
public boolean isOperator() { final public boolean isOperator() {
return TokenUtil.isOperator(fKind); return TokenUtil.isOperator(fKind);
} }
@ -83,7 +83,7 @@ public abstract class Token implements IToken, Cloneable {
return new String(getCharImage()); return new String(getCharImage());
} }
public Object clone() { final public Object clone() {
try { try {
return super.clone(); return super.clone();
} catch (CloneNotSupportedException e) { } catch (CloneNotSupportedException e) {