mirror of
https://github.com/eclipse-cdt/cdt
synced 2025-04-29 19:45:01 +02:00
Optimizations for the lexer
This commit is contained in:
parent
c281420174
commit
22033c0504
3 changed files with 105 additions and 91 deletions
|
@ -142,7 +142,7 @@ public class LexerTests extends BaseTestCase {
|
||||||
token(IToken.tBITOR);
|
token(IToken.tBITOR);
|
||||||
token(IToken.tLBRACE);
|
token(IToken.tLBRACE);
|
||||||
token(IToken.tRBRACE);
|
token(IToken.tRBRACE);
|
||||||
token(IToken.tCOMPL);
|
token(IToken.tBITCOMPLEMENT);
|
||||||
eof();
|
eof();
|
||||||
|
|
||||||
init("a??/\nb");
|
init("a??/\nb");
|
||||||
|
|
|
@ -43,7 +43,6 @@ final public class Lexer {
|
||||||
public static final int tSYSTEM_HEADER_NAME = IToken.FIRST_RESERVED_SCANNER + 4;
|
public static final int tSYSTEM_HEADER_NAME = IToken.FIRST_RESERVED_SCANNER + 4;
|
||||||
|
|
||||||
private static final int END_OF_INPUT = -1;
|
private static final int END_OF_INPUT = -1;
|
||||||
private static final int LINE_SPLICE_SEQUENCE = -2;
|
|
||||||
private static final int ORIGIN_LEXER = OffsetLimitReachedException.ORIGIN_LEXER;
|
private static final int ORIGIN_LEXER = OffsetLimitReachedException.ORIGIN_LEXER;
|
||||||
|
|
||||||
public final static class LexerOptions implements Cloneable {
|
public final static class LexerOptions implements Cloneable {
|
||||||
|
@ -210,6 +209,7 @@ final public class Lexer {
|
||||||
* @throws OffsetLimitReachedException when completion is requested in a literal or an header-name.
|
* @throws OffsetLimitReachedException when completion is requested in a literal or an header-name.
|
||||||
*/
|
*/
|
||||||
public Token nextDirective() throws OffsetLimitReachedException {
|
public Token nextDirective() throws OffsetLimitReachedException {
|
||||||
|
fInsideIncludeDirective= false;
|
||||||
final Token t= fToken;
|
final Token t= fToken;
|
||||||
boolean haveNL= t==null || t.getType() == tNEWLINE;
|
boolean haveNL= t==null || t.getType() == tNEWLINE;
|
||||||
while(true) {
|
while(true) {
|
||||||
|
@ -217,14 +217,38 @@ final public class Lexer {
|
||||||
haveNL= false;
|
haveNL= false;
|
||||||
final int start= fOffset;
|
final int start= fOffset;
|
||||||
final int c= fCharPhase3;
|
final int c= fCharPhase3;
|
||||||
final int d= nextCharPhase3();
|
|
||||||
|
// optimization avoids calling nextCharPhase3
|
||||||
|
int d;
|
||||||
|
final int pos= fEndOffset;
|
||||||
|
if (pos+1 >= fLimit) {
|
||||||
|
d= nextCharPhase3();
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
d= fInput[pos];
|
||||||
|
switch(d) {
|
||||||
|
case '\\':
|
||||||
|
d= nextCharPhase3();
|
||||||
|
break;
|
||||||
|
case '?':
|
||||||
|
if (fInput[pos+1] == '?') {
|
||||||
|
d= nextCharPhase3();
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
// no break;
|
||||||
|
default:
|
||||||
|
fOffset= pos;
|
||||||
|
fCharPhase3= d;
|
||||||
|
fEndOffset= pos+1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
switch(c) {
|
switch(c) {
|
||||||
case END_OF_INPUT:
|
case END_OF_INPUT:
|
||||||
fToken= newToken(Lexer.tEND_OF_INPUT, start);
|
fToken= newToken(Lexer.tEND_OF_INPUT, start);
|
||||||
return fToken;
|
return fToken;
|
||||||
case '\n':
|
case '\n':
|
||||||
haveNL= true;
|
haveNL= true;
|
||||||
fInsideIncludeDirective= false;
|
|
||||||
continue;
|
continue;
|
||||||
case ' ':
|
case ' ':
|
||||||
case '\t':
|
case '\t':
|
||||||
|
@ -261,6 +285,7 @@ final public class Lexer {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
if (hadNL) {
|
if (hadNL) {
|
||||||
|
fFirstTokenAfterNewline= true;
|
||||||
fToken= newToken(IToken.tPOUND, start);
|
fToken= newToken(IToken.tPOUND, start);
|
||||||
return fToken;
|
return fToken;
|
||||||
}
|
}
|
||||||
|
@ -637,25 +662,21 @@ final public class Lexer {
|
||||||
}
|
}
|
||||||
|
|
||||||
private void blockComment(final int start) {
|
private void blockComment(final int start) {
|
||||||
int c= nextCharPhase3();
|
// we can ignore line-splices, trigraphs and windows newlines when searching for the '*'
|
||||||
while(true) {
|
int pos= fEndOffset;
|
||||||
switch (c) {
|
while(pos < fLimit) {
|
||||||
case END_OF_INPUT:
|
if (fInput[pos++] == '*') {
|
||||||
fLog.handleComment(true, start, fOffset);
|
fEndOffset= pos;
|
||||||
return;
|
if (nextCharPhase3() == '/') {
|
||||||
case '*':
|
|
||||||
c= nextCharPhase3();
|
|
||||||
if (c == '/') {
|
|
||||||
nextCharPhase3();
|
nextCharPhase3();
|
||||||
fLog.handleComment(true, start, fOffset);
|
fLog.handleComment(true, start, fOffset);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
break;
|
|
||||||
default:
|
|
||||||
c= nextCharPhase3();
|
|
||||||
break;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
fCharPhase3= END_OF_INPUT;
|
||||||
|
fOffset= fEndOffset= pos;
|
||||||
|
fLog.handleComment(true, start, pos);
|
||||||
}
|
}
|
||||||
|
|
||||||
private void lineComment(final int start) {
|
private void lineComment(final int start) {
|
||||||
|
@ -907,75 +928,72 @@ final public class Lexer {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Perform phase 1-3: Replace \r\n with \n, handle trigraphs, detect line-splicing.
|
* Perform phase 1-3: Replace \r\n with \n, handle trigraphs, detect line-splicing.
|
||||||
* Changes fOffset, fEndOffset and fCharPhase3.
|
* Changes fOffset, fEndOffset and fCharPhase3, stateless otherwise.
|
||||||
*/
|
*/
|
||||||
private int nextCharPhase3() {
|
private int nextCharPhase3() {
|
||||||
int offset;
|
int pos= fEndOffset;
|
||||||
int c;
|
|
||||||
do {
|
do {
|
||||||
offset= fEndOffset;
|
if (pos+1 >= fLimit) {
|
||||||
c= fetchCharPhase3(offset); // changes fEndOffset
|
if (pos >= fLimit) {
|
||||||
}
|
fOffset= fLimit;
|
||||||
while(c == LINE_SPLICE_SEQUENCE);
|
fEndOffset= fLimit;
|
||||||
|
fCharPhase3= END_OF_INPUT;
|
||||||
fOffset= offset;
|
return END_OF_INPUT;
|
||||||
fCharPhase3= c;
|
}
|
||||||
return c;
|
fOffset= pos;
|
||||||
}
|
fEndOffset= pos+1;
|
||||||
|
fCharPhase3= fInput[pos];
|
||||||
/**
|
return fCharPhase3;
|
||||||
* Perform phase 1-3: Replace \r\n with \n, handle trigraphs, detect line-splicing.
|
}
|
||||||
* Changes <code>fEndOffset</code>, but is stateless otherwise.
|
|
||||||
*/
|
final char c= fInput[pos];
|
||||||
private int fetchCharPhase3(int pos) {
|
fOffset= pos;
|
||||||
if (pos >= fLimit) {
|
fEndOffset= ++pos;
|
||||||
fEndOffset= fLimit;
|
fCharPhase3= c;
|
||||||
return END_OF_INPUT;
|
switch(c) {
|
||||||
}
|
|
||||||
final char c= fInput[pos++];
|
|
||||||
switch(c) {
|
|
||||||
// windows line-ending
|
// windows line-ending
|
||||||
case '\r':
|
case '\r':
|
||||||
if (pos < fLimit && fInput[pos] == '\n') {
|
if (fInput[pos] == '\n') {
|
||||||
fEndOffset= pos+1;
|
fEndOffset= pos+1;
|
||||||
return '\n';
|
fCharPhase3= '\n';
|
||||||
}
|
return '\n';
|
||||||
fEndOffset= pos;
|
}
|
||||||
return c;
|
return c;
|
||||||
|
|
||||||
// trigraph sequences
|
// trigraph sequences
|
||||||
case '?':
|
case '?':
|
||||||
if (pos+1 >= fLimit || fInput[pos] != '?') {
|
if (fInput[pos] != '?' || pos+1 >= fLimit) {
|
||||||
|
return c;
|
||||||
|
}
|
||||||
|
final char trigraph= checkTrigraph(fInput[pos+1]);
|
||||||
|
if (trigraph == 0) {
|
||||||
|
return c;
|
||||||
|
}
|
||||||
|
if (trigraph != '\\') {
|
||||||
|
fEndOffset= pos+2;
|
||||||
|
fCharPhase3= trigraph;
|
||||||
|
return trigraph;
|
||||||
|
}
|
||||||
|
pos+= 2;
|
||||||
|
// no break, handle backslash
|
||||||
|
|
||||||
|
case '\\':
|
||||||
|
final int lsPos= findEndOfLineSpliceSequence(pos);
|
||||||
|
if (lsPos > pos) {
|
||||||
|
pos= lsPos;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
fEndOffset= pos;
|
fEndOffset= pos;
|
||||||
|
fCharPhase3= '\\';
|
||||||
|
return '\\'; // don't return c, it may be a '?'
|
||||||
|
|
||||||
|
default:
|
||||||
return c;
|
return c;
|
||||||
}
|
}
|
||||||
final char trigraph= checkTrigraph(fInput[pos+1]);
|
|
||||||
if (trigraph == 0) {
|
|
||||||
fEndOffset= pos;
|
|
||||||
return c;
|
|
||||||
}
|
|
||||||
if (trigraph != '\\') {
|
|
||||||
fEndOffset= pos+2;
|
|
||||||
return trigraph;
|
|
||||||
}
|
|
||||||
pos+= 2;
|
|
||||||
// no break, handle backslash
|
|
||||||
|
|
||||||
case '\\':
|
|
||||||
final int lsPos= findEndOfLineSpliceSequence(pos);
|
|
||||||
if (lsPos > pos) {
|
|
||||||
fEndOffset= lsPos;
|
|
||||||
return LINE_SPLICE_SEQUENCE;
|
|
||||||
}
|
|
||||||
fEndOffset= pos;
|
|
||||||
return '\\'; // don't return c, it may be a '?'
|
|
||||||
|
|
||||||
default:
|
|
||||||
fEndOffset= pos;
|
|
||||||
return c;
|
|
||||||
}
|
}
|
||||||
|
while(true);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Maps a trigraph to the character it encodes.
|
* Maps a trigraph to the character it encodes.
|
||||||
* @param c trigraph without leading question marks.
|
* @param c trigraph without leading question marks.
|
||||||
|
@ -1059,12 +1077,8 @@ final public class Lexer {
|
||||||
final char[] result= new char[imageLength];
|
final char[] result= new char[imageLength];
|
||||||
markPhase3();
|
markPhase3();
|
||||||
fEndOffset= offset;
|
fEndOffset= offset;
|
||||||
int idx= 0;
|
for (int idx=0; idx<imageLength; idx++) {
|
||||||
while (idx<imageLength) {
|
result[idx]= (char) nextCharPhase3();
|
||||||
int c= fetchCharPhase3(fEndOffset);
|
|
||||||
if (c != LINE_SPLICE_SEQUENCE) {
|
|
||||||
result[idx++]= (char) c;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
restorePhase3();
|
restorePhase3();
|
||||||
return result;
|
return result;
|
||||||
|
|
|
@ -31,32 +31,32 @@ public abstract class Token implements IToken, Cloneable {
|
||||||
fSource= source;
|
fSource= source;
|
||||||
}
|
}
|
||||||
|
|
||||||
public int getType() {
|
final public int getType() {
|
||||||
return fKind;
|
return fKind;
|
||||||
}
|
}
|
||||||
|
|
||||||
public int getOffset() {
|
final public int getOffset() {
|
||||||
return fOffset;
|
return fOffset;
|
||||||
}
|
}
|
||||||
|
|
||||||
public int getEndOffset() {
|
final public int getEndOffset() {
|
||||||
return fEndOffset;
|
return fEndOffset;
|
||||||
}
|
}
|
||||||
|
|
||||||
public int getLength() {
|
final public int getLength() {
|
||||||
return fEndOffset-fOffset;
|
return fEndOffset-fOffset;
|
||||||
}
|
}
|
||||||
|
|
||||||
public IToken getNext() {
|
final public IToken getNext() {
|
||||||
return fNextToken;
|
return fNextToken;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
public void setType(int kind) {
|
final public void setType(int kind) {
|
||||||
fKind= kind;
|
fKind= kind;
|
||||||
}
|
}
|
||||||
|
|
||||||
public void setNext(IToken t) {
|
final public void setNext(IToken t) {
|
||||||
fNextToken= t;
|
fNextToken= t;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -67,7 +67,7 @@ public abstract class Token implements IToken, Cloneable {
|
||||||
|
|
||||||
public abstract char[] getCharImage();
|
public abstract char[] getCharImage();
|
||||||
|
|
||||||
public boolean hasGap(Token t) {
|
final public boolean hasGap(Token t) {
|
||||||
return fSource == t.fSource && fEndOffset != t.getOffset();
|
return fSource == t.fSource && fEndOffset != t.getOffset();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -75,7 +75,7 @@ public abstract class Token implements IToken, Cloneable {
|
||||||
return getImage();
|
return getImage();
|
||||||
}
|
}
|
||||||
|
|
||||||
public boolean isOperator() {
|
final public boolean isOperator() {
|
||||||
return TokenUtil.isOperator(fKind);
|
return TokenUtil.isOperator(fKind);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -83,7 +83,7 @@ public abstract class Token implements IToken, Cloneable {
|
||||||
return new String(getCharImage());
|
return new String(getCharImage());
|
||||||
}
|
}
|
||||||
|
|
||||||
public Object clone() {
|
final public Object clone() {
|
||||||
try {
|
try {
|
||||||
return super.clone();
|
return super.clone();
|
||||||
} catch (CloneNotSupportedException e) {
|
} catch (CloneNotSupportedException e) {
|
||||||
|
|
Loading…
Add table
Reference in a new issue