1
0
Fork 0
mirror of https://github.com/eclipse-cdt/cdt synced 2025-08-12 10:45:37 +02:00

Use prime numbers for hash table sizes to reduce collisions.

Change-Id: I4233e4a4ca729dd742825ee23b9c254fa836bc41
This commit is contained in:
Sergey Prigogin 2016-03-28 19:05:04 -07:00
parent eeaed8ee74
commit 9622166291
5 changed files with 269 additions and 188 deletions

View file

@ -14,6 +14,8 @@
*/
package org.eclipse.cdt.core.parser.tests;
import java.util.Random;
import org.eclipse.cdt.core.parser.util.CharArrayObjectMap;
import org.eclipse.cdt.core.parser.util.ObjectMap;
@ -24,7 +26,9 @@ import junit.framework.TestCase;
*/
public class ObjectMapTest extends TestCase {
static public class HashObject {
private static class HashObject {
final public int hash;
HashObject(int h) {
hash = h;
}
@ -33,7 +37,6 @@ public class ObjectMapTest extends TestCase {
public int hashCode() {
return hash;
}
final public int hash;
}
public void insertContents(ObjectMap map, Object[][] contents) throws Exception {
@ -60,7 +63,7 @@ public class ObjectMapTest extends TestCase {
assertContents(map, contents);
assertEquals(map.size(), 1);
assertEquals(map.capacity(), 2);
assertEquals(map.capacity(), 8);
}
public void testSimpleCollision() throws Exception{
@ -75,7 +78,7 @@ public class ObjectMapTest extends TestCase {
insertContents(map, contents);
assertEquals(map.size(), 2);
assertEquals(map.capacity(), 2);
assertEquals(map.capacity(), 8);
assertContents(map, contents);
}
@ -84,7 +87,7 @@ public class ObjectMapTest extends TestCase {
ObjectMap map = new ObjectMap(1);
assertEquals(map.size(), 0);
assertEquals(map.capacity(), 2);
assertEquals(map.capacity(), 8);
Object[][] res = new Object[][] { { "0", "o0" },
{ "1", "o1" },
@ -101,7 +104,7 @@ public class ObjectMapTest extends TestCase {
ObjectMap map = new ObjectMap(1);
assertEquals(map.size(), 0);
assertEquals(map.capacity(), 2);
assertEquals(map.capacity(), 8);
Object[][] res = new Object[][] { { new HashObject(0), "o0" },
{ new HashObject(1), "o1" },
@ -118,13 +121,13 @@ public class ObjectMapTest extends TestCase {
ObjectMap map = new ObjectMap(1);
assertEquals(map.size(), 0);
assertEquals(map.capacity(), 2);
assertEquals(map.capacity(), 8);
Object[][] res = new Object[][] { { "0", "o0" },
{ "1", "o1" } };
insertContents(map, res);
assertEquals(map.capacity(), 2);
assertEquals(map.capacity(), 8);
assertContents(map, res);
res = new Object[][]{ { "0", "o00" },
@ -136,21 +139,6 @@ public class ObjectMapTest extends TestCase {
assertContents(map, res);
}
public void testResizeResolvesCollision() throws Exception{
ObjectMap map = new ObjectMap(2);
Object k1 = new HashObject(0);
Object k2 = new HashObject(1);
Object k3 = new HashObject(4); // Collision with 0 in a table capacity 2, but ok in table capacity 4
Object[][] con = new Object[][] { { k1, "1" },
{ k2, "2" },
{ k3, "3" } };
insertContents(map, con);
assertContents(map, con);
}
public void testMapAdd() {
CharArrayObjectMap map = new CharArrayObjectMap(4);
char[] key1 = "key1".toCharArray();
@ -161,14 +149,25 @@ public class ObjectMapTest extends TestCase {
Object value2 = map.get(key2);
assertEquals(value1, value2);
for (int i = 0; i < 5; ++i) {
for (int i = 0; i < 25; ++i) {
map.put(("ikey" + i).toCharArray(), new Integer(i));
}
Object ivalue1 = map.get("ikey1".toCharArray());
assertEquals(ivalue1, new Integer(1));
for (int i = 0; i < 25; ++i) {
Object ivalue1 = map.get(("ikey" + i).toCharArray());
assertEquals(i, ivalue1);
}
}
Object ivalue4 = map.get("ikey4".toCharArray());
assertEquals(ivalue4, new Integer(4));
public void testCollisionRatio() {
Random random = new Random(239);
CharArrayObjectMap map = new CharArrayObjectMap(1);
for (int i = 0; i < 20000; i++) {
int r = random.nextInt();
map.put(("key" + Integer.toUnsignedString(i)).toCharArray(), i);
double collisionRatio = (double) map.countCollisions() / map.size();
assertTrue(String.format("Collision ratio %.3f is unexpectedly high for map size of %d.", collisionRatio, map.size()),
collisionRatio <= 0.4);
}
}
}

View file

@ -53,7 +53,7 @@ public class CharTable extends HashTable {
}
protected final int hash(char[] source, int start, int length) {
return CharArrayUtils.hash(source, start, length) & ((keyTable.length * 2) - 1);
return hashTable == null ? 0 : hashToOffset(CharArrayUtils.hash(source, start, length));
}
@Override
@ -70,40 +70,40 @@ public class CharTable extends HashTable {
}
public final int addIndex(char[] buffer, int start, int len) {
if (hashTable == null) {
int pos = lookup(buffer, start, len);
if (pos != -1)
return pos;
// Key is not here, add it.
if (currEntry + 1 >= capacity()) {
resize();
if (hashTable != null) {
// If we grew from list to hash, then recurse and add as a hashtable.
return addIndex(buffer, start, len);
}
}
currEntry++;
keyTable[currEntry] = CharArrayUtils.extract(buffer, start, len);
} else {
int hash = hash(buffer, start, len);
int pos = lookup(buffer, start, len, hash);
if (pos != -1)
return pos;
// key is not here, add it.
if ((currEntry + 1) >= capacity()) {
// Key is not here, add it.
if (currEntry + 1 >= capacity()) {
resize();
hash = hash(buffer, start, len);
}
currEntry++;
keyTable[currEntry] = CharArrayUtils.extract(buffer, start, len);
linkIntoHashTable(currEntry, hash);
} else {
int pos = lookup(buffer, start, len);
if (pos != -1)
return pos;
// key is not here, add it.
if ((currEntry + 1) >= capacity()) {
resize();
if (capacity() > minHashSize) {
//if we grew from list to hash, then recurse and add as a hashtable
return addIndex(buffer, start, len);
}
}
currEntry++;
keyTable[currEntry] = CharArrayUtils.extract(buffer, start, len);
}
return currEntry;
}
protected void removeEntry(int i) {
// Remove the entry from the keyTable, shifting everything over if necessary
// Remove the entry from the keyTable, shifting everything over if necessary.
int hash = hash(keyTable[i]);
if (i < currEntry)
System.arraycopy(keyTable, i + 1, keyTable, i, currEntry - i);
@ -161,15 +161,16 @@ public class CharTable extends HashTable {
}
protected final int lookup(char[] buffer, int start, int len, int hash) {
if (hashTable[hash] == 0)
int i = hashTable[hash];
if (i == 0)
return -1;
int i = hashTable[hash] - 1;
--i;
if (CharArrayUtils.equals(buffer, start, len, keyTable[i]))
return i;
// Follow the next chain
for (i = nextTable[i] - 1; i >= 0 && nextTable[i] != i + 1; i = nextTable[i] - 1) {
for (i = nextTable[i] - 1; i >= 0 && i != nextTable[i] - 1; i = nextTable[i] - 1) {
if (CharArrayUtils.equals(buffer, start, len, keyTable[i]))
return i;
}

View file

@ -10,13 +10,49 @@
*******************************************************************************/
package org.eclipse.cdt.core.parser.util;
import java.util.Arrays;
import java.util.Comparator;
/**
* @author ddaoust
*/
public class HashTable implements Cloneable {
protected static final int minHashSize = 2;
// Prime numbers from http://planetmath.org/goodhashtableprimes
private static final int[] PRIMES = {
17,
29,
53,
97,
193,
389,
769,
1543,
3079,
6151,
12289,
24593,
49157,
98317,
196613,
393241,
786433,
1572869,
3145739,
6291469,
12582917,
25165843,
50331653,
100663319,
201326611,
402653189,
805306457,
1610612741
};
private static final int MIN_HASH_SIZE = 9;
/** @deprecated Don't depend on this implementation detail. @noreference This field is not intended to be referenced by clients. */
@Deprecated
protected static final int minHashSize = MIN_HASH_SIZE - 1;
protected int currEntry = -1;
protected int[] hashTable;
@ -31,13 +67,9 @@ public class HashTable implements Cloneable {
}
public HashTable(int initialSize) {
int size = 1;
while (size < initialSize)
size <<= 1;
if (size > minHashSize) {
hashTable = new int[size * 2];
nextTable = new int[size];
if (initialSize >= MIN_HASH_SIZE) {
hashTable = new int[getSuitableHashTableSize(initialSize)];
nextTable = new int[initialSize];
} else {
hashTable = null;
nextTable = null;
@ -57,7 +89,7 @@ public class HashTable implements Cloneable {
int size = capacity();
if (hashTable != null) {
newTable.hashTable = new int[size * 2];
newTable.hashTable = new int[getSuitableHashTableSize(size)];
newTable.nextTable = new int[size];
System.arraycopy(hashTable, 0, newTable.hashTable, 0, hashTable.length);
System.arraycopy(nextTable, 0, newTable.nextTable, 0, nextTable.length);
@ -77,11 +109,8 @@ public class HashTable implements Cloneable {
if (hashTable == null)
return;
for (int i = 0; i < capacity(); i++) {
hashTable[2 * i] = 0;
hashTable[2 * i + 1] = 0;
nextTable[i] = 0;
}
Arrays.fill(hashTable, 0);
Arrays.fill(nextTable, 0);
}
protected void rehash() {
@ -89,11 +118,8 @@ public class HashTable implements Cloneable {
return;
// Clear the table (don't call clear() or else the subclasses stuff will be cleared too).
for (int i = 0; i < capacity(); i++) {
hashTable[2 * i] = 0;
hashTable[2 * i + 1] = 0;
nextTable[i] = 0;
}
Arrays.fill(hashTable, 0);
Arrays.fill(nextTable, 0);
// Need to rehash everything.
for (int i = 0; i <= currEntry; ++i) {
linkIntoHashTable(i, hash(i));
@ -101,8 +127,8 @@ public class HashTable implements Cloneable {
}
protected void resize(int size) {
if (size > minHashSize) {
hashTable = new int[size * 2];
if (size >= MIN_HASH_SIZE) {
hashTable = new int[getSuitableHashTableSize(size)];
nextTable = new int[size];
// Need to rehash everything.
@ -112,11 +138,41 @@ public class HashTable implements Cloneable {
}
}
private static int getSuitableHashTableSize(int size) {
size += (size + 2) / 3;
if (size < 0)
return Integer.MAX_VALUE; // Integer overflow. Return the max possible size.
int low = 0;
int high = PRIMES.length;
while (low < high) {
int mid = (low + high) >>> 1;
int p = PRIMES[mid];
if (p < size) {
low = mid + 1;
} else if (p > size) {
high = mid;
} else {
return p;
}
}
if (low == PRIMES.length)
return Integer.MAX_VALUE; // Largest prime is not sufficient. Return the max possible size.
return PRIMES[low];
}
protected int hash(int pos) {
// Return the hash value of the element in the key table.
throw new UnsupportedOperationException();
}
final int hashToOffset(int hash) {
int offset = hash % hashTable.length;
if (offset < 0)
offset += hashTable.length - 1;
return offset;
}
protected void linkIntoHashTable(int i, int hash) {
if (nextTable == null)
return;
@ -126,11 +182,9 @@ public class HashTable implements Cloneable {
} else {
// Need to link.
int j = hashTable[hash] - 1;
while (nextTable[j] != 0) {
// if (nextTable[j] - 1 == j) {
// break;
// }
j = nextTable[j] - 1;
int k;
while ((k = nextTable[j]) != 0) {
j = k - 1;
}
nextTable[j] = i + 1;
}
@ -138,7 +192,7 @@ public class HashTable implements Cloneable {
public final int capacity() {
if (nextTable == null)
return minHashSize;
return MIN_HASH_SIZE - 1;
return nextTable.length;
}
@ -154,8 +208,10 @@ public class HashTable implements Cloneable {
} else {
// Find entry pointing to me.
int j = hashTable[hash] - 1;
while (nextTable[j] != 0 && nextTable[j] != i + 1)
j = nextTable[j] - 1;
int k;
while ((k = nextTable[j]) != 0 && k != i + 1) {
j = k - 1;
}
nextTable[j] = nextTable[i];
}
@ -165,13 +221,15 @@ public class HashTable implements Cloneable {
// Adjust hash and next entries for things that moved.
for (int j = 0; j < hashTable.length; ++j) {
if (hashTable[j] > i + 1)
--hashTable[j];
int k = hashTable[j] - 1;
if (k > i)
hashTable[j] = k;
}
for (int j = 0; j < nextTable.length; ++j) {
if (nextTable[j] > i + 1)
--nextTable[j];
int k = nextTable[j] - 1;
if (k > i)
nextTable[j] = k;
}
}
@ -190,8 +248,10 @@ public class HashTable implements Cloneable {
private void quickSort(Comparator<Object> c, int p, int r) {
if (p < r) {
int q = partition(c, p, r);
if (p < q) quickSort(c, p, q);
if (++q < r) quickSort(c, q, r);
if (p < q)
quickSort(c, p, q);
if (++q < r)
quickSort(c, q, r);
}
}
@ -199,6 +259,10 @@ public class HashTable implements Cloneable {
throw new UnsupportedOperationException();
}
/**
* For debugging only.
* @noreference This method is not intended to be referenced by clients.
*/
public void dumpNexts() {
if (nextTable == null)
return;
@ -215,4 +279,21 @@ public class HashTable implements Cloneable {
System.out.println(""); //$NON-NLS-1$
}
}
/**
* Returns the number of collisions.
* For debugging only.
* @noreference This method is not intended to be referenced by clients.
*/
public int countCollisions() {
if (nextTable == null)
return 0;
int numCollisions = 0;
for (int i = 0; i < nextTable.length; ++i) {
if (nextTable[i] != 0)
numCollisions++;
}
return numCollisions;
}
}

View file

@ -68,7 +68,7 @@ public abstract class ObjectTable<T> extends HashTable implements Iterable<T> {
}
private int hash(Object obj) {
return obj.hashCode() & ((capacity() * 2) - 1);
return hashTable == null ? 0 : hashToOffset(obj.hashCode());
}
@Override
@ -85,7 +85,7 @@ public abstract class ObjectTable<T> extends HashTable implements Iterable<T> {
if (pos != -1)
return pos;
if ((currEntry + 1) >= capacity()) {
if (currEntry + 1 >= capacity()) {
resize();
}
currEntry++;
@ -118,7 +118,7 @@ public abstract class ObjectTable<T> extends HashTable implements Iterable<T> {
return i;
// Follow the next chain
for (i = nextTable[i] - 1; i >= 0 && nextTable[i] != i + 1; i = nextTable[i] - 1) {
for (i = nextTable[i] - 1; i >= 0 && i != nextTable[i] - 1; i = nextTable[i] - 1) {
if (buffer.equals(keyTable[i]))
return i;
}