import java.util.*;
index = abs(hash-code % array_size)
for ( Object o :
new Object[] { "a", "A", new Integer(10), "a0", "a1", new Float(10.0) } )
{
System.out.println(o.hashCode());
}
The output is:
97 65 10 3055 3056 1092616192For a single character, the hashCode is the ASCII value, for the Integer, the value itself, otherwise some function combines the object's data bits to create the hashCode. An important notion of hashing is that it is replicable, i.e., the same element will generate the same hash code every time the program is executed on any kind of system running Java.
index = abs(element.hashCode() % array_size)
int capacity = 11;
String[] strings = {"music", "beer", "afterlife", "wisdom",
"politics", "theater", "schools", "painting", "fear"};
for (String s : strings) {
System.out.println(
'"' + s + '"'
+ "\n\thash code: " + s.hashCode() + ", "
+ "\n\tarray index: " + Math.abs(s.hashCode() % capacity)
);
}
The output is:
array of size 11 with 9 strings
"music"
hash code: 104263205,
array index: 2
"beer"
hash code: 3019824,
array index: 5
"afterlife"
hash code: 1019963096,
array index: 9
"wisdom"
hash code: -787603007,
array index: 4
"politics"
hash code: 547400545,
array index: 10
"theater"
hash code: -1350043631,
array index: 2
"schools"
hash code: 1917457279,
array index: 1
"painting"
hash code: 925981380,
array index: 5
"fear"
hash code: 3138864,
array index: 3
In particular we see that with 9 strings out of 11 indices we
have only two collisions making the buckets at indices 2 and 5
each of size 2. This gives us
the right sense about how well hashing can perform. A crucial measure
to keep track of is the load, which is:
load = (# elements in the hash table) / capacityFor example, this hash table has load
9/11 = .82
import java.util.*;
public class Main {
public static void main(String[] args) {
mainShowSets(args);
}
public static void mainShowSets(String[] args) {
Integer[] I = new Integer[20];
Random r = new Random();
for (int n = 0; n < I.length; ++n) {
I[n] = r.nextInt(50);
}
System.out.println("I: " + Arrays.toString(I));
Set<Integer> HS = new HashSet<Integer>();
Set<Integer> LS = new LinkedHashSet<Integer>();
Set<Integer> TS = new TreeSet<Integer>();
for (Integer n : I) {
HS.add(n);
LS.add(n);
TS.add(n);
}
System.out.println("HS: " + HS);
System.out.println("LS: " + LS);
System.out.println("TS: " + TS);
}
}
public static void mainCompareTreeHashSets(String[] args) {
long start_hash = 0;
long start_sort = 0;
long end_hash = 0;
long start_tree = 0;
long end_tree = 0;
Set<Integer> HS = null;
Set<Integer> TS = null;
Integer[] I = null;
final int NUM = 40000;
for (int i = 0; i < 2; ++i) {
I = new Integer[NUM];
Random r = new Random();
for (int n = 0; n < I.length; ++n) {
I[n] = r.nextInt(NUM);
}
HS = new HashSet<Integer>();
TS = new TreeSet<Integer>();
List<Integer> HSL = new ArrayList<Integer>();
List<Integer> TSL = new ArrayList<Integer>();
start_hash = System.currentTimeMillis();
for (Integer n : I) { // add the integers to the hash set, HS
HS.add(n);
}
start_sort = System.currentTimeMillis();
for (Integer n : HS) { // transfer from the tree set to the list, HSL
HSL.add(n);
}
Collections.sort(HSL); // sort the list
end_hash = System.currentTimeMillis();
// Now, create the same effect using a TreeSet
start_tree = System.currentTimeMillis();
for (Integer n : I) { // add the integers to the tree set, TS
TS.add(n);
}
for (Integer n : TS) { // transfer from the tree set to the list, TSL
TSL.add(n);
}
end_tree = System.currentTimeMillis();
}
System.out.println("total: " + I.length);
System.out.println("total unique: " + TS.size());
System.out.println("hash: " + (end_hash - start_hash) + "ms");
System.out.println("\tenter: " + (start_sort - start_hash) + "ms");
System.out.println("\tsort: " + (end_hash - start_sort) + "ms");
System.out.println("tree: " + (end_tree - start_tree) + "ms");
}
public static void mainRegularLinkedHashSets(String[] args) {
Map<String, Integer> hmap = new HashMap<String, Integer>();
Map<String, Integer> lhmap = new LinkedHashMap<String, Integer>();
//Map<String, Integer> hmap = new OpenHMap<String, Integer>();
//Map<String, Integer> lhmap = new LinkedOpenHMap<String, Integer>();
String[] keys = {
"fashion", "radio", "technology", "television",
"movies", "business", "wine", "science"
};
int[] values = {5, 17, 7, 10, 8, 3, 14, 11};
int index = 0;
for (String key : keys) {
int value = values[index++];
System.out.println("entry: (" + key + "," + value + ")");
hmap.put(key, value);
lhmap.put(key, value);
}
System.out.println("hmap: " + hmap);
System.out.println("lhmap: " + lhmap);
//((OpenHMap) hmap).tableOutput();
}
Class Name: MapAdapter package: mapDo this by creating from the code or perhaps simply copy the entire map package from the TMapDemo project.
public void tableOutput()can be used to reveal the internal hashed table structure. Create the class:
Class Name: OpenHMap package: hashThen insert the following content ( click to show )
class LinkedHashMap extends HashMapWhat is added is a linked list of nodes whose data are (pointers to) the hash table nodes. Create the class:
Class Name: LinkedOpenHMap package: hashThen insert the following content ( click to show )
private int primeUp(int n) {
int prime = n;
if (prime % 2 == 0) {
prime += 1;
}
// guess prime to be the first odd number up from n
boolean found_prime;
do {
found_prime = true;
// check for divisibility for all odd numbers between 3 and sqrt(prime)
for (int i = 3; i <= Math.sqrt(prime); i += 2) {
if (prime % i == 0) {
found_prime = false;
prime += 2; // divisor found, try the next odd number up
break;
}
}
} while (!found_prime);
return prime;
}
This next prime computation is done quite efficiently but there is still
the "double loop" to consider.
When rehashing is required, we increase capacity by the call
capacity = primeUp(2 * capacity + 1);
and so the call to
primeUp
uses n = 2*capacity + 1.
The first question is:
how far from n to the next prime?
Using the famous
Prime Number Theorem
we can expect the prime to be found after ln(n) outer loops.
The inner loop is at worst sqrt(n). Thus if
the rehash is called when the hash size, N ~ .75 * capacity, we
get an average case time of:
O(log(N)*sqrt(N)) = o(N)Since a rehash costs O(N) just to move the nodes into the new hash table, the primeUp function adds no significant extra cost.
protected class Node {
K key;
V value;
Node next;
Node(K key, V value, Node next) {
this.key = key;
this.value = value;
this.next = next;
}
@Override
public String toString() {
return key + "=" + value;
}
}
The toString function is needed only by the derived class in
order to simplify its toString function.
The data elements are these:
protected int capacity; private float loadFactor; protected int size = 0; protected Object[] bucket; // array of NodesWe would prefer to type the bucket array as:
protected Node[] bucket;but Java refuses to make the necessary cast in the constructor:
bucket = (Node[]) new Object[capacity];and so we are forced to do:
bucket = new Object[capacity];In addition to these, the other crucial internal function is:
protected int hash(K key) {
return Math.abs(key.hashCode() % capacity);
}
private Node findNode(K key, Node start) {
Node curr;
for (curr = start; curr != null; curr = curr.next) {
if (curr.key.equals(key)) {
break;
}
}
return curr;
}
protected Node put(K key, V value, AtomicReference<V> saved) {
int num = hash(key);
Node curr = findNode(key, (Node) bucket[num]);
if (curr == null) { // create a new entry, return the new Node created
float load = (float) size / capacity;
if (load >= loadFactor) {
rehash();
num = hash(key); // rehash this new one
}
// push the new node onto the bucket
bucket[num] = new Node(key, value, (Node) bucket[num]);
saved.set(null); // no "old value", so set to null
return (Node) bucket[num]; // return the new Node
}
else { // node with key found: no new entry created
saved.set(curr.value); // capture old value
curr.value = value; // reset the value
return null; // return null to indicate no new node
}
}
The public put function uses it like this:
public V put(K key, V value) {
AtomicReference<V> saved = new AtomicReference<V>();
if (put(key, value, saved) != null) {
++size;
}
return saved.get();
}
The public get operation is even simpler since there are no side-effects:
public V get(Object obj) {
K key = (K) obj;
int num = hash(key);
Node curr = findNode(key, (Node) bucket[num]);
if (curr == null) {
return null;
} else {
return curr.value;
}
}
package hash;
import java.util.*;
import java.util.concurrent.atomic.AtomicReference;
public class LinkedOpenHMap<K, V> extends OpenHMap<K, V> {
public LinkedOpenHMap() {
super();
}
public LinkedOpenHMap(int capacity) {
super(capacity);
}
public LinkedOpenHMap(int capacity, float loadFactor) {
super(capacity, loadFactor);
}
private List<Node> list = new LinkedList<Node>();
@Override
public V put(K key, V value) {
AtomicReference<V> saved = new AtomicReference<V>();
Node insert = super.put(key, value, saved);
if (insert != null) {
++size;
list.add(insert);
}
return saved.get();
}
@Override
public String toString() {
return list.toString();
}
}
(fashion,5) (radio,17) (technology,7) (television,10) (movies,8) (business,3) (wine,15) (science,11)A depiction of the internal structure using our implementations would be this:
In particular the hash Node class should also have a pointer to the node used in the linked list. In particular, we would be obliged to implement the linked list directly so that the list node could be stored in the hash nodes. A diagram of the modified linked hash map might look like this:
import hash.*;
Map<String,Integer> hmap = new HashMap<String,Integer>(); Map<String,Integer> lhmap = new LinkedHashMap<String,Integer>();and uncomment these:
//Map<String,Integer> hmap = new OpenHMap<String,Integer>(); //Map<String,Integer> lhmap = new LinkedOpenHMap<String,Integer>(); //((OHashMap)hmap).tableOutput();
public static void mainShowRehash(String[] args) {
Map<String,Integer> map = new OpenHMap<String,Integer>(17);
//Map<String, Integer> map = new LinkedOpenHMap<String, Integer>(17);
String[] keys = {
"fashion", "radio", "technology", "television",
"movies", "business", "wine", "science"
};
int[] values = {5, 17, 7, 10, 8, 3, 14, 11};
System.out.println("key entry: " + Arrays.toString(keys));
int index = 0;
for (String key : keys) {
int value = values[index++];
map.put(key, value);
}
System.out.println("map: " + map);
System.out.println("table output");
((OpenHMap) map).tableOutput();
String[] more_keys = new String[]{
"music", "beer", "afterlife", "wisdom",
"politics", "theater", "schools", "painting"
};
System.out.println("key entry: " + Arrays.toString(more_keys));
index = 0;
for (String key : more_keys) {
int value = values[index++];
map.put(key, value);
}
System.out.println("map: " + map);
System.out.println("table output");
((OpenHMap) map).tableOutput();
}
hash(e) + 1
+ 2
+ 3
+ ... increase by adding 1, mod by capacity
hash(e) + 1
+ 4 = 22
+ 9 = 32
+ 16 = 42
+ ... increase by adding successive odd numbers, mod by capacity
One issue which must be resolved is whether we will be able to
find an open slot with this method under reasonable circumstances.
A relatively easy
number-theoretic result ensures that if the capacity is prime and
the load is under 50% that a the probe will succeed.
a = hash2(e)We want 0 < a < capacity. Using this value, search this sequence of alternative locations:
hash(e) + a
+ 2 * a
+ 3 * a
+ .... increase by adding a, mod by capacity
Because the "a" value is likely to be different for different
elements, they probe different sequences of indices.
Here is a class which implements closed hashing with the three probing versions. Choose a probing version by setting the private data member String whichProbe appropriately. Create the class:
Class Name: ClosedHSet package: hashThen insert the following content ( click to show ) Like our other implementations the ClosedHSet class extends the basic set implementation, SetAdapter.
private int linear_probe_pos(E elt) {
int pos = hash(elt);
while (true) {
if (data[pos] == null) {
return pos;
}
else if (data[pos].equals(elt))
return -1;
else
pos = (pos + 1) % capacity;
}
}
private int quadratic_probe_pos(E elt) {
int pos = hash(elt);
int i = 1;
while (true) {
if (data[pos] == null) {
return pos;
} else if (data[pos].equals(elt)) {
return -1;
} else {
pos = (pos + i) % capacity;
i += 2;
}
}
}
// we could write a "primeDown" function and make
// smaller_factor = primeDown(capacity), but we simplify it here
// since this second hash value is less crucial than the first one.
private int hash2(E elt) {
int smaller_factor = capacity - 2;
return 1 + Math.abs(elt.hashCode() % smaller_factor );
}
private int double_hash_probe_pos(E elt) {
int pos = hash(elt);
int amt = hash2(elt);
while (true) {
if (data[pos] == null) {
return pos;
} else if (data[pos].equals(elt)) {
return -1;
} else {
pos = (pos + amt) % capacity;
}
}
}
The add and contains functions
calls a single probe function, probe_pos
which is one of the three specific probe function implementations:
linear_probe_pos,
quadratic_probe_pos,
or
double_hash_probe_pos.
public boolean add(E elt) {
int pos = probe_pos(elt);
if (pos == -1) { // element found and not added
return false;
} else { // element added at position pos
++size;
data[pos] = elt;
return true;
}
}
public boolean contains(Object obj) {
E elt = (E) obj;
return probe_pos(elt) == -1;
}
import hash.*;To make the comparison, run it three times using each of the three valid values of the private data member whichProbe.
static void mainClosedHash(String[] args) {
Set<String> set = new ClosedHSet<String>(30);
String[] keys = {
"games", "blogs", "cartoons", "music", "books", "health", "dining",
"sunday", "store", "classified", "mobile", "update", "services"
};
for (String key : keys) {
set.add(key);
}
((ClosedHSet) set).tableOutput();
}
data[delete_position] = new Deleted( ... );This delete element can possibly contain useful information, like the deleted value, etc. The probe functions must then be modified so as to:
private int linear_probe_pos(E e) {
int pos = hash(e);
int delpos = -1;
while (true) {
if (data[pos] == null) {
// reached the end of the probe path
// if you found a delete position on the way, return that
// for possible insertion
return (delpos != -1) ? delpos : pos;
}
else if (data[pos].equals(e))
// the element you're looking for was found
return -1;
else {
// mark a deleted position if found
if (data[pos] instanceof Deleted) // a "deleted" marker
delpos = pos;
pos = (pos + 1) % capacity;
}
}
}