Skip to content

Commit

Permalink
MAHOUT-1096 - Implement hashCode for generated classes
Browse files Browse the repository at this point in the history
git-svn-id: https://svn.apache.org/repos/asf/mahout/trunk@1397344 13f79535-47bb-0310-9956-ffa450edef68
  • Loading branch information
tdunning committed Oct 11, 2012
1 parent c4714d8 commit ca9b6d0
Show file tree
Hide file tree
Showing 8 changed files with 209 additions and 1 deletion.
Original file line number Diff line number Diff line change
Expand Up @@ -28,9 +28,12 @@ package org.apache.mahout.math.map;

import java.util.ArrayList;
import java.util.List;
import java.nio.IntBuffer;
import java.util.Arrays;

import org.apache.mahout.math.Sorting;
import org.apache.mahout.math.Swapper;
import org.apache.mahout.math.set.HashUtils;
import org.apache.mahout.math.function.IntComparator;

import org.apache.mahout.math.function.${keyTypeCap}ObjectProcedure;
Expand Down Expand Up @@ -145,6 +148,25 @@ public abstract class Abstract${keyTypeCap}ObjectMap<T> extends AbstractSet {
);
}

public int hashCode() {
final int[] buf = new int[size()];
forEachPair(
new ${keyTypeCap}ObjectProcedure() {
int i = 0;

@Override
public boolean apply(${keyType} key, Object value) {
buf[i++] = HashUtils.hash(key) ^ value.hashCode();
return true;
}
}
);
Arrays.sort(buf);
return IntBuffer.wrap(buf).hashCode();
}



/**
* Applies a procedure to each key of the receiver, if any. Note: Iterates over the keys in no particular order.
* Subclasses can define a particular order, for example, "sorted by key". All methods which <i>can</i> be expressed
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,12 @@ It is provided "as is" without expressed or implied warranty.
*/
package org.apache.mahout.math.map;

import java.nio.IntBuffer;
import java.util.Arrays;

import org.apache.mahout.math.Sorting;
import org.apache.mahout.math.Swapper;
import org.apache.mahout.math.set.HashUtils;
import org.apache.mahout.math.function.${keyTypeCap}${valueTypeCap}Procedure;
import org.apache.mahout.math.function.${keyTypeCap}Procedure;
import org.apache.mahout.math.list.${keyTypeCap}ArrayList;
Expand Down Expand Up @@ -147,6 +151,23 @@ public abstract class Abstract${keyTypeCap}${valueTypeCap}Map extends AbstractSe
);
}

public int hashCode() {
final int[] buf = new int[size()];
forEachPair(
new ${keyTypeCap}${valueTypeCap}Procedure() {
int i = 0;

@Override
public boolean apply(${keyType} key, ${valueType} value) {
buf[i++] = HashUtils.hash(key) ^ HashUtils.hash(value);
return true;
}
}
);
Arrays.sort(buf);
return IntBuffer.wrap(buf).hashCode();
}

/**
* Applies a procedure to each key of the receiver, if any. Note: Iterates over the keys in no particular order.
* Subclasses can define a particular order, for example, "sorted by key". All methods which <i>can</i> be expressed
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,10 @@ package org.apache.mahout.math.map;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.nio.IntBuffer;
import java.util.Arrays;

import org.apache.mahout.math.set.HashUtils;
import org.apache.mahout.math.Sorting;
import org.apache.mahout.math.Swapper;
import org.apache.mahout.math.function.Object${valueTypeCap}Procedure;
Expand Down Expand Up @@ -149,6 +152,23 @@ public abstract class AbstractObject${valueTypeCap}Map<T> extends AbstractSet {
);
}

public int hashCode() {
final int[] buf = new int[size()];
forEachPair(
new Object${valueTypeCap}Procedure() {
int i = 0;

@Override
public boolean apply(Object key, ${valueType} value) {
buf[i++] = key.hashCode() ^ HashUtils.hash(value);
return true;
}
}
);
Arrays.sort(buf);
return IntBuffer.wrap(buf).hashCode();
}

/**
* Applies a procedure to each key of the receiver, if any. Note: Iterates over the keys in no particular order.
* Subclasses can define a particular order, for example, "sorted by key". All methods which <i>can</i> be expressed
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,9 @@ package org.apache.mahout.math.set;

import org.apache.mahout.math.function.${keyTypeCap}Procedure;
import org.apache.mahout.math.list.${keyTypeCap}ArrayList;
import java.util.Arrays;
import java.nio.IntBuffer;
import org.apache.mahout.math.set.HashUtils;

public abstract class Abstract${keyTypeCap}Set extends AbstractSet {

Expand Down Expand Up @@ -73,6 +76,23 @@ public abstract class Abstract${keyTypeCap}Set extends AbstractSet {
);
}

public int hashCode() {
final int[] buf = new int[size()];
forEachKey(
new ${keyTypeCap}Procedure() {
int i = 0;

@Override
public boolean apply(${keyType} iterKey) {
buf[i++] = HashUtils.hash(iterKey);
return true;
}
}
);
Arrays.sort(buf);
return IntBuffer.wrap(buf).hashCode();
}

/**
* Applies a procedure to each key of the receiver, if any. Note: Iterates over the keys in no particular order.
* Subclasses can define a particular order, for example, "sorted by key". All methods which <i>can</i> be expressed
Expand Down
35 changes: 35 additions & 0 deletions math/src/main/java/org/apache/mahout/math/set/HashUtils.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
package org.apache.mahout.math.set;

/**
* Computes hashes of primitive values. Providing these as statics allows the templated code
* to compute hashes of sets.
*/
public class HashUtils {
public static int hash(byte x) {
return x;
}

public static int hash(short x) {
return x;
}

public static int hash(char x) {
return x;
}

public static int hash(int x) {
return x;
}

public static int hash(float x) {
return Float.floatToIntBits(x) >>> 3 + Float.floatToIntBits((float) (Math.PI * x));
}

public static int hash(double x) {
return hash(17 * Double.doubleToLongBits(x));
}

public static int hash(long x) {
return (int) ((x * 11) >>> 32 ^ x);
}
}
16 changes: 15 additions & 1 deletion math/src/main/java/org/apache/mahout/math/set/OpenHashSet.java
Original file line number Diff line number Diff line change
Expand Up @@ -19,13 +19,15 @@

package org.apache.mahout.math.set;

import java.nio.ByteBuffer;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Iterator;
import java.util.List;
import java.util.Set;

import org.apache.mahout.math.MurmurHash;
import org.apache.mahout.math.function.ObjectProcedure;
import org.apache.mahout.math.map.PrimeFinder;

Expand Down Expand Up @@ -457,7 +459,19 @@ public boolean apply(T key) {
}
);
}


@Override
public int hashCode() {
ByteBuffer buf = ByteBuffer.allocate(size());
for (int i = 0; i < table.length; i++) {
Object v = table[i];
if (state[i] == FULL) {
buf.putInt(v.hashCode());
}
}
return MurmurHash.hash(buf, this.getClass().getName().hashCode());
}

/**
* Implement the standard Java Collections iterator. Note that 'remove' is silently
* ineffectual here. This method is provided for convenience, only.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -166,11 +166,14 @@ public class Open${keyTypeCap}HashSetTest extends Assert {
map.remove(($keyType) 13);
Open${keyTypeCap}HashSet map2 = (Open${keyTypeCap}HashSet) map.copy();
assertTrue(map.equals(map2));
assertTrue(map.hashCode() == map2.hashCode());
assertTrue(map2.equals(map));
assertTrue(map.hashCode() == map2.hashCode());
assertFalse("Hello Sailor".equals(map));
assertFalse(map.equals("hello sailor"));
map2.remove(($keyType) 11);
assertFalse(map.equals(map2));
assertFalse(map2.equals(map));
assertFalse(map.hashCode() == map2.hashCode());
}
}
73 changes: 73 additions & 0 deletions math/src/test/java/org/apache/mahout/math/set/HashUtilsTest.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
package org.apache.mahout.math.set;

import com.google.common.collect.HashMultiset;
import com.google.common.collect.Lists;
import com.google.common.collect.Multiset;
import junit.framework.TestCase;
import org.apache.mahout.common.RandomUtils;

import java.util.Collection;
import java.util.List;
import java.util.Random;

public class HashUtilsTest extends TestCase {
public void testHashFloat() {
Multiset<Integer> violations = HashMultiset.create();
for (int k = 0; k < 1000; k++) {
List<Float> original = Lists.newArrayList();

Random gen = RandomUtils.getRandom();
for (int i = 0; i < 10000; i++) {
float x = (float) gen.nextDouble();
original.add(x);
}

violations.add(checkCounts(original) <= 12 ? 0 : 1);
}
// the hashes for floats don't really have 32 bits of entropy so the test
// only succeeds at better than about 99% rate.
assertTrue(violations.count(0) >= 985);
}

public void testHashDouble() {
List<Double> original = Lists.newArrayList();

for (int k = 0; k < 10; k++) {
Random gen = RandomUtils.getRandom();
for (int i = 0; i < 10000; i++) {
double x = gen.nextDouble();
original.add(x);
}

checkCounts(original);
}
}

public void testHashLong() {
List<Long> original = Lists.newArrayList();

for (int k = 0; k < 10; k++) {
Random gen = RandomUtils.getRandom();
for (int i = 0; i < 10000; i++) {
long x = gen.nextLong();
original.add(x);
}

checkCounts(original);
}
}

private <T> int checkCounts(Collection<T> original) {
Multiset<T> hashCounts = HashMultiset.create();
for (T v : original) {
hashCounts.add(v);
}

Multiset<Integer> countCounts = HashMultiset.create();
for (T hash : hashCounts) {
countCounts.add(hashCounts.count(hash));
}

return original.size() - countCounts.count(1);
}
}

0 comments on commit ca9b6d0

Please sign in to comment.