forked from apache/mahout
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
MAHOUT-1096 - Implement hashCode for generated classes
git-svn-id: https://svn.apache.org/repos/asf/mahout/trunk@1397344 13f79535-47bb-0310-9956-ffa450edef68
- Loading branch information
Showing
8 changed files
with
209 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
35 changes: 35 additions & 0 deletions
35
math/src/main/java/org/apache/mahout/math/set/HashUtils.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,35 @@ | ||
package org.apache.mahout.math.set; | ||
|
||
/** | ||
* Computes hashes of primitive values. Providing these as statics allows the templated code | ||
* to compute hashes of sets. | ||
*/ | ||
public class HashUtils { | ||
public static int hash(byte x) { | ||
return x; | ||
} | ||
|
||
public static int hash(short x) { | ||
return x; | ||
} | ||
|
||
public static int hash(char x) { | ||
return x; | ||
} | ||
|
||
public static int hash(int x) { | ||
return x; | ||
} | ||
|
||
public static int hash(float x) { | ||
return Float.floatToIntBits(x) >>> 3 + Float.floatToIntBits((float) (Math.PI * x)); | ||
} | ||
|
||
public static int hash(double x) { | ||
return hash(17 * Double.doubleToLongBits(x)); | ||
} | ||
|
||
public static int hash(long x) { | ||
return (int) ((x * 11) >>> 32 ^ x); | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
73 changes: 73 additions & 0 deletions
73
math/src/test/java/org/apache/mahout/math/set/HashUtilsTest.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,73 @@ | ||
package org.apache.mahout.math.set; | ||
|
||
import com.google.common.collect.HashMultiset; | ||
import com.google.common.collect.Lists; | ||
import com.google.common.collect.Multiset; | ||
import junit.framework.TestCase; | ||
import org.apache.mahout.common.RandomUtils; | ||
|
||
import java.util.Collection; | ||
import java.util.List; | ||
import java.util.Random; | ||
|
||
public class HashUtilsTest extends TestCase { | ||
public void testHashFloat() { | ||
Multiset<Integer> violations = HashMultiset.create(); | ||
for (int k = 0; k < 1000; k++) { | ||
List<Float> original = Lists.newArrayList(); | ||
|
||
Random gen = RandomUtils.getRandom(); | ||
for (int i = 0; i < 10000; i++) { | ||
float x = (float) gen.nextDouble(); | ||
original.add(x); | ||
} | ||
|
||
violations.add(checkCounts(original) <= 12 ? 0 : 1); | ||
} | ||
// the hashes for floats don't really have 32 bits of entropy so the test | ||
// only succeeds at better than about 99% rate. | ||
assertTrue(violations.count(0) >= 985); | ||
} | ||
|
||
public void testHashDouble() { | ||
List<Double> original = Lists.newArrayList(); | ||
|
||
for (int k = 0; k < 10; k++) { | ||
Random gen = RandomUtils.getRandom(); | ||
for (int i = 0; i < 10000; i++) { | ||
double x = gen.nextDouble(); | ||
original.add(x); | ||
} | ||
|
||
checkCounts(original); | ||
} | ||
} | ||
|
||
public void testHashLong() { | ||
List<Long> original = Lists.newArrayList(); | ||
|
||
for (int k = 0; k < 10; k++) { | ||
Random gen = RandomUtils.getRandom(); | ||
for (int i = 0; i < 10000; i++) { | ||
long x = gen.nextLong(); | ||
original.add(x); | ||
} | ||
|
||
checkCounts(original); | ||
} | ||
} | ||
|
||
private <T> int checkCounts(Collection<T> original) { | ||
Multiset<T> hashCounts = HashMultiset.create(); | ||
for (T v : original) { | ||
hashCounts.add(v); | ||
} | ||
|
||
Multiset<Integer> countCounts = HashMultiset.create(); | ||
for (T hash : hashCounts) { | ||
countCounts.add(hashCounts.count(hash)); | ||
} | ||
|
||
return original.size() - countCounts.count(1); | ||
} | ||
} |