In Mapreduce framework, during sorting phase, map output keys are compared with each other.
Hadoop provides RawComparator which is an extension of java’s Comparator for comparing binary stream data directly without de-serializing it into objects, thereby avoiding overhead of creation of objects.
Java
package org.apache.hadoop.io;
import java.util.Comparator;
public interface RawComparator<T> extends Comparator<T>
{
public int compare(byte[] b1, int s1, int l1, byte[] b2, int s2, int l2);
}
import java.util.Comparator;
public interface RawComparator<T> extends Comparator<T>
{
public int compare(byte[] b1, int s1, int l1, byte[] b2, int s2, int l2);
}