Hadoop: How to start 2 Mapper and 2 reducer

Hadoop: How to start 2 Mapper and 2 reducer - java

i'm trying to develop and Hadoop App. i want to start 2 Mapper and 2 Reducer in my main method. But the i keep getting a cast error, which bring me to ask how can i do this?
Mapper1:
#SuppressWarnings("javadoc")
public class IntervallMapper1 extends Mapper<LongWritable, Text, Text, LongWritable> {
private static Logger logger = Logger.getLogger(IntervallMapper1.class.getName());
private static Category categoriy;
private static Value value;
private String[] values = new String[4];
private final static LongWritable one = new LongWritable(1);
#Override
public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
if(!this.categoriy.valueIsMissing(value.toString())){ // Luftdruck und Windstärke vorhanden...
this.logger.info("Key: " + values[0] + values[1]);
values = this.value.getValues(value.toString());
context.write(new Text(values[0] + values[1]), this.one); // Station-Datum als Key und Value = 1
}
}
}
Reducer1:
#SuppressWarnings("javadoc")
public class IntervallReducer1 extends Reducer<Text, LongWritable, Text, LongWritable> {
private static Logger logger = Logger.getLogger(IntervallReducer1.class.getName());
private String key = null;
private static LongWritable result = new LongWritable();
private long sum;
#Override
protected void reduce(Text key, Iterable<LongWritable> values, Context context)
throws IOException, InterruptedException {
for (LongWritable value : values) {
if(this.key == null){
logger.info("Erster Durchlauf");
System.out.println("---> " + value.get());
sum = value.get();
this.key = key.toString().substring(0, 10);
} else if (key.toString().contains(this.key)) { // TODO: key.toString().substring(0, 10)
logger.info("Key bereit vorhanden");
System.out.println("---> " + sum);
sum += value.get();
} else { // Falls Key nicht bereit vorhanden
logger.info("Key nicht vorhanden");
result.set(sum);
logger.info("Value: " + sum);
context.write(new Text(this.key), result);
this.key = key.toString().substring(0, 10);
sum = value.get();
}
}
}
}
Mapper2:
#SuppressWarnings("javadoc")
public class IntervallMapper1 extends Mapper<LongWritable, Text, Text, LongWritable> {
private static Logger logger = Logger.getLogger(IntervallMapper1.class.getName());
private static Category categoriy;
private static Value value;
private String[] values = new String[4];
private final static LongWritable one = new LongWritable(1);
#Override
public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
if(!this.categoriy.valueIsMissing(value.toString())){ // Luftdruck und Windstärke vorhanden...
this.logger.info("Key: " + values[0] + values[1]);
values = this.value.getValues(value.toString());
context.write(new Text(values[0] + values[1]), this.one); // Station-Datum als Key und Value = 1
}
}
}
Main:
#SuppressWarnings("javadoc")
public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
Job job = Job.getInstance(new Configuration());
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(LongWritable.class);
job.setMapperClass(IntervallMapper1.class);
// job.setCombinerClass(IntervallReducer1.class);
job.setReducerClass(IntervallReducer1.class);
job.setMapperClass(IntervallMapper2.class);
job.setInputFormatClass(TextInputFormat.class);
job.setOutputFormatClass(TextOutputFormat.class);
FileInputFormat.setInputPaths(job, new Path(args[0]));
FileOutputFormat.setOutputPath(job, new Path(args[1]));
job.setJarByClass(IntervallStart.class);
job.waitForCompletion(true);
}
Error:
Error: java.lang.ClassCastException: org.apache.hadoop.io.LongWritable cannot be cast to org.apache.hadoop.io.Text
at ncdcW03.IntervallMapper2.map(IntervallMapper2.java:1)
at org.apache.hadoop.mapreduce.Mapper.run(Mapper.java:146)
at org.apache.hadoop.mapred.MapTask.runNewMapper(MapTask.java:787)
at org.apache.hadoop.mapred.MapTask.run(MapTask.java:341)
at org.apache.hadoop.mapred.YarnChild$2.run(YarnChild.java:164)
at java.security.AccessController.doPrivileged(Native Method)
at javax.security.auth.Subject.doAs(Subject.java:422)
at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1657)
at org.apache.hadoop.mapred.YarnChild.main(YarnChild.java:158)

Related

Hadoop: Error: java.lang.ClassCastException: org.apache.hadoop.io.LongWritable cannot be cast to org.apache.hadoop.io.Text

My Mapper function looks like this:
public class preprocessMapper
extends Mapper<LongWritable, Text, Text, Text> {
private String Heading = "";
private String para ="";
private Integer record = 0;
private String word;
public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
String fileName = ((FileSplit) context.getInputSplit()).getPath().getName();
String date = fileName.substring(5,15);
Text t1 = new Text(date);
context.write(t1, value);
}}
My reducer function looks like this:
public class preprocessReducer
extends Reducer<Text, Text, Text, Text> {
// private IntWritable result = new IntWritable();
public void reduce(Text key, Iterable<Text> values,Context context) throws IOException, InterruptedException {
String para = "";
for (Text val : values) {
para = para + val+" ";
}
Text t2 = new Text(para);
//result.set(para);
context.write(key, t2);
}
}
Here's my configuration function:
public class preprocess {
public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();
Job job = Job.getInstance(conf, "Preprocess");
job.setJarByClass(preprocess.class);
job.setMapperClass(preprocessMapper.class);
job.setReducerClass(preprocessReducer.class);
job.setNumReduceTasks(1);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);
//job.setInputFormatClass(Text.class);
FileInputFormat.addInputPath(job, new Path(args[0]));
FileOutputFormat.setOutputPath(job, new Path(args[1]));
System.exit(job.waitForCompletion(true) ? 0 : 1);
}
}
`
I am trying to process a set of text files using the above but I get the following error related to format. Can someone advice where the format is incorrect?
18/07/18 19:38:09 INFO mapreduce.Job: Task Id :
attempt_1528077494936_5165_m_000001_2, Status : FAILED
Error: java.lang.ClassCastException: org.apache.hadoop.io.LongWritable
cannot be cast to org.apache.hadoop.io.Text
at preprocessMapper.map(preprocessMapper.java:20)
at org.apache.hadoop.mapreduce.Mapper.run(Mapper.java:145)
at org.apache.hadoop.mapred.MapTask.runNewMapper(MapTask.java:793)
at org.apache.hadoop.mapred.MapTask.run(MapTask.java:341)
at org.apache.hadoop.mapred.YarnChild$2.run(YarnChild.java:164)
at java.security.AccessController.doPrivileged(Native Method)
at javax.security.auth.Subject.doAs(Subject.java:422)
at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1920)
at org.apache.hadoop.mapred.YarnChild.main(YarnChild.java:158)

Hadoop secondary sorting

I trying to implemented secondary sort,
And see that url as eg.:https://www.safaribooksonline.com/library/view/data-algorithms/9781491906170/ch01.html
But my problem it's different, I have a list of product, the year and month and the price like that:
201505011000######PEN DRIVE00951
201505011000######PEN DRIVE00952
201505011000######PEN DRIVE00458
201505011000######PEN DRIVE00459
201505011000#######NOTEBOOK11470
201605011000#######NOTEBOOK21471
201705011000#######NOTEBOOK21472
201705011000###GAVETA DE HD01472
201703011000###GAVETA DE HD01473
201705011000###GAVETA DE HD01474
Where for eg.: 201505 represent the year and the month, after the # sign I had the product name, and in the and the price 01470 represent 14,70.
What I need to do is get the lower price for each product and show the Year and month of that Price. But I don't know to do that, what I can show are the Lower price and the product.
Here is my program:
MAPPER
public class GroupMR {
public static class GroupMapper extends Mapper<LongWritable, Text, Product, IntWritable> {
Product prdt = new Product();
Text cntText = new Text();
Text YearMonthText = new Text();
IntWritable price = new IntWritable();
public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
String line = value.toString();
String produto = line.substring(13, 27);//Nome do produto
produto = produto.substring(produto.lastIndexOf("#")+1);
String ano = line.substring(0, 6);
int valor = Integer.parseInt(line.substring(27, 32));
cntText.set(new Text(produto));
YearMonthText.set(ano);
price.set(valor);
Product prdt = new Product(cntText, YearMonthText);
context.write(prdt, price);
}
}
REDUCER
public static class GroupReducer extends Reducer<Product, IntWritable, Product, IntWritable> {
public void reduce(Product key, Iterator<IntWritable> values, Context context) throws IOException,
InterruptedException {
int minValue = Integer.MAX_VALUE;
while (values.hasNext()) {
minValue = Math.min(minValue,values.next().get());
}
context.write(key, new IntWritable(minValue));
}
}
COMPARABLE
private static class Product implements WritableComparable<Product> {
Text Product;
Text YearMonth;
public Product(Text Product, Text YearMonth) {
this.Product = Product;
this.YearMonth = YearMonth;
}
public Product() {
this.Product = new Text();
this.YearMonth = new Text();
}
public void write(DataOutput out) throws IOException {
this.Product.write(out);
this.YearMonth.write(out);
}
public void readFields(DataInput in) throws IOException {
this.Product.readFields(in);
this.YearMonth.readFields(in);
}
public int compareTo(Product pric) {
if (pric == null)
return 0;
int intcnt = Product.compareTo(pric.Product);
return intcnt;
}
#Override
public String toString() {
return Product.toString() + " DATA: " + YearMonth.toString();
}
}
DRIVER
public static void main(String[] args)
throws IOException, ClassNotFoundException, InterruptedException {
FileUtils.deleteDirectory(new File("/Local/data/output"));
Configuration conf = new Configuration();
Job job = Job.getInstance(conf, "GroupMR");
job.setJarByClass(GroupMR.class);
job.setMapperClass(GroupMapper.class);
job.setReducerClass(GroupReducer.class);
job.setOutputKeyClass(Product.class);
job.setOutputValueClass(IntWritable.class);
FileInputFormat.addInputPath(job, new Path(args[1]));
FileOutputFormat.setOutputPath(job, new Path(args[2]));
System.exit(job.waitForCompletion(true) ? 0 : 1);
}
RESULT
201605011000######PEN DRIVE00950
201505011000######PEN DRIVE00951
201505011000######PEN DRIVE00952
201505011000######PEN DRIVE00458
201505011000######PEN DRIVE00459
201505011000#######NOTEBOOK11470
201605011000#######NOTEBOOK21471
201705011000#######NOTEBOOK21472
201705011000###GAVETA DE HD01472
201703011000###GAVETA DE HD01473
201705011000###GAVETA DE HD01474
I think the problem is in the Reduce and in the CompareTo But I have no idea how to make. Someone could help me with it?

Hadoop MapReduce Output in JAVA

When I use :
context.write(key,value)
It's write one "(key, value)" per line, but I want to change it.
Curent output:
(key, value)
(key, value)
(key, value)
(key, value)
Target output:
(key, value) (key, value) (key, value) (key, value)
with key = NullWritable and value = Text like a random word
How can I fix it?

Mapper output:
(hi, 408)
(hi, 442)
(hi, 723)
(hi, 805)
Final/Reducer output:
(hi, 805) (hi, 723) (hi, 442) (hi, 408)
public class DataApp{
public static class DataMapper extends Mapper<Object, Text, NullWritable, Text> {
public void map(Object key, Text value, Context context) throws IOException, InterruptedException{
System.out.println("(hi, " + value.getLength() + ")");
context.write(NullWritable.get(), new Text("(hi, " + value.getLength() + ")"));
}
}
public static class DataReducer extends Reducer<NullWritable, Text, NullWritable, Text> {
public void reduce(NullWritable key, Iterable<Text> values, Context context)
throws IOException, InterruptedException {
String str="";
for(Text value: values){
str += value.toString() + " ";
}
context.write(NullWritable.get(), new Text(str));
}
}
public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();
Job job = Job.getInstance(conf, "stackoverflow-41476232");
job.setJarByClass(DataApp.class);
job.setMapperClass(DataMapper.class);
job.setReducerClass(DataReducer.class);
job.setMapOutputKeyClass(NullWritable.class);
job.setMapOutputValueClass(Text.class);
job.setOutputKeyClass(NullWritable.class);
job.setOutputValueClass(Text.class);
FileInputFormat.addInputPath(job, new Path(args[0]));
FileOutputFormat.setOutputPath(job, new Path(args[1]));
FileSystem fs = null;
Path dstFilePath = new Path(args[1]);
try {
fs = dstFilePath.getFileSystem(conf);
if (fs.exists(dstFilePath))
fs.delete(dstFilePath, true);
} catch (IOException e1) {
e1.printStackTrace();
}
job.waitForCompletion(true);
}
}

reducer not being called in the mapreduce program

I am writign a simple extension on Mapreduce program and found that my code is only displaying output from Map(). Mapred job runs in eclipse without any errors but does not invoke reduce().
Here is my map():
public static class KVMapper
extends Mapper<Text, Text, IntWritable, Text>{
// extends Mapper<Text, Text, Text, IntWritable>{
private final static IntWritable one = new IntWritable(1);
private String word;// = new Text();
private IntWritable iw;
private final LongWritable val = new LongWritable();
public void map(Text key, Text value , Context context
) throws IOException, InterruptedException {
iw = new IntWritable(Integer.parseInt(value.toString()));
System.out.println(value +" hello , world " +key );
context.write(iw, key);
}
}
Reduce()
public static class KVReducer
extends Reducer<IntWritable,Text,IntWritable, Text> {
KVReducer(){
System.out.println("Inside reducer");
}
public void reduce(IntWritable key, Text value,
Context context
) throws IOException, InterruptedException {
System.out.println(value +" hello2 , world " +key );
context.write(key, value);
}
}
main()
public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();
conf.set("mapreduce.input.keyvaluelinerecordreader.key.value.separator", "\t");
//conf.set("mapreduce.input.keyvaluelinerecordreader.key.value.separator",",");
String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
if (otherArgs.length < 2) {
System.err.println("Usage: wordcount <in> [<in>...] <out>");
System.exit(2);
}
Job job = new Job(conf, "word desc");
job.setInputFormatClass(KeyValueTextInputFormat.class);
job.setJarByClass(WordDesc.class);
job.setMapperClass(KVMapper.class);
job.setCombinerClass(KVReducer.class);
job.setReducerClass(KVReducer.class);
job.setMapOutputKeyClass(IntWritable.class);
job.setMapOutputValueClass(Text.class);
job.setOutputKeyClass(IntWritable.class);
job.setOutputValueClass(Text.class);
for (int i = 0; i < otherArgs.length - 1; ++i) {
FileInputFormat.addInputPath(job, new Path(otherArgs[i]));
}
FileOutputFormat.setOutputPath(job,
new Path(otherArgs[otherArgs.length - 1]));
System.exit(job.waitForCompletion(true) ? 0 : 1);
}
Sample of the input:
1500s 1
1960s 1
Aldus 1
Sample output from the program, while I was expecting mapper to reverse key and value pairs
1500s 1
1960s 1
Aldus 1
Not sure why the reduce() is not being invoked in the above code

You are not overriding reduce() method of Reducer class.
For your case its signature should be like public void reduce(IntWritable key, Iterable<Text> values,Context context)
Here is updated KVReducer
public static class KVReducer
extends Reducer<IntWritable,Text,IntWritable, Text> {
KVReducer(){
System.out.println("Inside reducer");
}
public void reduce(IntWritable key, Iterable<Text> values,Context context) throws IOException, InterruptedException {
for(Text value: values){}
System.out.println(value +" hello2 , world " +key );
context.write(key, value);
}
}
}

using string split failed in hadoop mapper

Below code is written in Mapper of Hadoop:
String[] s = value.toString().split("\\s+");
String date = s[1];
Error occurs in s[1], ArrayIndexOutofBoundsException.
Does regex not work in the hadoop?

this is due to blank or space comes as a line, you have to filter it.
if(s.length>1){
String[] s = value.toString().split("\\s+");
String date = s[1];
}
your problem solution
// Map function:
public static class Map extends MapReduceBase implements Mapper<LongWritable, Text, Text, DoubleWritable> {
// private final static IntWritable one = new IntWritable(1);
//private Text word = new Text();
double temp;
public void map(LongWritable key, Text value, OutputCollector<Text, DoubleWritable> output, Reporter reporter) throws IOException {
String line = value.toString();
line=line.replaceAll("U","");
int a=line.length();
if(a>2)
{
int spec=line.indexOf(' ');
String s=line.substring(spec,spec+9);
String b=line.substring(spec+10,a);
StringTokenizer tokenizer = new StringTokenizer(b);
while (tokenizer.hasMoreTokens()) {
{
temp=Double.valueOf(tokenizer.nextToken().toString());
}
output.collect(new Text(s), new DoubleWritable(temp));
}
}
}
}
// Reduce function:
public static class Reduce extends MapReduceBase implements Reducer<Text, DoubleWritable, Text, DoubleWritable> {
public void reduce(Text key, Iterator<DoubleWritable> values, OutputCollector<Text, DoubleWritable> output, Reporter reporter) throws IOException {
Double maxValue = Double.MIN_VALUE;
Double minvalue=Double.MAX_VALUE;
Double a;
while (values.hasNext())
{
a=values.next().get();
maxValue = Math.max(maxValue,a);
minvalue=Math.min(minvalue,a);
if(maxValue>40)
{
output.collect(key,new DoubleWritable(maxValue));
}
/* if(minvalue<10)
{
output.collect(key, new DoubleWritable(a));
} */
}
output.collect(new Text(key+"Max"), new DoubleWritable(maxValue));
output.collect(new Text(key+"Min"),new DoubleWritable(minvalue));
}
}

We Keep Coding

Java is a programming language and computing platform first released by Sun Microsystems in 1995.

Hadoop: How to start 2 Mapper and 2 reducer - java

Related

Hadoop: Error: java.lang.ClassCastException: org.apache.hadoop.io.LongWritable cannot be cast to org.apache.hadoop.io.Text

Hadoop secondary sorting

Hadoop MapReduce Output in JAVA

reducer not being called in the mapreduce program

using string split failed in hadoop mapper

Categories

Resources