Error in running MapReduce job in eclipse from windows - java

I Have pseudo-distributed Hadoop setup on a linux machine. I have done a few examples in eclipse which is also installed in that linux machine and they worked fine. Now I want to perform MapReduce Jobs through eclipse (installed in windows machine) and access the HDFS which is already present in my linux machine. I have written the following Driver code:
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.FileInputFormat;
import org.apache.hadoop.mapred.FileOutputFormat;
import org.apache.hadoop.mapred.JobClient;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
public class Windows_Driver extends Configured implements Tool{
public static void main(String[] args) throws Exception {
int exitcode = ToolRunner.run(new Windows_Driver(), args);
System.exit(exitcode);
}
#Override
public int run(String[] arg0) throws Exception {
JobConf conf = new JobConf(Windows_Driver.class);
conf.set("fs.defaultFS", "hdfs://<Ip address>:50070");
FileInputFormat.setInputPaths(conf, new Path("sample"));
FileOutputFormat.setOutputPath(conf, new Path("sam"));
conf.setMapperClass(Win_Mapper.class);
conf.setMapOutputKeyClass(Text.class);
conf.setMapOutputValueClass(Text.class);
conf.setOutputKeyClass(Text.class);
conf.setOutputValueClass(Text.class);
JobClient.runJob(conf);
return 0;
}
}
And the Mapper code :
import java.io.IOException;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.MapReduceBase;
import org.apache.hadoop.mapred.Mapper;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.Reporter;
public class Win_Mapper extends MapReduceBase implements Mapper<LongWritable, Text,Text, Text> {
#Override
public void map(LongWritable key, Text value, OutputCollector<Text, Text> o, Reporter arg3) throws IOException {
...
o.collect(... , ...);
}
}
When I run this, I get the following error:
SEVERE: PriviledgedActionException as:miracle cause:java.io.IOException: Failed to set permissions of path: \tmp\hadoop-miracle\mapred\staging\miracle1262421749\.staging to 0700
Exception in thread "main" java.io.IOException: Failed to set permissions of path: \tmp\hadoop-miracle\mapred\staging\miracle1262421749\.staging to 0700
at org.apache.hadoop.fs.FileUtil.checkReturnValue(FileUtil.java:691)
at org.apache.hadoop.fs.FileUtil.setPermission(FileUtil.java:664)
at org.apache.hadoop.fs.RawLocalFileSystem.setPermission(RawLocalFileSystem.java:514)
at org.apache.hadoop.fs.RawLocalFileSystem.mkdirs(RawLocalFileSystem.java:349)
at org.apache.hadoop.fs.FilterFileSystem.mkdirs(FilterFileSystem.java:193)
at org.apache.hadoop.mapreduce.JobSubmissionFiles.getStagingDir(JobSubmissionFiles.java:126)
at org.apache.hadoop.mapred.JobClient$2.run(JobClient.java:942)
at org.apache.hadoop.mapred.JobClient$2.run(JobClient.java:936)
at java.security.AccessController.doPrivileged(Native Method)
at javax.security.auth.Subject.doAs(Subject.java:415)
at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1190)
at org.apache.hadoop.mapred.JobClient.submitJobInternal(JobClient.java:936)
at org.apache.hadoop.mapred.JobClient.submitJob(JobClient.java:910)
at org.apache.hadoop.mapred.JobClient.runJob(JobClient.java:1353)
at Windows_Driver.run(Windows_Driver.java:41)
at org.apache.hadoop.util.ToolRunner.run(ToolRunner.java:65)
at org.apache.hadoop.util.ToolRunner.run(ToolRunner.java:79)
at Windows_Driver.main(Windows_Driver.java:16)
How can I rectify the error? And how can I access my HDFS remotely from windows?

submit() method on the Job creates an internal Jobsubmitter instance and that would do all the data validations including input path,output path availability,file/directory creation permissions and other things. During different phases of MR, it will create temporary directories under which it will put the temp. files. The temp directory is taken from core-site.xml with property hadoop.tmp.dir. The issue with your system is it seems the temp. directory is /tmp/ and the user running the MR job doesn't have permission to change its rwx status to 700. Provide appropriate permissions and rerun the job.

Related

Hadoop class not found exception

I'm working on simple program on hadoop, I followed this tutorial steps:
http://www.bogotobogo.com/Hadoop/BigData_hadoop_Creating_Java_Wordcount_Project_with_Eclipse_MapReduce2.php
even though I tried it on two different machines, it keeps showing this exception:
Exception in thread "main" java.lang.ClassNotFoundException: test.java
at java.net.URLClassLoader$1.run(URLClassLoader.java:366)
at java.net.URLClassLoader$1.run(URLClassLoader.java:355)
at java.security.AccessController.doPrivileged(Native Method)
at java.net.URLClassLoader.findClass(URLClassLoader.java:354)
at java.lang.ClassLoader.loadClass(ClassLoader.java:425)
at java.lang.ClassLoader.loadClass(ClassLoader.java:358)
at java.lang.Class.forName0(Native Method)
at java.lang.Class.forName(Class.java:270)
at org.apache.hadoop.util.RunJar.run(RunJar.java:214)
at org.apache.hadoop.util.RunJar.main(RunJar.java:136)
package pa2;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.FileOutputFormat;
import org.apache.hadoop.mapred.FileInputFormat;
import org.apache.hadoop.mapred.JobClient;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
public class test extends Configured implements Tool{
public int run(String[] args) throws Exception
{ if (args.length<2)
{
System.out.println("plz give proper arguments");
return -1;
}
//creating a JobConf object and assigning a job name for identification purposes
JobConf conf = new JobConf(test.class);
FileInputFormat.setInputPaths(conf, new Path(args[0]));
FileOutputFormat.setOutputPath(conf, new Path(args[1]));
conf.setMapperClass(mapper.class);
conf.setMapOutputKeyClass(Text.class);
conf.setMapOutputValueClass(IntWritable.class);
conf.setOutputKeyClass(Text.class);
conf.setOutputValueClass(IntWritable.class);
JobClient.runJob(conf);
return 0;
}
public static void main(String[] args) throws Exception
{
// this main function will call run method defined above.
int exitcode = ToolRunner.run(new test(),args);
System.exit(exitcode);
}
}
can you please tell me what is wrong here?
update:
mapper class:
package pa2;
import java.io.IOException;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.MapReduceBase;
import org.apache.hadoop.mapred.Mapper;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.Reporter;
public class mapper extends MapReduceBase
implements Mapper<LongWritable,Text, Text, IntWritable>
{
public void map(LongWritable Key, Text value,
OutputCollector<Text, IntWritable> output, Reporter r)
throws IOException {
int i=0;
String [] array = new String [50];
String name;
String year;
String s=value.toString();
for (String word:s.split(",")){
word = s.substring(0, s.indexOf(",")+1);
year= word.substring(0, s.indexOf(",")+1);
name=word.substring(s.indexOf(",")+1);
int theyear= Integer.parseInt(year);
if(theyear<2000){
array[i] =name;
output.collect(new Text(word), new IntWritable(1));
i++;}
}
}
}
I haven't written the reducer class. I exported the project as jar file,and I made a text file called movies to be the input of the program. then wrote this in the terminal:
[cloudera#quickstart ~]$ cd workspace
[cloudera#quickstart workspace]$ ls
pa2 pa2.jar training
[cloudera#quickstart workspace]$ hadoop jar pa2.jar test movies.txt output.txt
Exception in thread "main" java.lang.ClassNotFoundException: test
at java.net.URLClassLoader$1.run(URLClassLoader.java:366)
at java.net.URLClassLoader$1.run(URLClassLoader.java:355)
at java.security.AccessController.doPrivileged(Native Method)
at java.net.URLClassLoader.findClass(URLClassLoader.java:354)
at java.lang.ClassLoader.loadClass(ClassLoader.java:425)
at java.lang.ClassLoader.loadClass(ClassLoader.java:358)
at java.lang.Class.forName0(Native Method)
at java.lang.Class.forName(Class.java:270)
at org.apache.hadoop.util.RunJar.run(RunJar.java:214)
at org.apache.hadoop.util.RunJar.main(RunJar.java:136)
No guarantees this is the solution to the immediate problem, but
package pa2;
This is appended to the class name. In other words, the fully-qualified class name is pa2.test.
So, try
hadoop jar ~/workspace/pa2.jar pa2.test input output
If you used the default package like that tutorial showed, you wouldn't need to specify the package on the command line.
The actual name of your map class should be provided here
conf.setMapperClass(mapper.class);
If you are trying to use the default map class, then write "Mapper.class".

Running MR program with separate mapper, reducer and driver classes

maxtempmapper.java class:
package com.hadoop.gskCodeBase.maxTemp;
import java.io.IOException;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
public class MaxTempMapper extends Mapper<LongWritable,Text,Text,IntWritable> {
private static final int MISSING=9999;
#Override
public void map(LongWritable kay,Text value,Context context) throws IOException,InterruptedException {
String line = value.toString();
String year = line.substring(15,19);
int airTemperature;
if(line.charAt(87)== '+'){
airTemperature=Integer.parseInt(line.substring(88, 92));
}else{
airTemperature=Integer.parseInt(line.substring(87, 92));
}
String quality=line.substring(92,93);
if(airTemperature !=MISSING && quality.matches("[01459]")){
context.write(new Text(year), new IntWritable(airTemperature));
}
}
}
maxtempreducer.java class:
package com.hadoop.gskCodeBase.maxTemp;
import java.io.IOException;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
public class MaxTempReducer extends Reducer<Text,IntWritable,Text,IntWritable> {
#Override
public void reduce(Text key,Iterable<IntWritable> values,Context context) throws IOException,InterruptedException {
int maxValue = Integer.MIN_VALUE;
for(IntWritable value : values){
maxValue=Math.max(maxValue, value.get());
}
context.write(key, new IntWritable(maxValue));
}
}
maxtempdriver.java class:
package com.hadoop.gskCodeBase.maxTemp;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
public class MaxTempDriver extends Configured implements Tool{
public int run(String[] args) throws Exception{
if(args.length !=2){
System.err.println("UsageTemperatureDriver <input path> <outputpath>");
System.exit(-1);
}
Job job = Job.getInstance();
job.setJarByClass(MaxTempDriver.class);
job.setJobName("Max Temperature");
FileInputFormat.addInputPath(job, new Path(args[0]));
FileOutputFormat.setOutputPath(job,new Path(args[1]));
job.setMapperClass(MaxTempMapper.class);
job.setReducerClass(MaxTempReducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);
System.exit(job.waitForCompletion(true) ? 0:1);
boolean success = job.waitForCompletion(true);
return success ? 0 : 1;
}
public static void main(String[] args) throws Exception {
MaxTempDriver driver = new MaxTempDriver();
int exitCode = ToolRunner.run(driver, args);
System.exit(exitCode);
}
}
I have to execute the above three classes on single node hadoop cluster on windows using command prompt
can someone please help me in how to execute these three classes on command prompt(windows)?
Archive all the java files into a single .jar file. Then just run it as you normally do. In Windows, it's easier to run Hadoop via Cygwin terminal. You can execute the job by the following command:
hadoop jar <path to .jar> <path to input folder in hdfs> <path to output folder in hdfs>
Eg:
hadoop jar wordcount.jar /input /output
-UPDATE-
You should assign you driver class to the job.setJarByClass(). In this case, it would be your MaxTempDriver.class
In eclipse, you can create a jar file by right clicking on your source folder > Export > JAR file. From there you can follow the steps. You can set your Main Class during the process as well.
Hope this answers your question.

Hadoop mapreduce-java.io.IOException: Job failed

I got the following exception while trying to execute hadoop mapreduce program.
java.io.IOException: Job failed!
at org.apache.hadoop.mapred.JobClient.runJob(JobClient.java:865)
at com.vasa.books.BookDriver.main(BookDriver.java:37)
BookDriver.java
package com.vasa.books;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.FileInputFormat;
import org.apache.hadoop.mapred.FileOutputFormat;
import org.apache.hadoop.mapred.JobClient;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.TextInputFormat;
import org.apache.hadoop.mapred.TextOutputFormat;
public class BookDriver {
public static void main(String args[]) {
// TODO Auto-generated method stub
JobClient client=new JobClient();
JobConf conf=new JobConf(com.vasa.books.BookDriver.class);
conf.setJobName("booknamefind");
conf.setOutputKeyClass(Text.class);
conf.setOutputValueClass(IntWritable.class);
conf.setMapperClass(com.vasa.books.bookmapper.class);
conf.setReducerClass(com.vasa.books.bookreducer.class);
conf.setInputFormat(TextInputFormat.class);
conf.setOutputFormat(TextOutputFormat.class);
FileInputFormat.setInputPaths(conf,new Path(args[0]));
FileOutputFormat.setOutputPath(conf, new Path(args[1]));
client.setConf(conf);
try{
JobClient.runJob(conf);
}catch(Exception e){
e.printStackTrace();
}
}
}
BookMapper.java
package com.vasa.books;
import java.io.IOException;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.io.WritableComparable;
import org.apache.hadoop.mapred.MapReduceBase;
import org.apache.hadoop.mapred.Mapper;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.Reporter;
public class BookMapper extends MapReduceBase implements Mapper<LongWritable,Text,Text,IntWritable> {
private final static IntWritable one=new IntWritable(1);
public void map(LongWritable _key, Text value,
OutputCollector<Text, IntWritable> output, Reporter reporter)
throws IOException {
// TODO Auto-generated method stub
String Tempstring=value.toString();
String[] singlebookdata=Tempstring.split("\",\"");
output.collect(new Text(singlebookdata[3]), one);
}
}
why does that exception occur?
According to the JobClient source, JobClient.runJob() calls JobClient.monitorAndPrintJob() which returns a boolean. If that boolean is false (meaning the job failed), it prints out that useless error message "something failed!" that you are seeing.
To solve this you have two options:
1 - (Faster) Check the logs. The RunningJob failure info should be getting printed to the logs.
2 - If you don't know where the logs are, don't have logging enabled, or don't want to have to dig through logs, you could rewrite a bit of your code. Instead of using JobClient.runJob(), I would do the equivalent of what runJob() is doing in your code, so that when it fails you get a useful error message.
public static RunningJob myCustomRunJob(JobConf job) throws Exception {
JobClient jc = new JobClient(job);
RunningJob rj = jc.submitJob(job);
if (!jc.monitorAndPrintJob(job, rj)) {
throw new IOException("Job failed with info: " + rj.getFailureInfo());
}
return rj;
}
My guess is the underlying problem is that either arg[0] or arg[1] (your input or output files) are not being found.

Incomaptible argument to the function error in MapReduce program

I am working on a MapReduce Java project in eclipse (on Ubuntu 14.04LTS) for which I am using Apache Avro serialization framework for that I need avro-tools-1.7.7.jar file. I have downloaded this jar from apache website and I have written the java code using the downloaded jar. When I execute the program I am getting java.lang.VerifyError error. I have read from few website that this error is due to a version mismatch between the JDK version of compiled class files in the jar to the runtime JDK version so I checked the versions of the downloaded jar file .class version and my runtime JVM version and there was a mismatch so I downgraded my JDK from 1.7 to 1.6 and there was no mismatch. The compiled classes in jar has 50 as thier major version and so are my current project class files. but I am still getting that error.
srimanth#srimanth-Inspiron-N5110:~$ hadoop jar Desktop/AvroMapReduceExamples.jar practice.AvroSort file:///home/srimanth/avrofile.avro file:///home/srimanth/sorted/ test.avro
15/04/19 22:14:36 WARN util.NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
Exception in thread "main" java.lang.VerifyError: (class: org/apache/hadoop/mapred/JobTrackerInstrumentation, method: create signature: (Lorg/apache/hadoop/mapred/JobTracker;Lorg/apache/hadoop/mapred/JobConf;)Lorg/apache/hadoop/mapred/JobTrackerInstrumentation;) Incompatible argument to function
at org.apache.hadoop.mapred.LocalJobRunner.<init>(LocalJobRunner.java:420)
at org.apache.hadoop.mapred.JobClient.init(JobClient.java:470)
at org.apache.hadoop.mapred.JobClient.<init>(JobClient.java:455)
at org.apache.hadoop.mapred.JobClient.runJob(JobClient.java:1252)
at practice.AvroSort.run(AvroSort.java:63)
at org.apache.hadoop.util.ToolRunner.run(ToolRunner.java:70)
at org.apache.hadoop.util.ToolRunner.run(ToolRunner.java:84)
at practice.AvroSort.main(AvroSort.java:67)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:622)
at org.apache.hadoop.util.RunJar.run(RunJar.java:221)
at org.apache.hadoop.util.RunJar.main(RunJar.java:136)
Here is my java program
package practice;
import java.io.File;
import java.io.IOException;
import org.apache.avro.Schema;
import org.apache.avro.mapred.AvroCollector;
import org.apache.avro.mapred.AvroJob;
import org.apache.avro.mapred.AvroMapper;
import org.apache.avro.mapred.AvroReducer;
import org.apache.avro.mapred.Pair;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.mapred.FileInputFormat;
import org.apache.hadoop.mapred.FileOutputFormat;
import org.apache.hadoop.mapred.JobClient;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.Reporter;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
public class AvroSort extends Configured implements Tool {
static class SortMapper<K> extends AvroMapper<K, Pair<K, K>> {
public void map(K datum, AvroCollector<Pair<K, K>> collector,
Reporter reporter) throws IOException {
collector.collect(new Pair<K, K>(datum, null, datum, null));
}
}
static class SortReducer<K> extends AvroReducer<K, K, K> {
public void reduce(K key, Iterable<K> values,
AvroCollector<K> collector,
Reporter reporter) throws IOException {
for (K value : values) {
collector.collect(value);
}
}
}
#Override
public int run(String[] args) throws Exception {
if (args.length != 3) {
System.err.printf(
"Usage: %s [generic options] <input> <output> <schema-file>\n",
getClass().getSimpleName());
ToolRunner.printGenericCommandUsage(System.err);
return -1;
}
String input = args[0];
String output = args[1];
String schemaFile = args[2];
JobConf conf = new JobConf(getConf(), getClass());
conf.setJobName("Avro sort");
FileInputFormat.addInputPath(conf, new Path(input));
FileOutputFormat.setOutputPath(conf, new Path(output));
Schema schema = new Schema.Parser().parse(new File(schemaFile));
AvroJob.setInputSchema(conf, schema);
Schema intermediateSchema = Pair.getPairSchema(schema, schema);
AvroJob.setMapOutputSchema(conf, intermediateSchema);
AvroJob.setOutputSchema(conf, schema);
AvroJob.setMapperClass(conf, SortMapper.class);
AvroJob.setReducerClass(conf, SortReducer.class);
JobClient.runJob(conf);
return 0;
}
public static void main(String[] args) throws Exception {
int exitCode = ToolRunner.run(new AvroSort(), args);
System.exit(exitCode);
}
}
Additional info: JDK version : 1.6 ,
Hadoop Version : 2.6.0 and I am not using maven.
please help me I am stuck at here this entire day. I really appreciate some help.

NoClassDefFoundError in wordcount program

I'm running hadoop wordcount program. But it is giving me error like "NoClassDefFoundError"
command for running :
hadoop -jar /home/user/Pradeep/sample.jar hdp_java.WordCount /user/hduser/ana.txt /user/hduser/prout
Exception in thread "main" java.lang.NoClassDefFoundError: WordCount
Caused by: java.lang.ClassNotFoundException: WordCount
at java.net.URLClassLoader$1.run(URLClassLoader.java:202)
at java.security.AccessController.doPrivileged(Native Method)
at java.net.URLClassLoader.findClass(URLClassLoader.java:190)
at java.lang.ClassLoader.loadClass(ClassLoader.java:306)
at sun.misc.Launcher$AppClassLoader.loadClass(Launcher.java:301)
at java.lang.ClassLoader.loadClass(ClassLoader.java:247)
Could not find the main class: WordCount. Program will exit.
i've created the program in eclipse and then exported as jar file
Eclipse code :
package hdp_java;
import java.io.IOException;
import java.util.StringTokenizer;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
public class WordCount {
public static class Map extends Mapper<LongWritable, Text, Text, IntWritable> {
private final static IntWritable one = new IntWritable(1);
private Text word = new Text();
public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
String line = value.toString();
StringTokenizer tokenizer = new StringTokenizer(line);
while (tokenizer.hasMoreTokens()) {
word.set(tokenizer.nextToken());
context.write(word, one);
}
}
}
public static class Reduce extends Reducer<Text, IntWritable, Text, IntWritable> {
public void reduce(Text key, Iterable<IntWritable> values, Context context)
throws IOException, InterruptedException {
int sum = 0;
for (IntWritable val : values) {
sum += val.get();
}
context.write(key, new IntWritable(sum));
}
}
public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();
Job job = new Job(conf, "wordcount");
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);
job.setMapperClass(Map.class);
job.setReducerClass(Reduce.class);
job.setInputFormatClass(TextInputFormat.class);
job.setOutputFormatClass(TextOutputFormat.class);
FileInputFormat.addInputPath(job, new Path(args[0]));
FileOutputFormat.setOutputPath(job, new Path(args[1]));
job.waitForCompletion(true);
}
}
Can anyone tell me where am i wrong?
You need to tell the hadoop job which jar to use like so:
job.setJarByClass(WordCount.class);
Also be sure to add any dependencies to both the HADOOP_CLASSPATH and -libjars upon submitting a job like in the following examples:
Use the following to add all the jar dependencies from (for example) current and lib directories:
export HADOOP_CLASSPATH=$HADOOP_CLASSPATH:`echo *.jar`:`echo lib/*.jar | sed 's/ /:/g'`
Bear in mind that when starting a job through hadoop jar you'll need to also pass it the jars of any dependencies through use of -libjars. I like to use:
hadoop jar <jar> <class> -libjars `echo ./lib/*.jar | sed 's/ /,/g'` [args...]
NOTE: The sed commands require a different delimiter character; the HADOOP_CLASSPATH is : separated and the -libjars need to be , separated.
Add this line in your code :
job.setJarByClass(WordCount.class);
If it still doesn't work export this job as a jar and add it to itself as an external jar and see if it works.

Categories