I am absolutely new to RIAK KV. I actually learn it in the university and have to write some Java Code to store 5000 datasets in an RIAK Cluster. So I started to code:
package main;
import java.io.File;
import java.io.FileNotFoundException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Random;
import java.util.Scanner;
import org.w3c.dom.NameList;
public class Product {
String artikelnummer;
String name;
String color;
Integer price;
String picture;
Integer max = 999;
String filepath = "./assets/products.txt";
ArrayList<String> productNames;
public Product() throws FileNotFoundException{
productNames = readFile(filepath);
name = randomName();
color = randomColor();
price = randomPrice(max);
picture = randomPic();
}
private String randomPic() {
String[] picNames = {"ikea0.jpg","ikea1.jpg","ikea2.jpg","ikea3.jpg","ikea4.jpg","ikea5.jpg","ikea6.jpg","ikea7.jpg","ikea8.jpg","ikea9.jpg"};
Random randPicIndex = new Random();
int randomPicIndex = randPicIndex.nextInt(9);
return picNames[randomPicIndex];
}
public Integer randomPrice(int max){
Random rand = new Random();
int randomNum = rand.nextInt(max);
price = randomNum;
return price;
}
public String randomColor(){
String[] colorArray = {"blue","red","yellow","magenta","green","black","white","cyan","purple","brown"};
Random randIndex = new Random();
int randomIndex = randIndex.nextInt(10);
int i = randomIndex;
color = colorArray[i];
return color;
}
public String randomName(){
Random randomName = new Random();
name = productNames.get(randomName.nextInt(productNames.size()));
return name;
}
public ArrayList<String> readFile(String filepath) throws FileNotFoundException {
Scanner scanner = new Scanner(new File(filepath));
ArrayList<String> nameList = new ArrayList<String>();
while (scanner.hasNextLine()){
nameList.add(scanner.nextLine());
}
scanner.close();
return nameList;
}
//Class for testing purposes
public void printProduct(){
System.out.println("Produktdatenblatt: Artikelnummer --> "+ artikelnummer + " " + name + " mit Farbe: " + color + " mit dem Preis: " + price + " ein Bild --> " + picture);
System.out.println("Hat funktioniert!!!");
}
}
Above you can see the code that contains the Product Class. To generate 500 products randomly and store this products in the KV Database of Rick i wrote following:
package main;
import io.netty.util.internal.shaded.org.jctools.queues.MessagePassingQueue.WaitStrategy;
import java.io.FileNotFoundException;
import java.net.UnknownHostException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.concurrent.ExecutionException;
import com.basho.riak.client.api.RiakClient;
import com.basho.riak.client.api.annotations.RiakKey;
import com.basho.riak.client.api.annotations.RiakLinks;
import com.basho.riak.client.api.commands.kv.StoreValue;
import com.basho.riak.client.core.RiakCluster;
import com.basho.riak.client.core.RiakNode;
import com.basho.riak.client.core.query.Location;
import com.basho.riak.client.core.query.Namespace;
import com.basho.riak.client.core.query.RiakObject;
import com.basho.riak.client.core.query.links.RiakLink;
import com.basho.riak.client.core.util.BinaryValue;
public class ProductRiakCluster {
static String artikelnummer;
public static void main(String args[]) throws FileNotFoundException, UnknownHostException, ExecutionException, InterruptedException {
System.out.println("main-method started...");
System.out.println("Starting create RiakCluster...");
for (int i = 5000; i > 0; i--) {
dataGenerator(i);
RiakClient client = RiakClient.newClient(8087, "127.0.0.1");
System.out.println("RiakClient " + client);
RiakObject riakObj = new RiakObject();
System.out.println("RiakObjekt " + riakObj);
Namespace productsBucket = new Namespace("products");
System.out.println("Bucket " + productsBucket);
Location productObjectLocation = new Location(productsBucket, artikelnummer);
System.out.println("Location " + productObjectLocation);
StoreValue storeP = new StoreValue.Builder(riakObj).withLocation(productObjectLocation).build();
StoreValue.Response response = client.execute(storeP);
client.shutdown();
}
System.out.println("RiakCluster setup finished...");
}
public static class ProductPojo {
#RiakKey
public String artikelnummer;
public String name;
public String color;
public Integer price;
#RiakLinks
public Collection < RiakLink > picture = new ArrayList < RiakLink > ();
}
private static void dataGenerator(int i) {
System.out.println("Started DataGenerator...");
try {
artikelnummer = String.valueOf(i);
generateRandomProduct(artikelnummer);
} catch (FileNotFoundException e) {
System.out.println("File not found...");
e.printStackTrace();
}
}
private static void generateRandomProduct(String artikelnummer) throws FileNotFoundException {
System.out.println("Method <generateRandomProduct> is running..." + artikelnummer);
Product product = new Product();
ProductPojo propo = new ProductPojo();
propo.artikelnummer = artikelnummer;
propo.name = product.name;
propo.color = product.color;
propo.price = product.price;
propo.picture.add(new RiakLink("pictures", product.picture, "Produktbild"));
product.printProduct();
}
}
After I started this program the following error occurs:
RiakClient com.basho.riak.client.api.RiakClient#2096442d
RiakObjekt RiakObject{contentType: application/octet-stream, value: null, riakIndexes: null, links: null, userMeta: null, vtag: null, isDeleted: false, isModified: false, vclock: null, lastModified: 0}
Bucket {type: default, bucket: products}
Location {namespace: {type: default, bucket: products}, key: 1}
Exception in thread "main" shaded.com.google.protobuf.UninitializedMessageException: Message missing required fields: value
at shaded.com.google.protobuf.AbstractMessage$Builder.newUninitializedMessageException(AbstractMessage.java:372)
at shaded.com.basho.riak.protobuf.RiakKvPB$RpbContent$Builder.build(RiakKvPB.java:18352)
at com.basho.riak.client.core.converters.RiakObjectConverter.convert(RiakObjectConverter.java:198)
at com.basho.riak.client.core.operations.StoreOperation$Builder.withContent(StoreOperation.java:158)
at com.basho.riak.client.api.commands.kv.StoreValue.buildCoreOperation(StoreValue.java:151)
at com.basho.riak.client.api.commands.kv.StoreValue.buildCoreOperation(StoreValue.java:72)
at com.basho.riak.client.api.GenericRiakCommand.executeAsync(GenericRiakCommand.java:41)
at com.basho.riak.client.api.commands.kv.StoreValue.executeAsync(StoreValue.java:112)
at com.basho.riak.client.api.RiakCommand.execute(RiakCommand.java:91)
at com.basho.riak.client.api.RiakClient.execute(RiakClient.java:355)
at main.ProductRiakCluster.main(ProductRiakCluster.java:49)
My thoughts about this: I generate an "artikelnummer" but it doesn't occurs in the Pojo class and so the Pojo has a null value. But I have no solution how to fix this problem.
The problem is that you don't pass anything to RiakObject.
Return a generated instance of ProductPojo and save it in a variable, say productToSave.
Then, either call new StoreValue.Builder(productToSave), or use RiakObject as follows:
RiakObject riakObj = new RiakObject();
riakObj.setValue(BinaryValue.create(productToSave));
As a side note, your code suffers from a few programming issues, unfortunately. For examples, exchanging data between methods via a static member is not good. You also don't have to create and shutdown a client every time you need it - reuse one instance for all of your queries.
Related
I am writing a search engine code using java, and I'm getting this error without knowing the cause:
Exception in thread "main" java.lang.NullPointerException
at WriteToFile.fileWriter(WriteToFile.java:29)
at Main.main(Main.java:14)
Process finished with exit code 1
this is my code :
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.select.Elements;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Paths;
import java.util.Scanner;
public class Search {
private static String URL="https://www.google.com/search?q=";
private Document doc;
private Elements links;
private String html;
public Search() throws IOException {};
public void SearchWeb() throws IOException {
//to get the keywords from the user
Scanner sc = new Scanner(System.in);
System.out.println("Please enter the keyword you want to search for: ");
String word = sc.nextLine();
//Search for the keyword over the net
String url = URL + word;
doc = Jsoup.connect(url).get();
html = doc.html();
Files.write(Paths.get("D:\\OOP\\OOPproj\\data.txt"), html.getBytes());
links = doc.select("cite");
}
public Document getDoc() {
return doc;
}
public String getHtml() {
return html;
}
public Elements getLinks() {
return links;
}
}
and this is the class writeToFile:
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import java.io.BufferedWriter;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Paths;
import java.nio.file.StandardOpenOption;
import java.util.ArrayList;
public class WriteToFile extends Search {
public WriteToFile() throws IOException {};
String description = "<!> Could not fetch description <!>";
String keywords = "<!> Could not fetch keywords <!>";
private ArrayList<String> detail = new ArrayList<String>();
BufferedWriter bw = null;
public void fileWriter() throws IOException {
for (Element link : super.getLinks()) {
String text = link.text();
if (text.contains("›")) {
text = text.replaceAll(" › ", "/");
}
detail.add(text);
System.out.println(text);
}
System.out.println("***************************************************");
for (int i = 0; i < detail.size(); i++)
System.out.println("detail [" + (i + 1) + "]" + detail.get(i));
System.out.println("###################################################################");
for (int j = 0; j < detail.size(); j++) {
Document document = Jsoup.connect(detail.get(j)).get();
String web = document.html();
Document d = Jsoup.parse(web);
Elements metaTags = d.getElementsByTag("meta");
for (Element metaTag : metaTags) {
String content = metaTag.attr("content");
String name = metaTag.attr("name");
if ("description".equals(name)) {
description = content;
}
if ("keywords".equals(name)) {
keywords = content;
}
}
String title = d.title();
Files.write(Paths.get("D:\\OOP\\OOPproj\\search.txt"), (detail.get(j) + "\t" + "|" + "\t" + title + "\t" + "|" + "\t" + description + "\t" + "|" + "\t" + keywords + System.lineSeparator()).getBytes(), StandardOpenOption.APPEND);
}
}
}
This is the Main class:
import java.io.IOException;
public class Main {
public static void main(String[] args) throws IOException {
Search a = new Search();
a.SearchWeb();
WriteToFile b = new WriteToFile();
b.fileWriter();
}
}
I tried to print the getLinks() method in main to check if it was null , but it wasn't , the links were cited.
I would be really grateful if someone helps me out.
You are calling SearchWeb() on object a, but you're calling fileWriter() on object b. This means the links are set in a, but not in b.
Since WriteToFile extends Search, you just need an instance of that:
WriteToFile a = new WriteToFile();
a.SearchWeb();
a.fileWriter();
I'm working on two word document comparison manually where i should not miss any Strings, Special chars, space and all the stuff and that document is around 150 pages or more. so its very headache to do comparison. Then I have written small java program to compare two documents but I'm not able to list the missing words.
Using Apche POI Library
Thanks in advance.
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.Iterator;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;
import org.apache.poi.xwpf.model.XWPFHeaderFooterPolicy;
import org.apache.poi.xwpf.usermodel.XWPFDocument;
import org.apache.poi.xwpf.usermodel.XWPFFooter;
import org.apache.poi.xwpf.usermodel.XWPFHeader;
import org.apache.poi.xwpf.usermodel.XWPFParagraph;
public class ReadDocFile {
private static XWPFDocument docx;
// private static String path = "C:\\States wise\\NH\\Assessment
// 2nd\\test.docx";
private static ArrayList<String> firstList = new ArrayList<String>(); // refers to first document list
private static ArrayList<String> secondList = new ArrayList<String>(); // refers to second document list
private static List<XWPFParagraph> paragraphList;
private static Map<String, String> map = null;
private static LinkedHashSet<String> firstMissedArray = new LinkedHashSet<String>(); // refers to first document Linked hash set
private static LinkedHashSet<String> secondMissedArray = new LinkedHashSet<String>(); // refers to second document Linked hash set
public static void getFilePath(String path) {
FileInputStream fis;
try {
fis = new FileInputStream(path);
docx = new XWPFDocument(fis);
} catch (FileNotFoundException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
public static void get_First_Doc_Data() {
getFilePath("C:\\States wise\\NH\\Assessment 2nd\\test.docx");
paragraphList = docx.getParagraphs();
System.out.println("******************** first list Starts here ******************** ");
System.out.println();
for (int i = 0; i < paragraphList.size() - 1; i++) {
firstList.add(paragraphList.get(i).getText().toString());
System.out.println(firstList.get(i).toString());
}
System.out.println("*********** first list Ends here ********************");
}
public static void get_Second_Doc_Data() {
getFilePath("C:\\States wise\\NH\\Assessment 2nd\\test1.docx");
paragraphList = docx.getParagraphs();
System.out.println("******************** Second list Starts here ******************** ");
System.out.println();
for (int i = 0; i < paragraphList.size() - 1; i++) {
secondList.add(paragraphList.get(i).getText().toString());
System.out.println(secondList.get(i).toString());
}
System.out.println("*********** Second list Ends here ********************");
}
public static void main(String[] args) {
get_First_Doc_Data();
get_Second_Doc_Data();
//System.out.println("First Para: " + firstList.contains(secondList));
compare();
compare_Two_List();
}
private static void compare() {
String firstMiss = null;
//String secondMiss = null;
for (int i = 0; i < firstList.size(); i++) {
for (int j = 0; j < secondList.size(); j++) {
if (!firstList.get(i).toString().equals(secondList.get(i).toString())) {
firstMiss = firstList.get(i).toString();
//secondMiss = secondList.get(i).toString();
map = new HashMap<String, String>();
}
}
firstMissedArray.add(firstMiss);
//secondMissedArray.add(secondMiss);
// System.out.println(missedArray.get(i).toString());
}
}
private static void compare_Two_List() {
int num = 0;
map.clear();
Iterator<String> first = firstMissedArray.iterator();
//Iterator<String> second = secondMissedArray.iterator();
while (first.hasNext()) {
map.put(""+num, first.next());
num++;
}
System.out.println(firstMissedArray.size());
Iterator it = map.entrySet().iterator();
while (it.hasNext()) {
Map.Entry pair = (Map.Entry) it.next();
System.out.println(pair.getKey() + " = " + pair.getValue());
// it.remove(); // avoids a ConcurrentModificationException
}
}
}
I have taken liberty to modify your code to arrive at the solution for your problem. Please go through this.
This should pretty much solve your problem - put SYSO statements wherever you think is necessary and tweak the flow of the program to achieve desired checks as per you requirement. In my hurry, I may not have made use of coding standards of using try catch block for error handling and handling the negative scenarios, so please take care of that when implementing it live.
In case if the documents are not .DOCX but .PDF make use of the Apache PDFBox api.
Here is the Code:
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.TreeMap;
import org.apache.poi.xwpf.usermodel.XWPFDocument;
import org.apache.poi.xwpf.usermodel.XWPFParagraph;
public class Comapre_Docs {
private static final String FIRST_DOC_PATH = "E:\\Workspace_Luna\\assignments\\Expected.docx";
private static final String SECOND_DOC_PATH = "E:\\Workspace_Luna\\assignments\\Actual.docx";
private static XWPFDocument docx;
private static List<XWPFParagraph> paragraphList;
private static ArrayList<String> firstList = new ArrayList<String>();
private static ArrayList<String> secondList = new ArrayList<String>();
public static void get_Doc_Data(String filePath, ArrayList listName)
throws IOException {
File file = new File(filePath);
FileInputStream fis = new FileInputStream(file);
docx = new XWPFDocument(fis);
paragraphList = docx.getParagraphs();
for (int i = 0; i <= paragraphList.size() - 1; i++) {
listName.add(paragraphList.get(i).getText().toString());
}
fis.close();
}
public static void main(String[] args) throws IOException {
get_Doc_Data(FIRST_DOC_PATH, firstList);
get_Doc_Data(SECOND_DOC_PATH, secondList);
compare(firstList, secondList);
}
private static void compare(ArrayList<String> firstList_1,
ArrayList<String> secondList_1) {
simpleCheck(firstList_1, secondList_1);
int size = firstList_1.size();
for (int i = 0; i < size; i++) {
paragraphCheck(firstList_1.get(i).toString().split(" "),
secondList_1.get(i).toString().split(" "), i);
}
}
private static void paragraphCheck(String[] firstParaArray,
String[] secondParaArray, int paraNumber) {
System.out
.println("=============================================================");
System.out.println("Paragraph No." + (paraNumber + 1) + ": Started");
if (firstParaArray.length != secondParaArray.length) {
System.out.println("There is mismatch of "
+ Math.abs(firstParaArray.length - secondParaArray.length)
+ " words in this paragraph");
}
TreeMap<String, Integer> firstDocPara = getOccurence(firstParaArray);
TreeMap<String, Integer> secondDocPara = getOccurence(secondParaArray);
ArrayList<String> keyData = new ArrayList<String>(firstDocPara.keySet());
for (int i = 0; i < keyData.size(); i++) {
if (firstDocPara.get(keyData.get(i)) != secondDocPara.get(keyData
.get(i))) {
System.out
.println("The following word is missing in actual document : "
+ keyData.get(i));
}
}
System.out.println("Paragraph No." + (paraNumber + 1) + ": Done");
System.out
.println("=============================================================");
}
private static TreeMap<String, Integer> getOccurence(String[] paraArray) {
TreeMap<String, Integer> paragraphStringCountHolder = new TreeMap<String, Integer>();
paragraphStringCountHolder.clear();
for (String a : paraArray) {
int count = 1;
if (paragraphStringCountHolder.containsKey(a)) {
count = paragraphStringCountHolder.get(a) + 1;
paragraphStringCountHolder.put(a, count);
} else {
paragraphStringCountHolder.put(a, count);
}
}
return paragraphStringCountHolder;
}
private static boolean simpleCheck(ArrayList<String> firstList,
ArrayList<String> secondList) {
boolean flag = false;
if (firstList.size() > secondList.size()) {
System.out
.println("There are more paragraph in Expected document than in Actual document");
} else if (firstList.size() < secondList.size()) {
System.out
.println("There are more paragraph in Actual document than in Expected document");
} else if (firstList.size() == secondList.size()) {
System.out.println("The paragraph count in both documents match");
flag = true;
}
return flag;
}
}
For some reason in the line v = new Versenyzok(); it says "Surround with...", i thought I need a method for Versenyzok, and I made it, but the issue is still.
The other issue is in the line v = new Versenyzok(adatok[0],adatok[1],adatok[2],adatok[3]);. Incompatible types:string cannot be converted to Date. But I cannot find the source where I wanna make a string to date format.
package vizsgamintaa;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.PrintStream;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Date;
import java.util.Scanner;
import java.util.logging.Level;
import java.util.logging.Logger;
public class VizsgaMintaA {
static SimpleDateFormat df = new SimpleDateFormat("yyyy.MM.dd");
static Scanner sc= new Scanner(System.in);
public static void main(String[] args) throws ParseException, FileNotFoundException {
ArrayList<Versenyzok> versenyzok = new ArrayList<>();
Feltolt(versenyzok);
}
private static void Feltolt(ArrayList<Versenyzok> versenyzok) throws ParseException{
Versenyzok v;
File f = new File("versenyzok.txt");
try {
Scanner scan = new Scanner(f, "iso-8859-2");
while (scan.hasNextLine()) {
String sor = scan.nextLine();
String[] adatok = sor.split(";");
if (adatok.length == 4) {
v = new Versenyzok();
v.nev = adatok[0];
v.szuletes = df.parse(adatok[1]);
v.csapat = adatok[2];
if (adatok[3].length()<1) {
v.vbcim = 0;
} else {
v.vbcim = Integer.parseInt(adatok[3]);
}
} else {
v = new Versenyzok(adatok[0],adatok[1],adatok[2],adatok[3]);
}
versenyzok.add(v);
}
} catch (FileNotFoundException ex) {
System.out.println("Nincs meg a fájl.");
}
}
class Versenyzok {
String nev,csapat;
Date szuletes;
int vbcim;
SimpleDateFormat df = new SimpleDateFormat("yyyy.MM.dd");
#Override
public String toString(){
return "Versenyzo:" +nev + "Született:" + df.format(szuletes)+"Csapata:"+ csapat + "Vb címek:" + vbcim;
}
public Versenyzok(String nev, Date szuletes, String csapat, int vbcim) {
this.nev = nev;
this.szuletes = szuletes;
this.csapat = csapat;
this.vbcim = vbcim;
}
public Versenyzok(){}
}
}
In public Versenyzok(String nev, Date szuletes, String csapat, int vbcim) { you are taking in parameter 2 as a date. But in the else loop you are putting adatok[1] into parameter 2, and here it is still a string, hence you need to parse it first.
I forgot to type
v = new Versenyzo (adatok[0],df.parse(adatok[1]),adatok[2],Integer.parseInt(adatok[3]));
the df.parse and Integer.parseInt
package br.edu.ufam.anibrata;
import java.io.*;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Iterator;
import java.util.List;
import java.util.StringTokenizer;
import java.util.Arrays;
import java.util.HashSet;
import org.apache.commons.lang.StringUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.*;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Partitioner;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.MapFileOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
import org.apache.log4j.Logger;
import org.kohsuke.args4j.CmdLineException;
import org.kohsuke.args4j.CmdLineParser;
import org.kohsuke.args4j.Option;
import org.kohsuke.args4j.ParserProperties;
import tl.lin.data.array.ArrayListWritable;
import tl.lin.data.pair.PairOfStringInt;
import tl.lin.data.pair.PairOfWritables;
import br.edu.ufam.data.Dataset;
import com.google.gson.JsonSyntaxException;
public class BuildIndexWebTables extends Configured implements Tool {
private static final Logger LOG = Logger.getLogger(BuildIndexWebTables.class);
public static void main(String[] args) throws Exception
{
ToolRunner.run(new BuildIndexWebTables(), args);
}
#Override
public int run(String[] argv) throws Exception {
// Creates a new job configuration for this Hadoop job.
Args args = new Args();
CmdLineParser parser = new CmdLineParser(args, ParserProperties.defaults().withUsageWidth(100));
try
{
parser.parseArgument(argv);
}
catch (CmdLineException e)
{
System.err.println(e.getMessage());
parser.printUsage(System.err);
return -1;
}
Configuration conf = getConf();
conf.setBoolean("mapreduce.map.output.compress", true);
conf.setBoolean("mapreduce.map.output.compress", true);
conf.set("mapreduce.map.failures.maxpercent", "10");
conf.set("mapreduce.max.map.failures.percent", "10");
conf.set("mapred.max.map.failures.percent", "10");
conf.set("mapred.map.failures.maxpercent", "10");
conf.setBoolean("mapred.compress.map.output", true);
conf.set("mapred.map.output.compression.codec", "org.apache.hadoop.io.compress.SnappyCodec");
conf.setBoolean("mapreduce.map.output.compress", true);
/*String inputPrefixes = args[0];
String outputFile = args[1];*/
Job job = Job.getInstance(conf);
/*FileInputFormat.addInputPath(job, new Path(inputPrefixes));
FileOutputFormat.setOutputPath(job, new Path(outputFile));*/
FileInputFormat.setInputPaths(job, new Path(args.input));
FileOutputFormat.setOutputPath(job, new Path(args.output));
FileOutputFormat.setCompressOutput(job, true);
FileOutputFormat.setOutputCompressorClass(job,org.apache.hadoop.io.compress.GzipCodec.class);
job.setMapperClass(BuildIndexWebTablesMapper.class);
job.setReducerClass(BuildIndexWebTablesReducer.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(Text.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(PairOfWritables.class);
//job.setOutputFormatClass(MapFileOutputFormat.class);
job.setOutputFormatClass(TextOutputFormat.class);
/*job.setOutputFormatClass(TextOutputFormat.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(Text.class);
job.setOutputKeyClass(NullWritable.class);
job.setOutputValueClass(Text.class);*/
job.setJarByClass(BuildIndexWebTables.class);
job.setNumReduceTasks(args.numReducers);
//job.setNumReduceTasks(500);
FileInputFormat.setInputPaths(job, new Path(args.input));
FileOutputFormat.setOutputPath(job, new Path(args.output));
System.out.println(Arrays.deepToString(FileInputFormat.getInputPaths(job)));
// Delete the output directory if it exists already.
Path outputDir = new Path(args.output);
FileSystem.get(getConf()).delete(outputDir, true);
long startTime = System.currentTimeMillis();
job.waitForCompletion(true);
System.out.println("Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds");
return 0;
}
private BuildIndexWebTables() {}
public static class Args
{
#Option(name = "-input", metaVar = "[path]", required = true, usage = "input path")
public String input;
#Option(name = "-output", metaVar = "[path]", required = true, usage = "output path")
public String output;
#Option(name = "-reducers", metaVar = "[num]", required = false, usage = "number of reducers")
public int numReducers = 1;
}
public static class BuildIndexWebTablesMapper extends Mapper<LongWritable, Text, Text, Text> {
//public static final Log log = LogFactory.getLog(BuildIndexWebTablesMapper.class);
private static final Text WORD = new Text();
private static final Text OPVAL = new Text();
#Override
public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
// Log to stdout file
System.out.println("Map key : TEST");
//log to the syslog file
//log.info("Map key "+ key);
/*if(log.isDebugEanbled()){
log.debug("Map key "+ key);
}*/
Dataset ds;
String pgTitle; // Table page title
List<String> tokens = new ArrayList<String>(); // terms for frequency and other data
ds = Dataset.fromJson(value.toString()); // Get all text values from the json corpus
String[][] rel = ds.getRelation(); // Extract relation from the first json
int numCols = rel.length; // Number of columns in the relation
String[] attributes = new String[numCols]; // To store attributes for the relation
for (int j = 0; j < numCols; j++) { // Attributes of the relation
attributes[j] = rel[j][0];
}
int numRows = rel[0].length; //Number of rows of the relation
//dsTabNum = ds.getTableNum(); // Gets the table number from json
// Reads terms from relation and stores in tokens
for (int i = 0; i < numRows; i++ ){
for (int j = 0; j < numCols; j++ ){
String w = rel[i][j].toLowerCase().replaceAll("(^[^a-z]+|[^a-z]+$)", "");
if (w.length() == 0)
continue;
else {
w = w + "|" + pgTitle + "." + j + "|" + i; // Concatenate the term/PageTitle.Column number/row number in term
tokens.add(w);
}
}
}
// Emit postings.
for (String token : tokens){
String[] tokenPart = token.split("|", -2); // Split based on "|", -2(any negative) to split multiple times.
String newkey = tokenPart[0] + "|" + tokenPart[1];
WORD.set(newkey); // Emit term as key
//String valstr = Arrays.toString(Arrays.copyOfRange(tokenPart, 2, tokenPart.length)); // Emit rest of the string as value
String valstr = tokenPart[2];
OPVAL.set(valstr);
context.write(WORD,OPVAL);
}
}
}
public static class BuildIndexWebTablesReducer extends Reducer<Text, Text, Text, Text> {
private static final Text TERM = new Text();
private static final IntWritable TF = new IntWritable();
private String PrevTerm = null;
private int termFrequency = 0;
#Override
protected void reduce(Text key, Iterable<Text> textval, Context context) throws IOException, InterruptedException {
Iterator<Text> iter = textval.iterator();
IntWritable tnum = new IntWritable();
ArrayListWritable<IntWritable> postings = new ArrayListWritable<IntWritable>();
PairOfStringInt relColInfo = new PairOfStringInt();
PairOfWritables keyVal = new PairOfWritables<PairOfStringInt, ArrayListWritable<IntWritable>>();
if((!key.toString().equals(PrevTerm)) && (PrevTerm != null)) {
String[] parseKey = PrevTerm.split("|", -2);
TERM.set(parseKey[0]);
relColInfo.set(parseKey[1],termFrequency);
keyVal.set(relColInfo, postings);
context.write(TERM, keyVal);
termFrequency = 0;
postings.clear();
}
PrevTerm = key.toString();
while (iter.hasNext()) {
int tupleset = Integer.parseInt(iter.next().toString());
tnum.set(tupleset);
postings.add(tnum);
termFrequency++;
}
}
}
}`
I am getting the below mentioned error while compilation.
[ERROR] Failed to execute goal
org.apache.maven.plugins:maven-compiler-plugin:2.3.2:compile
(default-compile) on project projeto-final: Compilation failure
[ERROR]
/home/cloudera/topicosBD-pis/topicosBD-pis/projeto-final/src/main/java/br/edu/ufam/anibrata/BuildIndexWebTables.java:[278,11]
error: method write in interface
TaskInputOutputContext cannot be
applied to given types;
The line where exactly this occurs is "context.write(TERM, keyVal);". This code has some dependencies that is based on my local machine though. I am stuck at the error since I am not getting any idea about it anywhere. If someone can help me understand the origin of the issue and how this can be tackled. I am pretty new to hadoop / mapreduce.
I have tried toggling between OutputFormatClass among job.setOutputFormatClass(MapFileOutputFormat.class); and
job.setOutputFormatClass(TextOutputFormat.class);, both of them throwing the same error. I am using "mvn clean package" to compile.
Any help is appreciated very much.
Thanks in advance.
As i can see, you are trying to write in context a key(TERM) of type Text and a value(keyval) of type PairOfWritables, but your reducer class extends Reducer with VALUEOUT(the last one) of type TEXT. You should change VALUEOUT to proper type.
In your case:
public static class BuildIndexWebTablesReducer extends Reducer<Text, Text, Text, PairOfWritables>
I have written a code in java for web services of crm on demand. The code reads the 1st column (id) from excel and queries it in the crm and inserts the values accordingly. If the 'id' is found it inserts the other values, but when the 'id' is not found in the crm the code is terminated with error message. I want the code to query for a record and if found insert the other values, but if the record is not found it should skip that record and query for the next id without it being terminated in between.
Please help...
and also how to skip cells with no value(null)??
import crmondemand.ws.account.AccountWS_AccountInsertChild_Input;
import crmondemand.ws.account.AccountWS_AccountInsertChild_Output;
import crmondemand.ws.account.AccountWS_AccountUpdate_Input;
import crmondemand.ws.account.AccountWS_AccountUpdate_Output;
import crmondemand.ws.account.Default_BindingStub;
import crmondemand.xml.account.Account;
import crmondemand.xml.account.RelatedAccount;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.math.BigDecimal;
import java.math.BigInteger;
import java.net.HttpURLConnection;
import java.net.InetSocketAddress;
import java.net.MalformedURLException;
import java.net.Proxy;
import java.net.URL;
import java.rmi.RemoteException;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Calendar;
import java.util.Date;
import java.util.Iterator;
import java.util.List;
import java.util.Properties;
import java.util.StringTokenizer;
import java.util.TimeZone;
import javax.xml.rpc.ServiceException;
import jxl.Cell;
import jxl.CellType;
import jxl.Sheet;
import jxl.Workbook;
import jxl.read.biff.BiffException;
import org.apache.axis.AxisProperties;
import org.apache.log4j.Logger;
import org.apache.poi.hssf.usermodel.HSSFCell;
import org.apache.poi.hssf.usermodel.HSSFDateUtil;
import org.apache.poi.hssf.usermodel.HSSFRow;
import org.apache.poi.hssf.usermodel.HSSFSheet;
import org.apache.poi.hssf.usermodel.HSSFWorkbook;
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
public class accrel1 {
private String inputFile;
public static int length;
public void setInputFile(String inputFile) {
this.inputFile = inputFile;
}
static Logger logger = Logger.getLogger(accrel1.class);
public static URL reauthentication(Properties properties) throws MalformedURLException {
// System.setProperty("https.proxyHost", "172.17.24.24");
// System.setProperty("https.proxyPort", "8003");
//System.setProperty("https.proxyHost", "145.247.13.164");
// System.setProperty("https.proxyPort", "3128");
// System.setProperty("javax.net.ssl.trustStore","C:\\ProgramFiles\\Java\\jdk1.6.0_31\\jre\\lib\\cacerts");
System.out.println("Loggin In");
String jsessionid_full =
logon("https://secure-ausomxapa.crmondemand.com/Services/Integration",
"################", "##########", properties);
System.out.println("Connecting to CRM..." + jsessionid_full);
String jsessionid = getSessionId(jsessionid_full);
System.out.println("JSessionid: " + jsessionid);
String endpoint =
"https://secure-ausomxapa.crmondemand.com/Services/Integration" +
";jsessionid=" + jsessionid;
URL urlAddr = new java.net.URL(endpoint);
System.out.println("Establishing Connection...");
return urlAddr;
}
public static void urlqueryWS1(URL urlAddr, List cellDataList,
Properties properties) throws RemoteException,
ServiceException,
MalformedURLException {
AxisProperties.setProperty("https.proxyHost",
properties.getProperty("proxyhost"));
AxisProperties.setProperty("https.proxyPort",
properties.getProperty("proxyport"));
// AxisProperties.setProperty("http.nonProxyHosts", "secure-ausomxapa.crmondemand.com");
crmondemand.ws.account.AccountWS_AccountQueryPage_Input accountlist =
new crmondemand.ws.account.AccountWS_AccountQueryPage_Input();
crmondemand.ws.account.AccountWS_AccountQueryPage_Output outlist =
new crmondemand.ws.account.AccountWS_AccountQueryPage_Output();
crmondemand.xml.account.Account[] accounts =
new crmondemand.xml.account.Account[1];
crmondemand.xml.account.Account account =
new crmondemand.xml.account.Account();
String stringCellValue[] = new String[6000];
int k = 0;
int flag = 0, notflag = 0, noparentflag = 0;
System.out.println("CellDatasize:" + cellDataList.size());
for (int i = 1; i < cellDataList.size(); i++) {
crmondemand.ws.account.Account service =
new crmondemand.ws.account.AccountLocator();
Default_BindingStub stub =
(Default_BindingStub)service.getDefault(urlAddr);
List cellTempList = (List)cellDataList.get(i);
for (int j = 0; j < cellTempList.size(); j++) {
HSSFCell hssfCell = (HSSFCell)cellTempList.get(j);
//System.out.println("Cell Type:" + i + "," + j + " " + hssfCell.getCellType());
if (hssfCell.getCellType() == HSSFCell.CELL_TYPE_STRING) {
stringCellValue[j] = hssfCell.getStringCellValue();
// logger.info(i + "," + j + "\t" + stringCellValue[j] + "\t");
} else if (hssfCell.getCellType() ==
HSSFCell.CELL_TYPE_NUMERIC) {
if (HSSFDateUtil.isCellDateFormatted(hssfCell)) {
System.out.println(hssfCell.getDateCellValue());
Date date = new Date();
date = hssfCell.getDateCellValue();
SimpleDateFormat df =
new SimpleDateFormat("MM/dd/yyyy");
stringCellValue[j] = df.format(date);
} else {
stringCellValue[j] = new BigDecimal(hssfCell.getNumericCellValue()).toPlainString();
//logger.info("Number"+i+","+j+ "\t"+stringCellValue[j] + "\t");
}
}
}
AccountWS_AccountInsertChild_Input child =
new AccountWS_AccountInsertChild_Input();
AccountWS_AccountInsertChild_Output childs =
new AccountWS_AccountInsertChild_Output();
crmondemand.xml.account.Account[] accrels =
new crmondemand.xml.account.Account[1];
crmondemand.xml.account.Account accrel =
new crmondemand.xml.account.Account();
RelatedAccount[] relaccounts = new RelatedAccount[1];
RelatedAccount relaccount = new RelatedAccount();
logger.info("Inserting " + i + "Record: "+stringCellValue[0]);
relaccount.setRelationshipRole(stringCellValue[1]);
relaccount.setRelatedAccountId(stringCellValue[2]);
relaccount.setStartDate(stringCellValue[6]);
relaccount.setEndDate(stringCellValue[7]);
relaccount.setReverseRelationshipRole(stringCellValue[3]);
relaccount.setComments(stringCellValue[4]);
relaccount.setRelationshipStatus(stringCellValue[5]);
relaccounts[0] = relaccount;
accrel.setAccountId(stringCellValue[0]); //JDE Account ID
accrel.setListOfRelatedAccount(relaccounts);
accrels[0] = accrel;
child.setListOfAccount(accrels);
try {
childs = stub.accountInsertChild(child);
logger.info(i + "th Record Inserted");
++flag;
} catch (Exception e) {
logger.info("Network Error: Re-Authenticating" + e);
urlAddr = reauthentication(properties);
stub = (Default_BindingStub)service.getDefault(urlAddr);
childs = stub.accountInsertChild(child);
logger.info(i + "th Record Inserted in 2nd Attempt");
++flag;
}
//logger.info("Total No. Of Records Processed"+flag);
}
logger.info("Total No. Of Records Processed"+flag);
}
private void readExcelFile(URL urlAddr, String fileName,
Properties properties) throws ServiceException,
RemoteException,
MalformedURLException {
System.out.println("Reading Excel File");
/**
* Create a new instance for cellDataList
*/
List cellDataList = new ArrayList();
try {
/**
* Create a new instance for FileInputStream class
*/
FileInputStream fileInputStream = new FileInputStream(fileName);
/**
* Create a new instance for POIFSFileSystem class
*/
POIFSFileSystem fsFileSystem =
new POIFSFileSystem(fileInputStream);
/*
* Create a new instance for HSSFWorkBook Class
*/
HSSFWorkbook workBook = new HSSFWorkbook(fsFileSystem);
HSSFSheet hssfSheet = workBook.getSheetAt(0);
/**
* Iterate the rows and cells of the spreadsheet
* to get all the datas.
*/
Iterator rowIterator = hssfSheet.rowIterator();
while (rowIterator.hasNext()) {
HSSFRow hssfRow = (HSSFRow)rowIterator.next();
Iterator iterator = hssfRow.cellIterator();
List cellTempList = new ArrayList();
while (iterator.hasNext()) {
HSSFCell hssfCell = (HSSFCell)iterator.next();
cellTempList.add(hssfCell);
}
cellDataList.add(cellTempList);
}
} catch (Exception e) {
e.printStackTrace();
}
/**
* Call the printToConsole method to print the cell data in the
* console.
*/
urlqueryWS1(urlAddr, cellDataList, properties);
}
private static String logon(String wsLocation, String userName,
String password, Properties properties) {
String sessionString = "FAIL";
int port =
Integer.parseInt(properties.getProperty("proxyport")); //Converting String Port Number to Integer.
try {
Proxy proxy =
new Proxy(Proxy.Type.HTTP, new InetSocketAddress(properties.getProperty("proxyhost"),
port));
// create an HTTPS connection to the On Demand webservices
URL wsURL = new URL(wsLocation + "?command=login");
HttpURLConnection wsConnection =
(HttpURLConnection)wsURL.openConnection(proxy);
// we don't want any caching to occur
wsConnection.setUseCaches(false);
// we want to send data to the server
// wsConnection.setDoOutput(true);
// set some http headers to indicate the username and passwod we are using to logon
wsConnection.setRequestProperty("UserName", userName);
wsConnection.setRequestProperty("Password", password);
wsConnection.setRequestMethod("GET");
// see if we got a successful response
if (wsConnection.getResponseCode() == HttpURLConnection.HTTP_OK) {
// get the session id from the cookie setting
sessionString = getCookieFromHeaders(wsConnection);
}
} catch (Exception e) {
System.out.println("Logon Exception generated :: " + e);
}
return sessionString;
}
private static String getCookieFromHeaders(HttpURLConnection wsConnection) {
// debug code - display all the returned headers
String headerName;
String headerValue = "FAIL";
for (int i = 0; ; i++) {
headerName = wsConnection.getHeaderFieldKey(i);
if (headerName != null && headerName.equals("Set-Cookie")) {
// found the Set-Cookie header (code assumes only one cookie is being set)
headerValue = wsConnection.getHeaderField(i);
break;
}
}
// return the header value (FAIL string for not found)
return headerValue;
}
private static String getSessionId(String cookie) {
StringTokenizer st = new StringTokenizer(cookie, ";");
String jsessionid = st.nextToken();
st = new StringTokenizer(jsessionid, "=");
st.nextToken();
return st.nextToken();
}
public static void main(String[] args) throws IOException,
ServiceException {
String jsessionid, jsessionid_full;
String endpoint;
AxisProperties.setProperty("https.proxyHost", "172.25.9.240");
AxisProperties.setProperty("https.proxyPort", "2006");
System.setProperty("https.proxyHost", "172.25.9.240");
System.setProperty("https.proxyPort", "2006");
// System.setProperty("https.proxyHost", "145.247.13.164");
// System.setProperty("https.proxyPort", "3128");
Properties properties = new Properties();
properties.load(new FileInputStream("C:\\Users\\10608011\\Documents\\Account_Config.properties")); //Windows Path
System.setProperty("javax.net.ssl.trustStore","C:\\Oracle\\Middleware3\\jdk160_24\\jre\\lib\\security\\cacerts");
System.out.println("Logging In");
jsessionid_full =
logon("https://secure-ausomxapa.crmondemand.com/Services/Integration",
"############", "###########", properties);
System.out.println("Establishing " + jsessionid_full);
jsessionid = getSessionId(jsessionid_full);
System.out.println("Jsessionid: " + jsessionid);
endpoint =
"https://secure-ausomxapa.crmondemand.com/Services/Integration" +
";jsessionid=" + jsessionid;
URL urlAddr = new java.net.URL(endpoint);
String fileName =
"D:" + File.separator + "Test2.xls"; // Windows Path
// String fileName =File.separator + "u01"+File.separator +"CRM_DEV"+File.separator +
// "sofwaredepot"+File.separator +"PALS_200_11Feb2013.xls"; //Linux Path /u01/CRM_DEV/softwaredepot
// String fileName="PALS_200_11Feb2013.xls";
final long start = System.currentTimeMillis();
logger.info("Start Time:" + start);
new accrel1().readExcelFile(urlAddr, fileName, properties);
final long durationInMilliseconds = System.currentTimeMillis() - start;
System.out.println("Time(Min) for Data Upload: " +
durationInMilliseconds / 60000 + "mins.");
logger.info("Duration for Data Upload: " +
durationInMilliseconds / 60000 + "mins.");
}
}