How to use ArrayList in Map Reduce? - java

I have written a Java code which I need to be written in Map Reduce.
I am relatively new to Map Reduce concepts.
As an input, I have a file containing multiple columns which are Pipe delimited.
Based on certain criteria which are given in my if condition of my code, I need to link the Row-Id's from the file.
for Example:
rowid|rollno|sbjctcode|subjct|dt_of_exam......|marks
2|1000|0101|PHY|02072015......|-060
9|1000|0101|PHY|02072015......|060
Desired Output:
2[9]
so that, I can get to know where is the error in the file, or for a case multiple entries etc.
Here's my Java Code:
import java.io.*;
import java.util.*;
public class MRCode {
public static void main(String[] args) {
BufferedReader in = null;
BufferedWriter out = null;
String in_line;
String PrevRollNo = "150";
ArrayList<Transaction> PCMList = new ArrayList<Transaction>();
ArrayList<Transaction> AdditionalList = new ArrayList<Transaction>();
ArrayList<Transaction> OtherList = new ArrayList<Transaction>();
try {
in = new BufferedReader(new FileReader(
"C:\\Users\\Desktop\\data.txt"));
File out_file = new File("C:\\Users\\Desktop\\op.txt");
if (!out_file.exists()) {
out_file.createNewFile();
}
FileWriter fw = new FileWriter(out_file);
out = new BufferedWriter(fw);
while ((in_line = in.readLine()) != null) {
Transaction transact = new Transaction(in_line);
if (transact.rollNo.equals(PrevRollNo)) {
if (transact.subjctCode.equals("PHY")
|| transact.subjctCode.equals("CHEM")
|| transact.subjctCode.equals("MATH")
&& transact.dt_of_exam == PrevDate) {
PCMList.add(transact);
break;
} else {
OtherList.add(transact);
}
if (transact.subjctCode.equals("0101")) {
Iterator<Transaction> pcm;
pcm = PCMList.iterator();
while (pcm.hasNext()) {
Transaction pcmtxn = pcm.next();
if (pcmtxn.subjctCode.equals("0102")
&& pcmtxn.dt_of_exam == transact.dt_of_exam
&& pcmtxn.subjct.equals(transact.subjct)
&& pcmtxn.marks == Math.abs(transact.marks)) {
pcmtxn.tgtfound = true;
transact.srcfound = true;
System.out.println(pcmtxn.row_id + "["
+ transact.row_id + "]");
}
}
}
}
}
} catch (Exception e) {
e.printStackTrace();
}
}
static class Transaction {
public String rollNo, subjctCode, subjct, l1, l2;
public int row_id, dt_of_exam, dt_of_chking;
public double marks;
public boolean srcfound, tgtfound;
public Transaction(String in_line) {
String[] SplitData = in_line.split("\\|");
row_id = Integer.parseInt(SplitData[0]);
rollNo = SplitData[1];
subjctCode = SplitData[4];
subjct = SplitData[5];
l1 = SplitData[7];
l2 = SplitData[8];
dt_of_exam = Integer.parseInt(SplitData[2]);
dt_of_chking = Integer.parseInt(SplitData[3]);
marks = Double.parseDouble(SplitData[11]);
srcfound = false;
tgtfound = false;
}
}
}
Please help me with your thoughts on how can use Array List in my mapper class.
or how can I write this code better in Map Reduce.
Note: This is just a snapshot.
Do share your views.

Related

Java, Refactoring case

I was given exercise that I need to refactor several java projects.
Only those 2 left which I truly don't have an idea how to refactor.
csv.writer
public class CsvWriter {
public CsvWriter() {
}
public void write(String[][] lines) {
for (int i = 0; i < lines.length; i++)
writeLine(lines[i]);
}
private void writeLine(String[] fields) {
if (fields.length == 0)
System.out.println();
else {
writeField(fields[0]);
for (int i = 1; i < fields.length; i++) {
System.out.print(",");
writeField(fields[i]);
}
System.out.println();
}
}
private void writeField(String field) {
if (field.indexOf(',') != -1 || field.indexOf('\"') != -1)
writeQuoted(field);
else
System.out.print(field);
}
private void writeQuoted(String field) {
System.out.print('\"');
for (int i = 0; i < field.length(); i++) {
char c = field.charAt(i);
if (c == '\"')
System.out.print("\"\"");
else
System.out.print(c);
}
System.out.print('\"');
}
}
csv.writertest
public class CsvWriterTest {
#Test
public void testWriter() {
CsvWriter writer = new CsvWriter();
String[][] lines = new String[][] {
new String[] {},
new String[] { "only one field" },
new String[] { "two", "fields" },
new String[] { "", "contents", "several words included" },
new String[] { ",", "embedded , commas, included",
"trailing comma," },
new String[] { "\"", "embedded \" quotes",
"multiple \"\"\" quotes\"\"" },
new String[] { "mixed commas, and \"quotes\"", "simple field" } };
// Expected:
// -- (empty line)
// only one field
// two,fields
// ,contents,several words included
// ",","embedded , commas, included","trailing comma,"
// """","embedded "" quotes","multiple """""" quotes"""""
// "mixed commas, and ""quotes""",simple field
writer.write(lines);
}
}
test
public class Configuration {
public int interval;
public int duration;
public int departure;
public void load(Properties props) throws ConfigurationException {
String valueString;
int value;
valueString = props.getProperty("interval");
if (valueString == null) {
throw new ConfigurationException("monitor interval");
}
value = Integer.parseInt(valueString);
if (value <= 0) {
throw new ConfigurationException("monitor interval > 0");
}
interval = value;
valueString = props.getProperty("duration");
if (valueString == null) {
throw new ConfigurationException("duration");
}
value = Integer.parseInt(valueString);
if (value <= 0) {
throw new ConfigurationException("duration > 0");
}
if ((value % interval) != 0) {
throw new ConfigurationException("duration % interval");
}
duration = value;
valueString = props.getProperty("departure");
if (valueString == null) {
throw new ConfigurationException("departure offset");
}
value = Integer.parseInt(valueString);
if (value <= 0) {
throw new ConfigurationException("departure > 0");
}
if ((value % interval) != 0) {
throw new ConfigurationException("departure % interval");
}
departure = value;
}
}
public class ConfigurationException extends Exception {
private static final long serialVersionUID = 1L;
public ConfigurationException() {
super();
}
public ConfigurationException(String arg0) {
super(arg0);
}
public ConfigurationException(String arg0, Throwable arg1) {
super(arg0, arg1);
}
public ConfigurationException(Throwable arg0) {
super(arg0);
}
}
configuration.test
public class ConfigurationTest{
#Test
public void testGoodInput() throws IOException {
String data = "interval = 10\nduration = 100\ndeparture = 200\n";
Properties input = loadInput(data);
Configuration props = new Configuration();
try {
props.load(input);
} catch (ConfigurationException e) {
assertTrue(false);
return;
}
assertEquals(props.interval, 10);
assertEquals(props.duration, 100);
assertEquals(props.departure, 200);
}
#Test
public void testNegativeValues() throws IOException {
processBadInput("interval = -10\nduration = 100\ndeparture = 200\n");
processBadInput("interval = 10\nduration = -100\ndeparture = 200\n");
processBadInput("interval = 10\nduration = 100\ndeparture = -200\n");
}
#Test
public void testInvalidDuration() throws IOException {
processBadInput("interval = 10\nduration = 99\ndeparture = 200\n");
}
#Test
public void testInvalidDeparture() throws IOException {
processBadInput("interval = 10\nduration = 100\ndeparture = 199\n");
}
#Test
private void processBadInput(String data) throws IOException {
Properties input = loadInput(data);
boolean failed = false;
Configuration props = new Configuration();
try {
props.load(input);
} catch (ConfigurationException e) {
failed = true;
}
assertTrue(failed);
}
#Test
private Properties loadInput(String data) throws IOException {
InputStream is = new StringBufferInputStream(data);
Properties input = new Properties();
input.load(is);
is.close();
return input;
}
}
Ok, here some advice regarding the code.
CsvWriter
The bad thing is that you print everything to System.out. It will be hard to test without mocks. Instead I suggest you to add field PrintStream which defines where all data will go.
import java.io.PrintStream;
public class CsvWriter {
private final PrintStream printStream;
public CsvWriter() {
this.printStream = System.out;
}
public CsvWriter(PrintStream printStream) {
this.printStream = printStream;
}
...
You then write everything to this stream. This refactoring easy since you use replace function(Ctrl+R in IDEA). Here is the example how you do it.
private void writeField(String field) {
if (field.indexOf(',') != -1 || field.indexOf('\"') != -1)
writeQuoted(field);
else
printStream.print(field);
}
Others stuff seems ok in this class.
CsvWriterTest
First thing first you don't check all logic in a single method. Make small methods with different kind of tests. It's ok to keep your current test though. Sometimes it's useful to check most of the logic in a complex scenario.
Also pay attention to the names of the methods. Check this
Obviously you test doesn't check the results. That's why we need this functionality with PrintStream. We can build a PrintStream on top of the instance of ByteArrayOutputStream. We then construct a string and check if the content is valid. Here is how you can easily check what was written
public class CsvWriterTest {
private ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream();
private PrintStream printStream = new PrintStream(byteArrayOutputStream);
#Test
public void testWriter() {
CsvWriter writer = new CsvWriter(printStream);
... old logic here ...
writer.write(lines);
String result = new String(byteArrayOutputStream.toByteArray());
Assert.assertTrue(result.contains("two,fields"));
Configuration
Make fields private
Make messages more concise
ConfigurationException
Seems good about serialVersionUID. This thing is needed for serialization/deserialization.
ConfigurationTest
Do not use assertTrue(false/failed); Use Assert.fail(String) with some message which is understandable.
Tip: if you don't have much experience and need to refactor code like this, you may want to read some chapters of Effective Java 2nd edition by Joshua Bloch. The book is not so big so you can read it in a week and it has some rules how to write clean and understandable code.

Jumping pair lines using .readLine() method at a while loop

I am new at Java and I am having a little trouble:
I am trying to read chemical samples to represent them at a X-Y graph.
The input file looks like this:
La 0.85678
Ce 0.473
Pr 62.839
...
...
My code stocks only the unpair lines value (0.85678, jumps line, 62.839 at the example), and I cannot realize what is the problem:
public class Procces {
public void readREE() throws IOException {
try{
rEE = new BufferedReader (new FileReader ("src/files/test.txt"));
while ( (currentLine = rEE.readLine() ) != null) {
try {
for (int size = 3;size<10;size++) {
String valueDec=(currentLine.substring(3,size));
//char letra =(char)c;
if ((c=rEE.read()) != -1) {
System.out.println("Max size");
} else
valueD = Double.parseDouble(valueDec);
System.out.println(valueDec);
}
}
catch (Exception excUncertainDecimals) {
}
}
}finally {
try { rEE.close();
} catch (Exception exc) {
}
}
}
String line;
int c = 0;
int counter = 0;
String valueS = null;
String valueSimb = null;
Double valueD = null;
Double logValue = null;
Double YFin=450.0;
String currentLine;
BufferedReader rEE;
}
Thank you in advance, as I can't see why the program jumps the pair lines.
use Java Scanner class.
import java.io.*;
import java.util.Scanner;
public class MyClass {
public static void main(String[] args) throws IOException {
try (Scanner s = new Scanner(new BufferedReader(new FileReader("file.txt"))){
while (s.hasNext()) {
System.out.println(s.next());
}
}
}
}
Please have a look at Scanner.
In general is Java a well established language and in most cases you do not have to re-implemented "common" (e.g. reading custom text files) stuff on a low level way.
I get it. Thank you.
Here the code:
import java.io.*
import java.util.Scanner;
public class Process implements Samples{
public void readREE() throws IOException {
try
(Scanner rEE = new Scanner(new BufferedReader(new FileReader("src/files/test.txt")))){
while (rEE.hasNext()) {
element = rEE.next();
if (element.equals("La")) {
String elementValue = rEE.next();
Double value = Double.parseDouble(elementValue);
Double valueChond = 0.237;
Double valueNorm= value/valueChond;
Double logValue = (Math.log(valueNorm)/Math.log(10));
Double yLog = yOrd - logValue*133.33333333;
Sample NormedSampleLa=new Sample("La",yLog);
sampleREE.add(NormedSampleLa);
}
}
} finally {
}
}
public String LaS, CeS, PrS, NdS, PmS, SmS, EuS, GdS, TbS, DyS, HoS, ErS, TmS, YbS, LuS;
public String element, elementValue;
public Double yOrd=450.0;
}

ArrayOutofBoundsException - Attempting to read to/from file into Hash Map

I'm working on a homework assignment and have run into an odd "ArrayOutOfBoundsException" error - I know what the error means (essentially I'm trying to reference a location in an array that isn't there) but I'm not sure why it's throwing that error? I'm not sure what I'm missing, but obviously there must be some logic error somewhere that I'm not seeing.
PhoneDirectory.java
import java.util.HashMap;
import java.io.*;
class PhoneDirectory {
private HashMap<String, String> directoryMap;
File directory;
public PhoneDirectory() { //create file for phone-directory
directory = new File("phone-directory.txt");
directoryMap = new HashMap<String, String>();
try(BufferedReader buffer = new BufferedReader(new FileReader(directory))) {
String currentLine;
while((currentLine = buffer.readLine()) != null) { //set currentLine = buffer.readLine() and check if not null
String[] fileData = currentLine.split(","); //create array of values in text file - split by comma
directoryMap.put(fileData[0], fileData[1]); //add item to directoryMap
}
}
catch(IOException err) {
err.printStackTrace();
}
}
public PhoneDirectory(String phoneDirectoryFile) {
directory = new File(phoneDirectoryFile);
directoryMap = new HashMap<String, String>();
try(BufferedReader buffer = new BufferedReader(new FileReader(directory))) {
String currentLine;
while((currentLine = buffer.readLine()) != null) { //set currentLine = buffer.readLine() and check if not null
String[] fileData = currentLine.split(","); //create array of values in text file - split by comma
directoryMap.put(fileData[0], fileData[1]); //add item to directoryMap
}
}
catch(IOException err) {
err.printStackTrace();
}
}
public String Lookup(String personName) {
if(directoryMap.containsKey(personName))
return directoryMap.get(personName);
else
return "This person is not in the directory.";
}
public void AddOrChangeEntry(String name, String phoneNumber) {
//ASK IF "IF-ELSE" CHECK IS NECESSARY
if(directoryMap.containsKey(name))
directoryMap.put(name,phoneNumber); //if name is a key, update listing
else
directoryMap.put(name, phoneNumber); //otherwise - create new entry with name
}
public void DeleteEntry(String name) {
if(directoryMap.containsKey(name))
directoryMap.remove(name);
else
System.out.println("The person you are looking for is not in this directory.");
}
public void Write() {
try(BufferedWriter writeDestination = new BufferedWriter(new FileWriter(directory)))
{
for(String key : directoryMap.keySet())
{
writeDestination.write(key + ", " + directoryMap.get(key) + '\n');
writeDestination.newLine();
}
}
catch(IOException err) {
err.printStackTrace();
}
}
}
Driver.java
public class Driver {
PhoneDirectory list1;
public static void main(String[] args) {
PhoneDirectory list1 = new PhoneDirectory("test.txt");
list1.AddOrChangeEntry("Disney World","123-456-7890");
list1.Write();
}
}
Essentially I'm creating a file called "test.txt" and adding the line "Disney World, 123-456-7890" - what's weird is that the code still works - but it throws me that error anyway, so what's really happening? (For the record, I'm referring to the line(s): directoryMap.put(fileData[0], fileData[1]) - which would be line 14 and 28 respectively.)

Modify class to write exceptions to text file

I really just need a point in the right direction for this code. I do not understand how to accomplish what it is asking.
Modify the ProductMainApp class so it responds appropriately if the addProduct and deleteProduct mwthod in the ProductTextFile class returns a flase value.
Modify the ProductTextFile class so it writes exceptions to a tex file names errorLog.txt instead of printing them to the console. To do that, add a method named printToLogFile that accepts an IOException as an argument. This method should append two records to the log file: one that indicates the date and time the exception occured and one that contains information about the exception.
Modify the getProducts and saveProducts methods so they call the printToLogFile method when an error occurs.
Here is the PrintTextFile:
import java.util.*;
import java.io.*;
import java.nio.file.*;
public final class ProductTextFile implements ProductDAO
{
private ArrayList<Product> products = null;
private Path productsPath = null;
private File productsFile = null;
private final String FIELD_SEP = "\t";
public ProductTextFile()
{
productsPath = Paths.get("products.txt");
productsFile = productsPath.toFile();
products = this.getProducts();
}
public ArrayList<Product> getProducts()
{
// if the products file has already been read, don't read it again
if (products != null)
return products;
products = new ArrayList<>();
if (Files.exists(productsPath)) // prevent the FileNotFoundException
{
try
{
if (true)
{
// throw new IOException();
}
// open the input stream
BufferedReader in =
new BufferedReader(
new FileReader(productsFile));
// read all products stored in the file
// into the array list
String line = in.readLine();
while(line != null)
{
String[] columns = line.split(FIELD_SEP);
String code = columns[0];
String description = columns[1];
String price = columns[2];
Product p = new Product(
code, description, Double.parseDouble(price));
products.add(p);
line = in.readLine();
}
// close the input stream
in.close();
}
catch(IOException e)
{
//System.out.println(e);
return null;
}
}
return products;
}
public Product getProduct(String code)
{
for (Product p : products)
{
if (p.getCode().equals(code))
return p;
}
return null;
}
public boolean addProduct(Product p)
{
products.add(p);
return this.saveProducts();
}
public boolean deleteProduct(Product p)
{
products.remove(p);
return this.saveProducts();
}
public boolean updateProduct(Product newProduct)
{
// get the old product and remove it
Product oldProduct = this.getProduct(newProduct.getCode());
int i = products.indexOf(oldProduct);
products.remove(i);
// add the updated product
products.add(i, newProduct);
return this.saveProducts();
}
private boolean saveProducts()
{
try
{
// open the output stream
PrintWriter out = new PrintWriter(
new BufferedWriter(
new FileWriter(productsFile)));
// write all products in the array list
// to the file
for (Product p : products)
{
out.print(p.getCode() + FIELD_SEP);
out.print(p.getDescription() + FIELD_SEP);
out.println(p.getPrice());
}
// close the output stream
out.close();
}
catch(IOException e)
{
System.out.println(e);
return false;
}
return true;
}
}
Here is the ProductMainApp:
import java.util.Scanner;
import java.util.ArrayList;
public class ProductMaintApp implements ProductConstants
{
// declare two class variables
private static ProductDAO productDAO = null;
private static Scanner sc = null;
public static void main(String args[])
{
// display a welcome message
System.out.println("Welcome to the Product Maintenance application\n");
// set the class variables
productDAO = DAOFactory.getProductDAO();
sc = new Scanner(System.in);
// display the command menu
displayMenu();
// perform 1 or more actions
String action = "";
while (!action.equalsIgnoreCase("exit"))
{
// get the input from the user
action = Validator.getString(sc,
"Enter a command: ");
System.out.println();
if (action.equalsIgnoreCase("list"))
displayAllProducts();
else if (action.equalsIgnoreCase("add"))
{
addProduct();
}
else if (action.equalsIgnoreCase("del") || action.equalsIgnoreCase("delete"))
deleteProduct();
else if (action.equalsIgnoreCase("help") || action.equalsIgnoreCase("menu"))
displayMenu();
else if (action.equalsIgnoreCase("exit") || action.equalsIgnoreCase("quit"))
System.out.println("Bye.\n");
else
System.out.println("Error! Not a valid command.\n");
}
}
public static void displayMenu()
{
System.out.println("COMMAND MENU");
System.out.println("list - List all products");
System.out.println("add - Add a product");
System.out.println("del - Delete a product");
System.out.println("help - Show this menu");
System.out.println("exit - Exit this application\n");
}
public static void displayAllProducts()
{
System.out.println("PRODUCT LIST");
ArrayList<Product> products = productDAO.getProducts();
Product p = null;
StringBuilder sb = new StringBuilder();
if (productDAO.getProducts().equals(null))
{
System.out.println("Value Null");
System.exit(0);
}
for (int i = 0; i < products.size(); i++)
{
p = products.get(i);
sb.append(StringUtils.padWithSpaces(
p.getCode(), CODE_SIZE + 4));
sb.append(StringUtils.padWithSpaces(
p.getDescription(), DESCRIPTION_SIZE + 4));
sb.append(
p.getFormattedPrice());
sb.append("\n");
}
System.out.println(sb.toString());
}
public static void addProduct()
{
String code = Validator.getString(
sc, "Enter product code: ");
String description = Validator.getLine(
sc, "Enter product description: ");
double price = Validator.getDouble(
sc, "Enter price: ");
Product product = new Product();
product.setCode(code);
product.setDescription(description);
product.setPrice(price);
productDAO.addProduct(product);
System.out.println();
System.out.println(description
+ " has been added.\n");
}
public static void deleteProduct()
{
String code = Validator.getString(sc,
"Enter product code to delete: ");
Product p = productDAO.getProduct(code);
System.out.println();
if (p != null)
{
productDAO.deleteProduct(p);
System.out.println(p.getDescription()
+ " has been deleted.\n");
}
else
{
System.out.println("No product matches that code.\n");
}
}
}
You can use Exception.printStackTrace (stream) where stream is a outputstream to a file.
http://docs.oracle.com/javase/7/docs/api/java/lang/Throwable.html#printStackTrace(java.io.PrintStream)

Error while running customized java class

I have created a sequence file out of directory and then given index according to groups I want so that I can create groups using that index. This groups are then given one by one to my customized java class which gives information based on the file present in the group.
My problem is that some time it runs perfectly but some time gives different errors like null pointer exception, data type of field not found.
The problem is may be due to size of group. Because I am creating folder based group and then do the fetches the information from that folder inside my customized jar.
So how can I resolve this issue?
Below is my java class code:
public class OperateDirectory extends EvalFunc<DataBag>{
public TupleFactory tupleFactory = TupleFactory.getInstance();
public BagFactory bagFactory = BagFactory.getInstance();
public DataBag exec(Tuple input) throws IOException{
ArrayList<String> protoTuple = new ArrayList<>();
DataBag dataBag = bagFactory.newDefaultBag();
/* Create Directory */
if(input == null)
return dataBag;
if(input.size() != 2)
return dataBag;
long id = (long)input.get(0);
DataBag infoBag = (DataBag)input.get(1);
Iterator<Tuple> it = infoBag.iterator();
File dir = new File("/tmp/TestFolder"+id);
if(dir.exists())
{
FileUtils.cleanDirectory(dir);
}
else
{
dir.mkdir();
}
while(it.hasNext())
{
Tuple file_details = (Tuple)it.next();
if(file_details != null && file_details.size()==3)
{
String file_name = (String)file_details.get(1);
BytesWritable file_contents = (BytesWritable)file_details.get(2);
File f = new File(dir.getPath()+"/"+file_name);
f.deleteOnExit();
writeToFile(file_contents, f);
}
}
/* Perform operation here */
File f = new File("output"+id+".log");
ProcessBuilder performProcess1 = new ProcessBuilder("processes/processor", dir.getPath(),f.getPath());
Process process1 = performProcess1.start();
try
{
process1.waitFor();
if(f.exists() && f.length()>0)
{
ProcessBuilder performProcess2 = new ProcessBuilder("perl", "scripts/ParseFile.pl", f.getPath());
Process process2 = performProcess2.start();
InputStream is = process2.getInputStream();
InputStreamReader isr = new InputStreamReader(is);
BufferedReader br = new BufferedReader(isr);
String line;
while ((line = br.readLine()) != null)
{
if(!line.isEmpty())
{
String [] tmpArray = line.split(",");
if(tmpArray.length == 2)
{
protoTuple.clear();
protoTuple.add(tmpArray[0]);
protoTuple.add(tmpArray[1]);
dataBag.add(tupleFactory.newTuple(protoTuple));
}
}
}
}
else
{
protoTuple.clear();
protoTuple.add("Error");
protoTuple.add("File "+f.getPath()+" does not exists ");
dataBag.add(tupleFactory.newTuple(protoTuple));
}
}
catch(Exception e)
{
protoTuple.clear();
protoTuple.add("Error ");
protoTuple.add(e.getMessage());
dataBag.add(tupleFactory.newTuple(protoTuple));
}
try
{
FileUtils.cleanDirectory(dir);
FileUtils.deleteDirectory(dir);
}
catch(Exception e)
{
}
return dataBag;
}
void writeToFile(BytesWritable value, File binaryFile) throws IOException{
FileOutputStream fileOut = new FileOutputStream(binaryFile);
fileOut.write(value.getBytes(), 0, value.getLength());
fileOut.close();
}
}

Categories