I have created a web scraper which scrapes data from a website and store it in CSV file. But, the problem is there is a column on the website which have currency formatted values which have amounts like 7,100 or 85,210. When my code executed and scrapes the data, these values separated into two columns like 7 in one column and 100 in other column. Kindly, check the attached screenshots.
Code is as follows.
public class ComMarket_summary {
boolean writeCSVToConsole = true;
boolean writeCSVToFile = true;
boolean sortTheList = true;
boolean writeToConsole;
boolean writeToFile;
public static Document doc = null;
public static Elements tbodyElements = null;
public static Elements elements = null;
public static Elements tdElements = null;
public static Elements trElement2 = null;
public static String Dcomma = ",";
public static String line = "";
public static ArrayList<Elements> sampleList = new ArrayList<Elements>();
public static void createConnection() throws IOException {
System.setProperty("http.proxyHost", "191.1.1.202");
System.setProperty("http.proxyPort", "8080");
String tempUrl = "http://www.psx.com.pk/phps/mktSummary.php";
doc = Jsoup.parse(new URL(tempUrl), 1000);
System.out.println("Successfully Connected");
}
public static void parsingHTML() throws Exception {
for (Element table : doc.select("table.marketData")) {
Elements tables = doc.select("table.marketData");
table = tables.get(2);
File fold = new File("C:\\market_smry.csv");
fold.delete();
File fnew = new File("C:\\market_smry.csv");
for (Element trElement : table.getElementsByTag("tr")) {
trElement2 = trElement.getElementsByTag("tr");
tdElements = trElement.getElementsByTag("td");
FileWriter sb = new FileWriter(fnew, true);
//if (table.hasClass("marketData")) { //&&(tdElements.hasClass("tableHead")&&tdElements.hasClass("tableSubHead"))
for (Iterator<Element> it = tdElements.iterator(); it.hasNext();) {
if (it.hasNext()) {
sb.append(" , ");
sb.append(" \r\n ");
}
for (Iterator<Element> it2 = tdElements.iterator(); it.hasNext();) {
Element tdElement2 = it.next();
final String content = tdElement2.text();
if (it2.hasNext()) {
sb.append(formatData(content));
sb.append(" , ");
}
}
System.out.println(sb.toString());
sb.flush();
sb.close();
}
System.out.println(sampleList.add(tdElements));
}
}
}
private static final SimpleDateFormat FORMATTER_MMM_d_yyyy = new SimpleDateFormat("MMM d, yyyy", Locale.US);
private static final SimpleDateFormat FORMATTER_dd_MMM_yyyy = new SimpleDateFormat("d-MMM-yy", Locale.US);
public static String formatData(String text) {
String tmp = null;
try {
Date d = FORMATTER_MMM_d_yyyy.parse(text);
tmp = FORMATTER_dd_MMM_yyyy.format(d);
} catch (ParseException pe) {
tmp = text;
}
return tmp;
}
public static void main(String[] args) throws IOException, Exception {
createConnection();
parsingHTML();
}
Note: I am using windows 8, java version 1.8, jsoup 1.8
Before saving the value get rid of the comma by using String.replace
value = value.replace (",", "");
String.replace will get rid of your comma. While there are several other similar functions (replaceAll, replaceFirst), replace will be slightly faster and is typically the best choice for a single character.
See: https://docs.oracle.com/javase/6/docs/api/java/lang/String.html
And also: Difference between String replace() and replaceAll()
Related
The program is giving correct output when System.out.println(mem[i].memName); but the array of class "member" is giving output (alex alex alex alex) i.e of the last line of the file (mentioned at the END of the CODE)
it should give all the names listed in the file.
import java.io.*;
import java.util.*;
class member//CLASS TO BE USED AS ARRAY TO STORE THE PRE REGISTERED MEMBERS FED INSIDE
{ //THE FILE "member.txt"
static int memId;
static String memName, memEmail, memPh, date;
}
public class entry// file name
{
static DateTimeFormatter dtf = DateTimeFormatter.ofPattern("yyyy/MM/dd");
static LocalDateTime now = LocalDateTime.now();
static Scanner input = new Scanner(System.in);
static member[] mem = new member[100];
static trainer[] tr = new trainer[100];
static FileWriter fwriter = null;
static BufferedWriter bwriter = null;
static FileReader fread = null;
static BufferedReader bread = null;
static void memberEntry() {
int i = 0;
System.out.println("NEW MEMBER IS BEING ADDED");
try {
fwriter = new FileWriter("C:\\Users\\itisa\\OneDrive\\Desktop\\GYM
PROJECT\\member.txt
",true);
bwriter = new BufferedWriter(fwriter);
System.out.println("Member ID is being automatically genrated");
int autoID = 101 + getMemberRecords();//Fetching the number of members so that ID can
//Be genrated automatically
i = autoID % 100;//jumping toward the next loaction location
System.out.println("Member ID of new member is" + autoID);
mem[i].memId = autoID;
System.out.print("Enter the name of the member:");
mem[i].memName = input.next();
System.out.print("Enter the email address of memeber:");
mem[i].memEmail = input.next();
System.out.print("Enter the contact number of memeber:");
mem[i].memPh = input.next();
System.out.println("Date has been feeded automatically:" + dtf.format(now));
mem[i].date = dtf.format(now);
bwriter.write(String.valueOf(mem[i].memId));
bwriter.write("|");
bwriter.write(mem[i].memName);
bwriter.write("|");
bwriter.write(mem[i].memEmail);
bwriter.write("|");
bwriter.write(mem[i].memPh);
bwriter.write("|");
bwriter.write(mem[i].date);
bwriter.write("\n");
System.out.println("MEMBER CREATED SUCCESSFULLY");
System.out.println("---------------------------------------");
bwriter.close();
} catch (IOException e) {
System.out.print(e);
}
}
static int getMemberRecords() {
int count = 0, i = 0;
try {
fread = new FileReader("C:\\Users\\itisa\\OneDrive\\Desktop\\GYM PROJECT\\member.txt");
bread = new BufferedReader(fread);
String lineRead = null;
while ((lineRead = bread.readLine()) != null)//Just to get to know the number of member
//alreadyinside the file
{
String[] t = lineRead.split("\\|");
mem[i].memId = Integer.parseInt(t[0]);
mem[i].memName = t[1];
mem[i].memEmail = t[2];
mem[i].memPh = t[3];
mem[i].date = t[4];
i++;
count++;
}
System.out.println(mem[0].memName);//should print the 1st name present in the name
//i.e, RAVI
} catch (IOException e) {
System.out.println(e);
}
return count;
}
public static void main(String[] args) throws IOException {
System.out.println("Total number of accounts:" + getMemberRecords());
}
}
Elements stored in the file:
101|RAVI|itisadi23#gmai.com|9102019656|2020/04/30
102|aditya|adi#gmail.com|9386977983|2020/04/30
103|anurag|anu#ymail.com|10000000000|2020/04/30
104|alex|alex123#mail.com|2829578303|2020/04/30
Expected output:
RAVI
Total number of accounts = 4
My output:
alex
Total number of accounts=4
In Short it is giving last name as output no matter what index number is given to fetch the data.
Your member class members must not be static, if they are, they're shared by every member class object.
The other problem is that your member array nodes are not being initialized, you'll need:
Live demo
class member
{
int memId; //non-static
String memName, memEmail, memPh, date; //non-static
}
and
//...
while ((lineRead = bread.readLine()) != null) {
String[] t = lineRead.split("\\|");
mem[i] = new member(); // initialize every node
mem[i].memId = Integer.parseInt(t[0]);
mem[i].memName = t[1];
mem[i].memEmail = t[2];
mem[i].memPh = t[3];
mem[i].date = t[4];
i++;
count++;
}
System.out.println(mem[0].memName);
//...
Output:
RAVI
Note that you can use ArrayList or other java collection.
I'm creating a program that basically has information about elements (From the periodic table) such as the atomic mass, number and symbol. I want to store all the information about them in a text file, but am unsure of how to code it to call upon the information separately (And not just read the text file line by line).
This is the code I'm using for each element so far:
private Element(int atomicNumber, double atomicMass, String atomicSymbol, String atomicName) {
this.atomicNumber = atomicNumber;
this.atomicMass = atomicMass;
this.atomicSymbol = atomicSymbol;
this.atomicName = atomicName;
}
In the same file, I was creating an element like this:
Element H = new Element(1, 1.008, "H", "Hydrogen");
How could I, instead of having all 118 elements in the same file as the code, put them in a text file and read them from there?
I'm using methods such as getAtomicNumber() and getAtomicMass(), etc to call upon the information.
Thanks.
You have multiple choices, since you can add any structure in a text file. My recomendation would be XML or CSV format.
A CSV is more easy to manage/edit, but the XML is more structured and read friendly. I'm go with a simple CSV.
Put in your file a line for each Element (that will be readed into your Element class), and each Element attribute (number, mass, symbol and name) separated by comma in a specific order, something like this:
1,1.008,H,Hydrogen
2,5.021,C,Carbon
etc..
Then, your code should read the file, line by line, create instances of your element class and maybe save it in a list. Something like this:
Your Element Class
public class Element {
private int atomicNumber;
private double atomicMass;
private String atomicSymbol;
private String atomicName;
public Element(int atomicNumber, double atomicMass, String atomicSymbol, String atomicName) {
this.atomicNumber = atomicNumber;
this.atomicMass = atomicMass;
this.atomicSymbol = atomicSymbol;
this.atomicName = atomicName;
}
public double getAtomicMass() {
return atomicMass;
}
public void setAtomicMass(double atomicMass) {
this.atomicMass = atomicMass;
}
public String getAtomicSymbol() {
return atomicSymbol;
}
public void setAtomicSymbol(String atomicSymbol) {
this.atomicSymbol = atomicSymbol;
}
public String getAtomicName() {
return atomicName;
}
public void setAtomicName(String atomicName) {
this.atomicName = atomicName;
}
public int getAtomicNumber() {
return atomicNumber;
}
public void setAtomicNumber(int atomicNumber) {
this.atomicNumber = atomicNumber;
}
#Override
public String toString() {
return "atomicMass: " + atomicMass
+ " | atomicSymbol: " + atomicSymbol
+ " | atomicName: " + atomicName
+ " | atomicNumber: " + atomicNumber;
}
}
Elements file reader
public class ElementsFileReader {
public static List<Element> read(String filePath) throws IOException {
FileReader fileReader = new FileReader(new File(filePath));
BufferedReader br = new BufferedReader(fileReader);
List<Element> elements = new ArrayList<>();
String line = null;
while ((line = br.readLine()) != null) {
String[] lineParts = line.split(",");
int atomicNumber = Integer.parseInt(lineParts[0]);
double atomicMass = Double.parseDouble(lineParts[1]);
String atomicSymbol = lineParts[2];
String atomicName = lineParts[3];
elements.add(new Element(atomicNumber, atomicMass, atomicSymbol, atomicName));
}
return elements;
}
}
Your main application using ElementsFileReader and Element
public static void main(String[] args){
String filePath = "the path to your text file";
List<Element> elements;
try {
elements = ElementsFileReader.read(filePath);
} catch(IOException e){
System.err.println("Something gone wrong reading elements file...");
e.printStackTrace();
return;
}
for(Element element : elements){
System.out.println(element);
// do your stuff
}
}
Careful with the parseInt and parseDouble method, I'm assuming the right format in your text file.
Hope this can help you.
Write all the elements to a file in the same format (1,1.008,H,Hydrogen), read line by line and create a Pojo object for each line and write it to Map.
public Map readFile(String filePath) {
Map<Integer, Element> elementsMap = new HashMap<>();
Scanner s;
try {
s = new Scanner(new File(filePath));
while (s.hasNextLine()){
String[] properties = s.nextLine().split(",");
Element element = new Element();
element.setAtomicNumber(Integer.parseInt(properties[0]));
element.setAtomicMass(Double.parseDouble(properties[1]));
element.setAtomicSymbol(properties[2]);
element.setAtomicName(properties[3]);
elementsMap.put(element.getAtomicNumber(), element);
}
s.close();
} catch (FileNotFoundException e) {
e.printStackTrace();
}
return elementsMap;
}
You can have the Map key as any other unique element property according to your convenience.
I have written a program to extract all integer value in the file and find the duplicate integer. But I want only those Integer value which is like ID="****.." / id="****..". I don't want to consider "dependsOnPresenceOf" value whatever it is.
My File is : for example
<line id="24867948" dependsOnPresenceOf="7417840">
<element text="Card Balance " id="18829409" dependsOnPresenceOf="28696224" />
<line id="2597826922" dependsOnPresenceOf="200114712343">
<methodElement fixedWidth="17" precededBySpace="false" id="418710522">
<line id="24867948" dependsOnPresenceOf="10565536">
<element text=" Cert. Number:" id="23917950" dependsOnPresenceOf="10565536" />
<line id="24867948" dependsOnPresenceOf="10565536">
<element text=" Cert. Number:" id="23917950" dependsOnPresenceOf="10565536" />
My Program is below which i have written to extract Integer value only :
public class DuplicateIDPicker {
protected static final Logger logger = Logger
.getLogger(com.aspire.pos.DuplicateIDPicker.class);
public static String finalContent = "";
public static void main(String[] args) throws FileNotFoundException {
String content = "";
/* Set location of the file as format below */
String path = "D://TASK/DuplicateFinder/OriginalFile/";
/* Set file name to be evaluate with extension */
String fileName = "SSLItems.bpt";
File f = new File(path.concat(fileName));
try {
content = readFile(f);
String extractedInteger = content.replaceAll("\\D+", " ");
String[] arrayOfID = findAllIDInArray(extractedInteger);
System.out.println("***********************");
HashSet<String> set = new HashSet<String>();
HashSet<String> newSet = new HashSet<String>();
System.out.println("Duplicate ID's found :");
for (String arrayElement : arrayOfID) {
if (!set.add(arrayElement)) {
// System.out.println("Duplicate Element is : "+arrayElement);
newSet.add(arrayElement);
}
}
System.out.println("-----------------------");
/* here are all Duplicate Id */
System.out.println(newSet);
} catch (IOException e) {
logger.error(e.getMessage());
}
}
#SuppressWarnings("resource")
public static String readFile(File f) throws IOException {
String data = "";
FileReader fr = new FileReader(f);
BufferedReader br = new BufferedReader(fr);
while ((data = br.readLine()) != null) {
// Print the content on the console
finalContent = finalContent + data;
}
return finalContent;
}
public static String[] findAllIDInArray(String str) {
String[] value = str.split(" ");
return value;
}
}
you can do content.replaceAll("dependsOnPresenceOf=\"\\d+\"", ""); to remove these unwanted strings
Here is a working solution that makes use of:
1) Java 7 read entire file in one line
2) Matcher ability to sequentially find occurences that match the expression
3) regex capturing group to get the desired numeric value
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Paths;
import java.util.HashSet;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class DuplicateIDPicker
{
public static void main(String[] args)
{
/* Set location of the file as format below */
String path = "C://Temp/";
/* Set file name to be evaluate with extension */
String fileName = "in.txt";
Set<String> all = new HashSet<>();
Set<String> duplicates = new HashSet<>();
String regex = "(id|ID)\\=\"" // attribute name + quoted equal and quotation
+ "(\\d+)" // id value marked as (capturing group)
+ "\""; // closing quotation
try {
String content = readFile(path + fileName);
Matcher m = Pattern.compile(regex).matcher(content);
while (m.find()) {
String idValue = m.group(2);
if (!all.add(idValue)) duplicates.add(idValue);
}
System.out.println(duplicates);
} catch (Exception e) {
e.printStackTrace();
}
}
public static String readFile(String fileFullPath) throws IOException
{
return new String(Files.readAllBytes(Paths.get(fileFullPath)));
}
}
This one is another solution to solve my problem.
public class DuplicateIDPicker {
protected static final Logger logger = Logger
.getLogger(com.aspire.pos.DuplicateIDPickerOld.class);
public static String finalContent = "";
public static void main(String[] args) throws FileNotFoundException,
IOException {
String content = "";
String[] arrayOFId = {};
String[] listOfID = {};
HashSet<String> set = new HashSet<String>();
HashSet<String> newSet = new HashSet<String>();
/* Set location of the file as format below */
String path = "D://TASK/DuplicateFinder/OriginalFile/";
/* Set file name to be evaluate with extension */
String fileName = "SSLPickupDeliveryOrderReceipt.txt";
File f = new File(path.concat(fileName));
content = readFile(f);
arrayOFId = findAllIDInString(content);
listOfID = extractIDOnly(arrayOFId);
System.out.println("***********************");
System.out.println("Duplicate ID's found :");
for (String arrayElement : listOfID) {
if (!set.add(arrayElement)) {
newSet.add(arrayElement);
}
}
System.out.println("-----------------------");
/* Duplicate Id stored in a Set : */
System.out.println(newSet);
}
/*
* This method is implemented to read file and
* return content in String format
*/
public static String readFile(File f) {
String data = "";
try {
FileReader fr = new FileReader(f);
BufferedReader br = new BufferedReader(fr);
while ((data = br.readLine()) != null) {
finalContent = finalContent + data;
}
br.close();
} catch (IOException e) {
logger.error(e.getMessage());
}
return finalContent;
}
/*
* This method is implemented to get Array string
* on the basis of '"'
*/
public static String[] extractIDOnly(String[] arr) {
ArrayList<String> listOfID = new ArrayList<String>();
for (int i = 0; i < arr.length; i++) {
String newString = arr[i];
String[] finalString = {};
for (int j = 0; j < newString.length();) {
finalString = newString.split("\"", 3);
for (int k = 1; k < finalString.length;) {
listOfID.add(finalString[1]);
break;
}
break;
}
}
return (String[]) listOfID.toArray(new String[listOfID.size()]);
}
/*
* This method is implemented to split the ID part only
*/
public static String[] findAllIDInString(String str) {
String[] value = str.split("id=");
return value;
}
}
Input string:
-;Lokacija;-;Pozicija;Grad;-;-;
Code:
public static ArrayList<String> sortList = new ArrayList<String>();
//Load
String Row = new String("-;Lokacija;-;Pozicija;Grad;-;-;");
String[] RowAsList;
RowAsList = Row.split(";");
sortList.add( RowAsList[0] );
// Check
StringBuffer minus = new StringBuffer( "-");
String itm = sortList.get(0);
if( !itm.contentEquals( minus ) )
// not minus
else
.....
Problem: this code says there is no minus on first item (0), subsequent minuses are recognized correctly.
Anyone has any ideas as to why ?
Thanks,
Kajko
Remove the static modifier before the sortList member:
public List<String> sortList = new ArrayList<String>();
Thats what you want
public class Test
{
public static ArrayList<String> sortList = new ArrayList<String>();
public static void main(String a[])
{
//Load
String Row = new String("-;Lokacija;-;Pozicija;Grad;-;-;");
String[] RowAsList;
RowAsList = Row.split(";");
for(int i=0;i<RowAsList.length-1;i++) {
sortList.add(RowAsList[i]);
}
System.out.println(sortList);
// Check
StringBuffer minus = new StringBuffer("-");
String itm = sortList.get(0);
if( itm.contentEquals( minus ) )
System.out.println(sortList.get(0));
// not minus
else
System.out.println("not found...");
}
}
Try :
String Row = new String("-;Lokacija;-;Pozicija;Grad;-;-;");
ArrayList<String> sortList = new ArrayList<String>();
String[] RowAsList= Row .split(";");
sortList.add( RowAsList[0]);
StringBuffer minus = new StringBuffer( "-");
String itm = sortList.get(0);
if( itm.equals( minus.toString())){
System.out.println("Get Here" + itm);
}
else{
System.out.println("Not Here"+ itm);
}
I am writing a program for a user to add a string to an ArrayList then display it.
It doesn't work and it seems there is a problem with compareTo().
Here is my code:
public class database {
static BufferedReader br = new BufferedReader(new InputStreamReader(System.in));
static String country[] = new String[100];
static String capital[] = new String[100];
static double population[] = new double[100];
static List<String> countriesList = Arrays.asList(country);
public static void main(String args[]) throws IOException {
country[0] = "Barbados";
country[1] = "France";
country[2] = "Nigeria";
country[3] = "USA";
country[4] = "Japan";
capital[0] = "Bridgetown";
capital[1] = "Paris";
capital[2] = "Abuja";
capital[3] = "Washington";
capital[4] = "Tokyo";
population[0] = 65.3;
population[1] = 315.8;
population[2] = 170.1;
population[3] = 2840;
population[4] = 126.7;
public static void searchCountry() throws IOException {
Scanner in = new Scanner(System.in);
String output;
int size, i;
System.out.println("Search Country:");
output = br.readLine();
boolean found = false;
for (i = 0; i < country.length; i++)
if (output.compareTo(country[i]) == 0) {
found = true;
break;
}
if (found)
System.out.println(output + " is found at index " + i);
else
System.out.println(output + "Country not found, choose Add country to add it");
public static void listCountry() throws IOException {
for (String c : countriesList) {
if (!=null)
System.out.println(c);
}
}
}
There is also a problem with the null at the end of my code.
While writing code, you should better start from the beginning. i.e.
First, write class name and make sure it there is no problem with brackets
public class MyClass{
}
Then, write main method in it.
public class MyClass{
public static void main(String[] args){
}
}
Then, write your methods, and test it in main method.
public class MyClass{
public void mymethod(){
//do something
System.out.println("say something");
}
public static void main(String[] args){
MyClass mc = new MyClass();
mc.mymethod();
}
}
If you try to write everything in one shotm it wont work and if you are not expert it would be hard for you to solve problem.
You can't write a method in a method. Close the brackets of main() before you open the brackets of searchCountry()
You don't check anything against being null. Maybe you mean if(c != null)
Just write it this way and you should be fine:
public class database {
static BufferedReader br = new BufferedReader(new InputStreamReader(System.in));
static String country[] = new String[100];
static String capital[] = new String[100];
static double population[] = new double[100];
static List<String> countriesList = Arrays.asList(country);
public static void main(String args[]) {
country[0] = "Barbados";
country[1] = "France";
country[2] = "Nigeria";
country[3] = "USA";
country[4] = "Japan";
capital[0] = "Bridgetown";
capital[1] = "Paris";
capital[2] = "Abuja";
capital[3] = "Washington";
capital[4] = "Tokyo";
population[0] = 65.3;
population[1] = 315.8;
population[2] = 170.1;
population[3] = 2840;
population[4] = 126.7;
searchCountry();
listCountry();
}
public void searchCountry() throws IOException {
Scanner in = new Scanner(System.in);
String output;
int size, i;
System.out.println("Search Country:");
output = br.readLine();
boolean found = false;
for (i = 0; i < country.length; i++)
if (output.compareTo(country[i]) == 0) {
found = true;
break;
}
if (found)
System.out.println(output + " is found at index " + i);
else
System.out.println(output + "Country not found, choose Add country to add it");
}
public void listCountry() {
for (String c : countriesList) {
if (c!=null)
System.out.println(c);
}
}
}
You forgot the c in c != null. You cannot define methods inside of methods in java. Also, use the equals method when testing forString equality, ie if (output.equals(country [i])).