How to do web scraping using htmlunitsriver? - java

i am getting somthing like this
Hi i am scraping a web page using Selenium Webdriver an i am able to achieve my data but problem is that this directly interact with browser and i dont want to open a web browser and want to scrape all data as it is
How can i achieve my goal
Here is my code
import org.openqa.selenium.By;
import org.openqa.selenium.WebDriver;
import org.openqa.selenium.WebElement;
import org.openqa.selenium.firefox.FirefoxDriver;
import org.openqa.selenium.support.ui.Select;
public class GetData {
public static void main(String args[]) throws InterruptedException {
String sDate = "27/03/2014";
WebDriver driver = new FirefoxDriver();
String url="http://www.upmandiparishad.in/commodityWiseAll.aspx";
driver.get(url);
Thread.sleep(5000);
// select barge
new Select(driver.findElement(By.id("ctl00_ContentPlaceHolder1_ddl_commodity"))).selectByVisibleText("Jo");
driver.findElement(By.id("ctl00_ContentPlaceHolder1_txt_rate")).sendKeys(sDate);
// click buttonctl00_ContentPlaceHolder1_txt_rate
Thread.sleep(3000);
driver.findElement(By.id("ctl00_ContentPlaceHolder1_btn_show")).click();
Thread.sleep(5000);
//get only table tex
WebElement findElement = driver.findElement(By.id("ctl00_ContentPlaceHolder1_GridView1"));
String htmlTableText = findElement.getText();
// do whatever you want now, This is raw table values.
System.out.println(htmlTableText);
driver.close();
driver.quit();
}
}
My updated New code
import com.gargoylesoftware.htmlunit.BrowserVersion;
import org.openqa.selenium.By;
import org.openqa.selenium.WebDriver;
import org.openqa.selenium.WebElement;
import org.openqa.selenium.firefox.FirefoxDriver;
import org.openqa.selenium.htmlunit.HtmlUnitDriver;
import org.openqa.selenium.support.ui.Select;
public class Getdata1 {
public static void main(String args[]) throws InterruptedException {
WebDriver driver = new HtmlUnitDriver(BrowserVersion.FIREFOX_3_6);
driver.get("http://www.upmandiparishad.in/commodityWiseAll.aspx");
System.out.println(driver.getPageSource());
Thread.sleep(5000);
// select barge
new Select(driver.findElement(By.id("ctl00_ContentPlaceHolder1_ddl_commodity"))).selectByVisibleText("Jo");
String sDate = "12/04/2014"; //What date you want
driver.findElement(By.id("ctl00_ContentPlaceHolder1_txt_rate")).sendKeys(sDate);
driver.findElement(By.id("ctl00_ContentPlaceHolder1_btn_show")).click();
Thread.sleep(3000);
//get only table tex
WebElement findElement = driver.findElement(By.id("ctl00_ContentPlaceHolder1_GridView1"));
String htmlTableText = findElement.getText();
// do whatever you want now, This is raw table values.
System.out.println(htmlTableText);
driver.close();
driver.quit();
}
}
Thanks in advance

Use HtmlUnit or HtmlUnitDriver by Selenium
WebDriver driver = new HtmlUnitDriver(BrowserVersion.FIREFOX_17);
driver.get("http://www.upmandiparishad.in/commodityWiseAll.aspx");
System.out.println(driver.getPageSource());
Thread.sleep(5000);
// select barge
new Select(driver.findElement(By.id("ctl00_ContentPlaceHolder1_ddl_commodity"))).selectByVisibleText("Jo");
String sDate = "12/04/2014"; //What date you want
driver.findElement(By.id("ctl00_ContentPlaceHolder1_txt_rate")).sendKeys(sDate);
driver.findElement(By.id("ctl00_ContentPlaceHolder1_btn_show")).click();
Thread.sleep(3000);
//get only table tex
WebElement findElement = driver.findElement(By.id("ctl00_ContentPlaceHolder1_GridView1"));
String htmlTableText = findElement.getText();
// do whatever you want now, This is raw table values.
System.out.println(htmlTableText);
driver.close();
driver.quit();
To get tabular output, you can try something like this..
String arrCells[] = htmlTableText.split(" ");
Boolean bIsANumber = false;
for(int i = 0; i < arrCells.length; i++) {
try {
int tmp = Integer.parseInt(arrCells[i]);
bIsANumber = true;
}
catch(Exception ex) {
bIsANumber = false;
}
if(bIsANumber) {
System.out.print("\n"+arrCells[i]+"\t");
}
else {
System.out.print(arrCells[i]+"\t");
}
}

Related

How to get cookie value from a WebDriver using selenium?

/I am using the following code to get the cookie value but I am only getting 1st and 2nd part.But not getting 3rd and 4th part(null as you can see).
Please help me with this.I have attached the screenshot of the cookies i get manually/
WebDriver driver;
System.setProperty("webdriver.ie.driver",
"C:\\Users\\MR049860\\Documents\\Selenium\\IEDriverServer\\IEDriverServer.exe");
driver = new InternetExplorerDriver();
driver.get("https://www.example.com");
// Input Email id and Password If you are already Register
driver.findElement(By.name("j_username")).sendKeys("publisher");
driver.findElement(By.name("j_password")).sendKeys("Passw0rd");
WebDriverWait wait = new WebDriverWait(driver, 5);
WebElement element = wait.until(ExpectedConditions.elementToBeClickable(By.id("BtnButton__")));
// WebElement ele = driver.findElement(By.id("ctllogonBtnButton__"));
element.sendKeys(Keys.ENTER);
// create file named Cookies to store Login Information
File file = new File("madhu.data");
try
{
// Delete old file if exists
file.delete();
file.createNewFile();
FileWriter fileWrite = new FileWriter(file);
BufferedWriter Bwrite = new BufferedWriter(fileWrite);
// loop for getting the cookie information
// loop for getting the cookie information
for(Cookie ck : driver.manage().getCookies())
{
Bwrite.write((ck.getName()+";"+ck.getValue()+";"+ck.getDomain()+";"+ck.getPath()+";"+ck.getExpiry()+";"+ck.isSecure()));
Bwrite.newLine();
}
Bwrite.close();
fileWrite.close();
}
catch(Exception ex)
{
ex.printStackTrace();
}
Output : -
ASPSESSIONIDSZQPRQCS;NFPFIAGDBMJNOMKPCPKESHDC;null;/;null;true
You can get only name and value. You are not allowed to get cookies of other domains/paths or expiry date. That's the security policy of web browsers.
Below code can be used to get the cookie value
package utility;
import java.util.Set;
import org.openqa.selenium.By;
import org.openqa.selenium.Capabilities;
import org.openqa.selenium.Cookie;
import org.openqa.selenium.Keys;
import org.openqa.selenium.WebElement;
import org.openqa.selenium.ie.InternetExplorerDriver;
import org.openqa.selenium.remote.DesiredCapabilities;
import org.openqa.selenium.remote.RemoteWebDriver;
import org.openqa.selenium.support.ui.ExpectedConditions;
import org.openqa.selenium.support.ui.WebDriverWait;
public class CookieUtility {
public static InternetExplorerDriver getDriver() throws InterruptedException {
InternetExplorerDriver driver;
System.setProperty("webdriver.ie.driver",
"C:\\Documents\\Selenium\\IEDriverServer\\IEDriverServer.exe");
driver = new InternetExplorerDriver();
driver.get("https://example.com");
// Input Email id and Password If you are already Register
driver.findElement(By.name("username")).sendKeys("password");
driver.findElement(By.name("password")).sendKeys("Paswrd");
WebDriverWait wait = new WebDriverWait(driver, 5);
WebElement element = wait.until(ExpectedConditions.elementToBeClickable(By.id("logonButton")));
// WebElement ele = driver.findElement(By.id("ctllogonBtnButton__"));
element.sendKeys(Keys.ENTER);
for (int i = 0; i < 2 && driver.findElements(By.id("textbox")).size() == 0; i++) {
Thread.sleep(10000);
}
element.sendKeys(Keys.F5);
return driver;
}
public static String[] getCookieValues(InternetExplorerDriver driver) throws InterruptedException {
// create file named Cookies to store Login Information
Set<Cookie> cks = driver.manage().getCookies();
String[] cookieValues = new String[cks.size()];
int i = 0;
for (Cookie ck : cks) {
cookieValues[i] = ck.getValue();
i++;
}
i = 0;
return cookieValues;
}
public static String getSessionId(InternetExplorerDriver driver) {
String sessionId = driver.getSessionId().toString();
return sessionId;
}
public static void main(String args[]) throws InterruptedException {
InternetExplorerDriver driver = getDriver();
String[] values = getCookieValues(driver);
String sessionId = getSessionId(driver);
}
public static String getcookiestring(String sessionId, String cookie1, String cookie2, String cookie3) {
String cookie = "JSESSIONID=" + sessionId + "; hi.session.co.entity=" + cookie2 + "; hi.session.id.identifier="
+ cookie1 + "; hi.session.client.identifier=" + cookie3;
return cookie;
}
}

How to open multiple tab in the same browser?

import org.openqa.selenium.By;
import org.openqa.selenium.Keys;
import org.openqa.selenium.WebDriver;
import org.openqa.selenium.firefox.FirefoxDriver;
public class newtab {
public static void main(String[] args) {
// TODO Auto-generated method stub
String driverpath = "geckodriver path\\";
System.setProperty("webdriver.gecko.driver",driverpath+"geckodriver.exe");
WebDriver driver = new FirefoxDriver();
driver.get("https://www.google.com");
driver.findElement(By.cssSelector("body")).sendKeys(Keys.CONTROL + "t");
driver.get("http://www.gmail.com");
}
}
In my code I want to open two tabs in the same browser, but this code is opening only one tab. How do I open multiple tabs in the same browser?
You are not able to open Gmail in other tab because focus is still at 1st window, because selenium identifies a particular window to work with using its window handles hence you have to first switch that particular window using handle like: driver.switchTo().window(handle value)
here is the complete code:
public static void main(String[] args) {
System.setProperty("webdriver.chrome.driver", "c:\\SRP\\chromedriver.exe");
WebDriver driver = new ChromeDriver();
driver.get("https://www.google.com");
String newTab =null;
String baseTab = driver.getWindowHandle();
driver.findElement(By.cssSelector("body")).sendKeys(Keys.CONTROL + "t");
Set <String> allTabs = driver.getWindowHandles();
allTabs.remove(baseTab);
Iterator<String> itr = allTabs.iterator();
while(itr.hasNext()){
newTab = (String) itr.next();
}
driver.switchTo().window(newTab);
driver.get("http://www.gmail.com");
}
import java.awt.AWTException;
import java.awt.Robot;
import java.awt.event.KeyEvent;
import java.util.Set;
import org.openqa.selenium.WebDriver;
import org.openqa.selenium.firefox.FirefoxDriver;
public class NewTab {
public static void main(String[] args) throws AWTException {
System.setProperty("webdriver.gecko.driver", "C:\\Users\\User-024\\Downloads\\geckodriver.exe");
WebDriver driver = new FirefoxDriver();
driver.get("https://www.google.com");
String parent = driver.getWindowHandle();
Robot r = new Robot();
r.keyPress(KeyEvent.VK_CONTROL);
r.keyPress(KeyEvent.VK_T);
Set<String> browsers = driver.getWindowHandles();
for (String i : browsers) {
if (!i.equals(parent)) {
driver.switchTo().window(i);
driver.get("http://www.gmail.com");
}
}
}
}
This code is working for me.
I have written simple code and it worked for me. It is opening two different URLs in different tabs of same browser.
public void cls(){
WebDriver driver = new FirefoxDriver();
driver.get("http://google.com");
String baseTab = driver.getWindowHandle();
driver.findElement(By.cssSelector("body")).sendKeys(Keys.CONTROL + "t");
driver.get("http://gmail.com");
}

Not able to click all links on a web page using Java and Selenium Webdriver

package testPackage;
import java.util.List;
import java.util.concurrent.TimeUnit;
import org.openqa.selenium.By;
import org.openqa.selenium.WebDriver;
import org.openqa.selenium.WebElement;
import org.openqa.selenium.chrome.ChromeDriver;
import org.testng.annotations.*;
public class AllLinkVerificationInAPage {
WebDriver driver;
#BeforeTest
public void OpenApp()
{
System.setProperty("webdriver.chrome.driver", "E:/Selenium/Webdriver /Softwares/chromedriver.exe");
driver = new ChromeDriver();
driver.navigate().to("http://ndtv.com/");
driver.manage().window().maximize();
driver.manage().timeouts().implicitlyWait(10, TimeUnit.SECONDS);
WebElement popUp = driver.findElement(By.xpath("//*[#id='__cricketsubscribe']/div[2]/div[2]/a[1]"));
popUp.click();
}
#Test
public void clickLinks() throws InterruptedException
{
//extract the list of WenElements and its count
List<WebElement> linkElements = driver.findElements(By.tagName("a"));
int count = linkElements.size();
System.out.println("Total number of links = " + count );
//test each link
for(WebElement currentElement : linkElements)
{
String link = currentElement.getText();
System.out.println(link);
if(link !="")
{
currentElement.click();
System.out.println("Working Fine");
}
driver.navigate().back();
Thread.sleep(3000);
}
}
}
When I run this code I get following error:-
org.openqa.selenium.StaleElementReferenceException: stale element
reference: element is not attached to the page document
I tried with implicit wait as well but getting same issue.
Each time the DOM is changed or refreshed, like in going to different page, the driver loses the elements it previously located. You need to relocate the list each iteration
int count = driver.findElements(By.tagName("a")).size();
for (int i = 0 ; i < count ; ++i) {
List<WebElement> linkElements = driver.findElements(By.tagName("a"));
WebElement currentElement = linkElements.get(i);
String link = currentElement.getText();
System.out.println(link);
if(link != "")
{
currentElement.click();
System.out.println("Working Fine");
}
driver.navigate().back();
Thread.sleep(3000);
}

How to upload Multiple files in selenium?

I'm trying to use following code :-
driver.findElement(By.xpath(".//*[#id='attach0']")).sendKeys("first path"+"\n"+"second path""+"\n"third path");
I didn't get result.
you can use AutoIT or JAVA code. Below i have used both for your reference. Try anyone of them
import java.io.IOException;
import org.junit.AfterClass;
import org.junit.BeforeClass;
import org.junit.Test;
import org.openqa.selenium.By;
import org.openqa.selenium.WebDriver;
import org.openqa.selenium.WebElement;
import org.openqa.selenium.chrome.ChromeDriver;
import org.openqa.selenium.remote.DesiredCapabilities;
import org.openqa.selenium.support.FindBy;
import org.openqa.selenium.support.ui.ExpectedConditions;
import org.openqa.selenium.support.ui.WebDriverWait;
public class AutoITforUpload {
private static WebDriver driver;
private static WebDriverWait waitForElement;
#FindBy(css = "span.btn.btn-success.fileinput-button")
private WebElement Add_files_btn;
#BeforeClass
public static void setUp() {
DesiredCapabilities desicap = new DesiredCapabilities();
System.setProperty("webdriver.chrome.driver", "D:/WorkSpace/Driver/chromedriver.exe");
desicap = DesiredCapabilities.chrome();
driver = new ChromeDriver(desicap);
driver.manage().window().maximize();
driver.get("https://blueimp.github.io/jQuery-File-Upload/");
waitForElement = new WebDriverWait(driver, 30);
}
#Test
public void AutoitUpload() {
// String filepath =
// "D:/Mine/GitHub/BasicProgramLearn/AutoItScript/unnamed.png";
WebElement btn = driver.findElement(By.cssSelector("span.btn.btn-success.fileinput-button"));
String file_dir = System.getProperty("user.dir");
String cmd = file_dir + "\\AutoItScript\\unnamed.png";
System.out.println("File directory is " + file_dir);
try {
// Using ordinary
Thread.sleep(3000);
for(int i=0;i<3;i++) //multiple times upload ;
driver.findElement(By.xpath("//*[#id='fileupload']/div/div[1]/span[1]/input")).sendKeys(cmd);
//use any String Array for multiple files
waitForElement(btn);
btn.click();
Thread.sleep(3000);
System.out.println(file_dir + "/AutoItScript/FileUploadCode.exe");
Runtime.getRuntime().exec(file_dir + "\\AutoItScript\\ChromeFileUpload.exe" + " " + cmd);
} catch (InterruptedException | IOException e) {
e.printStackTrace();
}
}
#AfterClass
public static void TearDown() {
try {
Thread.sleep(5000);
driver.quit();
} catch (InterruptedException e) {
e.printStackTrace();
}
}
private void waitForElement(WebElement vElement) {
waitForElement.until(ExpectedConditions.visibilityOf(vElement));
}
}
The code in AutoIt is
#include<IE.au3>
If $CmdLine[0] < 2 Then
$window_name="Open"
WinWait($window_name)
ControlFocus($window_name,"","Edit1")
ControlSetText($window_name,"","Edit1",$CmdLine[1])
ControlClick($window_name,"","Button1")
EndIf
Hope this gives you an idea

Exception in thread “main” org.openqa.selenium.StaleElementReferenceException: Element not found in the cache

Can someone help with it ?
I have error
"Exception in thread “main” org.openqa.selenium.StaleElementReferenceException: Element not found in the cache"
Why showing this error?
I need to hover on each category menu than click on each text in sub-menu.
public class santander {
private static WebDriver driver = null;
public static JavascriptExecutor js = (JavascriptExecutor) driver;
public static void main(String[] args) throws FileNotFoundException, InterruptedException, IOException {
// TODO Auto-generated method stub
driver = new FirefoxDriver();
driver.manage().timeouts().implicitlyWait(10, TimeUnit.SECONDS);
driver.get("http://www.santander.co.uk/uk/index");
driver.manage().window().maximize();
driver.get("http://www.santander.co.uk/uk/index");
JavascriptExecutor js = (JavascriptExecutor) driver;
WebDriverWait wait = new WebDriverWait(driver, 10);
String submenutxtlinks = "submenu.txt";
List<String> submenu = new ArrayList<String>();
BufferedReader reader = new BufferedReader(new FileReader(submenutxtlinks));
String line;
while ((line = reader.readLine()) != null) {
submenu.add(line);
}
reader.close();
Actions action = new Actions(driver);
/*
WebElement menu = driver.findElement(By.linkText("Current Accounts"));
action.moveToElement(menu).perform();
WebElement submenu = driver.findElement(By.linkText("See all current accounts"));
action.moveToElement(submenu);
action.click();
action.perform();
*/
// String title = driver.getTitle();
// wait.until(ExpectedConditions.titleIs(title));
// driver.navigate().back();
//Loop to read all lines one by one from file and print It.
// while((menu = BR.readLine())!= null && !menu.isEmpty()){
// driver.manage().timeouts().implicitlyWait(10, TimeUnit.SECONDS);
// Action hovering = action.moveToElement(a).build();
// hovering.perform();
//action.moveToElement(a).perform();
//action.clickAndHold(a).perform();
WebElement a = driver.findElement(By.cssSelector("#nav > div.navMain > div.nav_menu > nav > ul > li:nth-child(1) > a"));
Action hovering = action.moveToElement(a).build();
//Thread.sleep(2000);
for (int i=0;i<=submenu.size()-1;i++ ) {
//String b = submenu.get(i);
// System.out.println(b);
//WebElement b = driver.findElement(By.xpath(submenu.get(i)));
try{
//Your code which causes exception
hovering.perform();
//action.moveToElement(b).click(b).build().perform();
Thread.sleep(1000);
clickAnElementByLinkText(submenu.get(i));
//b.click();
Thread.sleep(1000);
/*
wait.until(ExpectedConditions.visibilityOf(b));
action.moveToElement(b);
action.click();
action.perform();
*/
// wait.until(ExpectedConditions.titleIs(title));
driver.navigate().back();
//driver.manage().timeouts().implicitlyWait(10, TimeUnit.SECONDS);
Thread.sleep(2000);
// driver.get("http://www.santander.co.uk/uk/index");
//driver.navigate();
Thread.sleep(2000);
// driver.manage().timeouts().implicitlyWait(10, TimeUnit.SECONDS);
}
catch(org.openqa.selenium.StaleElementReferenceException e){
//Repeat the code in try
}
}
// }
driver.close();
}
public static void clickAnElementByLinkText(String linkText) {
WebDriverWait wait = new WebDriverWait(driver, 10);
wait.until(ExpectedConditions.presenceOfElementLocated(By.xpath(linkText)));
driver.findElement(By.xpath(linkText)).click();
}
}
Dont mix ImplicitWait ExplicitWait and Thread.Sleep all in the same context.
Learn when and where those should be used.
The hoverElement wont work because the driver navigates to different page and element no longer exists you have find it again inside the forLoop
For the sake of test I've hardcoded the submenulist
This code will do what you've asked for
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.concurrent.TimeUnit;
import org.openqa.selenium.By;
import org.openqa.selenium.WebDriver;
import org.openqa.selenium.WebElement;
import org.openqa.selenium.firefox.FirefoxDriver;
import org.openqa.selenium.interactions.Actions;
import org.openqa.selenium.support.ui.ExpectedConditions;
import org.openqa.selenium.support.ui.WebDriverWait;
public class ListTest {
static WebDriver driver;
public static void main(String[] args) {
List<String> submenu = new ArrayList<>(Arrays.asList(new String[]{"See all current accounts", "1|2|3 Current Account", "Everyday Current Account", "Basic Current Account", "Choice Current Account"}));
driver = new FirefoxDriver();
driver.manage().timeouts().implicitlyWait(10, TimeUnit.SECONDS);
driver.manage().window().maximize();
driver.get("http://www.santander.co.uk/uk/index");
for (String sMenu : submenu) {
WebElement a = driver.findElement(By.cssSelector("#nav > div.navMain > div.nav_menu > nav > ul > li:nth-child(1) > a"));
new Actions(driver).moveToElement(a).build().perform();
clickAnElementByLinkText("//li[#role='listitem']/a[normalize-space(text())='" + sMenu + "']");
driver.navigate().back();
}
driver.quit();
}
public static void clickAnElementByLinkText(String linkText) {
WebDriverWait wait = new WebDriverWait(driver, 10);
wait.until(ExpectedConditions.presenceOfElementLocated(By.xpath(linkText))).click();
}
}
You are trying to access an element which does not exists.
The code collects the elements on the page,
then the code clicks an element which changes the source
then it try to access an object he collected before but it is not longer there (what you see is a new one).
You can enter the finding of the elements process into the loop and loop changing the 'i' element in every iteration (If you must click the element in every iteration).

Categories