Selenium tut for scraping 08-08-2016, 02:19 AM
#1
I'm not that good at java, I just used it enough to know what i'm doing (when it comes to selenium)
W/o comments
Code:
import org.openqa.selenium.By;
import org.openqa.selenium.WebDriver;
import org.openqa.selenium.chrome.ChromeDriver;
import java.io.IOException;
public class main extends Thread {
public static int fuckoffloop = 0;
public static void main(String[] args) throws IOException, InterruptedException {
WebDriver driver = new ChromeDriver(); // First we need to define driver, like so.
// We will be scrapping from https://sinister.ly/memberlist
int Memebers = 503; // Because there 500 users per page.
int pageNumber = 1; // first page
driver.get("https://sinister.ly/memberlist.php?sort=lastvisit&order=descending&perpage=500&page=1");
for (int x = 3; x < Memebers; x++) { // Normal for loop.
// So, we can goto a site like so, I saw "perpage=20" so I just changed it so we don't have to go to as many pages (also faster)
/*Now we need to find a class or id, xpath, ect.
we will use xpath since there is multipliable "trow1" class's
I only know how to get xpath in chrome (since I only use chrome), So right click your text or whatever you want to scrape, right on the HTML code it highlights, then copy -> copy xpath
So if we click on the first user on the URL and get the xpath of it, it will be
//*[@id="content"]/table/tbody/tr[3]/td[2]/a/span
Now, This only is for that person and that person only, so lets click on the second memeber.
//*[@id="content"]/table/tbody/tr[4]/td[2]/a/span
There, we can see a change, it incremented by one.
Now, i'm going to scroll all the way to the bottom so I can see when too tell my loop to stop
//*[@id="content"]/table/tbody/tr[502]/td[2]/a/span
That is the last memeber on the page,so lets get to work writing this
*/
System.out.println(driver.findElement(By.xpath("//*[@id=\"content\"]/table/tbody/tr["+x+"]/td[2]/a/span")).getText()); // This is the xpath, you can see we put +x+ so we can get each user's name, also, getText will get the text of your WebElement
if (x == 502){ // This check we will use to get to the next page
x =3; // So we set back to the default three (which will be the first user)
pageNumber++; // we incremente pageNumber by 1 so we go to the next page
driver.get("https://sinister.ly/memberlist.php?sort=lastvisit&order=descending&perpage=500&page=" + pageNumber); // we get the next page to scrape from
}
}
}
}
W/o comments
Code:
import org.openqa.selenium.By;
import org.openqa.selenium.WebDriver;
import org.openqa.selenium.chrome.ChromeDriver;
import java.io.IOException;
public class main extends Thread {
public static int fuckoffloop = 0;
public static void main(String[] args) throws IOException, InterruptedException {
WebDriver driver = new ChromeDriver();
int Memebers = 503;
int pageNumber = 1;
driver.get("https://sinister.ly/memberlist.php?sort=lastvisit&order=descending&perpage=500&page=1");
for (int x = 3; x < Memebers; x++) {
System.out.println(driver.findElement(By.xpath("//*[@id=\"content\"]/table/tbody/tr[" + x + "]/td[2]/a/span")).getText());
System.out.println(x);
if (x == 502) {
x = 3;
pageNumber++;
driver.get("https://sinister.ly/memberlist.php?sort=lastvisit&order=descending&perpage=500&page=" + pageNumber);
}
}
}
}