package local.example.crawler;
public interface ICrawler {
void run();
}
package local.example.crawler;
public class CrawlerMaker {
private ICrawler jsoupCrawler;
public CrawlerMaker() {
jsoupCrawler = new JsoupCrawler();
}
public void useJsoupCrawler() {
jsoupCrawler.run();
}
}
package local.example.crawler;
import java.io.File;
import java.io.IOException;
import java.util.List;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import com.google.common.base.Charsets;
import com.google.common.io.Files;
public class JsoupCrawler implements ICrawler {
private String urlListFile = Conf.getString("JsoupCrawler.urlListFile");
private String reportFile = Conf.getString("JsoupCrawler.reportFile");
@Override
public void run() {
List<String> urls = this.getUrlList();
for (String url : urls) {
this.scan(url);
}
}
/**
* @return List<String>
*/
public List<String> getUrlList() {
List<String> result = null;
try {
result = Files.readLines(new File(this.urlListFile), Charsets.UTF_8);
} catch (IOException e) {
e.printStackTrace();
}
return result;
}
/**
* @param url
*/
public void scan(String url) {
Document doc = null;
try {
doc = Jsoup.connect(url).get();
} catch (IOException e) {
e.printStackTrace();
}
Integer size = doc.body().toString().length();
String title = doc.title();
String log = url + " === " + size + " === " + title + "n";
System.out.println(log);
try {
Files.append(log, new File(this.reportFile), Charsets.UTF_8);
} catch (IOException e) {
e.printStackTrace();
}
}
}
package local.example.run;
import local.example.crawler.CrawlerMaker;
public class Run {
public static void main(String[] args) {
CrawlerMaker crawlerMaker = new CrawlerMaker();
crawlerMaker.useJsoupCrawler();
}
}