Como criar um rastreador da web em java?

package local.example.crawler;

public interface ICrawler {
void run();
}
package local.example.crawler;

public class CrawlerMaker {
private ICrawler jsoupCrawler;

public CrawlerMaker() {
jsoupCrawler
= new JsoupCrawler();
}

public void useJsoupCrawler() {
jsoupCrawler
.run();
}
}
package local.example.crawler;

import java.io.File;
import java.io.IOException;
import java.util.List;

import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;

import com.google.common.base.Charsets;
import com.google.common.io.Files;

public class JsoupCrawler implements ICrawler {

private String urlListFile = Conf.getString("JsoupCrawler.urlListFile");
private String reportFile = Conf.getString("JsoupCrawler.reportFile");

@Override
public void run() {
List<String> urls = this.getUrlList();
for (String url : urls) {
this.scan(url);
}
}

/**
* @return List<String>

*/

public List<String> getUrlList() {
List<String> result = null;
try {
result
= Files.readLines(new File(this.urlListFile), Charsets.UTF_8);
} catch (IOException e) {
e
.printStackTrace();
}
return result;
}

/**
* @param url

*/

public void scan(String url) {
Document doc = null;
try {
doc
= Jsoup.connect(url).get();
} catch (IOException e) {
e
.printStackTrace();
}

Integer size = doc.body().toString().length();
String title = doc.title();
String log = url + " === " + size + " === " + title + "n";

System.out.println(log);

try {
Files.append(log, new File(this.reportFile), Charsets.UTF_8);
} catch (IOException e) {
e
.printStackTrace();
}
}
}
package local.example.run;

import local.example.crawler.CrawlerMaker;

public class Run {
public static void main(String[] args) {
CrawlerMaker crawlerMaker = new CrawlerMaker();
crawlerMaker
.useJsoupCrawler();
}
}