This is a open project of Java. The project integrated Apache Commons-VFS and Jsoup. It can be grabbing data much easy.
'com.github.abola:crawler:0.9.1'
<dependency>
<groupId>com.github.abola</groupId>
<artifactId>crawler</artifactId>
<version>0.9.1</version>
</dependency>
<dependency org="com.github.abola" name="crawler" rev="0.9.1"/>
"com.github.abola", "crawler", "0.9.1"