This is a open project of Java. The project integrated Apache Commons-VFS and Jsoup. It can be grabbing data much easy.
'com.github.abola:crawler:0.9.2'
<dependency>
<groupId>com.github.abola</groupId>
<artifactId>crawler</artifactId>
<version>0.9.2</version>
</dependency>
<dependency org="com.github.abola" name="crawler" rev="0.9.2"/>
"com.github.abola", "crawler", "0.9.2"