This is a open project of Java. The project integrated Apache Commons-VFS and Jsoup. It can be grabbing data much easy.
'com.github.abola:crawler:1.0.0'
<dependency>
<groupId>com.github.abola</groupId>
<artifactId>crawler</artifactId>
<version>1.0.0</version>
</dependency>
<dependency org="com.github.abola" name="crawler" rev="1.0.0"/>
"com.github.abola", "crawler", "1.0.0"