This plugin crawls web sites and extracts the content from Web.
'org.codelibs:elasticsearch-river-web:1.1.2'
<dependency>
<groupId>org.codelibs</groupId>
<artifactId>elasticsearch-river-web</artifactId>
<version>1.1.2</version>
</dependency>
<dependency org="org.codelibs" name="elasticsearch-river-web" rev="1.1.2"/>
"org.codelibs", "elasticsearch-river-web", "1.1.2"