The Apache Tika™ toolkit detects and extracts metadata and structured text content from various documents using existing parser libraries.
'org.apache.tika:tika:1.6'
<dependency>
<groupId>org.apache.tika</groupId>
<artifactId>tika</artifactId>
<version>1.6</version>
</dependency>
<dependency org="org.apache.tika" name="tika" rev="1.6"/>
"org.apache.tika", "tika", "1.6"