The OpenIMAJ NLP Library contains a text pre-processing pipeline which goes from raw unstructured text to part of speech tagged stemmed text.
'org.openimaj:nlp:1.3'
<dependency> <groupId>org.openimaj</groupId> <artifactId>nlp</artifactId> <version>1.3</version> </dependency>
<dependency org="org.openimaj" name="nlp" rev="1.3"/>
"org.openimaj", "nlp", "1.3"