The OpenIMAJ NLP Library contains a text pre-processing pipeline which goes from raw unstructured text to part of speech tagged stemmed text.
'org.openimaj:nlp:1.3.1'
<dependency> <groupId>org.openimaj</groupId> <artifactId>nlp</artifactId> <version>1.3.1</version> </dependency>
<dependency org="org.openimaj" name="nlp" rev="1.3.1"/>
"org.openimaj", "nlp", "1.3.1"