public class RegexTextStripper
extends org.apache.pdfbox.util.PDFTextStripper
Constructor and Description |
---|
RegexTextStripper()
Initialize our hash map, in addition to the parent constructor.
|
Modifier and Type | Method and Description |
---|---|
void |
clearSuggestedRedactionsMap()
Clears out our map of suggested Redactions.
|
java.util.Map<java.lang.String,java.lang.String> |
getRedactionRegexAttrMap()
Returns the map of things to be redacted and their corresponding regex pattern.
|
java.util.Map<java.lang.String,java.util.List<RedactionSuggestion>> |
getSuggestedRedactionsMap()
Returns the map of possible redaction strings to suggested redaction objects.
|
void |
setRedactionRegexAttrMap(java.util.Map<java.lang.String,java.lang.String> redactionRegexAttrMap)
Sets the map of things to be redacted and their corresponding regex pattern.
|
protected void |
writeString(java.lang.String text,
java.util.List<org.apache.pdfbox.util.TextPosition> textPositions)
Called for each line of text stripped off of the PDF.
|
endArticle, endDocument, endPage, getAddMoreFormatting, getArticleEnd, getArticleStart, getAverageCharTolerance, getCharactersByArticle, getCurrentPageNo, getDropThreshold, getEndBookmark, getEndPage, getIndentThreshold, getLineSeparator, getListItemPatterns, getOutput, getPageEnd, getPageSeparator, getPageStart, getParagraphEnd, getParagraphStart, getSeparateByBeads, getSortByPosition, getSpacingTolerance, getStartBookmark, getStartPage, getSuppressDuplicateOverlappingText, getText, getText, getWordSeparator, handleLineSeparation, inspectFontEncoding, isParagraphSeparation, matchListItemPattern, matchPattern, processPage, processPages, processTextPosition, resetEngine, setAddMoreFormatting, setArticleEnd, setArticleStart, setAverageCharTolerance, setDropThreshold, setEndBookmark, setEndPage, setIndentThreshold, setLineSeparator, setListItemPatterns, setPageEnd, setPageSeparator, setPageStart, setParagraphEnd, setParagraphStart, setShouldSeparateByBeads, setSortByPosition, setSpacingTolerance, setStartBookmark, setStartPage, setSuppressDuplicateOverlappingText, setWordSeparator, startArticle, startArticle, startDocument, startPage, writeCharacters, writeLineSeparator, writePage, writePageEnd, writePageSeperator, writePageStart, writeParagraphEnd, writeParagraphSeparator, writeParagraphStart, writeString, writeText, writeText, writeWordSeparator
getColorSpaces, getCurrentPage, getFonts, getGraphicsStack, getGraphicsState, getGraphicsStates, getResources, getTextLineMatrix, getTextMatrix, getTotalCharCnt, getValidCharCnt, getXObjects, isForceParsing, processEncodedText, processOperator, processOperator, processStream, processSubStream, registerOperatorProcessor, setColorSpaces, setFonts, setForceParsing, setGraphicsStack, setGraphicsState, setGraphicsStates, setTextLineMatrix, setTextMatrix
public RegexTextStripper() throws java.io.IOException
java.io.IOException
- If there is an error loading the properties.public java.util.Map<java.lang.String,java.lang.String> getRedactionRegexAttrMap()
public void setRedactionRegexAttrMap(java.util.Map<java.lang.String,java.lang.String> redactionRegexAttrMap)
public java.util.Map<java.lang.String,java.util.List<RedactionSuggestion>> getSuggestedRedactionsMap()
RedactionSuggestion
objects.public void clearSuggestedRedactionsMap()
protected void writeString(java.lang.String text, java.util.List<org.apache.pdfbox.util.TextPosition> textPositions) throws java.io.IOException
writeString
in class org.apache.pdfbox.util.PDFTextStripper
java.io.IOException