Filter example - binary to string conversion
This example shows a simple filter plugin that converts some binary form such as a pdf to a String containing HTML. Here we outline an example of being given a binary document and converting it to a String.
The example below shows a simple filter implementation and corresponding tests.
Example
In this example we simply convert all JSON documents from raw bytes to a String assuming the charset is UTF-8. This example implements the ByteDocumentFilter. We are required to implement canFilter(), used to check the document type is JSON to determine if the filter should run, as well as filterAsBytesDocument() which contains the logic for the filter.
DocumentFilterBinaryToStringConversion.javapackage com.example.pluginexamples;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import com.funnelback.filter.api.FilterContext;
import com.funnelback.filter.api.FilterResult;
import com.funnelback.filter.api.documents.NoContentDocument;
import com.funnelback.filter.api.documents.BytesDocument;
import com.funnelback.filter.api.filters.PreFilterCheck;
import com.funnelback.filter.api.filters.BytesDocumentFilter;
import com.funnelback.filter.api.DocumentType;
import static java.nio.charset.StandardCharsets.UTF_8;
public class DocumentFilterBinaryToStringConversion implements BytesDocumentFilter {
private static final Logger log = LogManager.getLogger(DocumentFilterBinaryToStringConversion.class);
@Override
public FilterResult filterAsBytesDocument(BytesDocument document, FilterContext context) {
log.trace("Get the document content as a byte array and convert it to a String assuming the bytes are UTF-8.");
// Get the document content as a byte array and convert it to a String
// assuming the bytes are UTF-8
byte[] documentContentAsBytes = document.getCopyOfContents();
String contentAsString = new String(documentContentAsBytes, UTF_8);
return FilterResult.of(context.getFilterDocumentFactory().toStringDocument(document,
DocumentType.MIME_APPLICATION_JSON_TEXT,
contentAsString));
}
}
DocumentFilterBinaryToStringConversionTest.javapackage com.example.pluginexamples;
import com.funnelback.filter.api.documents.StringDocument;
import org.junit.Assert;
import org.junit.Test;
import com.funnelback.filter.api.DocumentType;
import com.funnelback.filter.api.FilterResult;
import com.funnelback.filter.api.documents.BytesDocument;
import com.funnelback.filter.api.mock.MockDocuments;
import com.funnelback.filter.api.mock.MockFilterContext;
import java.util.Optional;
import static java.nio.charset.StandardCharsets.UTF_8;
public class DocumentFilterBinaryToStringConversionTest {
@Test
public void testJSONIsConverted() throws Exception {
String expected = "{\"accents\": \"é à ê\"}";
// Create the dummy JSON input document
BytesDocument inputDoc = MockDocuments.mockByteDoc(
"http://foo.com/",
DocumentType.APPLICATION_JSON_TEXT,
Optional.empty(),
expected.getBytes("UTF-8"));
// Create and run the filter
FilterResult filterResult = new DocumentFilterBinaryToStringConversion()
.filter(inputDoc, MockFilterContext.getEmptyContext());
// Get the resulting filtered document from the filter result
// (we assume a document will be returned)
StringDocument filteredDocument = (StringDocument) filterResult
.getFilteredDocuments().get(0);
Assert.assertEquals(
"Content was not correctly converted to a string",
expected,
filteredDocument.getContentAsString());
}
@Test
public void testFilterOnlyRunsOnJsonDocuments() throws Exception {
// Create a dummy HTML input document.
BytesDocument inputDoc = MockDocuments.mockEmptyByteDoc()
.cloneWithContent(DocumentType.MIME_HTML_TEXT, Optional.empty(), "<html><p>Hello</p></html>".getBytes(UTF_8));
// Create and run the filter.
FilterResult filterResult = new DocumentFilterBinaryToStringConversion()
.filter(inputDoc, MockFilterContext.getEmptyContext());
Assert.assertTrue(
"Filter should have been skipped as the document was not a JSON document",
filterResult.isSkipped());
}
}