Filter example - binary to string conversion

This example shows a simple filter plugin that converts some binary form such as a pdf to a String containing HTML. Here we outline an example of being given a binary document and converting it to a String.

The example below shows a simple filter implementation and corresponding tests.

Example

In this example we simply convert all JSON documents from raw bytes to a String assuming the charset is UTF-8. This example implements the ByteDocumentFilter. We are required to implement canFilter(), used to check the document type is JSON to determine if the filter should run, as well as filterAsBytesDocument() which contains the logic for the filter.

DocumentFilterBinaryToStringConversion.java
package com.example.pluginexamples;

import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;

import com.funnelback.filter.api.FilterContext;
import com.funnelback.filter.api.FilterResult;
import com.funnelback.filter.api.documents.NoContentDocument;
import com.funnelback.filter.api.documents.BytesDocument;
import com.funnelback.filter.api.filters.PreFilterCheck;
import com.funnelback.filter.api.filters.BytesDocumentFilter;
import com.funnelback.filter.api.DocumentType;
import static java.nio.charset.StandardCharsets.UTF_8;

public class DocumentFilterBinaryToStringConversion implements BytesDocumentFilter {

    private static final Logger log = LogManager.getLogger(DocumentFilterBinaryToStringConversion.class);

    @Override
    public FilterResult filterAsBytesDocument(BytesDocument document, FilterContext context) {
        log.trace("Get the document content as a byte array and convert it to a String assuming the bytes are UTF-8.");

        // Get the document content as a byte array and convert it to a String
        // assuming the bytes are UTF-8
        byte[] documentContentAsBytes = document.getCopyOfContents();
        String contentAsString = new String(documentContentAsBytes, UTF_8);

        return FilterResult.of(context.getFilterDocumentFactory().toStringDocument(document,
                DocumentType.MIME_APPLICATION_JSON_TEXT,
                contentAsString));
    }
}
DocumentFilterBinaryToStringConversionTest.java
package com.example.pluginexamples;

import com.funnelback.filter.api.documents.StringDocument;
import org.junit.Assert;
import org.junit.Test;

import com.funnelback.filter.api.DocumentType;
import com.funnelback.filter.api.FilterResult;
import com.funnelback.filter.api.documents.BytesDocument;
import com.funnelback.filter.api.mock.MockDocuments;
import com.funnelback.filter.api.mock.MockFilterContext;
import java.util.Optional;
import static java.nio.charset.StandardCharsets.UTF_8;

public class DocumentFilterBinaryToStringConversionTest {

    @Test
    public void testJSONIsConverted() throws Exception {
        String expected = "{\"accents\": \"é à ê\"}";

        // Create the dummy JSON input document
        BytesDocument inputDoc = MockDocuments.mockByteDoc(
                "http://foo.com/",
                DocumentType.APPLICATION_JSON_TEXT,
                Optional.empty(),
                expected.getBytes("UTF-8"));

        // Create and run the filter
        FilterResult filterResult = new DocumentFilterBinaryToStringConversion()
                .filter(inputDoc, MockFilterContext.getEmptyContext());

        // Get the resulting filtered document from the filter result
        // (we assume a document will be returned)
        StringDocument filteredDocument = (StringDocument) filterResult
                .getFilteredDocuments().get(0);

        Assert.assertEquals(
                "Content was not correctly converted to a string",
                expected,
                filteredDocument.getContentAsString());
    }

    @Test
    public void testFilterOnlyRunsOnJsonDocuments() throws Exception {
        // Create a dummy HTML input document.
        BytesDocument inputDoc = MockDocuments.mockEmptyByteDoc()
                .cloneWithContent(DocumentType.MIME_HTML_TEXT, Optional.empty(), "<html><p>Hello</p></html>".getBytes(UTF_8));

        // Create and run the filter.
        FilterResult filterResult = new DocumentFilterBinaryToStringConversion()
                .filter(inputDoc, MockFilterContext.getEmptyContext());

        Assert.assertTrue(
                "Filter should have been skipped as the document was not a JSON document",
                filterResult.isSkipped());
    }
}