Filter example - add metadata to any document

Filters can be used to add metadata to document regardless of document type.

In this example a general filter (Filter) is implemented because access to the document content is not required for this example filter. If the metadata is being extracted from the content the either a StringDocumentFilter or ByteDocumentFilter will need to be implemented. Using a general filter avoids unnecessary conversion of the document to String or bytes respectively resulting in a more efficient filter.

If you are writing a filter that modifies the content and metadata you can chain the method calls. e.g.

StringDocument filteredDocument = document.cloneWithStringContent(document.getDocumentType(),
                lowerCasedContent).cloneWithMetadata(metadata);

Example

In this example the filter-time-stamp is set to a human-readable date for all documents. This implements the Filter interface which only requires implementation of the filter() method. The filter can still be skipped by returning FilterResult.skipped().

This example also has a simple test method which can be executed by running the main method, see testing filters for details.

The above example adds metadata with the name 'filter-time-stamp' to the document. For the metadata to be available in the search index, it needs to be added to the metadata mappings.

DocumentFilterAddMetadataAnyDocument.java
package com.example.pluginexamples;

import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;

import com.funnelback.filter.api.FilterContext;
import com.funnelback.filter.api.FilterResult;
import com.funnelback.filter.api.documents.FilterableDocument;
import com.funnelback.filter.api.filters.Filter;

import com.google.common.collect.ListMultimap;
import java.util.Date;

public class DocumentFilterAddMetadata implements Filter {

    private static final Logger log = LogManager.getLogger(DocumentFilterAddMetadata.class);

    /**
     * Adds the filtered time stamp to all documents.
     *
     * By implementing Filter rather than StringDocumentFilter or ByteDocumentFilter
     * we avoid any unnecessary conversion of the document to String or bytes respectively
     */

    @Override
    public FilterResult filter(FilterableDocument document, FilterContext context) {
        // Get a copy of the existing metadata,
        // so that we preserve the existing metadata
        ListMultimap<String, String> metadata = document.getCopyOfMetadata();

        // As metadata values are a List we first remove any existing values.
        metadata.removeAll("filter-time-stamp");

        String date = new Date().toString();

        // Add the current time to the metadata
        metadata.put("filter-time-stamp", date);

        log.debug("Adding date: '" + date + "' to : '" + document.getURI() + "'");

        // Create a document with the new metadata
        FilterableDocument filteredDocument = document.cloneWithMetadata(metadata);

        return FilterResult.of(filteredDocument);
    }
}
DocumentFilterAddMetadataAnyDocument.java
package com.example.pluginexamples;

import org.junit.Assert;
import org.junit.Test;

import com.funnelback.filter.api.FilterResult;
import com.funnelback.filter.api.documents.FilterableDocument;
import com.funnelback.filter.api.mock.MockDocuments;
import com.funnelback.filter.api.mock.MockFilterContext;

import java.util.List;

public class DocumentFilterAddMetadataTest {

    @Test
    public void checkTimeStampIsAddedTest() {
        MockFilterContext mockContext = MockFilterContext.getEmptyContext();

        // Create the input document
        FilterableDocument inputDoc = MockDocuments.mockEmptyByteDoc();

        // Create and run the filter
        DocumentFilterAddMetadata underTest = new DocumentFilterAddMetadata();

        FilterResult res = underTest.filter(inputDoc, mockContext);

        // Get the filtered document
        FilterableDocument filteredDocument = res
                .getFilteredDocuments().get(0);

        // Get the timeStamps metadata
        // Metadata values are a list as they can be multi-valued
        List<String> timeStamps = filteredDocument
                .getCopyOfMetadata().get("filter-time-stamp");

        // Check the time stamp is set
        Assert.assertEquals(
                "Expected to see exactly one time stamp",
                1,
                timeStamps.size());
    }
}