Filter example - add metadata to any document
Filters can be used to add metadata to document regardless of document type.
In this example a general filter (Filter
) is implemented because access to the document content is not required for this example filter. If the metadata is being extracted from the content the either a StringDocumentFilter
or ByteDocumentFilter
will need to be implemented. Using a general filter avoids unnecessary conversion of the document to String or bytes respectively resulting in a more efficient filter.
If you are writing a filter that modifies the content and metadata you can chain the method calls. e.g.
|
Example
In this example the filter-time-stamp
is set to a human-readable date for all documents. This implements the Filter
interface which only requires implementation of the filter()
method. The filter can still be skipped by returning FilterResult.skipped()
.
This example also has a simple test method which can be executed by running the main method, see testing filters for details.
The above example adds metadata with the name 'filter-time-stamp' to the document. For the metadata to be available in the search index, it needs to be added to the metadata mappings.
DocumentFilterAddMetadataAnyDocument.java
package com.example.pluginexamples;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import com.funnelback.filter.api.FilterContext;
import com.funnelback.filter.api.FilterResult;
import com.funnelback.filter.api.documents.FilterableDocument;
import com.funnelback.filter.api.filters.Filter;
import com.google.common.collect.ListMultimap;
import java.util.Date;
public class DocumentFilterAddMetadata implements Filter {
private static final Logger log = LogManager.getLogger(DocumentFilterAddMetadata.class);
/**
* Adds the filtered time stamp to all documents.
*
* By implementing Filter rather than StringDocumentFilter or ByteDocumentFilter
* we avoid any unnecessary conversion of the document to String or bytes respectively
*/
@Override
public FilterResult filter(FilterableDocument document, FilterContext context) {
// Get a copy of the existing metadata,
// so that we preserve the existing metadata
ListMultimap<String, String> metadata = document.getCopyOfMetadata();
// As metadata values are a List we first remove any existing values.
metadata.removeAll("filter-time-stamp");
String date = new Date().toString();
// Add the current time to the metadata
metadata.put("filter-time-stamp", date);
log.debug("Adding date: '" + date + "' to : '" + document.getURI() + "'");
// Create a document with the new metadata
FilterableDocument filteredDocument = document.cloneWithMetadata(metadata);
return FilterResult.of(filteredDocument);
}
}
DocumentFilterAddMetadataAnyDocument.java
package com.example.pluginexamples;
import org.junit.Assert;
import org.junit.Test;
import com.funnelback.filter.api.FilterResult;
import com.funnelback.filter.api.documents.FilterableDocument;
import com.funnelback.filter.api.mock.MockDocuments;
import com.funnelback.filter.api.mock.MockFilterContext;
import java.util.List;
public class DocumentFilterAddMetadataTest {
@Test
public void checkTimeStampIsAddedTest() {
MockFilterContext mockContext = MockFilterContext.getEmptyContext();
// Create the input document
FilterableDocument inputDoc = MockDocuments.mockEmptyByteDoc();
// Create and run the filter
DocumentFilterAddMetadata underTest = new DocumentFilterAddMetadata();
FilterResult res = underTest.filter(inputDoc, mockContext);
// Get the filtered document
FilterableDocument filteredDocument = res
.getFilteredDocuments().get(0);
// Get the timeStamps metadata
// Metadata values are a list as they can be multi-valued
List<String> timeStamps = filteredDocument
.getCopyOfMetadata().get("filter-time-stamp");
// Check the time stamp is set
Assert.assertEquals(
"Expected to see exactly one time stamp",
1,
timeStamps.size());
}
}