Filter example - modify the document URL
This example shows a simple filter plugin that alters the URL of documents, changing the protocol of the URL from HTTP to HTTPS.
The example below shows a simple filter implementation and corresponding tests.
Although this example implements a StringDocumentFilter
, the other filter types: ByteDocumentFilter
and Filter
can also be used to change the document URL.
Example
In this example any URL that has protocol http
is updated to https
. The filter can still be skipped by returning FilterResult.skipped()
.
DocumentFilterModifyUrl.java
package com.example.pluginexamples;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import com.funnelback.filter.api.FilterContext;
import com.funnelback.filter.api.FilterResult;
import com.funnelback.filter.api.documents.NoContentDocument;
import com.funnelback.filter.api.filters.PreFilterCheck;
import com.funnelback.filter.api.documents.FilterableDocument;
import com.funnelback.filter.api.filters.Filter;
import com.funnelback.filter.api.filters.FilterException;
import java.net.URI;
public class DocumentFilterModifyUrl implements Filter {
private static final Logger log = LogManager.getLogger(DocumentFilterModifyUrl.class);
@Override
public FilterResult filter(FilterableDocument document, FilterContext context) throws FilterException {
//Get the document URI
String uri = document.getURI().toASCIIString();
//Only change URIs which start with http://
if(uri.startsWith("http://")) {
String newUrl = "https://" + uri.substring("http://".length());
log.debug("Changing URL '" + document.getURI() + "' to '" + newUrl + "'");
FilterableDocument filteredDocument = document.cloneWithURI(URI.create(newUrl));
//Return the document with the modified URL
return FilterResult.of(filteredDocument);
}
//If the document does not start with http:// mark this filter as being skipped.
return FilterResult.skipped();
}
}
DocumentFilterModifyUrlTest.java
package com.example.pluginexamples;
import com.funnelback.filter.api.documents.FilterableDocument;
import org.junit.Assert;
import org.junit.Test;
import com.funnelback.filter.api.FilterResult;
import com.funnelback.filter.api.mock.MockDocuments;
import com.funnelback.filter.api.mock.MockFilterContext;
import java.net.URI;
public class DocumentFilterModifyUrlTest {
@Test
public void checkHttpUrl() throws Exception {
//Create input document with http:// URL
FilterableDocument inputDocument = MockDocuments.mockEmptyStringDoc()
.cloneWithURI(new URI("http://foo.com/"));
FilterResult filterResult = new DocumentFilterModifyUrl().filter(inputDocument, MockFilterContext.getEmptyContext());
FilterableDocument filteredDocument = filterResult.getFilteredDocuments().get(0);
Assert.assertEquals("https://foo.com/", filteredDocument.getURI().toASCIIString());
}
@Test
public void checkNonHttpURL() throws Exception {
//Create input document with http:// URL
FilterableDocument inputDocument = MockDocuments.mockEmptyStringDoc()
.cloneWithURI(new URI("file://foo.com/"));
FilterResult filterResult = new DocumentFilterModifyUrl().filter(inputDocument, MockFilterContext.getEmptyContext());
Assert.assertTrue("As the document url did not start with http:// the filter should have been skipped",
filterResult.isSkipped());
}
}