Filter example - modify the document URL

This example shows a simple filter plugin that alters the URL of documents, changing the protocol of the URL from HTTP to HTTPS.

The example below shows a simple filter implementation and corresponding tests.

Although this example implements a StringDocumentFilter, the other filter types: ByteDocumentFilter and Filter can also be used to change the document URL.

Example

In this example any URL that has protocol http is updated to https. The filter can still be skipped by returning FilterResult.skipped().

DocumentFilterModifyUrl.java
package com.example.pluginexamples;

import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;

import com.funnelback.filter.api.FilterContext;
import com.funnelback.filter.api.FilterResult;
import com.funnelback.filter.api.documents.NoContentDocument;
import com.funnelback.filter.api.filters.PreFilterCheck;
import com.funnelback.filter.api.documents.FilterableDocument;
import com.funnelback.filter.api.filters.Filter;
import com.funnelback.filter.api.filters.FilterException;

import java.net.URI;

public class DocumentFilterModifyUrl implements Filter {

    private static final Logger log = LogManager.getLogger(DocumentFilterModifyUrl.class);

    @Override
    public FilterResult filter(FilterableDocument document, FilterContext context) throws FilterException {
        //Get the document URI
        String uri = document.getURI().toASCIIString();

        //Only change URIs which start with http://
        if(uri.startsWith("http://")) {
            String newUrl = "https://" + uri.substring("http://".length());

            log.debug("Changing URL '" + document.getURI() + "' to '" + newUrl + "'");

            FilterableDocument filteredDocument = document.cloneWithURI(URI.create(newUrl));

            //Return the document with the modified URL
            return FilterResult.of(filteredDocument);
        }

        //If the document does not start with http:// mark this filter as being skipped.
        return FilterResult.skipped();
    }
}
DocumentFilterModifyUrlTest.java
package com.example.pluginexamples;

import com.funnelback.filter.api.documents.FilterableDocument;
import org.junit.Assert;
import org.junit.Test;

import com.funnelback.filter.api.FilterResult;
import com.funnelback.filter.api.mock.MockDocuments;
import com.funnelback.filter.api.mock.MockFilterContext;

import java.net.URI;

public class DocumentFilterModifyUrlTest {

    @Test
    public void checkHttpUrl() throws Exception {
        //Create input document with http:// URL
        FilterableDocument inputDocument = MockDocuments.mockEmptyStringDoc()
                .cloneWithURI(new URI("http://foo.com/"));

        FilterResult filterResult = new DocumentFilterModifyUrl().filter(inputDocument, MockFilterContext.getEmptyContext());

        FilterableDocument filteredDocument = filterResult.getFilteredDocuments().get(0);

        Assert.assertEquals("https://foo.com/", filteredDocument.getURI().toASCIIString());
    }

    @Test
    public void checkNonHttpURL() throws Exception {
        //Create input document with http:// URL
        FilterableDocument inputDocument = MockDocuments.mockEmptyStringDoc()
                .cloneWithURI(new URI("file://foo.com/"));

        FilterResult filterResult = new DocumentFilterModifyUrl().filter(inputDocument, MockFilterContext.getEmptyContext());

        Assert.assertTrue("As the document url did not start with http:// the filter should have been skipped",
                filterResult.isSkipped());

    }
}