Java Code Examples for org.carrot2.core.Controller#init()

The following examples show how to use org.carrot2.core.Controller#init() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.

Example 1

Source File: UsingCustomLanguageModel.java From scava with Eclipse Public License 2.0

6 votes

public static void main(String [] args)
{
    @SuppressWarnings("unchecked")
    final Controller controller = ControllerFactory
        .createCachingPooling(IDocumentSource.class);

    // We will pass our custom language model element factories classes as a
    // initialization-time attributes. It is preferred to passing them as
    // processing-time attributes because the instances created at initialization
    // time will be reused for all further requests.
    Map<String, Object> attrs = Maps.newHashMap();
    BasicPreprocessingPipelineDescriptor.attributeBuilder(attrs)
        .stemmerFactory(CustomStemmerFactory.class)
        .tokenizerFactory(CustomTokenizerFactory.class)
        .lexicalDataFactory(CustomLexicalDataFactory.class);
    controller.init(attrs);

    // Cluster some data with Lingo and STC. Notice how the cluster quality degrades
    // when the stop word list is empty (especially for STC).
    clusterAndDisplayClusters(controller, LingoClusteringAlgorithm.class);
    clusterAndDisplayClusters(controller, STCClusteringAlgorithm.class);
}

Example 2

Source File: UsingCustomLexicalResources.java From scava with Eclipse Public License 2.0

6 votes

public static void main(String [] args)
{
    @SuppressWarnings("unchecked")
    final Controller controller = ControllerFactory.createCachingPooling(IDocumentSource.class);

    // We will pass our custom resource locator at initialization time. There is a
    // variety of implementations of IResourceLocator interface, we will use
    // an explicit filesystem folder in the current working directory.
    File resourcesDir = new File("resources");
    ResourceLookup resourceLookup = new ResourceLookup(new DirLocator(resourcesDir));

    Map<String, Object> attrs = Maps.newHashMap();

    // Note that we tell the linguistic component to merge all lexical resources,
    // this is the default setting and it usually helps with multi-lingual content.
    DefaultLexicalDataFactoryDescriptor.attributeBuilder(attrs)
        .mergeResources(true);
    LexicalDataLoaderDescriptor.attributeBuilder(attrs)
        .resourceLookup(resourceLookup);

    controller.init(attrs);

    // Cluster some data with Lingo and STC.
    clusterAndDisplayClusters(controller, LingoClusteringAlgorithm.class);
    clusterAndDisplayClusters(controller, STCClusteringAlgorithm.class);
}

Example 3

Source File: UsingCachingController.java From scava with Eclipse Public License 2.0

4 votes

@SuppressWarnings(
{
    "unused", "unchecked"
})
public static void main(String [] args)
{
    // [[[start:using-caching-controller]]]
    /*
     * Create the caching controller. You need only one caching controller instance
     * per application life cycle. This controller instance will cache the results
     * fetched from any document source and also clusters generated by the Lingo
     * algorithm.
     */
    final Controller controller = ControllerFactory.createCachingPooling(
        IDocumentSource.class, LingoClusteringAlgorithm.class);

    /*
     * Before using the caching controller, you must initialize it. On initialization,
     * you can set default values for some attributes. In this example, we'll set the
     * default results number to 50 and the API key.
     */
    final Map<String, Object> globalAttributes = new HashMap<String, Object>();
    CommonAttributesDescriptor
        .attributeBuilder(globalAttributes)
            .results(50);
    Bing3WebDocumentSourceDescriptor
        .attributeBuilder(globalAttributes)
            .appid(BingKeyAccess.getKey()); // use your own ID here
    controller.init(globalAttributes);

    /*
     * The controller is now ready to perform queries. To show that the documents from
     * the document input are cached, we will perform the same query twice and measure
     * the time for each query.
     */
    ProcessingResult result;
    long start, duration;

    final Map<String, Object> attributes;
    attributes = new HashMap<String, Object>();
    CommonAttributesDescriptor.attributeBuilder(attributes).query("data mining");

    start = System.currentTimeMillis();
    result = controller.process(attributes, Bing3WebDocumentSource.class,
        LingoClusteringAlgorithm.class);
    duration = System.currentTimeMillis() - start;
    System.out.println(duration + " ms (empty cache)");

    start = System.currentTimeMillis();
    result = controller.process(attributes, Bing3WebDocumentSource.class,
        LingoClusteringAlgorithm.class);
    duration = System.currentTimeMillis() - start;
    System.out.println(duration + " ms (documents and clusters from cache)");
    // [[[end:using-caching-controller]]]
}

Example 4

Source File: UsingComponentSuites.java From scava with Eclipse Public License 2.0

4 votes

public static void main(String [] args) throws Exception
{
    @SuppressWarnings("unchecked")
    final Controller controller = ControllerFactory.createCachingPooling(IDocumentSource.class);

    // Initialization-time attributes that will apply to all components.
    final Map<String, Object> initAttributes = Maps.newHashMap();

    // Prepare resource lookup facade. We will use the suites directory 
    // and class path resources.
    final ResourceLookup resourceLookup = new ResourceLookup(
        new DirLocator(new File("suites")),
        new ContextClassLoaderLocator());

    // We know we'll be using Bing so set up its access key.
    // use your own ID here!
    Bing3WebDocumentSourceDescriptor
        .attributeBuilder(initAttributes)
            .appid(BingKeyAccess.getKey());
    
    // We'll read the component suite definition from an XML stream.
    // IResource is an abstraction layer over resources in Carrot2.
    IResource suiteXml = resourceLookup.getFirst("suite-examples.xml");

    // Deserialize the component suite definition.
    final ProcessingComponentSuite suite = 
        ProcessingComponentSuite.deserialize(suiteXml, resourceLookup);

    // Initialize the controller with the suite. All components from the suite
    // will be available for processing within this controller.
    controller.init(initAttributes, suite.getComponentConfigurations());

    // From the suite definition, you can get the document sources and clustering
    // algorithm descriptors.
    final List<DocumentSourceDescriptor> sources = suite.getSources();
    final List<String> sourceIds = Lists.transform(sources,
        ProcessingComponentDescriptor.ProcessingComponentDescriptorToId.INSTANCE);
    System.out.println("Found " + sourceIds.size() + " document sources: "
        + sourceIds);

    final List<ProcessingComponentDescriptor> algorithms = suite.getAlgorithms();
    final List<String> algorithmIds = Lists.transform(algorithms,
        ProcessingComponentDescriptor.ProcessingComponentDescriptorToId.INSTANCE);
    System.out.println("Found " + algorithmIds.size() + " clutering algorithms: "
        + algorithmIds + "\n\n");

    // Run not more than two algorithms on not more than two sources
    for (int s = 0; s < Math.min(sourceIds.size(), 2); s++)
    {
        for (int a = 0; a < Math.min(algorithmIds.size(), 2); a++)
        {
            // You can retrieve some metadata about the components, such as
            // human-readable label, from their descriptors.
            System.out.println("Querying " + sources.get(s).getLabel()
                + ", clustering with " + algorithms.get(a).getLabel());

            // As usual, we pass attributes for processing
            final Map<String, Object> attributes = Maps.newHashMap();
            CommonAttributesDescriptor.attributeBuilder(attributes)
                .query("data mining");

            // Pass component ids to the controller to perform processing
            final ProcessingResult result = controller.process(attributes,
                sourceIds.get(s), algorithmIds.get(a));
            ConsoleFormatter.displayClusters(result.getClusters());
            System.out.println();
        }
    }
}

Example 5

Source File: LoadingAttributeValuesFromXml.java From scava with Eclipse Public License 2.0

4 votes

public static void main(String [] args) throws Exception
{
    InputStream xmlStream = null;
    try
    {
        xmlStream = LoadingAttributeValuesFromXml.class
            .getResourceAsStream("algorithm-lingo-attributes.xml");

        // Load attribute value sets from the XML stream
        final AttributeValueSets attributeValueSets = AttributeValueSets
            .deserialize(xmlStream);

        // Get the desired set of attribute values for use with further processing
        final Map<String, Object> defaultAttributes = attributeValueSets
            .getDefaultAttributeValueSet().getAttributeValues();

        final Map<String, Object> fasterClusteringAttributes = attributeValueSets
            .getAttributeValueSet("faster-clustering").getAttributeValues();

        // Perform processing using the attribute values
        final Controller controller = ControllerFactory.createSimple();

        // Initialize the controller with one attribute set
        controller.init(fasterClusteringAttributes);

        // Perform clustering using the attribute set provided at initialization time
        Map<String, Object> requestAttributes = Maps.newHashMap(); 
        CommonAttributesDescriptor.attributeBuilder(requestAttributes)
            .documents(Lists.newArrayList(SampleDocumentData.DOCUMENTS_DATA_MINING))
            .query("data mining");
        ProcessingResult results = controller.process(requestAttributes, LingoClusteringAlgorithm.class);
        ConsoleFormatter.displayClusters(results.getClusters());

        // Perform clustering using some other attribute set, in this case the
        // one that is the default in the XML file.
        requestAttributes =
            CommonAttributesDescriptor.attributeBuilder(Maps.newHashMap(defaultAttributes))
                .documents(Lists.newArrayList(SampleDocumentData.DOCUMENTS_DATA_MINING))
                .query("data mining").map;

        results = controller.process(requestAttributes, LingoClusteringAlgorithm.class);
        ConsoleFormatter.displayClusters(results.getClusters());
    }
    finally
    {
        CloseableUtils.close(xmlStream);
    }
}