Java Code Examples for cc.mallet.topics.ParallelTopicModel#setRandomSeed()

The following examples show how to use cc.mallet.topics.ParallelTopicModel#setRandomSeed() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: MalletCalculator.java    From TagRec with GNU Affero General Public License v3.0 6 votes vote down vote up
public void predictValuesProbs(boolean topicCreation) {
	ParallelTopicModel LDA = new ParallelTopicModel(this.numTopics, ALPHA * this.numTopics, BETA); // TODO
	LDA.addInstances(this.instances);
	LDA.setNumThreads(1);
	LDA.setNumIterations(NUM_ITERATIONS);
	LDA.setRandomSeed(43);
	try {
		LDA.estimate();
	} catch (Exception e) {
		e.printStackTrace();
	}
	this.docList = getMaxTopicsByDocs(LDA, this.numTopics);
	System.out.println("Fetched Doc-List");
	this.topicList = !topicCreation ? getMaxTermsByTopics(LDA, MAX_TERMS) : null;
	System.out.println("Fetched Topic-List");
}
 
Example 2
Source File: MalletCalculatorTweet.java    From TagRec with GNU Affero General Public License v3.0 6 votes vote down vote up
/**
 * What does this boolean value signify.
 * @param topicCreation
 */
public void predictValuesProbs(boolean topicCreation) {
    
    ParallelTopicModel LDA = new ParallelTopicModel(this.numTopics, ALPHA * this.numTopics, BETA); // TODO
    LDA.addInstances(this.instances);
    LDA.setNumThreads(1);
    LDA.setNumIterations(NUM_ITERATIONS);
    LDA.setRandomSeed(43);
    try {
        LDA.estimate();
    } catch (Exception e) {
        e.printStackTrace();
    }
    this.docList = getMaxTopicsByDocs(LDA, this.numTopics);
    System.out.println("Fetched Doc-List");
    this.topicList = !topicCreation ? getMaxTermsByTopics(LDA, MAX_TERMS) : null;
    System.out.println("Fetched Topic-List");
}
 
Example 3
Source File: EngineMBTopicsLDA.java    From gateplugin-LearningFramework with GNU Lesser General Public License v2.1 4 votes vote down vote up
public void applyTopicModel(AnnotationSet instanceAS, AnnotationSet tokenAS,
        String tokenFeature, String featurePrefix, String parms) {
  CorpusRepresentationMalletLDA data = (CorpusRepresentationMalletLDA)corpusRepresentation;
  data.stopGrowth();

  int numIterations = 10;
  int burnIn = 10;
  int thinning = 0;
  int seed = 0;
  Parms parmdef = new Parms(parms,
              "i:iters:i",
              "B:burnin:i",
              "T:thinning:i",
              "s:seed:i"
  );
  numIterations = (int) parmdef.getValueOrElse("iters", numIterations);
  burnIn = (int) parmdef.getValueOrElse("burnin", burnIn);
  thinning = (int) parmdef.getValueOrElse("thinning", thinning);
  seed = (int) parmdef.getValueOrElse("seed", seed);


  ParallelTopicModel tm = (ParallelTopicModel)model;
  TopicInferencer ti = tm.getInferencer();
  tm.setRandomSeed(seed);
  
  for(Annotation instAnn : instanceAS.inDocumentOrder()) {
    // System.err.println("DEBUG: adding instance annotation "+instAnn);
    Instance inst = data.getInstanceFor(gate.Utils.start(instAnn), gate.Utils.end(instAnn), tokenAS, tokenFeature);
    // System.err.println("DEBUG: Instance data is "+inst.getData());
    // System.err.println("DEBUG: got inferencer "+ti);
    // NOTE: see http://mallet.cs.umass.edu/api/cc/mallet/topics/TopicInferencer.html#getSampledDistribution(cc.mallet.types.Instance,%20int,%20int,%20int)
    double[] tdist = ti.getSampledDistribution(inst, numIterations, thinning, burnIn);
    List<Double> tdistlist = new ArrayList<>(tdist.length);
    int i = 0;
    int bestTopic = -1;
    double bestProb = -999.99;
    for(double val : tdist) {
      tdistlist.add(val);
      if(val > bestProb) {
        bestTopic = i;
        bestProb = val;
      }
      i++;
    }
    if(featurePrefix == null) {
      featurePrefix = "";
    }
    instAnn.getFeatures().put(featurePrefix+"TopicDist", tdistlist);    
    // Also add a feature that gives the index and word list of the most likely topic
    instAnn.getFeatures().put(featurePrefix+"BestTopic", bestTopic);
    instAnn.getFeatures().put(featurePrefix+"BestTopicProb", bestProb);
    // TODO: to add the topic words we have to pre-calculate the top k words for each topic
    // and assign the list for topic k here!
    // instAnn.getFeatures().put("LF_MBTopicsLDA_MLTopicWords", bestProb);            
  }
}