org.apache.solr.client.solrj.io.Tuple#put

Source File: SetValueEvaluator.java From lucene-solr with Apache License 2.0

6 votes

@Override
public Object doWork(Object... values) throws IOException {
  if(values[0] instanceof Tuple) {
    Tuple tuple = (Tuple)values[0];
    String key = (String)values[1];
    Object value = values[2];
    if(value instanceof String) {
      value = ((String)value).replace("\"", "");
    }
    key = key.replace("\"", "");
    Tuple newTuple = tuple.clone();
    newTuple.put(key, value);
    return newTuple;
  } else {
    throw new IOException("The setValue function expects a Tuple as the first parameter");
  }
}

Source File: CloudSolrStream.java From lucene-solr with Apache License 2.0

6 votes

protected Tuple _read() throws IOException {
  TupleWrapper tw = tuples.pollFirst();
  if(tw != null) {
    Tuple t = tw.getTuple();

    if (trace) {
      t.put("_COLLECTION_", this.collection);
    }

    if(tw.next()) {
      tuples.add(tw);
    }
    return t;
  } else {
    Tuple tuple = Tuple.EOF();
    if(trace) {
      tuple.put("_COLLECTION_", this.collection);
    }
    return tuple;
  }
}

Source File: DeepRandomStream.java From lucene-solr with Apache License 2.0

6 votes

protected Tuple _read() throws IOException {
  if(tuples.size() > 0) {
    TupleWrapper tw = tuples.removeFirst();
    Tuple t = tw.getTuple();

    if (trace) {
      t.put("_COLLECTION_", this.collection);
    }

    if(tw.next()) {
      tuples.addLast(tw);
    }
    return t;
  } else {
    Tuple tuple = Tuple.EOF();
    if(trace) {
      tuple.put("_COLLECTION_", this.collection);
    }
    return tuple;
  }
}

Source File: JDBCStream.java From lucene-solr with Apache License 2.0

6 votes

public Tuple read() throws IOException {
  
  try {
    Tuple tuple = new Tuple();
    if (resultSet.next()) {
      // we have a record
      for (ResultSetValueSelector selector : valueSelectors) {
        tuple.put(selector.getColumnName(), selector.selectValue(resultSet));
      }
    } else {
      // we do not have a record
      tuple.put(StreamParams.EOF, true);
    }
    
    return tuple;
  } catch (SQLException e) {
    throw new IOException(String.format(Locale.ROOT, "Failed to read next record with error '%s'", e.getMessage()), e);
  }
}

Source File: ReplaceWithValueOperation.java From lucene-solr with Apache License 2.0

5 votes

private void replace(Tuple tuple){
  if(null == replacement){
    tuple.remove(fieldName);
  }
  else{
    tuple.put(fieldName, replacement);
  }
}

Source File: StatsStream.java From lucene-solr with Apache License 2.0

5 votes

private void fillTuple(Tuple t,
                       @SuppressWarnings({"rawtypes"})NamedList nl,
                       Metric[] _metrics) {

  if(nl == null) {
    return;
  }

  int m = 0;
  for(Metric metric : _metrics) {
    String identifier = metric.getIdentifier();
    if(!identifier.startsWith("count(")) {
      if(nl.get("facet_"+m) != null) {
        Object d = nl.get("facet_" + m);
        if(d instanceof Number) {
          if (metric.outputLong) {
            t.put(identifier, Math.round(((Number)d).doubleValue()));
          } else {
            t.put(identifier, ((Number)d).doubleValue());
          }
        } else {
          t.put(identifier, d);
        }
      }
      ++m;
    } else {
      long l = ((Number)nl.get("count")).longValue();
      t.put("count(*)", l);
    }
  }
}

Source File: FrequencyTableEvaluator.java From lucene-solr with Apache License 2.0

5 votes

@Override
public Object doWork(Object... values) throws IOException {
  if(Arrays.stream(values).anyMatch(item -> null == item)){
    return null;
  }

  List<?> sourceValues;

  if(values.length == 1){
    sourceValues = values[0] instanceof List<?> ? (List<?>)values[0] : Arrays.asList(values[0]);
  }
  else
  {
    throw new IOException(String.format(Locale.ROOT,"Invalid expression %s - expecting at least one value but found %d",toExpression(constructingFactory),containedEvaluators.size()));
  }

  Frequency frequency = new Frequency();

  for(Object o : sourceValues) {
    Number number = (Number)o;
    frequency.addValue(number.longValue());
  }

  List<Tuple> histogramBins = new ArrayList<>();

  @SuppressWarnings({"rawtypes"})
  Iterator iterator = frequency.valuesIterator();

  while(iterator.hasNext()){
    Long value = (Long)iterator.next();
    Tuple tuple = new Tuple();
    tuple.put("value", value.longValue());
    tuple.put("count", frequency.getCount(value));
    tuple.put("cumFreq", frequency.getCumFreq(value));
    tuple.put("cumPct", frequency.getCumPct(value));
    tuple.put("pct", frequency.getPct(value));
    histogramBins.add(tuple);
  }
  return histogramBins;
}

Source File: ChiSquareDataSetEvaluator.java From lucene-solr with Apache License 2.0

5 votes

@Override
public Object doWork(Object value1, Object value2) throws IOException {

  @SuppressWarnings({"unchecked"})
  List<Number> listA = (List<Number>) value1;
  @SuppressWarnings({"unchecked"})
  List<Number> listB = (List<Number>) value2;

  long[] sampleA = new long[listA.size()];
  long[] sampleB = new long[listB.size()];

  for(int i=0; i<sampleA.length; i++) {
    sampleA[i] = listA.get(i).longValue();
  }

  for(int i=0; i<sampleB.length; i++) {
    sampleB[i] = listB.get(i).longValue();
  }

  ChiSquareTest chiSquareTest = new ChiSquareTest();
  double chiSquare = chiSquareTest.chiSquareDataSetsComparison(sampleA, sampleB);
  double p = chiSquareTest.chiSquareTestDataSetsComparison(sampleA, sampleB);

  Tuple tuple = new Tuple();
  tuple.put("chisquare-statistic", chiSquare);
  tuple.put(StreamParams.P_VALUE, p);
  return tuple;

}

Source File: GTestDataSetEvaluator.java From lucene-solr with Apache License 2.0

5 votes

@Override
public Object doWork(Object value1, Object value2) throws IOException {

  @SuppressWarnings({"unchecked"})
  List<Number> listA = (List<Number>) value1;
  @SuppressWarnings({"unchecked"})
  List<Number> listB = (List<Number>) value2;

  long[] sampleA = new long[listA.size()];
  long[] sampleB = new long[listB.size()];

  for(int i=0; i<sampleA.length; i++) {
    sampleA[i] = listA.get(i).longValue();
  }

  for(int i=0; i<sampleB.length; i++) {
    sampleB[i] = listB.get(i).longValue();
  }

  GTest gTest = new GTest();
  double g = gTest.gDataSetsComparison(sampleA, sampleB);
  double p = gTest.gTestDataSetsComparison(sampleA, sampleB);

 Tuple tuple = new Tuple();
  tuple.put("G-statistic", g);
  tuple.put(StreamParams.P_VALUE, p);
  return tuple;
}

Source File: TopicStream.java From lucene-solr with Apache License 2.0

5 votes

public Tuple read() throws IOException {
  Tuple tuple = _read();

  if(tuple.EOF) {
    if(runCount > 0) {
      tuple.put("sleepMillis", 0);
    } else {
      tuple.put("sleepMillis", 1000);
    }

    return tuple;
  }

  ++count;
  ++runCount;
  if(checkpointEvery > -1 && (count % checkpointEvery) == 0) {
    persistCheckpoints();
  }

  long version = tuple.getLong(VERSION_FIELD);
  String slice = tuple.getString("_SLICE_");
  checkpoints.put(slice, version);

  tuple.remove("_SLICE_");
  tuple.remove("_CORE_");

  return tuple;
}

Source File: SearchStream.java From lucene-solr with Apache License 2.0

5 votes

public Tuple read() throws IOException {
  if(documentIterator.hasNext()) {
    Tuple tuple = new Tuple();
    SolrDocument doc = documentIterator.next();
    for(Entry<String, Object> entry : doc.entrySet()) {
      tuple.put(entry.getKey(), entry.getValue());
    }
    return tuple;
  } else {
    return Tuple.EOF();
  }
}

Source File: DescribeEvaluator.java From lucene-solr with Apache License 2.0

5 votes

@Override
public Object doWork(Object value) throws IOException {
  
  if(!(value instanceof List<?>)){
    throw new IOException(String.format(Locale.ROOT,"Invalid expression %s - expecting a numeric list but found %s", toExpression(constructingFactory), value.getClass().getSimpleName()));
  }
  
  // we know each value is a BigDecimal or a list of BigDecimals
  DescriptiveStatistics descriptiveStatistics = new DescriptiveStatistics();
  ((List<?>)value).stream().mapToDouble(innerValue -> ((Number)innerValue).doubleValue()).forEach(innerValue -> descriptiveStatistics.addValue(innerValue));

  Tuple tuple = new Tuple();
  tuple.put("max", descriptiveStatistics.getMax());
  tuple.put("mean", descriptiveStatistics.getMean());
  tuple.put("min", descriptiveStatistics.getMin());
  tuple.put("stdev", descriptiveStatistics.getStandardDeviation());
  tuple.put("sum", descriptiveStatistics.getSum());
  tuple.put("N", descriptiveStatistics.getN());
  tuple.put("var", descriptiveStatistics.getVariance());
  tuple.put("kurtosis", descriptiveStatistics.getKurtosis());
  tuple.put("skewness", descriptiveStatistics.getSkewness());
  tuple.put("popVar", descriptiveStatistics.getPopulationVariance());
  tuple.put("geometricMean", descriptiveStatistics.getGeometricMean());
  tuple.put("sumsq", descriptiveStatistics.getSumsq());

  return tuple;
}

Source File: Node.java From lucene-solr with Apache License 2.0

5 votes

public Tuple toTuple(String collection, String field, int level, Traversal traversal) {
  Tuple tuple = new Tuple();

  tuple.put("node", id);
  tuple.put("collection", collection);
  tuple.put("field", field);
  tuple.put("level", level);

  boolean prependCollection = traversal.isMultiCollection();
  List<String> cols = traversal.getCollections();

  if(ancestors != null) {
    List<String> l = new ArrayList<>();
    for(String ancestor : ancestors) {
      String[] ancestorParts = ancestor.split("\\^");

      if(prependCollection) {
        //prepend the collection
        int colIndex = Integer.parseInt(ancestorParts[0]);
        l.add(cols.get(colIndex)+"/"+ancestorParts[1]);
      } else {
        // Use only the ancestor id.
        l.add(ancestorParts[1]);
      }
    }

    tuple.put("ancestors", l);
  }

  if(metrics != null) {
    for(Metric metric : metrics) {
      tuple.put(metric.getIdentifier(), metric.getValue());
    }
  }

  return tuple;
}

Source File: SelectStream.java From lucene-solr with Apache License 2.0

4 votes

public Tuple read() throws IOException {
  Tuple original = stream.read();
  
  if(original.EOF){
    return original;
  }

  // create a copy with the limited set of fields
  Tuple workingToReturn = new Tuple();
  Tuple workingForEvaluators = new Tuple();

  //Clear the TupleContext before running the evaluators.
  //The TupleContext allows evaluators to cache values within the scope of a single tuple.
  //For example a LocalDateTime could be parsed by one evaluator and used by other evaluators within the scope of the tuple.
  //This avoids the need to create multiple LocalDateTime instances for the same tuple to satisfy a select expression.

  streamContext.getTupleContext().clear();

  for(Object fieldName : original.getFields().keySet()){
    workingForEvaluators.put(fieldName, original.get(fieldName));
    if(selectedFields.containsKey(fieldName)){
      workingToReturn.put(selectedFields.get(fieldName), original.get(fieldName));
    }
  }
  
  // apply all operations
  for(StreamOperation operation : operations){
    operation.operate(workingToReturn);
    operation.operate(workingForEvaluators);
  }
  
  // Apply all evaluators
  for(Map.Entry<StreamEvaluator, String> selectedEvaluator : selectedEvaluators.entrySet()) {
    Object o = selectedEvaluator.getKey().evaluate(workingForEvaluators);
    if(o != null) {
      workingForEvaluators.put(selectedEvaluator.getValue(), o);
      workingToReturn.put(selectedEvaluator.getValue(), o);
    }
  }
  
  return workingToReturn;
}

Source File: FacetStream.java From lucene-solr with Apache License 2.0

4 votes

private void fillTuples(int level,
                        List<Tuple> tuples,
                        Tuple currentTuple,
                        @SuppressWarnings({"rawtypes"}) NamedList facets,
                        Bucket[] _buckets,
                        Metric[] _metrics) {

  String bucketName = _buckets[level].toString();
  @SuppressWarnings({"rawtypes"})
  NamedList nl = (NamedList)facets.get(bucketName);
  if(nl == null) {
    return;
  }
  @SuppressWarnings({"rawtypes"})
  List allBuckets = (List)nl.get("buckets");
  for(int b=0; b<allBuckets.size(); b++) {
    @SuppressWarnings({"rawtypes"})
    NamedList bucket = (NamedList)allBuckets.get(b);
    Object val = bucket.get("val");
    if (val instanceof Integer) {
      val=((Integer)val).longValue();  // calcite currently expects Long values here
    }
    Tuple t = currentTuple.clone();
    t.put(bucketName, val);
    int nextLevel = level+1;
    if(nextLevel<_buckets.length) {
      fillTuples(nextLevel,
                 tuples,
                 t.clone(),
                 bucket,
                 _buckets,
                 _metrics);
    } else {
      int m = 0;
      for(Metric metric : _metrics) {
        String identifier = metric.getIdentifier();
        if(!identifier.startsWith("count(")) {
          Number d = ((Number)bucket.get("facet_"+m));
          if(metric.outputLong) {
            if (d instanceof Long || d instanceof Integer) {
              t.put(identifier, d.longValue());
            } else {
              t.put(identifier, Math.round(d.doubleValue()));
            }
          } else {
            t.put(identifier, d.doubleValue());
          }
          ++m;
        } else {
          long l = ((Number)bucket.get("count")).longValue();
          t.put("count(*)", l);
        }
      }
      tuples.add(t);
    }
  }
}

Source File: HistogramEvaluator.java From lucene-solr with Apache License 2.0

4 votes

@Override
public Object doWork(Object... values) throws IOException {
  if(Arrays.stream(values).anyMatch(item -> null == item)){
    return null;
  }
  
  List<?> sourceValues;
  Integer bins = 10;
  
  if(values.length >= 1){
    sourceValues = values[0] instanceof List<?> ? (List<?>)values[0] : Arrays.asList(values[0]); 
          
    if(values.length >= 2){
      if(values[1] instanceof Number){
        bins = ((Number)values[1]).intValue();
      }
      else{
        throw new IOException(String.format(Locale.ROOT,"Invalid expression %s - if second parameter is provided then it must be a valid number but found %s instead",toExpression(constructingFactory), values[1].getClass().getSimpleName()));
      }        
    }      
  }
  else{
    throw new IOException(String.format(Locale.ROOT,"Invalid expression %s - expecting at least one value but found %d",toExpression(constructingFactory),containedEvaluators.size()));
  }

  EmpiricalDistribution distribution = new EmpiricalDistribution(bins);
  distribution.load(((List<?>)sourceValues).stream().mapToDouble(value -> ((Number)value).doubleValue()).toArray());;

  List<Tuple> histogramBins = new ArrayList<>();
  for(SummaryStatistics binSummary : distribution.getBinStats()) {
    Tuple tuple = new Tuple();
    tuple.put("max", binSummary.getMax());
    tuple.put("mean", binSummary.getMean());
    tuple.put("min", binSummary.getMin());
    tuple.put("stdev", binSummary.getStandardDeviation());
    tuple.put("sum", binSummary.getSum());
    tuple.put("N", binSummary.getN());
    tuple.put("var", binSummary.getVariance());
    tuple.put("cumProb", distribution.cumulativeProbability(binSummary.getMean()));
    tuple.put("prob", distribution.probability(binSummary.getMin(), binSummary.getMax()));
    histogramBins.add(tuple);
  }
  
  return histogramBins;
}

Source File: TextLogitStream.java From lucene-solr with Apache License 2.0

4 votes

public Tuple call() throws Exception {
  ModifiableSolrParams params = new ModifiableSolrParams();
  HttpSolrClient solrClient = cache.getHttpSolrClient(baseUrl);

  params.add(DISTRIB, "false");
  params.add("fq","{!tlogit}");
  params.add("feature", feature);
  params.add("terms", TextLogitStream.toString(terms));
  params.add("idfs", TextLogitStream.toString(idfs));

  for(Entry<String, String> entry : paramsMap.entrySet()) {
    params.add(entry.getKey(), entry.getValue());
  }

  if(weights != null) {
    params.add("weights", TextLogitStream.toString(weights));
  }

  params.add("iteration", Integer.toString(iteration));
  params.add("outcome", outcome);
  params.add("positiveLabel", Integer.toString(positiveLabel));
  params.add("threshold", Double.toString(threshold));
  params.add("alpha", Double.toString(learningRate));

  QueryRequest  request= new QueryRequest(params, SolrRequest.METHOD.POST);
  QueryResponse response = request.process(solrClient);
  @SuppressWarnings({"rawtypes"})
  NamedList res = response.getResponse();

  @SuppressWarnings({"rawtypes"})
  NamedList logit = (NamedList)res.get("logit");

  @SuppressWarnings({"unchecked"})
  List<Double> shardWeights = (List<Double>)logit.get("weights");
  double shardError = (double)logit.get("error");

  Tuple tuple = new Tuple();

  tuple.put("error", shardError);
  tuple.put("weights", shardWeights);
  tuple.put("evaluation", logit.get("evaluation"));

  return tuple;
}

Source File: ClassifyStream.java From lucene-solr with Apache License 2.0

4 votes

@Override
public Tuple read() throws IOException {
  if (modelTuple == null) {

    modelTuple = modelStream.read();
    if (modelTuple == null || modelTuple.EOF) {
      throw new IOException("Model tuple not found for classify stream!");
    }

    termToIndex = new HashMap<>();

    List<String> terms = modelTuple.getStrings("terms_ss");

    for (int i = 0; i < terms.size(); i++) {
      termToIndex.put(terms.get(i), i);
    }

    idfs = modelTuple.getDoubles("idfs_ds");
    modelWeights = modelTuple.getDoubles("weights_ds");
  }

  Tuple docTuple = docStream.read();
  if (docTuple.EOF) return docTuple;

  String text = docTuple.getString(field);

  double tfs[] = new double[termToIndex.size()];

  TokenStream tokenStream = analyzer.tokenStream(analyzerField, text);
  CharTermAttribute termAtt = tokenStream.getAttribute(CharTermAttribute.class);
  tokenStream.reset();

  int termCount = 0;
  while (tokenStream.incrementToken()) {
    termCount++;
    if (termToIndex.containsKey(termAtt.toString())) {
      tfs[termToIndex.get(termAtt.toString())]++;
    }
  }

  tokenStream.end();
  tokenStream.close();

  List<Double> tfidfs = new ArrayList<>(termToIndex.size());
  tfidfs.add(1.0);
  for (int i = 0; i < tfs.length; i++) {
    if (tfs[i] != 0) {
      tfs[i] = 1 + Math.log(tfs[i]);
    }
    tfidfs.add(this.idfs.get(i) * tfs[i]);
  }

  double total = 0.0;
  for (int i = 0; i < tfidfs.size(); i++) {
    total += tfidfs.get(i) * modelWeights.get(i);
  }

  double score = total * ((float) (1.0 / Math.sqrt(termCount)));
  double positiveProb = sigmoid(total);

  docTuple.put("probability_d", positiveProb);
  docTuple.put("score_d",  score);

  return docTuple;
}

Source File: ReplaceWithFieldOperation.java From lucene-solr with Apache License 2.0

4 votes

private void replace(Tuple tuple){
  tuple.put(originalFieldName, tuple.get(replacementFieldName));
}

Source File: Facet2DStream.java From lucene-solr with Apache License 2.0

4 votes

private void fillTuples(int level, List<Tuple> tuples, Tuple currentTuple,
                        @SuppressWarnings({"rawtypes"})NamedList facets, Bucket x, Bucket y, Metric metric) {
  String bucketXName = x.toString();
  String bucketYName = y.toString();

  @SuppressWarnings({"rawtypes"})
  NamedList allXBuckets = (NamedList) facets.get("x");
  for (int b = 0; b < allXBuckets.size(); b++) {
    @SuppressWarnings({"rawtypes"})
    List buckets = (List) allXBuckets.get("buckets");
    for(int s=0; s<buckets.size(); s++) {

      @SuppressWarnings({"rawtypes"})
      NamedList bucket = (NamedList)buckets.get(s);
      Object val = bucket.get("val");
      if (val instanceof Integer) {
        val = ((Integer) val).longValue();
      }
      Tuple tx = currentTuple.clone();
      tx.put(bucketXName, val);

      @SuppressWarnings({"rawtypes"})
      NamedList allYBuckets = (NamedList) bucket.get("y");
      @SuppressWarnings({"rawtypes"})
      List ybuckets = (List)allYBuckets.get("buckets");

      for (int d = 0; d < ybuckets.size(); d++) {
        @SuppressWarnings({"rawtypes"})
        NamedList bucketY = (NamedList) ybuckets.get(d);
        Object valY = bucketY.get("val");
        if (valY instanceof Integer) {
          valY = ((Integer) valY).longValue();
        }
        Tuple yt = tx.clone();
        yt.put(bucketYName, valY);

        int m = 0;
        String identifier = metric.getIdentifier();
        if (!identifier.startsWith("count(")) {
          Number d1 = (Number) bucketY.get("agg");
          if (metric.outputLong) {
            if (d1 instanceof Long || d1 instanceof Integer) {
              yt.put(identifier, d1.longValue());
            } else {
              yt.put(identifier, Math.round(d1.doubleValue()));
            }
          } else {
            yt.put(identifier, d1.doubleValue());
          }
          ++m;
        } else {
          long l = ((Number)bucketY.get("count")).longValue();
          yt.put("count(*)", l);
        }
        tuples.add(yt);
      }
    }
  }
}

Java Code Examples for org.apache.solr.client.solrj.io.Tuple#put()