biz.k11i.xgboost.util.FVec Java Examples
The following examples show how to use
biz.k11i.xgboost.util.FVec.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: Example.java From xgboost-predictor-java with Apache License 2.0 | 6 votes |
/** * Loads test data. * * @return test data */ static List<SimpleEntry<Integer, FVec>> loadData() throws IOException { List<SimpleEntry<Integer, FVec>> result = new ArrayList<>(); for (String line : Files.readAllLines(new File(TestHelper.getResourcePath("data/agaricus.txt.0.test")).toPath(), StandardCharsets.UTF_8)) { String[] values = line.split(" "); Map<Integer, Float> map = new HashMap<>(); for (int i = 1; i < values.length; i++) { String[] pair = values[i].split(":"); map.put(Integer.parseInt(pair[0]), Float.parseFloat(pair[1])); } result.add(new SimpleEntry<>(Integer.parseInt(values[0]), FVec.Transformer.fromMap(map))); } return result; }
Example #2
Source File: XGBoostOnlinePredictUDTF.java From incubator-hivemall with Apache License 2.0 | 6 votes |
@Nonnull private static FVec parseSparseFeatures(@Nonnull final List<?> featureList) throws UDFArgumentException { final Map<Integer, Double> map = new HashMap<>((int) (featureList.size() * 1.5)); for (Object f : featureList) { if (f == null) { continue; } String str = f.toString(); final int pos = str.indexOf(':'); if (pos < 1) { throw new UDFArgumentException("Invalid feature format: " + str); } final int index; final double value; try { index = Integer.parseInt(str.substring(0, pos)); value = Double.parseDouble(str.substring(pos + 1)); } catch (NumberFormatException e) { throw new UDFArgumentException("Failed to parse a feature value: " + str); } map.put(index, value); } return FVec.Transformer.fromMap(map); }
Example #3
Source File: XGBoostOnlinePredictUDTF.java From incubator-hivemall with Apache License 2.0 | 6 votes |
@Nonnull private FVec parseDenseFeatures(@Nonnull Object argObj) throws UDFArgumentException { final int length = featureListOI.getListLength(argObj); final double[] values = new double[length]; for (int i = 0; i < length; i++) { final Object o = featureListOI.getListElement(argObj, i); final double v; if (o == null) { v = Double.NaN; } else { v = PrimitiveObjectInspectorUtils.getDouble(o, featureElemOI); } values[i] = v; } return FVec.Transformer.fromArray(values, false); }
Example #4
Source File: TestBase.java From incubator-hivemall with Apache License 2.0 | 6 votes |
@Override public List<FVec> loadDatasetAsListOfFVec() throws Exception { final List<FVec> dataset = new ArrayList<>(); RowProcessor proc = new RowProcessor() { @Override public void handleRow(String[] splitted) throws Exception { FVec fv = XGBoostUtils.parseRowAsFVec(splitted, 1, splitted.length); dataset.add(fv); } }; parse(proc); return dataset; }
Example #5
Source File: TestBase.java From incubator-hivemall with Apache License 2.0 | 6 votes |
@Override public List<FVec> loadDatasetAsListOfFVec() throws Exception { final List<FVec> dataset = new ArrayList<>(); RowProcessor proc = new RowProcessor() { @Override public void handleRow(String[] splitted) throws Exception { final float[] features = new float[34]; for (int i = 0; i <= 32; i++) { features[i] = Float.parseFloat(splitted[i]); } features[33] = splitted[33].equals("?") ? 0.f : Float.parseFloat(splitted[33]); FVec fv = FVec.Transformer.fromArray(features, false); dataset.add(fv); } }; parse(proc); return slice(dataset, sliceIndex, FVec.class); }
Example #6
Source File: XGBoostOnlinePredictUDTF.java From incubator-hivemall with Apache License 2.0 | 6 votes |
@Override public void process(Object[] args) throws HiveException { if (mapToModel == null) { this.mapToModel = new HashMap<String, Predictor>(); } if (args[1] == null) {// features is null return; } String modelId = PrimitiveObjectInspectorUtils.getString(nonNullArgument(args, 2), modelIdOI); Predictor model = mapToModel.get(modelId); if (model == null) { Text arg3 = modelOI.getPrimitiveWritableObject(nonNullArgument(args, 3)); model = XGBoostUtils.loadPredictor(arg3); mapToModel.put(modelId, model); } Writable rowId = HiveUtils.copyToWritable(nonNullArgument(args, 0), rowIdOI); FVec features = denseFeatures ? parseDenseFeatures(args[1]) : parseSparseFeatures(featureListOI.getList(args[1])); predictAndForward(model, rowId, features); }
Example #7
Source File: XGBoostUtils.java From incubator-hivemall with Apache License 2.0 | 6 votes |
@Nonnull public static FVec parseRowAsFVec(@Nonnull final String[] row, final int start, final int end) { final Map<Integer, Float> map = new HashMap<>((int) (row.length * 1.5)); for (int i = start; i < end; i++) { String f = row[i]; if (f == null) { continue; } String str = f.toString(); final int pos = str.indexOf(':'); if (pos < 1) { throw new IllegalArgumentException("Invalid feature format: " + str); } final int index; final float value; try { index = Integer.parseInt(str.substring(0, pos)); value = Float.parseFloat(str.substring(pos + 1)); } catch (NumberFormatException e) { throw new IllegalArgumentException("Failed to parse a feature value: " + str); } map.put(index, value); } return FVec.Transformer.fromMap(map); }
Example #8
Source File: Example.java From xgboost-predictor-java with Apache License 2.0 | 6 votes |
/** * Predicts probability and calculate its logarithmic loss using {@link Predictor#predict(FVec)}. * * @param predictor Predictor * @param data test data */ static void predictAndLogLoss(Predictor predictor, List<SimpleEntry<Integer, FVec>> data) { double sum = 0; for (SimpleEntry<Integer, FVec> pair : data) { double[] predicted = predictor.predict(pair.getValue()); double predValue = Math.min(Math.max(predicted[0], 1e-15), 1 - 1e-15); int actual = pair.getKey(); sum = actual * Math.log(predValue) + (1 - actual) * Math.log(1 - predValue); } double logLoss = -sum / data.size(); System.out.println("Logloss: " + logLoss); }
Example #9
Source File: XGBoostEvidenceFilterUnitTest.java From gatk with BSD 3-Clause "New" or "Revised" License | 6 votes |
@Test(groups = "sv") protected void testLocalXGBoostClassifierSpark() { final Predictor localPredictor = XGBoostEvidenceFilter.loadPredictor(localClassifierModelFile); // get spark ctx final JavaSparkContext ctx = SparkContextFactory.getTestSparkContext(); // parallelize classifierAccuracyData to RDD JavaRDD<FVec> testFeaturesRdd = ctx.parallelize(Arrays.asList(classifierAccuracyData.features)); // predict in parallel JavaDoubleRDD predictedProbabilityRdd = testFeaturesRdd.mapToDouble(f -> localPredictor.predictSingle(f, false, 0)); // pull back to local array final double[] predictedProbabilitySpark = predictedProbabilityRdd.collect() .stream().mapToDouble(Double::doubleValue).toArray(); // check probabilities from spark are identical to serial assertArrayEquals(predictedProbabilitySpark, predictedProbabilitySerial, 0.0, "Probabilities predicted in spark context differ from serial" ); }
Example #10
Source File: Example.java From xgboost-predictor-java with Apache License 2.0 | 5 votes |
public static void main(String[] args) throws IOException { List<SimpleEntry<Integer, FVec>> data = loadData(); Predictor predictor = new Predictor(TestHelper.getResourceAsStream("model/gbtree/v47/binary-logistic.model")); predictAndLogLoss(predictor, data); predictLeafIndex(predictor, data); }
Example #11
Source File: Example.java From xgboost-predictor-java with Apache License 2.0 | 5 votes |
/** * Predicts leaf index of each tree. * * @param predictor Predictor * @param data test data */ static void predictLeafIndex(Predictor predictor, List<SimpleEntry<Integer, FVec>> data) { int count = 0; for (SimpleEntry<Integer, FVec> pair : data) { int[] leafIndexes = predictor.predictLeaf(pair.getValue()); System.out.printf("leafIndexes[%d]: %s%s", count++, Arrays.toString(leafIndexes), System.lineSeparator()); } }
Example #12
Source File: GBTree.java From Myna with Apache License 2.0 | 5 votes |
double pred(FVec feat, int bst_group, int root_index, int ntree_limit) { RegTree[] trees = _groupTrees[bst_group]; int treeleft = ntree_limit == 0 ? trees.length : ntree_limit; double psum = 0; for (int i = 0; i < treeleft; i++) { psum += trees[i].getLeafValue(feat, root_index); } return psum; }
Example #13
Source File: GBTree.java From Myna with Apache License 2.0 | 5 votes |
@Override public double[] predict(FVec feat, int ntree_limit) { double[] preds = new double[mparam.num_output_group]; for (int gid = 0; gid < mparam.num_output_group; gid++) { preds[gid] = pred(feat, gid, 0, ntree_limit); } return preds; }
Example #14
Source File: GBTree.java From Myna with Apache License 2.0 | 5 votes |
@Override public double predictSingle(FVec feat, int ntree_limit) { if (mparam.num_output_group != 1) { throw new IllegalStateException( "Can't invoke predictSingle() because this model outputs multiple values: " + mparam.num_output_group); } return pred(feat, 0, 0, ntree_limit); }
Example #15
Source File: PredictionTestBase.java From xgboost-predictor-java with Apache License 2.0 | 5 votes |
public static PredictionTask predictLeafWithNTree(final int ntree_limit) { return new PredictionTask("leaf_ntree") { @Override double[] predict(Predictor predictor, FVec feat) { return toDoubleArray(predictor.predictLeaf(feat, ntree_limit)); } }; }
Example #16
Source File: PredictionTestBase.java From xgboost-predictor-java with Apache License 2.0 | 5 votes |
public static PredictionTask predictLeaf() { return new PredictionTask("leaf") { @Override double[] predict(Predictor predictor, FVec feat) { return toDoubleArray(predictor.predictLeaf(feat)); } }; }
Example #17
Source File: GBTree.java From Myna with Apache License 2.0 | 5 votes |
int[] predPath(FVec feat, int root_index, int ntree_limit) { int treeleft = ntree_limit == 0 ? trees.length : ntree_limit; int[] leafIndex = new int[treeleft]; for (int i = 0; i < treeleft; i++) { leafIndex[i] = trees[i].getLeafIndex(feat, root_index); } return leafIndex; }
Example #18
Source File: XGBoostOnlinePredictUDTF.java From incubator-hivemall with Apache License 2.0 | 5 votes |
private void predictAndForward(@Nonnull final Predictor model, @Nonnull final Writable rowId, @Nonnull final FVec features) throws HiveException { double[] predicted = model.predict(features); // predicted[0] has // - probability ("binary:logistic") // - class label ("multi:softmax") forwardPredicted(rowId, predicted); }
Example #19
Source File: GBTree.java From xgboost-predictor-java with Apache License 2.0 | 5 votes |
int[] predPath(FVec feat, int root_index, int ntree_limit) { int treeleft = ntree_limit == 0 ? trees.length : Math.min(ntree_limit, trees.length); int[] leafIndex = new int[treeleft]; for (int i = 0; i < treeleft; i++) { leafIndex[i] = trees[i].getLeafIndex(feat, root_index); } return leafIndex; }
Example #20
Source File: Predictor.java From xgboost-predictor-java with Apache License 2.0 | 5 votes |
double[] predictRaw(FVec feat, int ntree_limit) { double[] preds = gbm.predict(feat, ntree_limit); for (int i = 0; i < preds.length; i++) { preds[i] += mparam.base_score; } return preds; }
Example #21
Source File: RegTree.java From xgboost-predictor-java with Apache License 2.0 | 5 votes |
int next(FVec feat) { double fvalue = feat.fvalue(_splitIndex); if (fvalue != fvalue) { // is NaN? return _defaultNext; } return (fvalue < split_cond) ? cleft_ : cright_; }
Example #22
Source File: GBLinear.java From xgboost-predictor-java with Apache License 2.0 | 5 votes |
@Override public double[] predict(FVec feat, int ntree_limit) { double[] preds = new double[mparam.num_output_group]; for (int gid = 0; gid < mparam.num_output_group; ++gid) { preds[gid] = pred(feat, gid); } return preds; }
Example #23
Source File: GBLinear.java From xgboost-predictor-java with Apache License 2.0 | 5 votes |
@Override public double predictSingle(FVec feat, int ntree_limit) { if (mparam.num_output_group != 1) { throw new IllegalStateException( "Can't invoke predictSingle() because this model outputs multiple values: " + mparam.num_output_group); } return pred(feat, 0); }
Example #24
Source File: GBLinear.java From xgboost-predictor-java with Apache License 2.0 | 5 votes |
double pred(FVec feat, int gid) { double psum = bias(gid); double featValue; for (int fid = 0; fid < mparam.num_feature; ++fid) { featValue = feat.fvalue(fid); if (!Double.isNaN(featValue)) { psum += featValue * weight(fid, gid); } } return psum; }
Example #25
Source File: RegTree.java From xgboost-predictor-java with Apache License 2.0 | 5 votes |
/** * Retrieves nodes from root to leaf and returns leaf value. * * @param feat feature vector * @param root_id starting root index * @return leaf value */ public double getLeafValue(FVec feat, int root_id) { Node n = nodes[root_id]; while (!n._isLeaf) { n = nodes[n.next(feat)]; } return n.leaf_value; }
Example #26
Source File: Dart.java From xgboost-predictor-java with Apache License 2.0 | 5 votes |
double pred(FVec feat, int bst_group, int root_index, int ntree_limit) { RegTree[] trees = _groupTrees[bst_group]; int treeleft = ntree_limit == 0 ? trees.length : Math.min(ntree_limit, trees.length); double psum = 0; for (int i = 0; i < treeleft; i++) { psum += weightDrop[i] * trees[i].getLeafValue(feat, root_index); } return psum; }
Example #27
Source File: GBTree.java From xgboost-predictor-java with Apache License 2.0 | 5 votes |
@Override public double[] predict(FVec feat, int ntree_limit) { double[] preds = new double[mparam.num_output_group]; for (int gid = 0; gid < mparam.num_output_group; gid++) { preds[gid] = pred(feat, gid, 0, ntree_limit); } return preds; }
Example #28
Source File: GBTree.java From xgboost-predictor-java with Apache License 2.0 | 5 votes |
@Override public double predictSingle(FVec feat, int ntree_limit) { if (mparam.num_output_group != 1) { throw new IllegalStateException( "Can't invoke predictSingle() because this model outputs multiple values: " + mparam.num_output_group); } return pred(feat, 0, 0, ntree_limit); }
Example #29
Source File: GBTree.java From xgboost-predictor-java with Apache License 2.0 | 5 votes |
double pred(FVec feat, int bst_group, int root_index, int ntree_limit) { RegTree[] trees = _groupTrees[bst_group]; int treeleft = ntree_limit == 0 ? trees.length : Math.min(ntree_limit, trees.length); double psum = 0; for (int i = 0; i < treeleft; i++) { psum += trees[i].getLeafValue(feat, root_index); } return psum; }
Example #30
Source File: RegTree.java From xgboost-predictor-java with Apache License 2.0 | 5 votes |
/** * Retrieves nodes from root to leaf and returns leaf index. * * @param feat feature vector * @param root_id starting root index * @return leaf index */ public int getLeafIndex(FVec feat, int root_id) { int pid = root_id; Node n; while (!(n = nodes[pid])._isLeaf) { pid = n.next(feat); } return pid; }