ml.dmlc.xgboost4j.java.XGBoost Java Exaples

Source File: TunedXGBoost.java From tsml with GNU General Public License v3.0

6 votes

@Override
public void buildClassifier(Instances data) throws Exception {
    //instead of (on a high level) calling build classifier on the same thing 10 times, 
    //with each subsequent call overwriting the training done in the last, 
    //we'll instead build each classifier in the models[] once, storing the traind model for each cv fold
    //when we move to the next num iterations, instead of building from scratch
    //we'll continue iterating from the stored models, which we can do since the 
    //cv folds will be identical.
    // so for a given para set, this build classifier will essentially be called 10 times,
    //once for each cv fold 

    modelIndex++; //going to use this model for this fold
    TunedXGBoost model = models[modelIndex];

    if (numIterations == 0) {
        //first of the 'numiterations' paras, i.e first build of each model. just build normally
        // - including the initialisation of all the meta info
        model.buildClassifier(data);
    } else {
        //continuing on from an already build model with less iterations
        //dont call normal build classifier, since that'll reinitialise 
        //a bunch of stuff, including the booster itself. instead just 
        //continue with a modified call to the trainer function
        model.booster = XGBoost.train(model.trainDMat, model.params, newNumIterations - numIterations, model.watches, null, null, null, 0, model.booster);
    }
}

Source File: MLXGBoost.java From RecSys2018 with Apache License 2.0

6 votes

public static Async<Booster> asyncModel(final String modelFile,
		final int nthread) {
	// load xgboost model
	final Async<Booster> modelAsync = new Async<Booster>(() -> {
		try {
			Booster bst = XGBoost.loadModel(modelFile);
			if (nthread > 0) {
				bst.setParam("nthread", nthread);
			}
			return bst;
		} catch (XGBoostError e) {
			e.printStackTrace();
			return null;
		}
	}, Booster::dispose);
	return modelAsync;
}

Source File: MaxEdgeScoreDependencyParser.java From SmoothNLP with GNU General Public License v3.0

5 votes

public static Booster loadXgbModel(String modelAddr) {

        try{
            InputStream modelIS = SmoothNLP.IOAdaptor.open(modelAddr);
            Booster booster = XGBoost.loadModel(modelIS);
            return booster;
        }catch(Exception e){
            // add proper warnings later
            System.out.println(e);
            return null;
        }
    }

Source File: DependencyGraghEdgeCostTrain.java From SmoothNLP with GNU General Public License v3.0

5 votes

public static void trainXgbModel(String trainFile, String devFile, String modelAddr, int nround, int negSampleRate, int earlyStop, int nthreads) throws IOException{
    final DMatrix trainMatrix = readCoNLL2DMatrix(trainFile,negSampleRate);
    final DMatrix devMatrix = readCoNLL2DMatrix(devFile,negSampleRate);
    try{
        Map<String, Object> params = new HashMap<String, Object>() {
            {
                put("nthread", nthreads);
                put("max_depth", 16);
                put("silent", 0);
                put("objective", "binary:logistic");
                put("colsample_bytree",0.95);
                put("colsample_bylevel",0.95);
                put("eta",0.2);
                put("subsample",0.95);
                put("lambda",0.2);

                put("min_child_weight",5);
                put("scale_pos_weight",negSampleRate);

                // other parameters
                // "objective" -> "multi:softmax", "num_class" -> "6"

                put("eval_metric", "logloss");
                put("tree_method","approx");
            }
        };
        Map<String, DMatrix> watches = new HashMap<String, DMatrix>() {
            {
                put("train", trainMatrix);
                put("dev",devMatrix);
            }
        };
        Booster booster = XGBoost.train(trainMatrix, params, nround, watches, null, null,null,earlyStop);
        OutputStream outstream = SmoothNLP.IOAdaptor.create(modelAddr);
        booster.saveModel(outstream);
    }catch(XGBoostError e){
        System.out.println(e);
    }
}

Source File: UtilFns.java From SmoothNLP with GNU General Public License v3.0

5 votes

public static Booster loadXgbModel(String modelAddr) {

        try{
            InputStream modelIS = SmoothNLP.IOAdaptor.open(modelAddr);
            Booster booster = XGBoost.loadModel(modelIS);
            return booster;
        }catch(Exception e){
            // add proper warnings later
            System.out.println(e);
            return null;
        }
    }

Source File: XGBoostModel.java From zoltar with Apache License 2.0

5 votes

/**
 * Note: Please use Models from zoltar-models module.
 *
 * <p>Returns a XGBoost model given a URI to the serialized model file.
 */
public static XGBoostModel create(final Model.Id id, final URI modelUri) throws IOException {
  try {
    GompLoader.start();
    final InputStream is = Files.newInputStream(FileSystemExtras.path(modelUri));
    return new AutoValue_XGBoostModel(id, XGBoost.loadModel(is));
  } catch (final XGBoostError xgBoostError) {
    throw new IOException(xgBoostError);
  }
}

Source File: MLXGBoost.java From RecSys2018 with Apache License 2.0

5 votes

public static MLXGBoostFeature[] analyzeFeatures(final String modelFile,
		final String featureFile) throws Exception {

	Booster model = XGBoost.loadModel(modelFile);

	List<String> temp = new LinkedList<String>();
	try (BufferedReader reader = new BufferedReader(
			new FileReader(featureFile))) {
		String line;
		while ((line = reader.readLine()) != null) {
			temp.add(line);
		}
	}

	// get feature importance scores
	String[] featureNames = new String[temp.size()];
	temp.toArray(featureNames);
	int[] importances = MLXGBoost.getFeatureImportance(model, featureNames);

	// sort features by their importance
	MLXGBoostFeature[] sortedFeatures = new MLXGBoostFeature[featureNames.length];
	for (int i = 0; i < featureNames.length; i++) {
		sortedFeatures[i] = new MLXGBoostFeature(featureNames[i],
				importances[i]);
	}
	Arrays.sort(sortedFeatures, new MLXGBoostFeature.ScoreComparator(true));

	return sortedFeatures;
}

Source File: XGBoostUtils.java From incubator-hivemall with Apache License 2.0

5 votes

@Nonnull
public static Booster deserializeBooster(@Nonnull final Text model) throws HiveException {
    try {
        byte[] b = IOUtils.fromCompressedText(model.getBytes(), model.getLength());
        return XGBoost.loadModel(new FastByteArrayInputStream(b));
    } catch (Throwable e) {
        throw new HiveException("Failed to deserialize a booster", e);
    }
}

Source File: XGBoostMethod.java From samantha with MIT License

5 votes

public void learn(PredictiveModel model, LearningData learningData, LearningData validData) {
    try {
        DMatrix dtrain = new DMatrix(new XGBoostIterator(learningData), null);
        Map<String, DMatrix> watches = new HashMap<>();
        if (validData != null) {
            watches.put("Validation", new DMatrix(new XGBoostIterator(validData), null));
        }
        Booster booster = XGBoost.train(dtrain, params, round, watches, null, null);
        XGBoostModel boostModel = (XGBoostModel) model;
        boostModel.setXGBooster(booster);
    } catch (XGBoostError e) {
        throw new BadRequestException(e);
    }
}

Source File: DependencyGraphRelationshipTagTrain.java From SmoothNLP with GNU General Public License v3.0

4 votes

public static void trainXgbModel(String trainFile, String devFile, String modelAddr, int nround, int earlyStop,int nthreads ) throws IOException{
    final DMatrix trainMatrix = readCoNLL2DMatrix(trainFile);
    final DMatrix devMatrix = readCoNLL2DMatrix(devFile);
    try{
        Map<String, Object> params = new HashMap<String, Object>() {
            {
                put("nthread", nthreads);
                put("max_depth", 12);
                put("silent", 0);
                put("objective", "multi:softprob");
                put("colsample_bytree",0.90);
                put("colsample_bylevel",0.90);
                put("eta",0.2);
                put("subsample",0.95);
                put("lambda",1.0);

                // tree methods for regulation
                put("min_child_weight",5);
                put("max_leaves",128);

                // other parameters
                // "objective" -> "multi:softmax", "num_class" -> "6"

                put("eval_metric", "merror");
                put("tree_method","approx");
                put("num_class",tag2float.size());

                put("min_child_weight",5);
            }
        };
        Map<String, DMatrix> watches = new HashMap<String, DMatrix>() {
            {
                put("train", trainMatrix);
                put("dev",devMatrix);
            }
        };
        Booster booster = XGBoost.train(trainMatrix, params, nround, watches, null, null,null,earlyStop);
        OutputStream outstream = SmoothNLP.IOAdaptor.create(modelAddr);
        booster.saveModel(outstream);



    }catch(XGBoostError e){
        System.out.println(e);
    }
}

Source File: TunedXGBoost.java From tsml with GNU General Public License v3.0

4 votes

/**
     * Does the 'actual' initialising and building of the model, as opposed to experimental code
     * setup etc
     * @throws Exception 
     */    
    public void buildActualClassifer() throws Exception {
        if(tuneParameters)
            tuneHyperparameters();

        String objective = "multi:softprob"; 
//        String objective = numClasses == 2 ? "binary:logistic" : "multi:softprob";

        trainDMat = wekaInstancesToDMatrix(trainInsts);
        params = new HashMap<String, Object>();
        //todo: this is a mega hack to enforce 1 thread only on cluster (else bad juju).
        //fix some how at some point. 
        if (runSingleThreaded || System.getProperty("os.name").toLowerCase().contains("linux"))
            params.put("nthread", 1);
        // else == num processors by default

        //fixed params
        params.put("silent", 1);
        params.put("objective", objective);
        if(objective.contains("multi"))
            params.put("num_class", numClasses); //required with multiclass problems
        params.put("seed", seed);
        params.put("subsample", rowSubsampling);
        params.put("colsample_bytree", colSubsampling);

        //tunable params (numiterations passed directly to XGBoost.train(...)
        params.put("learning_rate", learningRate);
        params.put("max_depth", maxTreeDepth);
        params.put("min_child_weight", minChildWeight);

        watches = new HashMap<String, DMatrix>();
//        if (getDebugPrinting() || getDebug())
//        watches.put("train", trainDMat);

//        int earlyStopping = (int) Math.ceil(numIterations / 10.0); 
        //e.g numIts == 25    =>   stop after 3 increases in err 
        //    numIts == 250   =>   stop after 25 increases in err

//        booster = XGBoost.train(trainDMat, params, numIterations, watches, null, null, null, earlyStopping);
        booster = XGBoost.train(trainDMat, params, numIterations, watches, null, null);

    }

ml.dmlc.xgboost4j.java.XGBoost Java Examples