org.apache.spark.SparkContext#stop

Source File: JavaLogisticRegressionWithLBFGSExample.java From SparkDemo with MIT License

5 votes

public static void main(String[] args) {
  SparkConf conf = new SparkConf().setAppName("JavaLogisticRegressionWithLBFGSExample");
  SparkContext sc = new SparkContext(conf);
  // $example on$
  String path = "data/mllib/sample_libsvm_data.txt";
  JavaRDD<LabeledPoint> data = MLUtils.loadLibSVMFile(sc, path).toJavaRDD();

  // Split initial RDD into two... [60% training data, 40% testing data].
  JavaRDD<LabeledPoint>[] splits = data.randomSplit(new double[] {0.6, 0.4}, 11L);
  JavaRDD<LabeledPoint> training = splits[0].cache();
  JavaRDD<LabeledPoint> test = splits[1];

  // Run training algorithm to build the model.
  final LogisticRegressionModel model = new LogisticRegressionWithLBFGS()
    .setNumClasses(10)
    .run(training.rdd());

  // Compute raw scores on the test set.
  JavaRDD<Tuple2<Object, Object>> predictionAndLabels = test.map(
    new Function<LabeledPoint, Tuple2<Object, Object>>() {
      public Tuple2<Object, Object> call(LabeledPoint p) {
        Double prediction = model.predict(p.features());
        return new Tuple2<Object, Object>(prediction, p.label());
      }
    }
  );

  // Get evaluation metrics.
  MulticlassMetrics metrics = new MulticlassMetrics(predictionAndLabels.rdd());
  double accuracy = metrics.accuracy();
  System.out.println("Accuracy = " + accuracy);

  // Save and load model
  model.save(sc, "target/tmp/javaLogisticRegressionWithLBFGSModel");
  LogisticRegressionModel sameModel = LogisticRegressionModel.load(sc,
    "target/tmp/javaLogisticRegressionWithLBFGSModel");
  // $example off$

  sc.stop();
}

Source File: CassandraDependenciesJob.java From zipkin-dependencies with Apache License 2.0

5 votes

public void run() {
  long microsLower = day * 1000;
  long microsUpper = (day * 1000) + TimeUnit.DAYS.toMicros(1) - 1;

  log.info("Running Dependencies job for {}: {} ≤ Span.timestamp {}", dateStamp, microsLower,
      microsUpper);

  SparkContext sc = new SparkContext(conf);

  List<DependencyLink> links = javaFunctions(sc)
    .cassandraTable(keyspace, "traces")
    .spanBy(r -> r.getLong("trace_id"), Long.class)
    .flatMapValues(new CassandraRowsToDependencyLinks(logInitializer, microsLower, microsUpper))
    .values()
    .mapToPair(l -> Tuple2.apply(Tuple2.apply(l.parent(), l.child()), l))
    .reduceByKey((l, r) -> DependencyLink.newBuilder()
      .parent(l.parent())
      .child(l.child())
      .callCount(l.callCount() + r.callCount())
      .errorCount(l.errorCount() + r.errorCount())
      .build())
    .values()
    .collect();

  sc.stop();

  saveToCassandra(links);
}

Source File: ExplorerSparkContextTest.java From Explorer with Apache License 2.0

5 votes

@After
public void tearDown(){
   try {
       SparkContext context = sparkContex.getConnector();
       if (context != null) {
           context.stop();
       }
   }catch (SparkEndPointException e){
      // left empty deliverely
   }
}

Source File: JavaSVMWithSGDExample.java From SparkDemo with MIT License

4 votes

public static void main(String[] args) {
  SparkConf conf = new SparkConf().setAppName("JavaSVMWithSGDExample");
  SparkContext sc = new SparkContext(conf);
  // $example on$
  String path = "data/mllib/sample_libsvm_data.txt";
  JavaRDD<LabeledPoint> data = MLUtils.loadLibSVMFile(sc, path).toJavaRDD();

  // Split initial RDD into two... [60% training data, 40% testing data].
  JavaRDD<LabeledPoint> training = data.sample(false, 0.6, 11L);
  training.cache();
  JavaRDD<LabeledPoint> test = data.subtract(training);

  // Run training algorithm to build the model.
  int numIterations = 100;
  final SVMModel model = SVMWithSGD.train(training.rdd(), numIterations);

  // Clear the default threshold.
  model.clearThreshold();

  // Compute raw scores on the test set.
  JavaRDD<Tuple2<Object, Object>> scoreAndLabels = test.map(
    new Function<LabeledPoint, Tuple2<Object, Object>>() {
      public Tuple2<Object, Object> call(LabeledPoint p) {
        Double score = model.predict(p.features());
        return new Tuple2<Object, Object>(score, p.label());
      }
    }
  );

  // Get evaluation metrics.
  BinaryClassificationMetrics metrics =
    new BinaryClassificationMetrics(JavaRDD.toRDD(scoreAndLabels));
  double auROC = metrics.areaUnderROC();

  System.out.println("Area under ROC = " + auROC);

  // Save and load model
  model.save(sc, "target/tmp/javaSVMWithSGDModel");
  SVMModel sameModel = SVMModel.load(sc, "target/tmp/javaSVMWithSGDModel");
  // $example off$

  sc.stop();
}

Source File: CassandraDependenciesJob.java From zipkin-dependencies with Apache License 2.0

4 votes

public void run() {
  long microsLower = day * 1000;
  long microsUpper = (day * 1000) + TimeUnit.DAYS.toMicros(1) - 1;

  log.info(
      "Running Dependencies job for {}: {} ≤ Span.timestamp {}",
      dateStamp,
      microsLower,
      microsUpper);

  SparkContext sc = new SparkContext(conf);
  try {
    JavaRDD<DependencyLink> links = flatMapToLinksByTraceId(
      javaFunctions(sc).cassandraTable(keyspace, "span"), microsUpper, microsLower, inTest
    ).values()
      .mapToPair(l -> Tuple2.apply(Tuple2.apply(l.parent(), l.child()), l))
      .reduceByKey((l, r) -> DependencyLink.newBuilder()
        .parent(l.parent())
        .child(l.child())
        .callCount(l.callCount() + r.callCount())
        .errorCount(l.errorCount() + r.errorCount())
        .build())
      .values();
    if (links.isEmpty()) {
      log.info("No dependency links could be processed from spans in table {}/span", keyspace);
      return;
    }
    log.info("Saving dependency links for {} to {}.dependency", dateStamp, keyspace);
    CassandraConnector.apply(conf)
        .withSessionDo(new AbstractFunction1<Session, Void>() {
          @Override
          public Void apply(Session session) {
            PreparedStatement prepared =
                session.prepare(QueryBuilder.insertInto(keyspace, "dependency")
                    .value("day", QueryBuilder.bindMarker("day"))
                    .value("parent", QueryBuilder.bindMarker("parent"))
                    .value("child", QueryBuilder.bindMarker("child"))
                    .value("calls", QueryBuilder.bindMarker("calls"))
                    .value("errors", QueryBuilder.bindMarker("errors")));

            for (DependencyLink link : links.collect()) {
              BoundStatement bound = prepared.bind()
                  .setDate("day", LocalDate.fromMillisSinceEpoch(day))
                  .setString("parent", link.parent())
                  .setString("child", link.child())
                  .setLong("calls", link.callCount());
              if (link.errorCount() > 0L) {
                bound.setLong("errors", link.errorCount());
              }
              session.execute(bound);
            }
            return null;
          }
        });
  } finally {
    sc.stop();
  }
}

Java Code Examples for org.apache.spark.SparkContext#stop()