org.apache.spark.api.java.JavaSparkContext#setLogLevel

Source File: Accumulator.java From sparkResearch with Apache License 2.0

6 votes

public static void main(String[] args) {
    SparkSession sparkSession = SparkSession.builder()
            .master("local[4]").appName("AttackFind").getOrCreate();
    //初始化sparkContext
    JavaSparkContext javaSparkContext = JavaSparkContext.fromSparkContext(sparkSession.sparkContext());
    //日志输出级别
    javaSparkContext.setLogLevel("ERROR");
    //创建RDD
    JavaRDD<String> rdd = javaSparkContext.parallelize(Arrays.asList(JavaBean.origin_id, JavaBean.asset_name)).cache();

    AttackAccumulator attackAccumulator = new AttackAccumulator();
    //注册累加器
    javaSparkContext.sc().register(attackAccumulator, "attack_count");
    //生成一个随机数作为value
    JavaPairRDD<String, String> javaPairRDD = rdd.mapToPair((PairFunction<String, String, String>) s -> {
        Integer random = new Random().nextInt(10);
        return new Tuple2<>(s, s + ":" + random);
    });

    javaPairRDD.foreach((VoidFunction<Tuple2<String, String>>) tuple2 -> {
        attackAccumulator.add(tuple2._2);
    });
    System.out.println(attackAccumulator.value());
}

Source File: SparkTempViewProvider.java From hudi with Apache License 2.0

5 votes

public SparkTempViewProvider(String appName) {
  try {
    SparkConf sparkConf = new SparkConf().setAppName(appName)
            .set("spark.serializer", "org.apache.spark.serializer.KryoSerializer").setMaster("local[8]");
    jsc = new JavaSparkContext(sparkConf);
    jsc.setLogLevel("ERROR");

    sqlContext = new SQLContext(jsc);
  } catch (Throwable ex) {
    // log full stack trace and rethrow. Without this its difficult to debug failures, if any
    LOG.error("unable to initialize spark context ", ex);
    throw new HoodieException(ex);
  }
}

Source File: HoodieClientTestHarness.java From hudi with Apache License 2.0

5 votes

/**
 * Initializes the Spark contexts ({@link JavaSparkContext} and {@link SQLContext}) with the given application name.
 *
 * @param appName The specified application name.
 */
protected void initSparkContexts(String appName) {
  // Initialize a local spark env
  jsc = new JavaSparkContext(HoodieClientTestUtils.getSparkConfForTest(appName));
  jsc.setLogLevel("ERROR");
  hadoopConf = jsc.hadoopConfiguration();

  // SQLContext stuff
  sqlContext = new SQLContext(jsc);
}

Source File: ConvertACNVResults.java From gatk-protected with BSD 3-Clause "New" or "Revised" License

4 votes

@Override
protected void runPipeline(final JavaSparkContext ctx) {
    final String originalLogLevel =
            (ctx.getLocalProperty("logLevel") != null) ? ctx.getLocalProperty("logLevel") : "INFO";
    ctx.setLogLevel("WARN");

    final CNLOHCaller cnlohCaller = new CNLOHCaller();
    cnlohCaller.setRhoThreshold(rhoThreshold);

    final List<ACNVModeledSegment> segments = SegmentUtils.readACNVModeledSegmentFile(new File(segmentsFile));
    String sampleName = determineSampleName(new File(segmentsFile));

    // Create the outputdir
    try {
        FileUtils.forceMkdir(outputDir);
    } catch (final IOException ioe) {
        throw new UserException("Cannot create " + outputDir +".  Does a file (not directory) exist with the same name?  Do you have access to create?", ioe);
    }

    final Genome genome = new Genome(targetCoveragesFile, snpCountsFile);

    // Make the calls
    logger.info("Making the balanced-segment (and CNLoH) calls...");
    final List<AllelicCalls> calls = cnlohCaller.makeCalls(segments, numIterations, ctx);

    // Write updated ACNV file with calls
    logger.info("Writing updated ACNV file with calls ...");
    final File finalACNVModeledSegmentsFile = new File(outputDir, getSegmentsBaseFilename() + "." + BALANCED_SEG_FILE_TAG + ".seg");
    SegmentUtils.writeCnLoHACNVModeledSegmentFile(finalACNVModeledSegmentsFile, calls, genome);

    // write file for GATK CNV formatted seg file
    logger.info("Writing file with same output format as GATK CNV...");
    final File finalModeledSegmentsFileAsGatkCNV = new File(outputDir, getSegmentsBaseFilename() + "." + GATK_SEG_FILE_TAG + ".seg");
    SegmentUtils.writeModeledSegmentFile(finalModeledSegmentsFileAsGatkCNV,
            ACNVModeledSegmentConversionUtils.convertACNVModeledSegmentsToModeledSegments(segments, genome), sampleName);

    //write file for ACS-compatible output to help Broad CGA
    logger.info("Writing file with same output format as Broad CGA Allelic CapSeg ...");
    final File finalACSModeledSegmentsFile = new File(outputDir, getSegmentsBaseFilename() + "." + CGA_ACS_SEG_FILE_TAG + ".seg");
    ACSModeledSegmentUtils.writeACNVModeledSegmentFileAsAllelicCapSegFile(finalACSModeledSegmentsFile, calls, genome);

    //write files for TITAN-compatible output to help Broad CGA
    logger.info("Writing het file with input format for TITAN ...");
    final File finalTitanHetFile = new File(outputDir, getSegmentsBaseFilename() + "." + TITAN_HET_FILE_TAG + ".tsv");
    TitanFileConverter.convertHetPulldownToTitanHetFile(snpCountsFile, finalTitanHetFile);

    logger.info("Writing tangent-normalized target CR estimates with input format for TITAN ...");
    final File finalTitanTNFile = new File(outputDir, getSegmentsBaseFilename() + "." + TITAN_TN_FILE_TAG + ".tsv");
    TitanFileConverter.convertCRToTitanCovFile(targetCoveragesFile, finalTitanTNFile);

    ctx.setLogLevel(originalLogLevel);
    logger.info("SUCCESS: CNLoH and splits called for sample " + sampleName + ".");
}

Source File: AllelicCNV.java From gatk-protected with BSD 3-Clause "New" or "Revised" License

4 votes

@Override
protected void runPipeline(final JavaSparkContext ctx) {
    validateArguments();

    if (!new HDF5Library().load(null)) {  //Note: passing null means using the default temp dir.
        throw new UserException.HardwareFeatureException("Cannot load the required HDF5 library. " +
                "HDF5 is currently supported on x86-64 architecture and Linux or OSX systems.");
    }

    final String originalLogLevel =
            (ctx.getLocalProperty("logLevel") != null) ? ctx.getLocalProperty("logLevel") : "INFO";
    ctx.setLogLevel("WARN");

    //the string after the final slash in the output prefix (which may be an absolute file path) will be used as the sample name
    final String sampleName = outputPrefix.substring(outputPrefix.lastIndexOf("/") + 1);

    logger.info("Starting workflow for " + sampleName + "...");

    //make Genome from input target coverages and SNP counts
    logger.info("Loading input files...");
    final Genome genome = new Genome(tangentNormalizedCoverageFile, snpCountsFile);

    //load allelic-bias panel of normals if provided
    final AllelicPanelOfNormals allelicPoN =
            allelicPoNFile != null ? AllelicPanelOfNormals.read(allelicPoNFile) : AllelicPanelOfNormals.EMPTY_PON;

    //load target-coverage segments from input file
    final List<ModeledSegment> targetSegmentsWithCalls =
            SegmentUtils.readModeledSegmentsFromSegmentFile(targetSegmentsFile);
    logger.info("Number of input target-coverage segments: " + targetSegmentsWithCalls.size());

    //merge copy-neutral and uncalled segments (unless disabled) and fix up target-segment start breakpoints
    logger.info("Preparing target-coverage segments...");
    final List<SimpleInterval> targetSegments = prepareTargetSegments(genome, targetSegmentsWithCalls);

    //segment SNPs using CBS on per-SNP MLE minor allele fraction iteratively until convergence
    //(final segment file is output as a side effect)
    logger.info("Performing SNP segmentation...");
    final List<SimpleInterval> snpSegments = performSNPSegmentationStep(sampleName, genome);

    //combine SNP and target-coverage segments
    logger.info("Combining SNP and target-coverage segments...");
    final List<SimpleInterval> unionedSegments = SegmentUtils.unionSegments(targetSegments, snpSegments, genome);
    logger.info("Number of segments after segment union: " + unionedSegments.size());
    final File unionedSegmentsFile = new File(outputPrefix + "-" + UNION_SEG_FILE_TAG + SEGMENT_FILE_SUFFIX);
    SegmentUtils.writeSegmentFileWithNumTargetsAndNumSNPs(unionedSegmentsFile, unionedSegments, genome);
    logSegmentFileWrittenMessage(unionedSegmentsFile);

    //small-segment merging (note that X and Y are always small segments and dropped, since GATK CNV drops them)
    logger.info("Merging small segments...");
    final SegmentedGenome segmentedGenomeWithSmallSegments = new SegmentedGenome(unionedSegments, genome);
    final SegmentedGenome segmentedGenome = segmentedGenomeWithSmallSegments.mergeSmallSegments(smallSegmentTargetNumberThreshold);
    logger.info("Number of segments after small-segment merging: " + segmentedGenome.getSegments().size());

    //initial MCMC model fitting performed by ACNVModeller constructor
    final ACNVModeller modeller = new ACNVModeller(segmentedGenome, allelicPoN,
            numSamplesCopyRatio, numBurnInCopyRatio, numSamplesAlleleFraction, numBurnInAlleleFraction, ctx);

    //write initial segments and parameters to file
    writeACNVModeledSegmentAndParameterFiles(modeller, INITIAL_FIT_FILE_TAG);

    //similar-segment merging (segment files are output for each merge iteration)
    logger.info("Merging similar segments...");
    performSimilarSegmentMergingStep(modeller);

    //write final segments and parameters to file
    writeACNVModeledSegmentAndParameterFiles(modeller, FINAL_FIT_FILE_TAG);

    ctx.setLogLevel(originalLogLevel);
    logger.info("SUCCESS: Allelic CNV run complete for sample " + sampleName + ".");
}

Source File: SparkCommandLineProgram.java From gatk with BSD 3-Clause "New" or "Revised" License

4 votes

private final void setSparkVerbosity(final JavaSparkContext ctx) {
    final String sparkVerbosity = sparkArgs.getSparkVerbosity(VERBOSITY);
    logger.info("Spark verbosity set to " + sparkVerbosity + " (see --" + SparkCommandLineArgumentCollection.SPARK_VERBOSITY_LONG_NAME + " argument)");
    ctx.setLogLevel(sparkVerbosity);
}

Java Code Examples for org.apache.spark.api.java.JavaSparkContext#setLogLevel()