org.apache.spark.SparkException Java Exaples

Source File: BatchProfilerIntegrationTest.java From metron with Apache License 2.0

6 votes

@Test
public void testBatchProfilerWithInvalidProfile() {
  profilerProperties.put(TELEMETRY_INPUT_READER.getKey(), JSON.toString());
  profilerProperties.put(TELEMETRY_INPUT_PATH.getKey(), "src/test/resources/telemetry.json");

  // the batch profiler should error out, if there is a bug in *any* of the profiles
  BatchProfiler profiler = new BatchProfiler();
  assertThrows(
      SparkException.class,
      () ->
          profiler.run(
              spark,
              profilerProperties,
              getGlobals(),
              readerProperties,
              fromJSON(invalidProfileJson)));
}

Source File: SMOutputFormatTest.java From spliceengine with GNU Affero General Public License v3.0

6 votes

@Test
public void writeExceptionsCauseAbort() throws StandardException, IOException {
    SparkPairDataSet<RowLocation, ExecRow> dataset = new SparkPairDataSet<>(SpliceSpark.getContextUnsafe().parallelizePairs(tenRows).mapToPair(new ToRowLocationFunction()));
    JavaPairRDD<RowLocation, Either<Exception, ExecRow>> rdd = dataset.wrapExceptions();

    final Configuration conf=new Configuration(HConfiguration.unwrapDelegate());
    TableWriterUtils.serializeInsertTableWriterBuilder(conf, new FakeTableWriterBuilder(true));
    conf.setClass(JobContext.OUTPUT_FORMAT_CLASS_ATTR, FakeOutputFormat.class, FakeOutputFormat.class);
    // workaround for SPARK-21549 on spark-2.2.0
    conf.set("mapreduce.output.fileoutputformat.outputdir","/tmp");
    File file = File.createTempFile(SMOutputFormatTest.class.getName(), "exception");
    file.delete();
    file.mkdir();
    conf.set("abort.directory", file.getAbsolutePath());
    try {
        rdd.saveAsNewAPIHadoopDataset(conf);
        Assert.fail("Expected exception");
    } catch (Exception se) {
        Assert.assertTrue("Unexpected exception", se instanceof SparkException);
    }
    File[] files = file.listFiles();
    Assert.assertTrue("Abort() not called", files.length > 0);
}

Source File: SMOutputFormatTest.java From spliceengine with GNU Affero General Public License v3.0

6 votes

@Test
public void readExceptionsCauseAbort() throws StandardException, IOException {
    SparkPairDataSet<ExecRow, ExecRow> dataset = new SparkPairDataSet<>(SpliceSpark.getContextUnsafe().parallelizePairs(tenRows).mapToPair(new FailFunction()));
    JavaPairRDD<ExecRow, Either<Exception, ExecRow>> rdd = dataset.wrapExceptions();

    final Configuration conf=new Configuration(HConfiguration.unwrapDelegate());
    TableWriterUtils.serializeInsertTableWriterBuilder(conf, new FakeTableWriterBuilder(false));
    conf.setClass(JobContext.OUTPUT_FORMAT_CLASS_ATTR,FakeOutputFormat.class,FakeOutputFormat.class);
    // workaround for SPARK-21549 on spark-2.2.0
    conf.set("mapreduce.output.fileoutputformat.outputdir","/tmp");
    File file = File.createTempFile(SMOutputFormatTest.class.getName(), "exception");
    file.delete();
    file.mkdir();
    conf.set("abort.directory", file.getAbsolutePath());
    try {
        rdd.saveAsNewAPIHadoopDataset(conf);
        Assert.fail("Expected exception");
    } catch (Exception se) {
        Assert.assertTrue("Unexpected exception", se instanceof SparkException);
    }
    File[] files = file.listFiles();
    Assert.assertTrue("Abort() not called", files.length > 0);
}

Source File: MarkDuplicatesSparkUtilsUnitTest.java From gatk with BSD 3-Clause "New" or "Revised" License

6 votes

@Test
public void testReadsMissingReadGroups() {
    JavaSparkContext ctx = SparkContextFactory.getTestSparkContext();

    SAMRecordSetBuilder samRecordSetBuilder = new SAMRecordSetBuilder(true, SAMFileHeader.SortOrder.queryname,
            true, SAMRecordSetBuilder.DEFAULT_CHROMOSOME_LENGTH, SAMRecordSetBuilder.DEFAULT_DUPLICATE_SCORING_STRATEGY);
    samRecordSetBuilder.addFrag("READ" , 0, 10000, false);

    JavaRDD<GATKRead> reads = ctx.parallelize(Lists.newArrayList(samRecordSetBuilder.getRecords()), 2).map(SAMRecordToGATKReadAdapter::new);
    reads = reads.map(r -> {r.setReadGroup(null); return r;});
    SAMFileHeader header = samRecordSetBuilder.getHeader();

    try {
        MarkDuplicatesSparkUtils.transformToDuplicateNames(header, MarkDuplicatesScoringStrategy.SUM_OF_BASE_QUALITIES, null, reads, 2, false).collect();
        Assert.fail("Should have thrown an exception");
    } catch (Exception e){
        Assert.assertTrue(e instanceof SparkException);
        Assert.assertTrue(e.getCause() instanceof UserException.ReadMissingReadGroup);
    }
}

Source File: QuicksqlServerMeta.java From Quicksql with MIT License

6 votes

public QueryResult getSparkQueryResult(Entry<List<Attribute>, List<GenericRowWithSchema>> sparkData)
    throws Exception {
    if (sparkData == null) {
        return new QueryResult(new ArrayList<>(), new ArrayList<>());
    }
    if (CollectionUtils.isEmpty(sparkData.getKey())) {
        throw new SparkException("collect data error");
    }
    List<Attribute> attributes = sparkData.getKey();
    List<GenericRowWithSchema> value = sparkData.getValue();
    List<Object> data = new ArrayList<>();
    List<ColumnMetaData> meta = new ArrayList<>();
    value.stream().forEach(column -> {
        data.add(column.values());
    });
    for (int index = 0; index < sparkData.getKey().size(); index++) {
        Attribute attribute = sparkData.getKey().get(index);
        ScalarType columnType = getColumnType(attribute.dataType());
        meta.add(new ColumnMetaData(index, false, true, false, false,
            attribute.nullable() ? 1 : 0, true, -1, attribute.name(), attribute.name(), null, -1, -1, null, null,
            columnType, true, false, false, columnType.columnClassName()));
    }
    return new QueryResult(meta, data);
}

Source File: SqoopSparkClientFactory.java From sqoop-on-spark with Apache License 2.0

6 votes

public static LocalSqoopSparkClient createSqoopSparkClient(SqoopConf sqoopConf)
    throws IOException, SparkException {

  Map<String, String> sparkConf = prepareSparkConfMapFromSqoopConfig(sqoopConf);
  // Submit spark job through local spark context while spark master is local
  // mode, otherwise submit
  // spark job through remote spark context.
  String master = sparkConf.get("spark.master");
  if (master.equals("local") || master.startsWith("local[")) {
    // With local spark context, all user sessions share the same spark
    // context.
    return LocalSqoopSparkClient.getInstance(generateSparkConf(sparkConf));
  } else {
    LOG.info("Using yarn submitter");
    //TODO: hook up yarn submitter
    return null;
  }
}

Source File: SparkPipelineResult.java From beam with Apache License 2.0

6 votes

private static RuntimeException beamExceptionFrom(final Throwable e) {
  // Scala doesn't declare checked exceptions in the bytecode, and the Java compiler
  // won't let you catch something that is not declared, so we can't catch
  // SparkException directly, instead we do an instanceof check.

  if (e instanceof SparkException) {
    if (e.getCause() != null && e.getCause() instanceof UserCodeException) {
      UserCodeException userException = (UserCodeException) e.getCause();
      return new Pipeline.PipelineExecutionException(userException.getCause());
    } else if (e.getCause() != null) {
      return new Pipeline.PipelineExecutionException(e.getCause());
    }
  }

  return runtimeExceptionFrom(e);
}

Source File: SparkStructuredStreamingPipelineResult.java From beam with Apache License 2.0

6 votes

private static RuntimeException beamExceptionFrom(final Throwable e) {
  // Scala doesn't declare checked exceptions in the bytecode, and the Java compiler
  // won't let you catch something that is not declared, so we can't catch
  // SparkException directly, instead we do an instanceof check.

  if (e instanceof SparkException) {
    if (e.getCause() != null && e.getCause() instanceof UserCodeException) {
      UserCodeException userException = (UserCodeException) e.getCause();
      return new Pipeline.PipelineExecutionException(userException.getCause());
    } else if (e.getCause() != null) {
      return new Pipeline.PipelineExecutionException(e.getCause());
    }
  }

  return runtimeExceptionFrom(e);
}

Source File: HaplotypeCallerSparkIntegrationTest.java From gatk-protected with BSD 3-Clause "New" or "Revised" License

5 votes

/**
 * Test that in VCF mode we're >= 99% concordant with GATK3.5 results
 * THIS TEST explodes with an exception because Allele-Specific annotations are not supported in vcf mode yet.
 * It's included to parallel the matching (also exploding) test for the non-spark HaplotypeCaller
 * {@link org.broadinstitute.hellbender.tools.walkers.haplotypecaller.HaplotypeCallerIntegrationTest#testVCFModeIsConcordantWithGATK3_5ResultsAlleleSpecificAnnotations()}
 */
@Test(expectedExceptions = SparkException.class) //this should be a UserException, but spark exceptions are not unwrapped yet
public void testVCFModeIsConcordantWithGATK3_5ResultsAlleleSpecificAnnotations() throws Exception {
    Utils.resetRandomGenerator();

    final File output = createTempFile("testVCFModeIsConcordantWithGATK3_5ResultsAlleleSpecificAnnotations", ".vcf");

    //Created by running
    //java -jar ~/bin/GenomeAnalysisTK-3.5.0/GenomeAnalysisTK.jar -T HaplotypeCaller \
    // -I ./src/test/resources/large/CEUTrio.HiSeq.WGS.b37.NA12878.20.21.bam \
    // -R src/test/resources/large/human_g1k_v37.20.21.fasta -L 20:10000000-10100000 \
    // --out as.gatk3.5.noDownsample.vcf -G StandardHC -G Standard -G AS_Standard \
    // --disableDithering --no_cmdline_in_header  -dt NONE --maxReadsInRegionPerSample 100000000 --minReadsPerAlignmentStart 100000
    final File gatk3Output = new File(TEST_FILES_DIR + "expected.testVCFMode.gatk3.5.alleleSpecific.vcf");

    final String[] args = {
            "-I", NA12878_20_21_WGS_bam,
            "-R", b37_2bit_reference_20_21,
            "-L", "20:10000000-10100000",
            "-O", output.getAbsolutePath(),
            "-G", "StandardAnnotation",
            "-G", "AS_StandardAnnotation",
            "-pairHMM", "AVX_LOGLESS_CACHING",
            "-stand_call_conf", "30.0"
    };

    runCommandLine(args);

    final double concordance = HaplotypeCallerIntegrationTest.calculateConcordance(output, gatk3Output);
    Assert.assertTrue(concordance >= 0.99, "Concordance with GATK 3.5 in AS VCF mode is < 99% (" +  concordance + ")");
}

Source File: RDDConverterUtilsExtTest.java From systemds with Apache License 2.0

5 votes

@Test(expected = SparkException.class)
public void testStringDataFrameToVectorDataFrameNonNumbers() {
	List<String> list = new ArrayList<>();
	list.add("[cheeseburger,fries]");
	JavaRDD<String> javaRddString = sc.parallelize(list);
	JavaRDD<Row> javaRddRow = javaRddString.map(new StringToRow());
	SparkSession sparkSession = SparkSession.builder().sparkContext(sc.sc()).getOrCreate();
	List<StructField> fields = new ArrayList<>();
	fields.add(DataTypes.createStructField("C1", DataTypes.StringType, true));
	StructType schema = DataTypes.createStructType(fields);
	Dataset<Row> inDF = sparkSession.createDataFrame(javaRddRow, schema);
	Dataset<Row> outDF = RDDConverterUtilsExt.stringDataFrameToVectorDataFrame(sparkSession, inDF);
	// trigger evaluation to throw exception
	outDF.collectAsList();
}

Source File: AbstractMarkDuplicatesCommandLineProgramTest.java From gatk with BSD 3-Clause "New" or "Revised" License

5 votes

@Test
public void testNonExistantReadGroupInRead() {
    final MarkDuplicatesSparkTester tester = new MarkDuplicatesSparkTester(true);
    tester.addMatePair("RUNID:7:1203:2886:82292",  19, 19, 485253, 485253, false, false, true, true, "42M59S", "59S42M", true, false, false, false, false, DEFAULT_BASE_QUALITY, "NotADuplicateGroup");
    try {
        tester.runTest();
        Assert.fail("Should have thrown an exception");
    } catch (Exception e){
       Assert.assertTrue(e instanceof SparkException);
       Assert.assertTrue(e.getCause() instanceof UserException.HeaderMissingReadGroup);
    }
}

Source File: AbstractMarkDuplicatesCommandLineProgramTest.java From gatk with BSD 3-Clause "New" or "Revised" License

5 votes

@Test
public void testNoReadGroupInRead() {
    final MarkDuplicatesSparkTester tester = new MarkDuplicatesSparkTester(true);
    tester.addMatePair("RUNID:7:1203:2886:82292",  19, 19, 485253, 485253, false, false, true, true, "42M59S", "59S42M", true, false, false, false, false, DEFAULT_BASE_QUALITY, null);

    try {
        tester.runTest();
        Assert.fail("Should have thrown an exception");
    } catch (Exception e){
        Assert.assertTrue(e instanceof SparkException);
        Assert.assertTrue(e.getCause() instanceof UserException.ReadMissingReadGroup);
    }
}

Source File: TestFileSystemInput.java From envelope with Apache License 2.0

5 votes

@Test (expected = SparkException.class)
public void readInputFormatMismatchTranslator() throws Exception {
  Map<String, Object> paramMap = new HashMap<>();
  paramMap.put(FileSystemInput.FORMAT_CONFIG, "input-format");
  paramMap.put(FileSystemInput.PATH_CONFIG, FileSystemInput.class.getResource(CSV_DATA).getPath());
  paramMap.put(FileSystemInput.INPUT_FORMAT_TYPE_CONFIG, KeyValueTextInputFormat.class.getCanonicalName());
  paramMap.put("translator.type", DummyInputFormatTranslator.class.getCanonicalName());
  config = ConfigFactory.parseMap(paramMap);

  FileSystemInput formatInput = new FileSystemInput();
  assertNoValidationFailures(formatInput, config);
  formatInput.configure(config);
  formatInput.read().show();
}

Source File: StreamingService.java From cxf with Apache License 2.0

5 votes

private void processStream(AsyncResponse async, List<String> inputStrings) {
    try {
        SparkConf sparkConf = new SparkConf().setMaster("local[*]")
            .setAppName("JAX-RS Spark Connect " + SparkUtils.getRandomId());
        JavaStreamingContext jssc = new JavaStreamingContext(sparkConf, Durations.seconds(1));

        SparkStreamingOutput streamOut = new SparkStreamingOutput(jssc);
        SparkStreamingListener sparkListener = new SparkStreamingListener(streamOut);
        jssc.addStreamingListener(sparkListener);

        JavaDStream<String> receiverStream = null;
        if ("queue".equals(receiverType)) {
            Queue<JavaRDD<String>> rddQueue = new LinkedList<>();
            for (int i = 0; i < 30; i++) {
                rddQueue.add(jssc.sparkContext().parallelize(inputStrings));
            }
            receiverStream = jssc.queueStream(rddQueue);
        } else {
            receiverStream = jssc.receiverStream(new StringListReceiver(inputStrings));
        }

        JavaPairDStream<String, Integer> wordCounts = SparkUtils.createOutputDStream(receiverStream, false);
        wordCounts.foreachRDD(new OutputFunction(streamOut));
        jssc.start();

        executor.execute(new SparkJob(async, sparkListener));
    } catch (Exception ex) {
        // the compiler does not allow to catch SparkException directly
        if (ex instanceof SparkException) {
            async.cancel(60);
        } else {
            async.resume(new WebApplicationException(ex));
        }
    }
}

Source File: HExceptionFactory.java From spliceengine with GNU Affero General Public License v3.0

5 votes

private IOException parseSparkException(SparkException e){
    String errMessage = e.getMessage();
    if(errMessage==null)
        return new IOException("Unknown Spark exception");
    else if(SPARK_CANCELLATION_PATTERN.matcher(errMessage).find())
        return new OperationCancelledException();
    else return new IOException(e);
}

Source File: RDDConverterUtilsExtTest.java From systemds with Apache License 2.0

5 votes

@Test(expected = SparkException.class)
public void testStringDataFrameToVectorDataFrameNonNumbers() {
	List<String> list = new ArrayList<>();
	list.add("[cheeseburger,fries]");
	JavaRDD<String> javaRddString = sc.parallelize(list);
	JavaRDD<Row> javaRddRow = javaRddString.map(new StringToRow());
	SparkSession sparkSession = SparkSession.builder().sparkContext(sc.sc()).getOrCreate();
	List<StructField> fields = new ArrayList<>();
	fields.add(DataTypes.createStructField("C1", DataTypes.StringType, true));
	StructType schema = DataTypes.createStructType(fields);
	Dataset<Row> inDF = sparkSession.createDataFrame(javaRddRow, schema);
	Dataset<Row> outDF = RDDConverterUtilsExt.stringDataFrameToVectorDataFrame(sparkSession, inDF);
	// trigger evaluation to throw exception
	outDF.collectAsList();
}

Source File: QuicksqlServerMeta.java From Quicksql with MIT License

5 votes

public QueryResult getFlinkQueryResult(Entry<TableSchema, List<Row>> sparkData) throws Exception {
    if (sparkData == null) {
        return new QueryResult(new ArrayList<>(), new ArrayList<>());
    }
    TableSchema tableSchema = sparkData.getKey();
    if (tableSchema == null || tableSchema.getFieldDataTypes().length != tableSchema.getFieldNames().length) {
        throw new SparkException("collect data error");
    }
    org.apache.flink.table.types.DataType[] fieldDataTypes = tableSchema.getFieldDataTypes();
    String[] fieldNames = tableSchema.getFieldNames();
    List<Row> value = sparkData.getValue();
    List<Object> data = new ArrayList<>();
    List<ColumnMetaData> meta = new ArrayList<>();
    value.stream().forEach(column -> {
        Object[] objects = new Object[column.getArity()];
        for (int i = 0; i < column.getArity(); i++) {
            objects[i] = column.getField(i);
        }
        data.add(Arrays.asList(objects));
    });
    for (int index = 0; index < fieldNames.length; index++) {
        ScalarType columnType = getColumnType2(fieldDataTypes[index]);
        meta.add(new ColumnMetaData(index, false, true, false, false,
            1, true, -1, fieldNames[index], fieldNames[index], null, -1, -1, null, null,
            columnType, true, false, false, columnType.columnClassName()));
    }
    return new QueryResult(meta, data);
}

org.apache.spark.SparkException Java Examples