Java Code Examples for org.apache.spark.sql.Encoders#bean()
The following examples show how to use
org.apache.spark.sql.Encoders#bean() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: DataSetApplication.java From sparkResearch with Apache License 2.0 | 5 votes |
public static void main(String[] args) { SparkSession sparkSession = SparkSession.builder().master("local") .appName("Java Spark SQL") .getOrCreate(); Person person = new Person("spark",10); Encoder<Person> encoder = Encoders.bean(Person.class); Dataset<Person> dataset = sparkSession.createDataset(Collections.singletonList(person),encoder); dataset.show(); //最终输出 {name:spark;age:10} /*常见类型的编码器*/ Encoder<Integer> integerEncoder = Encoders.INT(); Dataset<Integer> integerDataset = sparkSession.createDataset(Arrays.asList(1,2),integerEncoder); Dataset<Integer> result = integerDataset.map(new MapFunction<Integer, Integer>() { @Override public Integer call(Integer value) { return value+1; } },integerEncoder); result.collect(); //最终输出 [2,3] /*通过提供一个类,可以将数据流转换为数据集。基于名称的映射*/ String url = "/usr/local/text.json"; Dataset<Person> personDataset = sparkSession.read().json(url).as(encoder); personDataset.show(); //最终输出 name:... age:,,,, }
Example 2
Source File: JavaUserDefinedTypedAggregation.java From incubator-nemo with Apache License 2.0 | 5 votes |
/** * Main function. * * @param args arguments. */ public static void main(final String[] args) { SparkSession spark = SparkSession .builder() .appName("Java Spark SQL user-defined Datasets aggregation example") .getOrCreate(); Encoder<Employee> employeeEncoder = Encoders.bean(Employee.class); String path = args[0]; Dataset<Employee> ds = spark.read().json(path).as(employeeEncoder); ds.show(); // +-------+------+ // | name|salary| // +-------+------+ // |Michael| 3000| // | Andy| 4500| // | Justin| 3500| // | Berta| 4000| // +-------+------+ MyAverage myAverage = new MyAverage(); // Convert the function to a `TypedColumn` and give it a name TypedColumn<Employee, Double> averageSalary = myAverage.toColumn().name("average_salary"); Dataset<Double> result = ds.select(averageSalary); result.show(); // +--------------+ // |average_salary| // +--------------+ // | 3750.0| // +--------------+ spark.stop(); }
Example 3
Source File: JavaBean.java From learning-spark-with-java with MIT License | 5 votes |
public static void main(String[] args) { SparkSession spark = SparkSession .builder() .appName("Dataset-JavaBean") .master("local[4]") .getOrCreate(); // // The Java API requires you to explicitly instantiate an encoder for // any JavaBean you want to use for schema inference // Encoder<Number> numberEncoder = Encoders.bean(Number.class); // // Create a container of the JavaBean instances // List<Number> data = Arrays.asList( new Number(1, "one", "un"), new Number(2, "two", "deux"), new Number(3, "three", "trois")); // // Use the encoder and the container of JavaBean instances to create a // Dataset // Dataset<Number> ds = spark.createDataset(data, numberEncoder); System.out.println("*** here is the schema inferred from the bean"); ds.printSchema(); System.out.println("*** here is the data"); ds.show(); // Use the convenient bean-inferred column names to query System.out.println("*** filter by one column and fetch others"); ds.where(col("i").gt(2)).select(col("english"), col("french")).show(); spark.stop(); }
Example 4
Source File: JavaUserDefinedTypedAggregation.java From nemo with Apache License 2.0 | 5 votes |
/** * Main function. * @param args arguments. */ public static void main(final String[] args) { SparkSession spark = SparkSession .builder() .appName("Java Spark SQL user-defined Datasets aggregation example") .getOrCreate(); Encoder<Employee> employeeEncoder = Encoders.bean(Employee.class); String path = args[0]; Dataset<Employee> ds = spark.read().json(path).as(employeeEncoder); ds.show(); // +-------+------+ // | name|salary| // +-------+------+ // |Michael| 3000| // | Andy| 4500| // | Justin| 3500| // | Berta| 4000| // +-------+------+ MyAverage myAverage = new MyAverage(); // Convert the function to a `TypedColumn` and give it a name TypedColumn<Employee, Double> averageSalary = myAverage.toColumn().name("average_salary"); Dataset<Double> result = ds.select(averageSalary); result.show(); // +--------------+ // |average_salary| // +--------------+ // | 3750.0| // +--------------+ spark.stop(); }
Example 5
Source File: AbstractJavaEsSparkStructuredStreamingTest.java From elasticsearch-hadoop with Apache License 2.0 | 5 votes |
@Test(expected = EsHadoopIllegalArgumentException.class) public void test0FailOnIndexCreationDisabled() throws Exception { String target = wrapIndex(resource("test-nonexisting", "data")); JavaStreamingQueryTestHarness<RecordBean> test = new JavaStreamingQueryTestHarness<>(spark, Encoders.bean(RecordBean.class)); RecordBean doc1 = new RecordBean(); doc1.setId(1); doc1.setName("Spark"); RecordBean doc2 = new RecordBean(); doc2.setId(2); doc2.setName("Hadoop"); RecordBean doc3 = new RecordBean(); doc3.setId(3); doc3.setName("YARN"); Dataset<RecordBean> dataset = test .withInput(doc1) .withInput(doc2) .withInput(doc3) .expectingToThrow(EsHadoopIllegalArgumentException.class) .stream(); test.run( dataset.writeStream() .option("checkpointLocation", checkpoint(target)) .option(ES_INDEX_AUTO_CREATE, "no") .format("es"), target ); assertTrue(!RestUtils.exists(target)); }
Example 6
Source File: AbstractJavaEsSparkStructuredStreamingTest.java From elasticsearch-hadoop with Apache License 2.0 | 5 votes |
@Test public void test1BasicWrite() throws Exception { String target = wrapIndex(resource("test-write", "data")); JavaStreamingQueryTestHarness<RecordBean> test = new JavaStreamingQueryTestHarness<>(spark, Encoders.bean(RecordBean.class)); RecordBean doc1 = new RecordBean(); doc1.setId(1); doc1.setName("Spark"); RecordBean doc2 = new RecordBean(); doc2.setId(2); doc2.setName("Hadoop"); RecordBean doc3 = new RecordBean(); doc3.setId(3); doc3.setName("YARN"); Dataset<RecordBean> dataset = test .withInput(doc1) .withInput(doc2) .withInput(doc3) .stream(); test.run( dataset.writeStream() .option("checkpointLocation", checkpoint(target)) .format("es"), target ); assertTrue(RestUtils.exists(target)); assertThat(RestUtils.get(target + "/_search?"), containsString("Spark")); assertThat(RestUtils.get(target + "/_search?"), containsString("Hadoop")); assertThat(RestUtils.get(target + "/_search?"), containsString("YARN")); }
Example 7
Source File: AbstractJavaEsSparkStructuredStreamingTest.java From elasticsearch-hadoop with Apache License 2.0 | 5 votes |
@Test public void test1WriteWithMappingId() throws Exception { String target = wrapIndex(resource("test-write-id", "data")); String docPath = wrapIndex(docPath("test-write-id", "data")); JavaStreamingQueryTestHarness<RecordBean> test = new JavaStreamingQueryTestHarness<>(spark, Encoders.bean(RecordBean.class)); RecordBean doc1 = new RecordBean(); doc1.setId(1); doc1.setName("Spark"); RecordBean doc2 = new RecordBean(); doc2.setId(2); doc2.setName("Hadoop"); RecordBean doc3 = new RecordBean(); doc3.setId(3); doc3.setName("YARN"); Dataset<RecordBean> dataset = test .withInput(doc1) .withInput(doc2) .withInput(doc3) .stream(); test.run( dataset.writeStream() .option("checkpointLocation", checkpoint(target)) .option("es.mapping.id", "id") .format("es"), target ); assertEquals(3, JavaEsSpark.esRDD(new JavaSparkContext(spark.sparkContext()), target).count()); assertTrue(RestUtils.exists(docPath + "/1")); assertTrue(RestUtils.exists(docPath + "/2")); assertTrue(RestUtils.exists(docPath + "/3")); assertThat(RestUtils.get(target + "/_search?"), containsString("Spark")); }
Example 8
Source File: AbstractJavaEsSparkStructuredStreamingTest.java From elasticsearch-hadoop with Apache License 2.0 | 5 votes |
@Test public void test1WriteWithMappingExclude() throws Exception { String target = wrapIndex(resource("test-mapping-exclude", "data")); JavaStreamingQueryTestHarness<RecordBean> test = new JavaStreamingQueryTestHarness<>(spark, Encoders.bean(RecordBean.class)); RecordBean doc1 = new RecordBean(); doc1.setId(1); doc1.setName("Spark"); RecordBean doc2 = new RecordBean(); doc2.setId(2); doc2.setName("Hadoop"); RecordBean doc3 = new RecordBean(); doc3.setId(3); doc3.setName("YARN"); Dataset<RecordBean> dataset = test .withInput(doc1) .withInput(doc2) .withInput(doc3) .stream(); test.run( dataset.writeStream() .option("checkpointLocation", checkpoint(target)) .option(ES_MAPPING_EXCLUDE, "name") .format("es"), target ); assertTrue(RestUtils.exists(target)); assertThat(RestUtils.get(target + "/_search?"), not(containsString("Spark"))); assertThat(RestUtils.get(target + "/_search?"), not(containsString("Hadoop"))); assertThat(RestUtils.get(target + "/_search?"), not(containsString("YARN"))); }
Example 9
Source File: AbstractJavaEsSparkStructuredStreamingTest.java From elasticsearch-hadoop with Apache License 2.0 | 5 votes |
@Test public void test1MultiIndexWrite() throws Exception { String target = wrapIndex(resource("test-write-tech-{name}", "data")); JavaStreamingQueryTestHarness<RecordBean> test = new JavaStreamingQueryTestHarness<>(spark, Encoders.bean(RecordBean.class)); RecordBean doc1 = new RecordBean(); doc1.setId(1); doc1.setName("spark"); RecordBean doc2 = new RecordBean(); doc2.setId(2); doc2.setName("hadoop"); Dataset<RecordBean> dataset = test .withInput(doc1) .withInput(doc2) .stream(); test.run( dataset.writeStream() .option("checkpointLocation", checkpoint(target)) .format("es"), target ); assertTrue(RestUtils.exists(wrapIndex(resource("test-write-tech-spark", "data")))); assertTrue(RestUtils.exists(wrapIndex(resource("test-write-tech-hadoop", "data")))); assertThat(RestUtils.get(wrapIndex(resource("test-write-tech-spark", "data") + "/_search?")), containsString("\"name\":\"spark\"")); assertThat(RestUtils.get(wrapIndex(resource("test-write-tech-hadoop", "data") + "/_search?")), containsString("\"name\":\"hadoop\"")); }
Example 10
Source File: TypeSafeUDAF.java From Apache-Spark-2x-for-Java-Developers with MIT License | 4 votes |
public Encoder<Average> bufferEncoder() { return Encoders.bean(Average.class); }
Example 11
Source File: JavaSparkSQLExample.java From SparkDemo with MIT License | 4 votes |
private static void runDatasetCreationExample(SparkSession spark) { // $example on:create_ds$ // Create an instance of a Bean class Person person = new Person(); person.setName("Andy"); person.setAge(32); // Encoders are created for Java beans Encoder<Person> personEncoder = Encoders.bean(Person.class); Dataset<Person> javaBeanDS = spark.createDataset( Collections.singletonList(person), personEncoder ); javaBeanDS.show(); // +---+----+ // |age|name| // +---+----+ // | 32|Andy| // +---+----+ // Encoders for most common types are provided in class Encoders Encoder<Integer> integerEncoder = Encoders.INT(); Dataset<Integer> primitiveDS = spark.createDataset(Arrays.asList(1, 2, 3), integerEncoder); Dataset<Integer> transformedDS = primitiveDS.map(new MapFunction<Integer, Integer>() { @Override public Integer call(Integer value) throws Exception { return value + 1; } }, integerEncoder); transformedDS.collect(); // Returns [2, 3, 4] // DataFrames can be converted to a Dataset by providing a class. Mapping based on name String path = Constant.LOCAL_FILE_PREX +"/data/resources/people.json"; Dataset<Person> peopleDS = spark.read().json(path).as(personEncoder); peopleDS.show(); // +----+-------+ // | age| name| // +----+-------+ // |null|Michael| // | 30| Andy| // | 19| Justin| // +----+-------+ // $example off:create_ds$ }
Example 12
Source File: AbstractJavaEsSparkStructuredStreamingTest.java From elasticsearch-hadoop with Apache License 2.0 | 4 votes |
@Test public void test2WriteToIngestPipeline() throws Exception { EsAssume.versionOnOrAfter(EsMajorVersion.V_5_X, "Ingest Supported in 5.x and above only"); String pipelineName = prefix + "-pipeline"; String pipeline = "{\"description\":\"Test Pipeline\",\"processors\":[{\"set\":{\"field\":\"pipeTEST\",\"value\":true,\"override\":true}}]}"; RestUtils.put("/_ingest/pipeline/" + pipelineName, StringUtils.toUTF(pipeline)); String target = wrapIndex(resource("test-write-ingest", "data")); JavaStreamingQueryTestHarness<RecordBean> test = new JavaStreamingQueryTestHarness<>(spark, Encoders.bean(RecordBean.class)); RecordBean doc1 = new RecordBean(); doc1.setId(1); doc1.setName("Spark"); RecordBean doc2 = new RecordBean(); doc2.setId(2); doc2.setName("Hadoop"); RecordBean doc3 = new RecordBean(); doc3.setId(3); doc3.setName("YARN"); Dataset<RecordBean> dataset = test .withInput(doc1) .withInput(doc2) .withInput(doc3) .stream(); test.run( dataset.writeStream() .option("checkpointLocation", checkpoint(target)) .option(ES_INGEST_PIPELINE, pipelineName) .option(ES_NODES_INGEST_ONLY, "true") .format("es"), target ); assertTrue(RestUtils.exists(target)); assertThat(RestUtils.get(target+"/_search?"), containsString("\"pipeTEST\":true")); }
Example 13
Source File: AbstractJavaEsSparkStructuredStreamingTest.java From elasticsearch-hadoop with Apache License 2.0 | 4 votes |
@Test @Ignore("Serialization issues in DataFrameValueWriter when trying to serialize an object for use in parameters") public void test3WriteWithUpsertScript() throws Exception { // BWC String keyword = "keyword"; String lang = "painless"; if (version.onOrBefore(EsMajorVersion.V_2_X)) { keyword = "string"; lang = "groovy"; } // Init String mapping = "{\"data\":{\"properties\":{\"id\":{\"type\":\""+keyword+"\"},\"note\":{\"type\":\""+keyword+"\"},\"address\":{\"type\":\"nested\",\"properties\":{\"id\":{\"type\":\""+keyword+"\"},\"zipcode\":{\"type\":\""+keyword+"\"}}}}}}"; String index = wrapIndex("test-script-upsert"); String type = "data"; String target = resource(index, type); String docPath = docPath(index, type); RestUtils.touch(index); RestUtils.putMapping(index, type, mapping.getBytes()); RestUtils.postData(docPath+"/1", "{\"id\":\"1\",\"note\":\"First\",\"address\":[]}".getBytes()); RestUtils.postData(docPath+"/2", "{\"id\":\"2\",\"note\":\"First\",\"address\":[]}".getBytes()); // Common configurations Map<String, String> updateProperties = new HashMap<>(); updateProperties.put("es.write.operation", "upsert"); updateProperties.put("es.mapping.id", "id"); updateProperties.put("es.update.script.lang", lang); // Run 1 ContactBean doc1; { AddressBean address = new AddressBean(); address.setId("1"); address.setZipcode("12345"); doc1 = new ContactBean(); doc1.setId("1"); doc1.setAddress(address); } String script1; if (version.onOrAfter(EsMajorVersion.V_5_X)) { script1 = "ctx._source.address.add(params.new_address)"; } else { script1 = "ctx._source.address+=new_address"; } JavaStreamingQueryTestHarness<ContactBean> test1 = new JavaStreamingQueryTestHarness<>(spark, Encoders.bean(ContactBean.class)); test1 .withInput(doc1) .run( test1.stream() .writeStream() .option("checkpointLocation", checkpoint(target)) .options(updateProperties) .option("es.update.script.params", "new_address:address") .option("es.update.script", script1) .format("es"), target ); // Run 2 ContactBean doc2; { doc2 = new ContactBean(); doc2.setId("2"); doc2.setNote("Second"); } String script2; if (version.onOrAfter(EsMajorVersion.V_5_X)) { script2 = "ctx._source.note = params.new_note"; } else { script2 = "ctx._source.note=new_note"; } JavaStreamingQueryTestHarness<ContactBean> test2 = new JavaStreamingQueryTestHarness<>(spark, Encoders.bean(ContactBean.class)); test2 .withInput(doc2) .run( test2.stream() .writeStream() .option("checkpointLocation", checkpoint(target)) .options(updateProperties) .option("es.update.script.params", "new_note:note") .option("es.update.script", script2) .format("es"), target ); // Validate assertTrue(RestUtils.exists(docPath + "/1")); assertThat(RestUtils.get(docPath + "/1"), both(containsString("\"zipcode\":\"12345\"")).and(containsString("\"note\":\"First\""))); assertTrue(RestUtils.exists(docPath + "/2")); assertThat(RestUtils.get(docPath + "/2"), both(not(containsString("\"zipcode\":\"12345\""))).and(containsString("\"note\":\"Second\""))); }
Example 14
Source File: JavaUserDefinedTypedAggregation.java From incubator-nemo with Apache License 2.0 | 2 votes |
/** * Specifies the EncoderFactory for the intermediate value type. * * @return buffer encoder. */ public Encoder<Average> bufferEncoder() { return Encoders.bean(Average.class); }
Example 15
Source File: JavaUserDefinedTypedAggregation.java From nemo with Apache License 2.0 | 2 votes |
/** * Specifies the Encoder for the intermediate value type. * * @return buffer encoder. */ public Encoder<Average> bufferEncoder() { return Encoders.bean(Average.class); }