Java Code Examples for org.apache.beam.sdk.options.PipelineOptions#setJobName()
The following examples show how to use
org.apache.beam.sdk.options.PipelineOptions#setJobName() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: JobNameConfiguration.java From dbeam with Apache License 2.0 | 6 votes |
public static void configureJobName(final PipelineOptions options, final String... parts) { try { options.as(ApplicationNameOptions.class).setAppName("JdbcAvroJob"); } catch (Exception e) { LOGGER.warn("Unable to configure ApplicationName", e); } if (options.getJobName() == null || "auto".equals(options.getJobName())) { final String randomPart = Integer.toHexString(ThreadLocalRandom.current().nextInt()); final String jobName = String.format( "dbeam-%s-%s", Arrays.stream(parts) .filter(p -> !Strings.isNullOrEmpty(p)) .map(JobNameConfiguration::normalizeString) .collect(Collectors.joining("-")), randomPart); options.setJobName(jobName); } }
Example 2
Source File: PipelineOptionsTranslationTest.java From beam with Apache License 2.0 | 6 votes |
@Parameters(name = "{index}: {0}") public static Iterable<? extends PipelineOptions> options() { PipelineOptionsFactory.register(TestUnserializableOptions.class); PipelineOptionsFactory.register(TestDefaultOptions.class); PipelineOptionsFactory.register(TestOptions.class); PipelineOptions emptyOptions = PipelineOptionsFactory.create(); TestUnserializableOptions withNonSerializable = PipelineOptionsFactory.as(TestUnserializableOptions.class); withNonSerializable.setUnserializable(new Object()); TestOptions withCustomField = PipelineOptionsFactory.as(TestOptions.class); withCustomField.setExample(99); PipelineOptions withSettings = PipelineOptionsFactory.create(); withSettings.as(ApplicationNameOptions.class).setAppName("my_app"); withSettings.setJobName("my_job"); PipelineOptions withParsedSettings = PipelineOptionsFactory.fromArgs("--jobName=my_job --appName=my_app").create(); return ImmutableList.of( emptyOptions, withNonSerializable, withCustomField, withSettings, withParsedSettings); }
Example 3
Source File: WordCountTimeOut1Sec.java From incubator-nemo with Apache License 2.0 | 6 votes |
/** * Main function for the MR BEAM program. * * @param args arguments. */ public static void main(final String[] args) { final String inputFilePath = args[0]; final String outputFilePath = args[1]; final PipelineOptions options = NemoPipelineOptionsFactory.create(); options.setJobName("WordCountTimeOut1Sec"); final Pipeline p = generateWordCountPipeline(options, inputFilePath, outputFilePath); final PipelineResult pr = p.run(); final PipelineResult.State running = pr.waitUntilFinish(org.joda.time.Duration.standardSeconds(1)); try { final PipelineResult.State cancelled = pr.cancel(); } catch (final IOException e) { LOG.info("IOException while cancelling job"); } }
Example 4
Source File: MapReduce.java From nemo with Apache License 2.0 | 5 votes |
/** * Main function for the MR BEAM program. * @param args arguments. */ public static void main(final String[] args) { final String inputFilePath = args[0]; final String outputFilePath = args[1]; final PipelineOptions options = PipelineOptionsFactory.create().as(NemoPipelineOptions.class); options.setRunner(NemoPipelineRunner.class); options.setJobName("MapReduce"); final Pipeline p = Pipeline.create(options); final PCollection<String> result = GenericSourceSink.read(p, inputFilePath) .apply(MapElements.<String, KV<String, Long>>via(new SimpleFunction<String, KV<String, Long>>() { @Override public KV<String, Long> apply(final String line) { final String[] words = line.split(" +"); final String documentId = words[0] + "#" + words[1]; final Long count = Long.parseLong(words[2]); return KV.of(documentId, count); } })) .apply(GroupByKey.<String, Long>create()) .apply(Combine.<String, Long, Long>groupedValues(Sum.ofLongs())) .apply(MapElements.<KV<String, Long>, String>via(new SimpleFunction<KV<String, Long>, String>() { @Override public String apply(final KV<String, Long> kv) { return kv.getKey() + ": " + kv.getValue(); } })); GenericSourceSink.write(result, outputFilePath); p.run(); }
Example 5
Source File: JobNameConfigurationTest.java From dbeam with Apache License 2.0 | 5 votes |
@Test public void shouldConfigureJobNameWithEmptyTableName() { PipelineOptions pipelineOptions = PipelineOptionsFactory.create(); pipelineOptions.setJobName(null); JobNameConfiguration.configureJobName(pipelineOptions, "some_db", null); Assert.assertEquals( "JdbcAvroJob", pipelineOptions.as(ApplicationNameOptions.class).getAppName()); assertThat(pipelineOptions.getJobName(), startsWith("dbeam-somedb-")); Assert.assertEquals(3, pipelineOptions.getJobName().split("-").length); }
Example 6
Source File: JobNameConfigurationTest.java From dbeam with Apache License 2.0 | 5 votes |
@Test public void shouldConfigureJobNameWhenJobNameIsAuto() { PipelineOptions pipelineOptions = PipelineOptionsFactory.create(); pipelineOptions.setJobName("auto"); JobNameConfiguration.configureJobName(pipelineOptions, "some_db", "some_table"); Assert.assertEquals( "JdbcAvroJob", pipelineOptions.as(ApplicationNameOptions.class).getAppName()); assertThat(pipelineOptions.getJobName(), startsWith("dbeam-somedb-sometable-")); }
Example 7
Source File: JobNameConfigurationTest.java From dbeam with Apache License 2.0 | 5 votes |
@Test public void shouldConfigureJobName() { PipelineOptions pipelineOptions = PipelineOptionsFactory.create(); pipelineOptions.setJobName(null); JobNameConfiguration.configureJobName(pipelineOptions, "some_db", "some_table"); Assert.assertEquals( "JdbcAvroJob", pipelineOptions.as(ApplicationNameOptions.class).getAppName()); assertThat(pipelineOptions.getJobName(), startsWith("dbeam-somedb-sometable-")); }
Example 8
Source File: WindowedBroadcast.java From incubator-nemo with Apache License 2.0 | 5 votes |
/** * Main function for the MR BEAM program. * * @param args arguments. */ public static void main(final String[] args) { final String outputFilePath = args[0]; final Window<Long> windowFn = Window .<Long>into(SlidingWindows.of(Duration.standardSeconds(2)) .every(Duration.standardSeconds(1))); final PipelineOptions options = NemoPipelineOptionsFactory.create(); options.setJobName("WindowedBroadcast"); final Pipeline p = Pipeline.create(options); final PCollection<Long> windowedElements = getSource(p).apply(windowFn); final PCollectionView<List<Long>> windowedView = windowedElements.apply(View.asList()); windowedElements.apply(ParDo.of(new DoFn<Long, String>() { @ProcessElement public void processElement(final ProcessContext c) { final Long anElementInTheWindow = c.element(); final List<Long> allElementsInTheWindow = c.sideInput(windowedView); System.out.println(anElementInTheWindow + " / " + allElementsInTheWindow); if (!allElementsInTheWindow.contains(anElementInTheWindow)) { throw new RuntimeException(anElementInTheWindow + " not in " + allElementsInTheWindow.toString()); } else { c.output(anElementInTheWindow + " is in " + allElementsInTheWindow); } } }).withSideInputs(windowedView) ).apply(new WriteOneFilePerWindow(outputFilePath, 1)); p.run().waitUntilFinish(); }
Example 9
Source File: WindowedWordCount.java From incubator-nemo with Apache License 2.0 | 5 votes |
/** * Main function for the MR BEAM program. * * @param args arguments. */ public static void main(final String[] args) { final String outputFilePath = args[0]; final String windowType = args[1]; final Window<KV<String, Long>> windowFn; if (windowType.equals("fixed")) { windowFn = Window.<KV<String, Long>>into(FixedWindows.of(Duration.standardSeconds(5))); } else { windowFn = Window.<KV<String, Long>>into(SlidingWindows.of(Duration.standardSeconds(10)) .every(Duration.standardSeconds(5))); } final PipelineOptions options = NemoPipelineOptionsFactory.create(); options.setJobName("WindowedWordCount"); final Pipeline p = Pipeline.create(options); getSource(p, args) .apply(windowFn) .apply(Sum.longsPerKey()) .apply(MapElements.<KV<String, Long>, String>via(new SimpleFunction<KV<String, Long>, String>() { @Override public String apply(final KV<String, Long> kv) { return kv.getKey() + ": " + kv.getValue(); } })) .apply(new WriteOneFilePerWindow(outputFilePath, 1)); p.run().waitUntilFinish(); }
Example 10
Source File: WordCount.java From incubator-nemo with Apache License 2.0 | 5 votes |
/** * Main function for the MR BEAM program. * * @param args arguments. */ public static void main(final String[] args) { final String inputFilePath = args[0]; final String outputFilePath = args[1]; final PipelineOptions options = NemoPipelineOptionsFactory.create(); options.setJobName("WordCount"); final Pipeline p = generateWordCountPipeline(options, inputFilePath, outputFilePath); p.run().waitUntilFinish(); }
Example 11
Source File: PipelineTestBase.java From kettle-beam with Apache License 2.0 | 5 votes |
@Ignore public void createRunPipeline( TransMeta transMeta ) throws Exception { /* FileOutputStream fos = new FileOutputStream( "/tmp/"+transMeta.getName()+".ktr" ); fos.write( transMeta.getXML().getBytes() ); fos.close(); */ PipelineOptions pipelineOptions = PipelineOptionsFactory.create(); pipelineOptions.setJobName( transMeta.getName() ); pipelineOptions.setUserAgent( BeamConst.STRING_KETTLE_BEAM ); BeamJobConfig jobConfig = new BeamJobConfig(); jobConfig.setName("Direct runner test"); jobConfig.setRunnerTypeName( RunnerType.Direct.name() ); // No extra plugins to load : null option TransMetaPipelineConverter converter = new TransMetaPipelineConverter( transMeta, metaStore, (String) null, jobConfig ); Pipeline pipeline = converter.createPipeline( pipelineOptions ); PipelineResult pipelineResult = pipeline.run(); pipelineResult.waitUntilFinish(); MetricResults metricResults = pipelineResult.metrics(); MetricQueryResults allResults = metricResults.queryMetrics( MetricsFilter.builder().build() ); for ( MetricResult<Long> result : allResults.getCounters() ) { System.out.println( "Name: " + result.getName() + " Attempted: " + result.getAttempted() ); } }
Example 12
Source File: KettleBeamPipelineExecutor.java From kettle-beam with Apache License 2.0 | 5 votes |
private void configureStandardOptions( BeamJobConfig config, String transformationName, PipelineOptions pipelineOptions, VariableSpace space ) { if ( StringUtils.isNotEmpty( transformationName ) ) { String sanitizedName = transformationName.replaceAll( "[^-A-Za-z0-9]", "" ) ; pipelineOptions.setJobName( sanitizedName ); } if ( StringUtils.isNotEmpty( config.getUserAgent() ) ) { String userAgent = space.environmentSubstitute( config.getUserAgent() ); pipelineOptions.setUserAgent( userAgent ); } if ( StringUtils.isNotEmpty( config.getTempLocation() ) ) { String tempLocation = space.environmentSubstitute( config.getTempLocation() ); pipelineOptions.setTempLocation( tempLocation ); } }
Example 13
Source File: PipelineTestBase.java From hop with Apache License 2.0 | 5 votes |
@Ignore public void createRunPipeline( PipelineMeta pipelineMeta ) throws Exception { /* FileOutputStream fos = new FileOutputStream( "/tmp/"+pipelineMeta.getName()+".ktr" ); fos.write( pipelineMeta.getXML().getBytes() ); fos.close(); */ PipelineOptions pipelineOptions = PipelineOptionsFactory.create(); pipelineOptions.setJobName( pipelineMeta.getName() ); pipelineOptions.setUserAgent( BeamConst.STRING_HOP_BEAM ); BeamDirectPipelineRunConfiguration beamRunConfig = new BeamDirectPipelineRunConfiguration(); beamRunConfig.setTempLocation( System.getProperty( "java.io.tmpdir" ) ); // No extra plugins to load : null option HopPipelineMetaToBeamPipelineConverter converter = new HopPipelineMetaToBeamPipelineConverter( pipelineMeta, metadataProvider, beamRunConfig ); Pipeline pipeline = converter.createPipeline(); PipelineResult pipelineResult = pipeline.run(); pipelineResult.waitUntilFinish(); MetricResults metricResults = pipelineResult.metrics(); MetricQueryResults allResults = metricResults.queryMetrics( MetricsFilter.builder().build() ); for ( MetricResult<Long> result : allResults.getCounters() ) { System.out.println( "Name: " + result.getName() + " Attempted: " + result.getAttempted() ); } }
Example 14
Source File: PartitionWordsByLength.java From incubator-nemo with Apache License 2.0 | 4 votes |
/** * Main function for the MR BEAM program. * * @param args arguments. */ public static void main(final String[] args) { final String inputFilePath = args[0]; final String outputFilePath = args[1]; final PipelineOptions options = NemoPipelineOptionsFactory.create(); options.setJobName("PartitionWordsByLength"); // {} here is required for preserving type information. // Please see https://stackoverflow.com/a/48431397 for details. final TupleTag<KV<Integer, String>> shortWordsTag = new TupleTag<KV<Integer, String>>("short") { }; final TupleTag<KV<Integer, String>> longWordsTag = new TupleTag<KV<Integer, String>>("long") { }; final TupleTag<String> veryLongWordsTag = new TupleTag<String>("very long") { }; final TupleTag<String> veryVeryLongWordsTag = new TupleTag<String>("very very long") { }; final Pipeline p = Pipeline.create(options); final PCollection<String> lines = GenericSourceSink.read(p, inputFilePath); PCollectionTuple results = lines .apply(FlatMapElements .into(TypeDescriptors.strings()) .via(line -> Arrays.asList(line.split(" ")))) .apply(ParDo.of(new DoFn<String, String>() { // processElement with Beam OutputReceiver. @ProcessElement public void processElement(final ProcessContext c) { final String word = c.element(); if (word.length() < 6) { c.output(shortWordsTag, KV.of(word.length(), word)); } else if (word.length() < 11) { c.output(longWordsTag, KV.of(word.length(), word)); } else if (word.length() > 12) { c.output(veryVeryLongWordsTag, word); } else { c.output(word); } } }).withOutputTags(veryLongWordsTag, TupleTagList .of(shortWordsTag).and(longWordsTag).and(veryVeryLongWordsTag))); PCollection<String> shortWords = results.get(shortWordsTag) .apply(GroupByKey.create()) .apply(MapElements.via(new FormatLines())); PCollection<String> longWords = results.get(longWordsTag) .apply(GroupByKey.create()) .apply(MapElements.via(new FormatLines())); PCollection<String> veryLongWords = results.get(veryLongWordsTag); PCollection<String> veryVeryLongWords = results.get(veryVeryLongWordsTag); GenericSourceSink.write(shortWords, outputFilePath + "_short"); GenericSourceSink.write(longWords, outputFilePath + "_long"); GenericSourceSink.write(veryLongWords, outputFilePath + "_very_long"); GenericSourceSink.write(veryVeryLongWords, outputFilePath + "_very_very_long"); p.run().waitUntilFinish(); }
Example 15
Source File: AlternatingLeastSquareInefficient.java From incubator-nemo with Apache License 2.0 | 4 votes |
/** * Main function for the ALS BEAM program. * * @param args arguments. */ public static void main(final String[] args) { final Long start = System.currentTimeMillis(); LOG.info(Arrays.toString(args)); final String inputFilePath = args[0]; final Integer numFeatures = Integer.parseInt(args[1]); final Integer numItr = Integer.parseInt(args[2]); final Double lambda; if (args.length > 4) { lambda = Double.parseDouble(args[3]); } else { lambda = 0.05; } final PipelineOptions options = NemoPipelineOptionsFactory.create(); options.setJobName("ALS"); options.setStableUniqueNames(PipelineOptions.CheckEnabled.OFF); final Pipeline p = Pipeline.create(options); p.getCoderRegistry().registerCoderProvider(CoderProviders.fromStaticMethods(int[].class, IntArrayCoder.class)); p.getCoderRegistry().registerCoderProvider(CoderProviders.fromStaticMethods(float[].class, FloatArrayCoder.class)); // Read raw data final PCollection<String> rawData = GenericSourceSink.read(p, inputFilePath); // Parse data for item final PCollection<KV<Integer, KV<int[], float[]>>> parsedItemData = rawData .apply(ParDo.of(new AlternatingLeastSquare.ParseLine(false))) .apply(Combine.perKey(new AlternatingLeastSquare.TrainingDataCombiner())); // Create Initial Item Matrix PCollection<KV<Integer, float[]>> itemMatrix = parsedItemData .apply(ParDo.of(new DoFn<KV<Integer, KV<int[], float[]>>, KV<Integer, float[]>>() { @ProcessElement public void processElement(final ProcessContext c) throws Exception { final float[] result = new float[numFeatures]; final KV<Integer, KV<int[], float[]>> element = c.element(); final float[] ratings = element.getValue().getValue(); for (int i = 0; i < ratings.length; i++) { result[0] += ratings[i]; } result[0] /= ratings.length; for (int i = 1; i < result.length; i++) { result[i] = (float) (Math.random() * 0.01); } c.output(KV.of(element.getKey(), result)); } })); // Iterations to update Item Matrix. for (Integer i = 0; i < numItr; i++) { // NOTE: a single composite transform for the iteration. itemMatrix = itemMatrix.apply(new UpdateUserAndItemMatrix(numFeatures, lambda, rawData, parsedItemData)); } p.run().waitUntilFinish(); LOG.info("JCT " + (System.currentTimeMillis() - start)); }
Example 16
Source File: WriteRowMetricsDoFnTest.java From feast with Apache License 2.0 | 4 votes |
@Test public void shouldSendCorrectStatsDMetrics() throws IOException, InterruptedException { PipelineOptions pipelineOptions = PipelineOptionsFactory.create(); pipelineOptions.setJobName("job"); Map<String, Iterable<FeatureRow>> input = readTestInput("feast/ingestion/transform/WriteRowMetricsDoFnTest.input"); List<String> expectedLines = readTestOutput("feast/ingestion/transform/WriteRowMetricsDoFnTest.output"); pipeline .apply(Create.of(input)) .apply( ParDo.of( WriteRowMetricsDoFn.newBuilder() .setStatsdHost("localhost") .setStatsdPort(STATSD_SERVER_PORT) .setStoreName("store") .setClock(Clock.fixed(Instant.ofEpochSecond(1585548645), ZoneId.of("UTC"))) .setMetricsNamespace("test") .build())); pipeline.run(pipelineOptions).waitUntilFinish(); // Wait until StatsD has finished processed all messages, 3 sec is a reasonable duration // based on empirical testing. Thread.sleep(3000); List<String> actualLines = statsDServer.messagesReceived(); for (String expected : expectedLines) { boolean matched = false; for (String actual : actualLines) { if (actual.equals(expected)) { matched = true; break; } } if (!matched) { System.out.println("Print actual metrics output for debugging:"); for (String line : actualLines) { System.out.println(line); } fail(String.format("Expected StatsD metric not found:\n%s", expected)); } } statsDServer.stop(); }
Example 17
Source File: WriteFeatureValueMetricsDoFnTest.java From feast with Apache License 2.0 | 4 votes |
@Test public void shouldSendCorrectStatsDMetrics() throws IOException, InterruptedException { PipelineOptions pipelineOptions = PipelineOptionsFactory.create(); pipelineOptions.setJobName("job"); Map<String, Iterable<FeatureRow>> input = readTestInput("feast/ingestion/transform/WriteFeatureValueMetricsDoFnTest.input"); List<String> expectedLines = readTestOutput("feast/ingestion/transform/WriteFeatureValueMetricsDoFnTest.output"); pipeline .apply(Create.of(input)) .apply( ParDo.of( WriteFeatureValueMetricsDoFn.newBuilder() .setStatsdHost("localhost") .setStatsdPort(STATSD_SERVER_PORT) .setStoreName("store") .setMetricsNamespace("test") .build())); pipeline.run(pipelineOptions).waitUntilFinish(); // Wait until StatsD has finished processed all messages, 3 sec is a reasonable duration // based on empirical testing. Thread.sleep(3000); List<String> actualLines = statsDServer.messagesReceived(); for (String expected : expectedLines) { boolean matched = false; for (String actual : actualLines) { if (actual.equals(expected)) { matched = true; break; } } if (!matched) { System.out.println("Print actual metrics output for debugging:"); for (String line : actualLines) { System.out.println(line); } fail(String.format("Expected StatsD metric not found:\n%s", expected)); } } statsDServer.stop(); }
Example 18
Source File: AlternatingLeastSquare.java From incubator-nemo with Apache License 2.0 | 4 votes |
/** * Main function for the ALS BEAM program. * * @param args arguments. * @throws ClassNotFoundException exception. */ public static void main(final String[] args) { final Long start = System.currentTimeMillis(); LOG.info(Arrays.toString(args)); final String inputFilePath = args[0]; final Integer numFeatures = Integer.parseInt(args[1]); final Integer numItr = Integer.parseInt(args[2]); final Double lambda; if (args.length > 3) { lambda = Double.parseDouble(args[3]); } else { lambda = 0.05; } final String outputFilePath; boolean checkOutput = false; if (args.length > 4) { outputFilePath = args[4]; checkOutput = true; } else { outputFilePath = ""; } final PipelineOptions options = NemoPipelineOptionsFactory.create(); options.setJobName("ALS"); options.setStableUniqueNames(PipelineOptions.CheckEnabled.OFF); final Pipeline p = Pipeline.create(options); p.getCoderRegistry().registerCoderProvider(CoderProviders.fromStaticMethods(int[].class, IntArrayCoder.class)); p.getCoderRegistry().registerCoderProvider(CoderProviders.fromStaticMethods(float[].class, FloatArrayCoder.class)); // Read raw data final PCollection<String> rawData = GenericSourceSink.read(p, inputFilePath); // Parse data for item final PCollection<KV<Integer, KV<int[], float[]>>> parsedItemData = rawData .apply(ParDo.of(new ParseLine(false))) .apply(Combine.perKey(new TrainingDataCombiner())); // Parse data for user final PCollection<KV<Integer, KV<int[], float[]>>> parsedUserData = rawData .apply(ParDo.of(new ParseLine(true))) .apply(Combine.perKey(new TrainingDataCombiner())); // Create Initial Item Matrix PCollection<KV<Integer, float[]>> itemMatrix = parsedItemData.apply(ParDo.of(new CreateInitialMatrix(numFeatures, checkOutput))); // Iterations to update Item Matrix. for (int i = 0; i < numItr; i++) { // NOTE: a single composite transform for the iteration. itemMatrix = itemMatrix.apply(new UpdateUserAndItemMatrix(numFeatures, lambda, parsedUserData, parsedItemData)); } if (checkOutput) { final PCollection<String> result = itemMatrix.apply(MapElements.<KV<Integer, float[]>, String>via( new SimpleFunction<KV<Integer, float[]>, String>() { @Override public String apply(final KV<Integer, float[]> elem) { final List<String> values = Stream.of(ArrayUtils.toObject(elem.getValue())) .map(String::valueOf) .collect(Collectors.toList()); return elem.getKey() + "," + String.join(",", values); } })); GenericSourceSink.write(result, outputFilePath); } p.run().waitUntilFinish(); LOG.info("JCT " + (System.currentTimeMillis() - start)); }
Example 19
Source File: AlternatingLeastSquareInefficient.java From nemo with Apache License 2.0 | 4 votes |
/** * Main function for the ALS BEAM program. * @param args arguments. */ public static void main(final String[] args) { final Long start = System.currentTimeMillis(); LOG.info(Arrays.toString(args)); final String inputFilePath = args[0]; final Integer numFeatures = Integer.parseInt(args[1]); final Integer numItr = Integer.parseInt(args[2]); final Double lambda; if (args.length > 4) { lambda = Double.parseDouble(args[3]); } else { lambda = 0.05; } final PipelineOptions options = PipelineOptionsFactory.create(); options.setRunner(NemoPipelineRunner.class); options.setJobName("ALS"); options.setStableUniqueNames(PipelineOptions.CheckEnabled.OFF); final Pipeline p = Pipeline.create(options); p.getCoderRegistry().registerCoderProvider(CoderProviders.fromStaticMethods(Pair.class, PairCoder.class)); // Read raw data final PCollection<String> rawData = GenericSourceSink.read(p, inputFilePath); // Parse data for item final PCollection<KV<Integer, Pair<List<Integer>, List<Double>>>> parsedItemData = rawData .apply(ParDo.of(new AlternatingLeastSquare.ParseLine(false))) .apply(Combine.perKey(new AlternatingLeastSquare.TrainingDataCombiner())); // Create Initial Item Matrix PCollection<KV<Integer, List<Double>>> itemMatrix = parsedItemData .apply(ParDo.of(new DoFn<KV<Integer, Pair<List<Integer>, List<Double>>>, KV<Integer, List<Double>>>() { @ProcessElement public void processElement(final ProcessContext c) throws Exception { final List<Double> result = new ArrayList<>(numFeatures); result.add(0, 0.0); final KV<Integer, Pair<List<Integer>, List<Double>>> element = c.element(); final List<Double> ratings = element.getValue().right(); for (Integer i = 0; i < ratings.size(); i++) { result.set(0, result.get(0) + ratings.get(i)); } result.set(0, result.get(0) / ratings.size()); for (Integer i = 1; i < result.size(); i++) { result.add(i, (Math.random() * 0.01)); } c.output(KV.of(element.getKey(), result)); } })); // Iterations to update Item Matrix. for (Integer i = 0; i < numItr; i++) { // NOTE: a single composite transform for the iteration. itemMatrix = itemMatrix.apply(new UpdateUserAndItemMatrix(numFeatures, lambda, rawData, parsedItemData)); } p.run(); LOG.info("JCT " + (System.currentTimeMillis() - start)); }
Example 20
Source File: HopPipelineMetaToBeamPipelineConverter.java From hop with Apache License 2.0 | 4 votes |
public Pipeline createPipeline() throws Exception { ILogChannel log = LogChannel.GENERAL; // Create a new Pipeline // RunnerType runnerType = pipelineRunConfiguration.getRunnerType(); Class<? extends PipelineRunner<?>> runnerClass = getPipelineRunnerClass( runnerType ); PipelineOptions pipelineOptions = pipelineRunConfiguration.getPipelineOptions(); // The generic options // pipelineOptions.setUserAgent( pipelineRunConfiguration.environmentSubstitute( pipelineRunConfiguration.getUserAgent() ) ); pipelineOptions.setTempLocation( pipelineRunConfiguration.environmentSubstitute( pipelineRunConfiguration.getTempLocation() ) ); pipelineOptions.setJobName( pipelineMeta.getName() ); pipelineOptions.setRunner( runnerClass ); Pipeline pipeline = Pipeline.create( pipelineOptions ); pipeline.getCoderRegistry().registerCoderForClass( HopRow.class, new HopRowCoder() ); log.logBasic( "Created Apache Beam pipeline with name '" + pipelineOptions.getJobName() + "'" ); // Keep track of which transform outputs which Collection // Map<String, PCollection<HopRow>> stepCollectionMap = new HashMap<>(); // Handle io // handleBeamInputSteps( log, stepCollectionMap, pipeline ); // Transform all the other transforms... // handleGenericStep( stepCollectionMap, pipeline ); // Output handling // handleBeamOutputSteps( log, stepCollectionMap, pipeline ); return pipeline; }