Java Code Examples for org.apache.spark.sql.Dataset#columns()
The following examples show how to use
org.apache.spark.sql.Dataset#columns() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: SparkCubingJobTest.java From kylin-on-parquet-v2 with Apache License 2.0 | 6 votes |
private Integer convertOutSchema(Dataset<Row> layoutDs, String fieldName, org.apache.spark.sql.types.DataType dataType) { StructField[] structFieldList = layoutDs.schema().fields(); String[] columns = layoutDs.columns(); int index = 0; StructField[] outStructFieldList = new StructField[structFieldList.length]; for (int i = 0; i < structFieldList.length; i++) { if (columns[i].equalsIgnoreCase(fieldName)) { index = i; StructField structField = structFieldList[i]; outStructFieldList[i] = new StructField(structField.name(), dataType, false, structField.metadata()); } else { outStructFieldList[i] = structFieldList[i]; } } OUT_SCHEMA = new StructType(outStructFieldList); return index; }
Example 2
Source File: NManualBuildAndQueryCuboidTest.java From kylin-on-parquet-v2 with Apache License 2.0 | 6 votes |
private Integer convertOutSchema(Dataset<Row> layoutDs, String fieldName, org.apache.spark.sql.types.DataType dataType) { StructField[] structFieldList = layoutDs.schema().fields(); String[] columns = layoutDs.columns(); int index = 0; StructField[] outStructFieldList = new StructField[structFieldList.length]; for (int i = 0; i < structFieldList.length; i++) { if (columns[i].equalsIgnoreCase(fieldName)) { index = i; StructField structField = structFieldList[i]; outStructFieldList[i] = new StructField(structField.name(), dataType, false, structField.metadata()); } else { outStructFieldList[i] = structFieldList[i]; } } OUT_SCHEMA = new StructType(outStructFieldList); return index; }
Example 3
Source File: CsvToDatasetCompatibleWithSparkv1x.java From net.jgp.labs.spark with Apache License 2.0 | 6 votes |
private void start() { SparkSession spark = SparkSession.builder() .appName("CSV to Dataset") .master("local") .getOrCreate(); String filename = "data/tuple-data-file.csv"; Dataset<Row> df = spark.read().format("csv") .option("inferSchema", "true") .option("header", "false") .load(filename); df.show(); // To ensure compatibility between Spark 2.0.0 and Spark 1.6.x int count = df.columns().length; for (int i = 0; i < count; i++) { String oldColName = "_c" + i; String newColName = "C" + i; df = df.withColumn(newColName, df.col(oldColName)).drop(oldColName); } df.show(); }
Example 4
Source File: WriteToDiscStep.java From bpmn.ai with BSD 3-Clause "New" or "Revised" License | 6 votes |
@Override public Dataset<Row> runPreprocessingStep(Dataset<Row> dataset, Map<String, Object> parameters, SparkRunnerConfig config) { // remove spaces from column names as parquet does not support them for(String columnName : dataset.columns()) { if(columnName.contains(" ")) { String newColumnName = columnName.replace(' ', '_'); dataset = dataset.withColumnRenamed(columnName, newColumnName); } } dataset.cache(); BpmnaiUtils.getInstance().writeDatasetToParquet(dataset, "result", config); if(config.isGenerateResultPreview()) { dataset.limit(config.getResultPreviewLineCount()).write().mode(SaveMode.Overwrite).saveAsTable(BpmnaiVariables.RESULT_PREVIEW_TEMP_TABLE); } return dataset; }
Example 5
Source File: RDDConverterUtils.java From systemds with Apache License 2.0 | 5 votes |
public static JavaPairRDD<MatrixIndexes, MatrixBlock> dataFrameToBinaryBlock(JavaSparkContext sc, Dataset<Row> df, DataCharacteristics mc, boolean containsID, boolean isVector) { //determine unknown dimensions and sparsity if required if( !mc.dimsKnown(true) ) { LongAccumulator aNnz = sc.sc().longAccumulator("nnz"); JavaRDD<Row> tmp = df.javaRDD().map(new DataFrameAnalysisFunction(aNnz, containsID, isVector)); long rlen = tmp.count(); long clen = !isVector ? df.columns().length - (containsID?1:0) : ((Vector) tmp.first().get(containsID?1:0)).size(); long nnz = UtilFunctions.toLong(aNnz.value()); mc.set(rlen, clen, mc.getBlocksize(), nnz); } //ensure valid blocksizes if( mc.getBlocksize()<=1 ) mc.setBlocksize(ConfigurationManager.getBlocksize()); //construct or reuse row ids JavaPairRDD<Row, Long> prepinput = containsID ? df.javaRDD().mapToPair(new DataFrameExtractIDFunction( df.schema().fieldIndex(DF_ID_COLUMN))) : df.javaRDD().zipWithIndex(); //zip row index //convert csv rdd to binary block rdd (w/ partial blocks) boolean sparse = requiresSparseAllocation(prepinput, mc); JavaPairRDD<MatrixIndexes, MatrixBlock> out = prepinput.mapPartitionsToPair( new DataFrameToBinaryBlockFunction(mc, sparse, containsID, isVector)); //aggregate partial matrix blocks (w/ preferred number of output //partitions as the data is likely smaller in binary block format, //but also to bound the size of partitions for compressed inputs) int parts = SparkUtils.getNumPreferredPartitions(mc, out); return RDDAggregateUtils.mergeByKey(out, parts, false); }
Example 6
Source File: SparkCubingJobTest.java From kylin-on-parquet-v2 with Apache License 2.0 | 5 votes |
private Dataset<Row> dsConvertToOriginal(Dataset<Row> layoutDs, LayoutEntity entity) { Map<Integer, FunctionDesc> orderedMeasures = entity.getOrderedMeasures(); for (final Map.Entry<Integer, FunctionDesc> entry : orderedMeasures.entrySet()) { FunctionDesc functionDesc = entry.getValue(); if (functionDesc != null) { final String[] columns = layoutDs.columns(); String functionName = functionDesc.returnType().dataType(); if ("bitmap".equals(functionName)) { final int finalIndex = convertOutSchema(layoutDs, entry.getKey().toString(), DataTypes.LongType); PreciseCountDistinct preciseCountDistinct = new PreciseCountDistinct(null); layoutDs = layoutDs.map((MapFunction<Row, Row>) value -> { Object[] ret = new Object[value.size()]; for (int i = 0; i < columns.length; i++) { if (i == finalIndex) { byte[] bytes = (byte[]) value.get(i); Roaring64NavigableMap bitmapCounter = preciseCountDistinct.deserialize(bytes); ret[i] = bitmapCounter.getLongCardinality(); } else { ret[i] = value.get(i); } } return RowFactory.create(ret); }, RowEncoder.apply(OUT_SCHEMA)); } } } return layoutDs; }
Example 7
Source File: DataframeUtils.java From net.jgp.labs.spark with Apache License 2.0 | 5 votes |
public static Dataset<Row> addMetadata(Dataset<Row> df, String key, String value) { for (String colName : df.columns()) { df = addMetadata(df, colName, key, value); } return df; }
Example 8
Source File: DrugBankDataset.java From mmtf-spark with Apache License 2.0 | 5 votes |
/** * Removes spaces from column names to ensure compatibility with parquet * files. * * @param original * dataset * @return dataset with columns renamed */ private static Dataset<Row> removeSpacesFromColumnNames(Dataset<Row> original) { for (String existingName : original.columns()) { String newName = existingName.replaceAll(" ", ""); original = original.withColumnRenamed(existingName, newName); } return original; }
Example 9
Source File: G2SDataset.java From mmtf-spark with Apache License 2.0 | 5 votes |
/** * Downloads PDB residue mappings for a list of genomic variations. * @param variationIds genomic variation ids (e.g. chr7:g.140449103A>C) * @param pdbId specific PDB structure used for mapping * @param chainId specific chain used for mapping * @return dataset with PDB mapping information * @throws IOException */ private static Dataset<Row> getDataset(List<String> variationIds, String structureId, String chainId) throws IOException { // get a spark context SparkSession spark = SparkSession.builder().getOrCreate(); @SuppressWarnings("resource") // sc will be closed elsewhere JavaSparkContext sc = new JavaSparkContext(spark.sparkContext()); // download data in parallel JavaRDD<String> data = sc.parallelize(variationIds).flatMap(m -> getData(m, structureId, chainId)); // convert from JavaRDD to Dataset Dataset<String> jsonData = spark.createDataset(JavaRDD.toRDD(data), Encoders.STRING()); // parse json strings and return as a dataset Dataset<Row> dataset = spark.read().json(jsonData); dataset.show(); // return null if dataset is empty if (dataset.columns().length == 0) { System.out.println("G2SDataset: no matches found"); return null; } dataset = standardizeData(dataset); return flattenDataset(dataset); }
Example 10
Source File: RDDConverterUtils.java From systemds with Apache License 2.0 | 5 votes |
public static JavaPairRDD<MatrixIndexes, MatrixBlock> dataFrameToBinaryBlock(JavaSparkContext sc, Dataset<Row> df, DataCharacteristics mc, boolean containsID, boolean isVector) { //determine unknown dimensions and sparsity if required if( !mc.dimsKnown(true) ) { LongAccumulator aNnz = sc.sc().longAccumulator("nnz"); JavaRDD<Row> tmp = df.javaRDD().map(new DataFrameAnalysisFunction(aNnz, containsID, isVector)); long rlen = tmp.count(); long clen = !isVector ? df.columns().length - (containsID?1:0) : ((Vector) tmp.first().get(containsID?1:0)).size(); long nnz = UtilFunctions.toLong(aNnz.value()); mc.set(rlen, clen, mc.getBlocksize(), nnz); } //ensure valid blocksizes if( mc.getBlocksize()<=1 ) mc.setBlocksize(ConfigurationManager.getBlocksize()); //construct or reuse row ids JavaPairRDD<Row, Long> prepinput = containsID ? df.javaRDD().mapToPair(new DataFrameExtractIDFunction( df.schema().fieldIndex(DF_ID_COLUMN))) : df.javaRDD().zipWithIndex(); //zip row index //convert csv rdd to binary block rdd (w/ partial blocks) boolean sparse = requiresSparseAllocation(prepinput, mc); JavaPairRDD<MatrixIndexes, MatrixBlock> out = prepinput.mapPartitionsToPair( new DataFrameToBinaryBlockFunction(mc, sparse, containsID, isVector)); //aggregate partial matrix blocks (w/ preferred number of output //partitions as the data is likely smaller in binary block format, //but also to bound the size of partitions for compressed inputs) int parts = SparkUtils.getNumPreferredPartitions(mc, out); return RDDAggregateUtils.mergeByKey(out, parts, false); }
Example 11
Source File: NManualBuildAndQueryCuboidTest.java From kylin-on-parquet-v2 with Apache License 2.0 | 5 votes |
private Dataset<Row> dsConvertToOriginal(Dataset<Row> layoutDs, LayoutEntity entity) { Map<Integer, FunctionDesc> orderedMeasures = entity.getOrderedMeasures(); for (final Map.Entry<Integer, FunctionDesc> entry : orderedMeasures.entrySet()) { FunctionDesc functionDesc = entry.getValue(); if (functionDesc != null) { final String[] columns = layoutDs.columns(); String functionName = functionDesc.returnType().dataType(); if ("bitmap".equals(functionName)) { final int finalIndex = convertOutSchema(layoutDs, entry.getKey().toString(), DataTypes.LongType); PreciseCountDistinct preciseCountDistinct = new PreciseCountDistinct(null); layoutDs = layoutDs.map((MapFunction<Row, Row>) value -> { Object[] ret = new Object[value.size()]; for (int i = 0; i < columns.length; i++) { if (i == finalIndex) { byte[] bytes = (byte[]) value.get(i); Roaring64NavigableMap bitmapCounter = preciseCountDistinct.deserialize(bytes); ret[i] = bitmapCounter.getLongCardinality(); } else { ret[i] = value.get(i); } } return RowFactory.create(ret); }, RowEncoder.apply(OUT_SCHEMA)); } } } return layoutDs; }
Example 12
Source File: SparkRegressor.java From mmtf-spark with Apache License 2.0 | 4 votes |
/** * Dataset must at least contain the following two columns: * label: the class labels * features: feature vector * @param data * @return map with metrics */ public Map<String,String> fit(Dataset<Row> data) { // Split the data into training and test sets (30% held out for testing) Dataset<Row>[] splits = data.randomSplit(new double[] {1.0-testFraction, testFraction}, seed); Dataset<Row> trainingData = splits[0]; Dataset<Row> testData = splits[1]; // Train a RandomForest model. predictor .setLabelCol(label) .setFeaturesCol("features"); // Chain indexer and forest in a Pipeline Pipeline pipeline = new Pipeline() .setStages(new PipelineStage[] {predictor}); // Train model. This also runs the indexer. PipelineModel model = pipeline.fit(trainingData); // Make predictions. Dataset<Row> predictions = model.transform(testData); // Display some sample predictions System.out.println("Sample predictions: " + predictor.getClass().getSimpleName()); String primaryKey = predictions.columns()[0]; predictions.select(primaryKey, label, "prediction").sample(false, 0.1, seed).show(50); Map<String,String> metrics = new LinkedHashMap<>(); metrics.put("Method", predictor.getClass().getSimpleName()); // Select (prediction, true label) and compute test error RegressionEvaluator evaluator = new RegressionEvaluator() .setLabelCol(label) .setPredictionCol("prediction") .setMetricName("rmse"); metrics.put("rmse", Double.toString(evaluator.evaluate(predictions))); return metrics; }
Example 13
Source File: AggregateActivityInstancesStep.java From bpmn.ai with BSD 3-Clause "New" or "Revised" License | 4 votes |
@Override public Dataset<Row> runPreprocessingStep(Dataset<Row> dataset, Map<String, Object> parameters, SparkRunnerConfig config) { //apply first and processState aggregator Map<String, String> aggregationMap = new HashMap<>(); for(String column : dataset.columns()) { if(column.equals(BpmnaiVariables.VAR_PROCESS_INSTANCE_ID)) { continue; } else if(column.equals(BpmnaiVariables.VAR_DURATION) || column.endsWith("_rev")) { aggregationMap.put(column, "max"); } else if(column.equals(BpmnaiVariables.VAR_STATE)) { aggregationMap.put(column, "ProcessState"); } else if(column.equals(BpmnaiVariables.VAR_ACT_INST_ID)) { //ignore it, as we aggregate by it continue; } else { aggregationMap.put(column, "AllButEmptyString"); } } //first aggregation //activity level, take only processInstance and activityInstance rows dataset = dataset .filter(dataset.col(BpmnaiVariables.VAR_DATA_SOURCE).notEqual(BpmnaiVariables.EVENT_PROCESS_INSTANCE)) .groupBy(BpmnaiVariables.VAR_PROCESS_INSTANCE_ID, BpmnaiVariables.VAR_ACT_INST_ID) .agg(aggregationMap); //rename back columns after aggregation String pattern = "(max|allbutemptystring|processstate)\\((.+)\\)"; Pattern r = Pattern.compile(pattern); for(String columnName : dataset.columns()) { Matcher m = r.matcher(columnName); if(m.find()) { String newColumnName = m.group(2); dataset = dataset.withColumnRenamed(columnName, newColumnName); } } //in case we add the CSV we have a name column in the first dataset of the join so we call drop again to make sure it is gone dataset = dataset.drop(BpmnaiVariables.VAR_PROCESS_INSTANCE_VARIABLE_NAME); dataset = dataset.drop(BpmnaiVariables.VAR_DATA_SOURCE); dataset = dataset.sort(BpmnaiVariables.VAR_START_TIME); dataset.cache(); BpmnaiLogger.getInstance().writeInfo("Found " + dataset.count() + " activity instances."); if(config.isWriteStepResultsIntoFile()) { BpmnaiUtils.getInstance().writeDatasetToCSV(dataset, "agg_of_activity_instances", config); } //return preprocessed data return dataset; }
Example 14
Source File: InListDeriver.java From envelope with Apache License 2.0 | 4 votes |
@Override public Dataset<Row> derive(Map<String, Dataset<Row>> dependencies) { Dataset<Row> target = getStepDataFrame(dependencies); if (target.columns().length < 1) { throw new RuntimeException("Targeted step, '" + stepName + ",' has no columns"); } try { String targetField = fieldName == null ? target.columns()[0] : fieldName; Column targetColumn = target.col(targetField); LOGGER.debug("Targeting '{}[{}]'", stepName, targetField); // If the IN list is inline, there is no batch if (inList != null) { LOGGER.debug("IN list is inline"); return target.filter(targetColumn.isin(inList.toArray())); } // Otherwise, collect the values from the reference, executed within the batch else { LOGGER.trace("IN list is a reference"); Dataset<Row> reference = dependencies.get(refStepName); String referenceField = refFieldName == null ? reference.columns()[0] : refFieldName; LOGGER.debug("Referencing using {}[{}]", refStepName, referenceField); Column referenceColumn = reference.col(referenceField); Iterator<Row> referenceIterator = reference.select(referenceColumn).distinct().toLocalIterator(); this.inList = new ArrayList<>(); long counter = 0; // Set up the batch collector JavaRDD<Row> unionRDD = new JavaSparkContext(Contexts.getSparkSession().sparkContext()).emptyRDD(); Dataset<Row> union = Contexts.getSparkSession().createDataFrame(unionRDD, target.schema()); while (referenceIterator.hasNext()) { // Flush the batch if (counter == batchSize) { LOGGER.trace("Flushing batch"); union = union.union(target.filter(targetColumn.isin(inList.toArray()))); inList.clear(); counter = 0L; } // Gather the elements of the IN list from the reference inList.add(referenceIterator.next().get(0)); counter++; } // If the selection is under the batch threshold if (union.rdd().isEmpty()) { return target.filter(targetColumn.isin(inList.toArray())); } // Flush any remaining IN list values else { return union.union(target.filter(targetColumn.isin(inList.toArray()))); } } } catch (Throwable ae) { throw new RuntimeException("Error executing IN list filtering", ae); } }
Example 15
Source File: TypeCastStep.java From bpmn.ai with BSD 3-Clause "New" or "Revised" License | 4 votes |
@Override public Dataset<Row> runPreprocessingStep(Dataset<Row> dataset, Map<String, Object> parameters, SparkRunnerConfig config) { // get variables Map<String, String> varMap = (Map<String, String>) SparkBroadcastHelper.getInstance().getBroadcastVariable(SparkBroadcastHelper.BROADCAST_VARIABLE.PROCESS_VARIABLES_ESCALATED); List<StructField> datasetFields = Arrays.asList(dataset.schema().fields()); List<ColumnConfiguration> columnConfigurations = null; List<VariableConfiguration> variableConfigurations = null; Configuration configuration = ConfigurationUtils.getInstance().getConfiguration(config); if(configuration != null) { PreprocessingConfiguration preprocessingConfiguration = configuration.getPreprocessingConfiguration(); columnConfigurations = preprocessingConfiguration.getColumnConfiguration(); variableConfigurations = preprocessingConfiguration.getVariableConfiguration(); } Map<String, ColumnConfiguration> columnTypeConfigMap = new HashMap<>(); Map<String, VariableConfiguration> variableTypeConfigMap = new HashMap<>(); if(columnConfigurations != null) { for(ColumnConfiguration cc : columnConfigurations) { columnTypeConfigMap.put(cc.getColumnName(), cc); } } if(variableConfigurations != null) { for(VariableConfiguration vc : variableConfigurations) { variableTypeConfigMap.put(vc.getVariableName(), vc); } } for(String column : dataset.columns()) { // skip revision columns as they are handled for each variable column if(column.endsWith("_rev")) { continue; } DataType newDataType = null; boolean isVariableColumn = false; String configurationDataType = null; String configurationParseFormat = null; if(variableTypeConfigMap.keySet().contains(column)) { // was initially a variable configurationDataType = variableTypeConfigMap.get(column).getVariableType(); configurationParseFormat = variableTypeConfigMap.get(column).getParseFormat(); if (config.getPipelineMode().equals(BpmnaiVariables.PIPELINE_MODE_LEARN)) { isVariableColumn = varMap.keySet().contains(column); } else { isVariableColumn = true; } } else if(columnTypeConfigMap.keySet().contains(column)){ // was initially a column configurationDataType = columnTypeConfigMap.get(column).getColumnType(); configurationParseFormat = columnTypeConfigMap.get(column).getParseFormat(); } newDataType = mapDataType(datasetFields, column, configurationDataType); // only check for cast errors if dev feature is enabled and if a change in the datatype has been done if(config.isDevTypeCastCheckEnabled() && !newDataType.equals(getCurrentDataType(datasetFields, column))) { // add a column with casted value to be able to check the cast results dataset = castColumn(dataset, column, column+"_casted", newDataType, configurationParseFormat); // add a column for cast results and write CAST_ERROR? in it if there might be a cast error dataset = dataset.withColumn(column+"_castresult", when(dataset.col(column).isNotNull().and(dataset.col(column).notEqual(lit(""))), when(dataset.col(column+"_casted").isNull(), lit("CAST_ERROR?")) .otherwise(lit("")) ).otherwise(lit("")) ); dataset.cache(); // check for cast errors and write warning to application log if(dataset.filter(column+"_castresult == 'CAST_ERROR?'").count() > 0) { BpmnaiLogger.getInstance().writeWarn("Column '" + column + "' seems to have cast errors. Please check the data type (is defined as '" + configurationDataType + "')"); } else { // drop help columns as there are no cast errors for this column and rename casted column to actual column name dataset = dataset.drop(column, column+"_castresult").withColumnRenamed(column+"_casted", column); } } else { // cast without checking the cast result, entries are null is spark can't cast it dataset = castColumn(dataset, column, column, newDataType, configurationParseFormat); } // cast revision columns for former variables, revisions columns only exist on process level if(config.getDataLevel().equals(BpmnaiVariables.DATA_LEVEL_PROCESS) && config.isRevCountEnabled() && isVariableColumn) { dataset = dataset.withColumn(column+"_rev", dataset.col(column+"_rev").cast("integer")); } } if(config.isWriteStepResultsIntoFile()) { BpmnaiUtils.getInstance().writeDatasetToCSV(dataset, "type_cast_columns", config); } //return preprocessed data return dataset; }
Example 16
Source File: Spark3Shims.java From zeppelin with Apache License 2.0 | 4 votes |
@Override public String showDataFrame(Object obj, int maxResult, InterpreterContext context) { if (obj instanceof Dataset) { Dataset<Row> df = ((Dataset) obj).toDF(); String[] columns = df.columns(); // DDL will empty DataFrame if (columns.length == 0) { return ""; } // fetch maxResult+1 rows so that we can check whether it is larger than zeppelin.spark.maxResult List<Row> rows = df.takeAsList(maxResult + 1); String template = context.getLocalProperties().get("template"); if (!StringUtils.isBlank(template)) { if (rows.size() >= 1) { return new SingleRowInterpreterResult(sparkRowToList(rows.get(0)), template, context).toHtml(); } else { return ""; } } StringBuilder msg = new StringBuilder(); msg.append("%table "); msg.append(StringUtils.join(TableDataUtils.normalizeColumns(columns), "\t")); msg.append("\n"); boolean isLargerThanMaxResult = rows.size() > maxResult; if (isLargerThanMaxResult) { rows = rows.subList(0, maxResult); } for (Row row : rows) { for (int i = 0; i < row.size(); ++i) { msg.append(TableDataUtils.normalizeColumn(row.get(i))); if (i != row.size() -1) { msg.append("\t"); } } msg.append("\n"); } if (isLargerThanMaxResult) { msg.append("\n"); msg.append(ResultMessages.getExceedsLimitRowsMessage(maxResult, "zeppelin.spark.maxResult")); } // append %text at the end, otherwise the following output will be put in table as well. msg.append("\n%text "); return msg.toString(); } else { return obj.toString(); } }
Example 17
Source File: Spark2Shims.java From zeppelin with Apache License 2.0 | 4 votes |
@Override public String showDataFrame(Object obj, int maxResult, InterpreterContext context) { if (obj instanceof Dataset) { Dataset<Row> df = ((Dataset) obj).toDF(); String[] columns = df.columns(); // DDL will empty DataFrame if (columns.length == 0) { return ""; } // fetch maxResult+1 rows so that we can check whether it is larger than zeppelin.spark.maxResult List<Row> rows = df.takeAsList(maxResult + 1); String template = context.getLocalProperties().get("template"); if (!StringUtils.isBlank(template)) { if (rows.size() >= 1) { return new SingleRowInterpreterResult(sparkRowToList(rows.get(0)), template, context).toHtml(); } else { return ""; } } StringBuilder msg = new StringBuilder(); msg.append("\n%table "); msg.append(StringUtils.join(TableDataUtils.normalizeColumns(columns), "\t")); msg.append("\n"); boolean isLargerThanMaxResult = rows.size() > maxResult; if (isLargerThanMaxResult) { rows = rows.subList(0, maxResult); } for (Row row : rows) { for (int i = 0; i < row.size(); ++i) { msg.append(TableDataUtils.normalizeColumn(row.get(i))); if (i != row.size() -1) { msg.append("\t"); } } msg.append("\n"); } if (isLargerThanMaxResult) { msg.append("\n"); msg.append(ResultMessages.getExceedsLimitRowsMessage(maxResult, "zeppelin.spark.maxResult")); } // append %text at the end, otherwise the following output will be put in table as well. msg.append("\n%text "); return msg.toString(); } else { return obj.toString(); } }