Skip to content

Commit

Permalink
Deprecated all File overloads from ParquetTools
Browse files Browse the repository at this point in the history
  • Loading branch information
malhotrashivam committed Apr 10, 2024
1 parent 1383850 commit 8c68079
Show file tree
Hide file tree
Showing 5 changed files with 597 additions and 200 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@

import io.deephaven.base.verify.Require;
import io.deephaven.configuration.Configuration;
import io.deephaven.engine.table.TableDefinition;
import io.deephaven.engine.table.impl.ColumnToCodecMappings;
import io.deephaven.hash.KeyedObjectHashMap;
import io.deephaven.hash.KeyedObjectKey;
Expand Down Expand Up @@ -116,6 +117,24 @@ public static int getDefaultTargetPageSize() {
return defaultTargetPageSize;
}

public enum ParquetFileLayout {
// A single parquet file.
SINGLE_FILE,

// A single directory of parquet files.
FLAT_PARTITIONED,

// A key-value directory partitioning of parquet files.
KV_PARTITIONED,

// A directory containing a _metadata parquet file and an optional _common_metadata parquet file.
METADATA_PARTITIONED;
}

private static final ParquetFileLayout DEFAULT_FILE_LAYOUT = null;

private static final TableDefinition DEFAULT_TABLE_DEFINITION = null;

private static final boolean DEFAULT_GENERATE_METADATA_FILES = false;

static final String UUID_TOKEN = "{uuid}";
Expand Down Expand Up @@ -178,6 +197,9 @@ public final String getColumnNameFromParquetColumnNameOrDefault(final String par
*/
public abstract boolean generateMetadataFiles();

public abstract ParquetFileLayout getFileLayout();

public abstract TableDefinition getTableDefinition();

/**
* @return the base name for partitioned parquet data. Check
Expand Down Expand Up @@ -270,6 +292,16 @@ public boolean generateMetadataFiles() {
public String baseNameForPartitionedParquetData() {
return DEFAULT_BASE_NAME_FOR_PARTITIONED_PARQUET_DATA;
}

@Override
public ParquetFileLayout getFileLayout() {
return DEFAULT_FILE_LAYOUT;
}

@Override
public TableDefinition getTableDefinition() {
return DEFAULT_TABLE_DEFINITION;
}
};

private static class ColumnInstructions {
Expand Down Expand Up @@ -340,6 +372,8 @@ private static final class ReadOnly extends ParquetInstructions {
private final Object specialInstructions;
private final boolean generateMetadataFiles;
private final String baseNameForPartitionedParquetData;
private final ParquetFileLayout fileLayout;
private final TableDefinition tableDefinition;

private ReadOnly(
final KeyedObjectHashMap<String, ColumnInstructions> columnNameToInstructions,
Expand All @@ -352,7 +386,9 @@ private ReadOnly(
final boolean isRefreshing,
final Object specialInstructions,
final boolean generateMetadataFiles,
final String baseNameForPartitionedParquetData) {
final String baseNameForPartitionedParquetData,
final ParquetFileLayout fileLayout,
final TableDefinition tableDefinition) {
this.columnNameToInstructions = columnNameToInstructions;
this.parquetColumnNameToInstructions = parquetColumnNameToColumnName;
this.compressionCodecName = compressionCodecName;
Expand All @@ -364,6 +400,8 @@ private ReadOnly(
this.specialInstructions = specialInstructions;
this.generateMetadataFiles = generateMetadataFiles;
this.baseNameForPartitionedParquetData = baseNameForPartitionedParquetData;
this.fileLayout = fileLayout;
this.tableDefinition = tableDefinition;
}

private String getOrDefault(final String columnName, final String defaultValue,
Expand Down Expand Up @@ -467,6 +505,16 @@ public String baseNameForPartitionedParquetData() {
return baseNameForPartitionedParquetData;
}

@Override
public ParquetFileLayout getFileLayout() {
return fileLayout;
}

@Override
public TableDefinition getTableDefinition() {
return tableDefinition;
}

KeyedObjectHashMap<String, ColumnInstructions> copyColumnNameToInstructions() {
// noinspection unchecked
return (columnNameToInstructions == null)
Expand Down Expand Up @@ -520,6 +568,8 @@ public static class Builder {
private Object specialInstructions;
private boolean generateMetadataFiles = DEFAULT_GENERATE_METADATA_FILES;
private String baseNameForPartitionedParquetData = DEFAULT_BASE_NAME_FOR_PARTITIONED_PARQUET_DATA;
private ParquetFileLayout fileLayout = DEFAULT_FILE_LAYOUT;
private TableDefinition tableDefinition = DEFAULT_TABLE_DEFINITION;

public Builder() {}

Expand Down Expand Up @@ -737,6 +787,31 @@ public Builder setBaseNameForPartitionedParquetData(final String baseNameForPart
return this;
}

/**
* Set the expected file layout when reading a parquet file or a directory. This info can be used to skip some
* computations to deduce the file layout from the source directory structure.
*/
public Builder setFileLayout(final ParquetFileLayout fileLayout) {
this.fileLayout = fileLayout;
return this;
}

/**
* <ul>
* <li>When reading a parquet file, this corresponds to the table definition to use instead of the one implied
* by the parquet file being read. Providing a definition can help save additional computations to deduce the
* table definition from the parquet files as well as from the directory layouts when reading partitioned
* data.</li>
* <li>When writing a parquet file, this corresponds to the table definition to use instead of the one implied
* by the table being written</li>
* </ul>
* This definition can be used to skip some columns or add additional columns with {@code null} values.
*/
public Builder setTableDefinition(final TableDefinition tableDefinition) {
this.tableDefinition = tableDefinition;
return this;
}

public ParquetInstructions build() {
final KeyedObjectHashMap<String, ColumnInstructions> columnNameToInstructionsOut = columnNameToInstructions;
columnNameToInstructions = null;
Expand All @@ -745,7 +820,8 @@ public ParquetInstructions build() {
parquetColumnNameToInstructions = null;
return new ReadOnly(columnNameToInstructionsOut, parquetColumnNameToColumnNameOut, compressionCodecName,
maximumDictionaryKeys, maximumDictionarySize, isLegacyParquet, targetPageSize, isRefreshing,
specialInstructions, generateMetadataFiles, baseNameForPartitionedParquetData);
specialInstructions, generateMetadataFiles, baseNameForPartitionedParquetData, fileLayout,
tableDefinition);
}
}

Expand Down
Loading

0 comments on commit 8c68079

Please sign in to comment.