Skip to content
This repository has been archived by the owner on Nov 16, 2022. It is now read-only.

Commit

Permalink
Use REQUIRED repetition level for MAP keys in parquet writer
Browse files Browse the repository at this point in the history
As per parquet spec, MAP key should be REQUIRED

cherry-pick of trinodb/trino@3247bd2
Co-Authored-By: Raunaq Morarka <[email protected]>
  • Loading branch information
zhenxiao committed Jun 16, 2022
1 parent 20795f7 commit 842b469
Show file tree
Hide file tree
Showing 3 changed files with 22 additions and 4 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -475,7 +475,7 @@ public void testSingleLevelArrayOfMapOfArray()
}

@Test
public void testMapOfArray()
public void testMapOfArrayValues()
throws Exception
{
Iterable<List<Integer>> arrays = createNullableTestArrays(limit(cycle(asList(1, null, 3, 5, null, null, null, 7, 11, null, 13, 17)), 30_000));
Expand All @@ -487,6 +487,22 @@ public void testMapOfArray()
values, values, mapType(INTEGER, new ArrayType(INTEGER)));
}

@Test
public void testMapOfArrayKeys()
throws Exception
{
Iterable<List<Integer>> mapKeys = createTestArrays(limit(cycle(asList(1, null, 3, 5, null, null, null, 7, 11, null, 13, 17)), 30_000));
Iterable<Integer> mapValues = intsBetween(0, 30_000);
Iterable<Map<List<Integer>, Integer>> testMaps = createTestMaps(mapKeys, mapValues);
tester.testRoundTrip(
getStandardMapObjectInspector(
getStandardListObjectInspector(javaIntObjectInspector),
javaIntObjectInspector),
testMaps,
testMaps,
mapType(new ArrayType(INTEGER), INTEGER));
}

@Test
public void testMapOfSingleLevelArray()
throws Exception
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@
import static java.util.Objects.requireNonNull;
import static org.apache.parquet.Preconditions.checkArgument;
import static org.apache.parquet.schema.Type.Repetition.OPTIONAL;
import static org.apache.parquet.schema.Type.Repetition.REQUIRED;

public class ParquetSchemaConverter
{
Expand Down Expand Up @@ -154,7 +155,7 @@ private org.apache.parquet.schema.Type getMapType(MapType type, String name, Lis
Type keyType = type.getKeyType();
Type valueType = type.getValueType();
return Types.map(repetition)
.key(convert(keyType, "key", parent, OPTIONAL))
.key(convert(keyType, "key", parent, REQUIRED))
.value(convert(valueType, "value", parent, OPTIONAL))
.named(name);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
import static java.util.Arrays.asList;
import static org.apache.parquet.schema.Type.Repetition.OPTIONAL;
import static org.apache.parquet.schema.Type.Repetition.REPEATED;
import static org.apache.parquet.schema.Type.Repetition.REQUIRED;
import static org.testng.Assert.assertEquals;

public class TestParquetSchemaConverter
Expand All @@ -42,7 +43,7 @@ public void testMapKeyRepetitionLevel()
GroupType keyValueValue = mapType.getType(0).asGroupType();
assertEquals(keyValueValue.isRepetition(REPEATED), true);
Type keyType = keyValueValue.getType(0).asPrimitiveType();
assertEquals(keyType.isRepetition(OPTIONAL), true);
assertEquals(keyType.isRepetition(REQUIRED), true);
PrimitiveType valueType = keyValueValue.getType(1).asPrimitiveType();
assertEquals(valueType.isRepetition(OPTIONAL), true);

Expand All @@ -53,7 +54,7 @@ public void testMapKeyRepetitionLevel()
keyValueValue = mapType.getType(0).asGroupType();
assertEquals(keyValueValue.isRepetition(REPEATED), true);
keyType = keyValueValue.getType(0).asGroupType();
assertEquals(keyType.isRepetition(OPTIONAL), true);
assertEquals(keyType.isRepetition(REQUIRED), true);
assertEquals(keyType.asGroupType().getType(0).asPrimitiveType().isRepetition(OPTIONAL), true);
assertEquals(keyType.asGroupType().getType(1).asPrimitiveType().isRepetition(OPTIONAL), true);
valueType = keyValueValue.getType(1).asPrimitiveType();
Expand Down

0 comments on commit 842b469

Please sign in to comment.