Fix BgenRowConverterSuite with AQE enabled (#292)

* copy after queryExecution.toRdd Signed-off-by: Karen Feng <[email protected]> * scalafmt Signed-off-by: Karen Feng <[email protected]> * Convert directly instead of copying Signed-off-by: Karen Feng <[email protected]>
projectglow · Sep 9, 2020 · af85d7e · af85d7e
1 parent 9296bd8
commit af85d7e
Showing 1 changed file with 24 additions and 9 deletions.
diff --git a/core/src/test/scala/io/projectglow/bgen/BgenRowConverterSuite.scala b/core/src/test/scala/io/projectglow/bgen/BgenRowConverterSuite.scala
@@ -17,6 +17,7 @@
 package io.projectglow.bgen
 
 import org.apache.spark.sql.catalyst.encoders.RowEncoder
+import org.apache.spark.sql.SparkSession
 
 import io.projectglow.common.{BgenGenotype, BgenRow}
 
@@ -25,32 +26,35 @@ class BgenRowConverterSuite extends BgenConverterBaseTest {
   val sourceName = "bgen"
 
   def compareVcfToBgen(
+      sess: SparkSession,
       testBgen: String,
       testVcf: String,
       bitsPerProb: Int,
       defaultPhasing: Boolean = false) {
-    val sess = spark
     import sess.implicits._
 
-    val bgenRows = spark
+    val bgenRows = sess
       .read
       .format(sourceName)
       .schema(BgenRow.schema)
       .load(testBgen)
       .sort("contigName", "start", "names")
       .as[BgenRow]
       .collect
-    val vcfDf = spark
+    val vcfDf = sess
       .read
       .format("vcf")
       .load(testVcf)
       .sort("contigName", "start", "names")
-    val converter = new InternalRowToBgenRowConverter(vcfDf.schema, 10, 2, defaultPhasing)
+    val vcfSchema = vcfDf.schema
     val vcfRows = vcfDf
       .queryExecution
       .toRdd
+      .mapPartitions { it =>
+        val converter = new InternalRowToBgenRowConverter(vcfSchema, 10, 2, defaultPhasing)
+        it.map(converter.convert)
+      }
       .collect
-      .map(converter.convert)
 
     assert(bgenRows.size == vcfRows.size)
     bgenRows.zip(vcfRows).foreach {
@@ -60,19 +64,30 @@ class BgenRowConverterSuite extends BgenConverterBaseTest {
   }
 
   test("unphased 8 bit") {
-    compareVcfToBgen(s"$testRoot/example.8bits.bgen", s"$testRoot/example.8bits.vcf", 8)
+    compareVcfToBgen(spark, s"$testRoot/example.8bits.bgen", s"$testRoot/example.8bits.vcf", 8)
   }
 
   test("unphased 16 bit (with missing samples)") {
-    compareVcfToBgen(s"$testRoot/example.16bits.bgen", s"$testRoot/example.16bits.vcf", 16)
+    compareVcfToBgen(spark, s"$testRoot/example.16bits.bgen", s"$testRoot/example.16bits.vcf", 16)
   }
 
   test("unphased 32 bit") {
-    compareVcfToBgen(s"$testRoot/example.32bits.bgen", s"$testRoot/example.32bits.vcf", 32)
+    compareVcfToBgen(spark, s"$testRoot/example.32bits.bgen", s"$testRoot/example.32bits.vcf", 32)
   }
 
   test("phased") {
-    compareVcfToBgen(s"$testRoot/phased.16bits.bgen", s"$testRoot/phased.16bits.vcf", 16, true)
+    compareVcfToBgen(
+      spark,
+      s"$testRoot/phased.16bits.bgen",
+      s"$testRoot/phased.16bits.vcf",
+      16,
+      true)
+  }
+
+  test("works with adaptive query execution enabled") {
+    val sess = spark.newSession()
+    sess.conf.set("spark.sql.adaptive.enabled", true)
+    compareVcfToBgen(sess, s"$testRoot/example.8bits.bgen", s"$testRoot/example.8bits.vcf", 8)
   }
 
   def inferPhasingOrPloidy(