-
Notifications
You must be signed in to change notification settings - Fork 8
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
13 changed files
with
203 additions
and
104 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,8 +1,9 @@ | ||
package org.virtuslab.typedframes | ||
|
||
import types.* | ||
import Internals.Name | ||
|
||
extension [N1 <: Name](col1: TypedColumn[N1, Int]) | ||
inline def +[N2 <: Name](col2: TypedColumn[N2, Int]) = UnnamedTypedColumn[Int](col1.underlying + col2.underlying) | ||
extension [N1 <: Name](col1: TypedColumn[N1, IntegerType]) | ||
inline def +[N2 <: Name](col2: TypedColumn[N2, IntegerType]) = UnnamedTypedColumn[IntegerType](col1.underlying + col2.underlying) | ||
|
||
// More operations can be added easily |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,14 +1,15 @@ | ||
package org.virtuslab.typedframes | ||
|
||
import org.apache.spark.sql.{ DataFrame => UntypedDataFrame, Encoder, SparkSession /* cdscsdc */ } | ||
import org.apache.spark.sql.{ DataFrame => UntypedDataFrame, Encoder, SparkSession } | ||
import types.{DataType, StructType} | ||
|
||
object TypedDataFrameOpaqueScope: | ||
opaque type TypedDataFrame[+S <: FrameSchema] = UntypedDataFrame | ||
opaque type TypedDataFrame[+S <: StructType] = UntypedDataFrame | ||
extension (inline df: UntypedDataFrame) | ||
inline def typed[A](using schema: FrameSchema.Provider[A]): TypedDataFrame[schema.Schema] = df // TODO: Check schema at runtime? Check if names of columns match? | ||
inline def withSchema[S <: FrameSchema]: TypedDataFrame[S] = df // TODO? make it private[typedframes] | ||
inline def typed[A](using encoder: DataType.StructEncoder[A]): TypedDataFrame[encoder.Encoded] = df // TODO: Check schema at runtime? Check if names of columns match? | ||
inline def withSchema[S <: StructType]: TypedDataFrame[S] = df // TODO? make it private[typedframes] | ||
|
||
extension [S <: FrameSchema](tdf: TypedDataFrame[S]) | ||
extension [S <: StructType](tdf: TypedDataFrame[S]) | ||
inline def untyped: UntypedDataFrame = tdf | ||
|
||
export TypedDataFrameOpaqueScope.* |
40 changes: 12 additions & 28 deletions
40
src/main/scala/org/virtuslab/typedframes/DataFrameBuilders.scala
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,45 +1,29 @@ | ||
package org.virtuslab.typedframes | ||
|
||
import scala.quoted._ | ||
import org.apache.spark.sql.{ DataFrame => UntypedDataFrame, Encoder, SparkSession /* cdscsdc */ } | ||
import org.apache.spark.sql | ||
import org.apache.spark.sql.{ DataFrame => UntypedDataFrame, SparkSession } | ||
import types.{ DataType, StructType } | ||
import Internals.Name | ||
import TypedDataFrameOpaqueScope.* | ||
|
||
object TypedDataFrameBuilders: | ||
//TODO: More inlining? | ||
class UnnamedColumnBuilder[A <: Int | String](spark: SparkSession, seq: Seq[A]): | ||
transparent inline def withColumn[N <: Name]: TypedDataFrame[FrameSchema] = ${toTypedDFWithNameImpl[N, A]('seq, 'spark)} | ||
transparent inline def withColumn[N <: Name](columnName: N): TypedDataFrame[FrameSchema] = withColumn[N] | ||
given primitiveTypeBuilderOps: {} with | ||
extension [A <: Int | String](inline seq: Seq[A])(using typeEncoder: DataType.Encoder[A], spark: SparkSession) // TODO: Add more primitive types | ||
transparent inline def toTypedDF[N <: Name](name: N): TypedDataFrame[StructType] = ${toTypedDFWithNameImpl[N, A, typeEncoder.Encoded]('seq, 'spark)} | ||
|
||
given foo1: {} with | ||
extension [A <: Int | String](inline seq: Seq[A])(using spark: SparkSession) // TODO: Add more primitive types | ||
// TODO decide on/unify naming | ||
// transparent inline def toTypedDF[N <: Name]: TypedDataFrame[FrameSchema] = ${toTypedDFWithNameImpl[N, A]('seq, 'spark)} | ||
inline def toTypedDF: UnnamedColumnBuilder[A] = new UnnamedColumnBuilder[A](spark, seq) | ||
|
||
// given foo2: {} with | ||
// extension [A <: Int | String](inline seq: Seq[A])(using spark: SparkSession) // TODO: Add more primitive types | ||
// // transparent inline def toTypedDF[N <: Name](columnName: N): TypedDataFrame[FrameSchema] = seq.toTypedDF[N] | ||
// transparent inline def toTypedDFNamed[N <: Name](columnName: N): TypedDataFrame[FrameSchema] = seq.toTypedDF[N] | ||
|
||
private def toTypedDFWithNameImpl[N <: Name : Type, A : Type](using Quotes)(seq: Expr[Seq[A]], spark: Expr[SparkSession]): Expr[TypedDataFrame[FrameSchema/* TableSchema */]] = | ||
private def toTypedDFWithNameImpl[N <: Name : Type, A : Type, E <: DataType : Type](using Quotes)(seq: Expr[Seq[A]], spark: Expr[SparkSession]): Expr[TypedDataFrame[StructType/* TableSchema */]] = | ||
'{ | ||
val s = $spark | ||
given Encoder[A] = ${ Expr.summon[Encoder[A]].get } | ||
given sql.Encoder[A] = ${ Expr.summon[sql.Encoder[A]].get } | ||
import s.implicits.* | ||
localSeqToDatasetHolder($seq).toDF(valueOf[N]).withSchema[FrameSchema.WithSingleColumn[N, A]] | ||
localSeqToDatasetHolder($seq).toDF(valueOf[N]).withSchema[StructType.WithSingleColumn[N, E]] | ||
} | ||
|
||
given foo3: {} with | ||
extension [A](inline seq: Seq[A])(using schema: FrameSchema.Provider[A])(using encoder: Encoder[A], spark: SparkSession) | ||
inline def toTypedDF: TypedDataFrame[schema.Schema] = | ||
given structTypeBuilderOps: {} with | ||
extension [A](inline seq: Seq[A])(using typeEncoder: DataType.StructEncoder[A], runtimeEncoder: sql.Encoder[A], spark: SparkSession) | ||
inline def toTypedDF: TypedDataFrame[typeEncoder.Encoded] = | ||
import spark.implicits.* | ||
seq.toDF(/* Should we explicitly pass columns here? */).typed | ||
|
||
// given foo4: {} with | ||
// extension [A](inline seq: Seq[A])(using schema: FrameSchema.Provider[A])(using encoder: Encoder[A], spark: SparkSession) | ||
// inline def crash: TypedDataFrame[schema.Schema] = | ||
// import spark.implicits.* | ||
// seq.toDF(/* Should we explicitly pass columns here? */).typed | ||
|
||
export TypedDataFrameBuilders.given |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,10 +1,11 @@ | ||
package org.virtuslab.typedframes | ||
|
||
import org.apache.spark.sql.Encoder | ||
import org.apache.spark.sql | ||
import types.{DataType, StructType} | ||
|
||
extension [S <: FrameSchema](inline tdf: TypedDataFrame[S]) | ||
extension [S <: StructType](inline tdf: TypedDataFrame[S]) | ||
inline def show(): Unit = tdf.untyped.show() | ||
|
||
// TODO: check schema conformance instead of equality | ||
inline def collect[T]()(using e: Encoder[T], fsf: FrameSchema.Provider[T], ev: fsf.Schema =:= S): List[T] = | ||
tdf.untyped.as[T].collect.toList | ||
inline def collect[A]()(using typeEncoder: DataType.StructEncoder[A], runtimeEncoder: sql.Encoder[A], eq: typeEncoder.Encoded =:= S): List[A] = | ||
tdf.untyped.as[A].collect.toList |
75 changes: 42 additions & 33 deletions
75
src/main/scala/org/virtuslab/typedframes/FrameSchema.scala
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.