Skip to content

Commit

Permalink
Merge pull request #1052 from Kotlin/merge-improvements
Browse files Browse the repository at this point in the history
Add more operation to compiler plugin
  • Loading branch information
koperagen authored Feb 7, 2025
2 parents 81ec10a + edc9915 commit 210143a
Show file tree
Hide file tree
Showing 33 changed files with 1,471 additions and 149 deletions.
15 changes: 11 additions & 4 deletions core/api/core.api
Original file line number Diff line number Diff line change
Expand Up @@ -6003,16 +6003,23 @@ public final class org/jetbrains/kotlinx/dataframe/api/Merge {
}

public final class org/jetbrains/kotlinx/dataframe/api/MergeKt {
public static final fun asStrings (Lorg/jetbrains/kotlinx/dataframe/api/Merge;)Lorg/jetbrains/kotlinx/dataframe/api/Merge;
public static final fun by (Lorg/jetbrains/kotlinx/dataframe/api/Merge;Ljava/lang/CharSequence;Ljava/lang/CharSequence;Ljava/lang/CharSequence;ILjava/lang/CharSequence;)Lorg/jetbrains/kotlinx/dataframe/api/Merge;
public static synthetic fun by$default (Lorg/jetbrains/kotlinx/dataframe/api/Merge;Ljava/lang/CharSequence;Ljava/lang/CharSequence;Ljava/lang/CharSequence;ILjava/lang/CharSequence;ILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/api/Merge;
public static final fun asStrings (Lorg/jetbrains/kotlinx/dataframe/api/Merge;)Lorg/jetbrains/kotlinx/dataframe/api/MergeWithTransform;
public static final fun by (Lorg/jetbrains/kotlinx/dataframe/api/Merge;Ljava/lang/CharSequence;Ljava/lang/CharSequence;Ljava/lang/CharSequence;ILjava/lang/CharSequence;)Lorg/jetbrains/kotlinx/dataframe/api/MergeWithTransform;
public static synthetic fun by$default (Lorg/jetbrains/kotlinx/dataframe/api/Merge;Ljava/lang/CharSequence;Ljava/lang/CharSequence;Ljava/lang/CharSequence;ILjava/lang/CharSequence;ILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/api/MergeWithTransform;
public static final fun into (Lorg/jetbrains/kotlinx/dataframe/api/Merge;Ljava/lang/String;)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
public static final fun into (Lorg/jetbrains/kotlinx/dataframe/api/Merge;Lorg/jetbrains/kotlinx/dataframe/columns/ColumnAccessor;)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
public static final fun into (Lorg/jetbrains/kotlinx/dataframe/api/Merge;Lorg/jetbrains/kotlinx/dataframe/columns/ColumnPath;)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
public static final fun into (Lorg/jetbrains/kotlinx/dataframe/api/MergeWithTransform;Ljava/lang/String;)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
public static final fun into (Lorg/jetbrains/kotlinx/dataframe/api/MergeWithTransform;Lorg/jetbrains/kotlinx/dataframe/columns/ColumnPath;)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
public static final fun intoList (Lorg/jetbrains/kotlinx/dataframe/api/Merge;)Ljava/util/List;
public static final fun intoList (Lorg/jetbrains/kotlinx/dataframe/api/MergeWithTransform;)Ljava/util/List;
public static final fun merge (Lorg/jetbrains/kotlinx/dataframe/DataFrame;Lkotlin/jvm/functions/Function2;)Lorg/jetbrains/kotlinx/dataframe/api/Merge;
public static final fun merge (Lorg/jetbrains/kotlinx/dataframe/DataFrame;[Ljava/lang/String;)Lorg/jetbrains/kotlinx/dataframe/api/Merge;
public static final fun notNull (Lorg/jetbrains/kotlinx/dataframe/api/Merge;)Lorg/jetbrains/kotlinx/dataframe/api/Merge;
public static final fun notNullList (Lorg/jetbrains/kotlinx/dataframe/api/Merge;)Lorg/jetbrains/kotlinx/dataframe/api/Merge;
}

public final class org/jetbrains/kotlinx/dataframe/api/MergeWithTransform {
public fun <init> (Lorg/jetbrains/kotlinx/dataframe/DataFrame;Lkotlin/jvm/functions/Function2;ZLkotlin/jvm/functions/Function2;Lkotlin/reflect/KType;Lorg/jetbrains/kotlinx/dataframe/api/Infer;)V
}

public final class org/jetbrains/kotlinx/dataframe/api/MinKt {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ public abstract class AggregateDsl<out T> :
DataFrame<T>,
ColumnSelectionDsl<T> {

@Interpretable("GroupByInto")
@Interpretable("AggregateDslInto")
public inline infix fun <reified R> R.into(name: String): NamedValue =
internal().yield(pathOf(name), this, typeOf<R>())

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,7 @@ public fun <T> Convert<T, *>.to(type: KType): DataFrame<T> = to { it.convertTo(t
public fun <T, C> Convert<T, C>.to(columnConverter: DataFrame<T>.(DataColumn<C>) -> AnyBaseCol): DataFrame<T> =
df.replace(columns).with { columnConverter(df, it) }

@Refine
@Interpretable("With0")
public inline fun <T, C, reified R> Convert<T, C>.with(
infer: Infer = Infer.Nulls,
Expand All @@ -126,6 +127,8 @@ public fun <T, C, R> Convert<T, DataRow<C>>.asFrame(
body: ColumnsContainer<T>.(ColumnGroup<C>) -> DataFrame<R>,
): DataFrame<T> = to { body(this, it.asColumnGroup()).asColumnGroup(it.name()) }

@Refine
@Interpretable("PerRowCol")
public inline fun <T, C, reified R> Convert<T, C>.perRowCol(
infer: Infer = Infer.Nulls,
noinline expression: RowColumnExpression<T, C, R>,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,7 @@ public data class Gather<T, C, K, R>(
public fun <T, C, K, R> Gather<T, C, K, R>.into(keyColumn: String, valueColumn: String): DataFrame<T> =
gatherImpl(keyColumn, valueColumn)

@AccessApiOverload
public fun <T, C, K, R> Gather<T, C, K, R>.into(
keyColumn: ColumnAccessor<K>,
valueColumn: ColumnAccessor<R>,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@ import org.jetbrains.kotlinx.dataframe.AnyRow
import org.jetbrains.kotlinx.dataframe.DataFrame
import org.jetbrains.kotlinx.dataframe.RowExpression
import org.jetbrains.kotlinx.dataframe.annotations.AccessApiOverload
import org.jetbrains.kotlinx.dataframe.annotations.Interpretable
import org.jetbrains.kotlinx.dataframe.annotations.Refine
import org.jetbrains.kotlinx.dataframe.columns.ColumnAccessor
import org.jetbrains.kotlinx.dataframe.impl.aggregation.internal
import org.jetbrains.kotlinx.dataframe.impl.aggregation.withExpr
Expand All @@ -14,6 +16,8 @@ import kotlin.reflect.typeOf

// region GroupBy

@Refine
@Interpretable("GroupByInto")
public fun <T, G> GroupBy<T, G>.into(column: String): DataFrame<T> = toDataFrame(column)

@AccessApiOverload
Expand Down
60 changes: 52 additions & 8 deletions core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/merge.kt
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@ import org.jetbrains.kotlinx.dataframe.ColumnsSelector
import org.jetbrains.kotlinx.dataframe.DataFrame
import org.jetbrains.kotlinx.dataframe.DataRow
import org.jetbrains.kotlinx.dataframe.annotations.AccessApiOverload
import org.jetbrains.kotlinx.dataframe.annotations.Interpretable
import org.jetbrains.kotlinx.dataframe.annotations.Refine
import org.jetbrains.kotlinx.dataframe.columns.ColumnAccessor
import org.jetbrains.kotlinx.dataframe.columns.ColumnPath
import org.jetbrains.kotlinx.dataframe.columns.ColumnReference
Expand All @@ -16,6 +18,7 @@ import kotlin.reflect.KProperty
import kotlin.reflect.KType
import kotlin.reflect.typeOf

@Interpretable("Merge0")
public fun <T, C> DataFrame<T>.merge(selector: ColumnsSelector<T, C>): Merge<T, C, List<C>> =
Merge(this, selector, false, { it }, typeOf<Any?>(), Infer.Type)

Expand Down Expand Up @@ -44,19 +47,51 @@ public data class Merge<T, C, R>(
internal val infer: Infer,
)

public fun <T, C, R> Merge<T, C, R>.notNull(): Merge<T, C, R> = copy(notNull = true)
public class MergeWithTransform<T, C, R>(
internal val df: DataFrame<T>,
internal val selector: ColumnsSelector<T, C>,
internal val notNull: Boolean,
internal val transform: DataRow<T>.(List<C>) -> R,
internal val resultType: KType,
internal val infer: Infer,
)

@Interpretable("MergeId")
public fun <T, C, R> Merge<T, C, R>.notNull(): Merge<T, C & Any, R> = copy(notNull = true) as Merge<T, C & Any, R>

@JvmName("notNullList")
@Interpretable("MergeId")
public fun <T, C, R> Merge<T, C, List<R>>.notNull(): Merge<T, C & Any, List<R & Any>> =
copy(notNull = true) as Merge<T, C & Any, List<R & Any>>

@Refine
@Interpretable("MergeInto0")
public fun <T, C, R> MergeWithTransform<T, C, R>.into(columnName: String): DataFrame<T> = into(pathOf(columnName))

@Refine
@Interpretable("MergeInto0")
public fun <T, C, R> Merge<T, C, R>.into(columnName: String): DataFrame<T> = into(pathOf(columnName))

@AccessApiOverload
public fun <T, C, R> Merge<T, C, R>.into(column: ColumnAccessor<*>): DataFrame<T> = into(column.path())
public inline fun <T, C, reified R> Merge<T, C, R>.into(column: ColumnAccessor<*>): DataFrame<T> = into(column.path())

@AccessApiOverload
public inline fun <T, C, reified R> MergeWithTransform<T, C, R>.into(column: ColumnAccessor<*>): DataFrame<T> =
into(column.path())

public fun <T, C, R> Merge<T, C, R>.intoList(): List<R> =
df.select(selector).rows().map { transform(it, it.values() as List<C>) }

public fun <T, C, R> Merge<T, C, R>.into(path: ColumnPath): DataFrame<T> {
public fun <T, C, R> MergeWithTransform<T, C, R>.intoList(): List<R> =
df.select(selector).rows().map { transform(it, it.values() as List<C>) }

public fun <T, C, R> MergeWithTransform<T, C, R>.into(path: ColumnPath): DataFrame<T> {
// If target path exists, merge into temp path
val mergePath = if (df.getColumnOrNull(path) != null) pathOf(nameGenerator().addUnique("temp")) else path
val mergePath = if (df.getColumnOrNull(path) != null) {
pathOf(df.nameGenerator().addUnique("temp"))
} else {
path
}

// move columns into group
val grouped = df.move(selector).under { mergePath }
Expand All @@ -82,16 +117,21 @@ public fun <T, C, R> Merge<T, C, R>.into(path: ColumnPath): DataFrame<T> {
return res
}

public fun <T, C, R> Merge<T, C, R>.asStrings(): Merge<T, C, String> = by(", ")
public fun <T, C, R> Merge<T, C, R>.into(path: ColumnPath): DataFrame<T> =
MergeWithTransform(df, selector, notNull, transform, resultType, infer).into(path)

@Interpretable("MergeId")
public fun <T, C, R> Merge<T, C, R>.asStrings(): MergeWithTransform<T, C, String> = by(", ")

@Interpretable("MergeBy0")
public fun <T, C, R> Merge<T, C, R>.by(
separator: CharSequence = ", ",
prefix: CharSequence = "",
postfix: CharSequence = "",
limit: Int = -1,
truncated: CharSequence = "...",
): Merge<T, C, String> =
Merge(
): MergeWithTransform<T, C, String> =
MergeWithTransform(
df = df,
selector = selector,
notNull = notNull,
Expand All @@ -108,7 +148,11 @@ public fun <T, C, R> Merge<T, C, R>.by(
infer = Infer.Nulls,
)

@Interpretable("MergeBy1")
public inline fun <T, C, R, reified V> Merge<T, C, R>.by(
infer: Infer = Infer.Nulls,
crossinline transform: DataRow<T>.(R) -> V,
): Merge<T, C, V> = Merge(df, selector, notNull, { transform(this@by.transform(this, it)) }, typeOf<V>(), infer)
): MergeWithTransform<T, C, V> =
MergeWithTransform(df, selector, notNull, {
transform(this@by.transform(this, it))
}, typeOf<V>(), infer)
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,8 @@ import kotlin.reflect.KProperty

// region DataFrame

@Refine
@Interpretable("RenameMapping")
public fun <T> DataFrame<T>.rename(vararg mappings: Pair<String, String>): DataFrame<T> =
rename { mappings.map { it.first.toColumnAccessor() }.toColumnSet() }
.into(*mappings.map { it.second }.toTypedArray())
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@ import org.jetbrains.kotlinx.dataframe.ColumnsSelector
import org.jetbrains.kotlinx.dataframe.DataFrame
import org.jetbrains.kotlinx.dataframe.Selector
import org.jetbrains.kotlinx.dataframe.annotations.AccessApiOverload
import org.jetbrains.kotlinx.dataframe.annotations.Interpretable
import org.jetbrains.kotlinx.dataframe.annotations.Refine
import org.jetbrains.kotlinx.dataframe.columns.ColumnReference
import org.jetbrains.kotlinx.dataframe.columns.toColumnSet
import org.jetbrains.kotlinx.dataframe.impl.api.reorderImpl
Expand Down Expand Up @@ -52,6 +54,8 @@ public fun <T, V : Comparable<V>> DataFrame<T>.reorderColumnsBy(
inFrameColumns = atAnyDepth,
).reorderImpl(desc, expression)

@Refine
@Interpretable("ReorderColumnsByName")
public fun <T> DataFrame<T>.reorderColumnsByName(atAnyDepth: Boolean = true, desc: Boolean = false): DataFrame<T> =
reorderColumnsBy(atAnyDepth, desc) { name() }

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ import org.jetbrains.kotlinx.dataframe.api.GroupBy
import org.jetbrains.kotlinx.dataframe.api.GroupClause
import org.jetbrains.kotlinx.dataframe.api.InsertClause
import org.jetbrains.kotlinx.dataframe.api.Merge
import org.jetbrains.kotlinx.dataframe.api.MergeWithTransform
import org.jetbrains.kotlinx.dataframe.api.MoveClause
import org.jetbrains.kotlinx.dataframe.api.Pivot
import org.jetbrains.kotlinx.dataframe.api.PivotGroupBy
Expand Down Expand Up @@ -165,6 +166,7 @@ public object KotlinNotebookPluginUtils {
is SplitWithTransform<*, *, *>,
is Split<*, *>,
is Merge<*, *, *>,
is MergeWithTransform<*, *, *>,
is Gather<*, *, *, *>,
is Update<*, *>,
is Convert<*, *>,
Expand Down Expand Up @@ -213,6 +215,13 @@ public object KotlinNotebookPluginUtils {
),
)

is MergeWithTransform<*, *, *> -> dataframeLike.into(
generateRandomVariationOfColumnName(
"merged",
dataframeLike.df.columnNames(),
),
)

is Gather<*, *, *, *> -> dataframeLike.into(
generateRandomVariationOfColumnName("key", dataframeLike.df.columnNames()),
generateRandomVariationOfColumnName("value", dataframeLike.df.columnNames()),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ import org.jetbrains.kotlinx.dataframe.annotations.DataSchema
import org.jetbrains.kotlinx.dataframe.api.ExcessiveColumns
import org.jetbrains.kotlinx.dataframe.api.GroupBy
import org.jetbrains.kotlinx.dataframe.api.Infer
import org.jetbrains.kotlinx.dataframe.api.Merge
import org.jetbrains.kotlinx.dataframe.api.ParserOptions
import org.jetbrains.kotlinx.dataframe.api.add
import org.jetbrains.kotlinx.dataframe.api.addAll
Expand Down Expand Up @@ -190,6 +191,7 @@ import org.jetbrains.kotlinx.dataframe.typeClass
import org.junit.Test
import java.math.BigDecimal
import java.time.LocalDate
import kotlin.reflect.KType
import kotlin.reflect.jvm.jvmErasure
import kotlin.reflect.typeOf

Expand Down Expand Up @@ -1397,6 +1399,35 @@ class DataFrameTests : BaseTest() {
res shouldBe expected
}

@Test
fun `merge into temp`() {
dataFrameOf("a", "b", "temp")(1, null, 3)
.merge { cols("a", "b") }.into("b")
}

inline fun <T, reified C, R> Merge<T, C, R>.typeOfElement() = typeOf<C>()

@Test
fun `merge not null`() {
val merge = dataFrameOf("a", "b")(1, null).merge { col("a") }
merge.typeOfElement() shouldBe typeOf<Any?>()
merge.notNull().typeOfElement() shouldBe typeOf<Any>()
}

inline fun <reified T> List<T>.typeOfElement(): KType = typeOf<List<T>>().arguments[0].type!!

@Test
fun `merge cols into list`() {
val merge = dataFrameOf("a", "b")(1, null).merge { col("a") }
merge.intoList().typeOfElement() shouldBe typeOf<List<Any?>>()
merge.by { it }.intoList().typeOfElement() shouldBe typeOf<List<Any?>>()
// here we can safely narrow down List<Any?> to List<Any> after notNull because the default transformer creates a List from C
merge.notNull().intoList().typeOfElement() shouldBe typeOf<List<Any>>()
// if by notNull could go after by { },
// we won't be able to do so because non-default transformer could introduce nulls itself:
merge.notNull().by { listOf(1, null) }.intoList().typeOfElement() shouldBe typeOf<List<Int?>>()
}

@Test
fun `generic column type`() {
val d = typed.convert { city }.with { it?.toCharArray()?.toList() ?: emptyList() }
Expand Down
10 changes: 5 additions & 5 deletions docs/StardustDocs/topics/reorder.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ Returns [`DataFrame`](DataFrame.md) with a new order of selected columns.
```text
reorder { columns }
[.cast<ColumnType>() ]
.by { columnExpression } | .byDesc { columnExpression } | .byName(desc = false) { columnExpression }
.by { columnExpression } | .byDesc { columnExpression } | .byName(desc = false)
columnExpression: DataColumn.(DataColumn) -> Value
```
Expand Down Expand Up @@ -74,19 +74,19 @@ df.reorder { name }.byName(desc = true) // [name.lastName, name.firstName]
Reorders all columns

```text
reorderColumnsBy(dfs = true, desc = false) { columnExpression }
reorderColumnsBy(atAnyDepth = true, desc = false) { columnExpression }
```

**Parameters:**
* `dfs` — reorder columns inside [`ColumnGroups`](DataColumn.md#columngroup) and [`FrameColumn`](DataColumn.md#framecolumn) recursively
* `atAnyDepth` — reorder columns inside [`ColumnGroups`](DataColumn.md#columngroup) and [`FrameColumn`](DataColumn.md#framecolumn) recursively
* `desc` — apply descending order

## reorderColumnsByName

```text
reorderColumnsByName(dfs = true, desc = false)
reorderColumnsByName(atAnyDepth = true, desc = false)
```

**Parameters:**
* `dfs` — reorder columns inside [`ColumnGroups`](DataColumn.md#columngroup) and [`FrameColumn`](DataColumn.md#framecolumn) recursively
* `atAnyDepth` — reorder columns inside [`ColumnGroups`](DataColumn.md#columngroup) and [`FrameColumn`](DataColumn.md#framecolumn) recursively
* `desc` — apply descending order
Original file line number Diff line number Diff line change
@@ -1,15 +1,13 @@
@file:Suppress("INVISIBLE_REFERENCE")

package org.jetbrains.kotlinx.dataframe.plugin.impl

import org.jetbrains.kotlinx.dataframe.AnyCol
import org.jetbrains.kotlinx.dataframe.DataColumn
import org.jetbrains.kotlinx.dataframe.DataFrame
import org.jetbrains.kotlinx.dataframe.api.asDataColumn
import org.jetbrains.kotlinx.dataframe.api.cast
import org.jetbrains.kotlinx.dataframe.api.dataFrameOf
import org.jetbrains.kotlinx.dataframe.columns.ColumnGroup
import org.jetbrains.kotlinx.dataframe.columns.FrameColumn
import org.jetbrains.kotlinx.dataframe.impl.columns.ColumnGroupImpl
import org.jetbrains.kotlinx.dataframe.plugin.impl.api.TypeApproximation

fun PluginDataFrameSchema.asDataFrame(): DataFrame<ConeTypesAdapter> {
Expand All @@ -28,11 +26,10 @@ private fun List<SimpleCol>.map(): DataFrame<ConeTypesAdapter> {
return dataFrameOf(columns).cast()
}

@Suppress("INVISIBLE_REFERENCE")
fun SimpleCol.asDataColumn(): DataColumn<*> {
val column = when (this) {
is SimpleDataColumn -> DataColumn.createByType(this.name, listOf(this.type))
is SimpleColumnGroup -> DataColumn.createColumnGroup(this.name, this.columns().map()) as ColumnGroupImpl<*>
is SimpleColumnGroup -> DataColumn.createColumnGroup(this.name, this.columns().map()).asDataColumn()
is SimpleFrameColumn -> DataColumn.createFrameColumn(this.name, listOf(this.columns().map()))
}
return column
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,13 +10,15 @@ import org.jetbrains.kotlinx.dataframe.plugin.impl.SimpleColumnGroup
import org.jetbrains.kotlinx.dataframe.plugin.impl.dataFrame
import org.jetbrains.kotlinx.dataframe.plugin.impl.simpleColumnOf
import org.jetbrains.kotlinx.dataframe.plugin.impl.dsl
import org.jetbrains.kotlinx.dataframe.plugin.impl.ignore
import org.jetbrains.kotlinx.dataframe.plugin.impl.type

typealias TypeApproximation = Marker

class Add : AbstractSchemaModificationInterpreter() {
val Arguments.receiver: PluginDataFrameSchema by dataFrame()
val Arguments.name: String by arg()
val Arguments.infer by ignore()
val Arguments.type: TypeApproximation by type(name("expression"))

override fun Arguments.interpret(): PluginDataFrameSchema {
Expand Down
Loading

0 comments on commit 210143a

Please sign in to comment.