@@ -15,14 +15,33 @@ import fansi.Str
15
15
import scala .collection .View .FlatMap
16
16
import io .github .quafadas .scautable .ConsoleFormat .*
17
17
18
+ import scala .math .Fractional .Implicits .*
18
19
19
20
20
21
@ experimental
21
22
object CSV :
23
+
24
+ inline def constValueAll [A ]: A =
25
+ inline erasedValue[A ] match
26
+ case _ : *: [h, t] => (constValueAll[h] *: constValueAll[t]).asInstanceOf [A ]
27
+ case _ : EmptyTuple => EmptyTuple .asInstanceOf [A ]
28
+ case _ => constValue[A ]
29
+
30
+
31
+ private def listToTuple [A ](list : List [A ]): Tuple = list match
32
+ case Nil => EmptyTuple
33
+ case h :: t => h *: listToTuple(t)
34
+
22
35
type Concat [X <: String , Y <: Tuple ] = X *: Y
23
36
24
37
type ConcatSingle [X , A ] = X *: A *: EmptyTuple
25
38
39
+ type Negate [T <: Tuple ] <: Tuple = T match
40
+ case EmptyTuple => EmptyTuple
41
+ case (head *: tail) => head match
42
+ case false => true *: Negate [tail]
43
+ case true => false *: Negate [tail]
44
+
26
45
type IsColumn [StrConst <: String , T <: Tuple ] = T match
27
46
case EmptyTuple => false
28
47
case (head *: tail) => IsMatch [StrConst , head] match
@@ -62,6 +81,10 @@ object CSV:
62
81
case false =>
63
82
typeHead *: DropOneTypeAtName [nameTail, StrConst , typeTail]
64
83
84
+ type GetTypesAtNames [N <: Tuple , ForNames <: Tuple , T <: Tuple ] <: Tuple = ForNames match
85
+ case EmptyTuple => EmptyTuple
86
+ case nameHead *: nameTail => GetTypeAtName [N , nameHead, T ] *: GetTypesAtNames [N , nameTail, T ]
87
+
65
88
type GetTypeAtName [N <: Tuple , StrConst <: String , T <: Tuple ] = (N , T ) match
66
89
case (EmptyTuple , _) => EmptyTuple
67
90
case (_, EmptyTuple ) => EmptyTuple
@@ -87,6 +110,42 @@ object CSV:
87
110
case A => true
88
111
case _ => false
89
112
113
+ type IsNumeric [T ] <: Boolean = T match
114
+ case Option [a] => IsNumeric [a]
115
+ case Int => true
116
+ case Long => true
117
+ case Float => true
118
+ case Double => true
119
+ case _ => false
120
+
121
+ type NumericColsIdx [T <: Tuple ] <: Tuple =
122
+ T match
123
+ case EmptyTuple => EmptyTuple
124
+ case (head *: tail) => IsNumeric [head] match
125
+ case true => true *: NumericColsIdx [tail]
126
+ case false => false *: NumericColsIdx [tail]
127
+
128
+ type SelectFromTuple [T <: Tuple , Bools <: Tuple ] <: Tuple = T match
129
+ case EmptyTuple => EmptyTuple
130
+ case (head *: tail) => Bools match
131
+ case (true *: boolTail) => head *: SelectFromTuple [tail, boolTail]
132
+ case (false *: boolTail) => SelectFromTuple [tail, boolTail]
133
+
134
+ type AllAreColumns [T <: Tuple , K <: Tuple ] <: Boolean = T match
135
+ case EmptyTuple => true
136
+ case head *: tail => IsColumn [head, K ] match
137
+ case true => AllAreColumns [tail, K ]
138
+ case false => false
139
+
140
+ type TupleContainsIdx [Search <: Tuple , In <: Tuple ] <: Tuple = In match
141
+ case EmptyTuple => EmptyTuple
142
+ case head *: tail => Search match
143
+ case EmptyTuple => false *: EmptyTuple
144
+ case searchHead *: searchTail => IsColumn [head, Search ] match
145
+ case true => true *: TupleContainsIdx [Search , tail]
146
+ case false => false *: TupleContainsIdx [Search , tail]
147
+
148
+
90
149
91
150
type StringifyTuple [T >: Tuple ] <: Tuple = T match
92
151
case EmptyTuple => EmptyTuple
@@ -168,6 +227,135 @@ object CSV:
168
227
}
169
228
}
170
229
230
+ // inline def numericCols: Iterator[
231
+ // NamedTuple.NamedTuple[
232
+ // io.github.quafadas.scautable.CSV.SelectFromTuple[K1,
233
+ // io.github.quafadas.scautable.CSV.TupleContainsIdx[
234
+ // io.github.quafadas.scautable.CSV.SelectFromTuple[K1,
235
+ // io.github.quafadas.scautable.CSV.NumericColsIdx[V1]],
236
+ // K1]
237
+ // ],
238
+ // io.github.quafadas.scautable.CSV.SelectFromTuple[V1,
239
+ // io.github.quafadas.scautable.CSV.TupleContainsIdx[
240
+ // io.github.quafadas.scautable.CSV.SelectFromTuple[K1,
241
+ // io.github.quafadas.scautable.CSV.NumericColsIdx[V1]],
242
+ // K1]
243
+ // ]
244
+ // ]
245
+ // ] =
246
+ // val ev1 = summonInline[AllAreColumns[SelectFromTuple[K1, NumericColsIdx[V1]], K1] =:= true]
247
+ // columns[SelectFromTuple[K1, NumericColsIdx[V1]]](using ev1)
248
+
249
+ // inline def nonNumericCols: Iterator[
250
+ // NamedTuple.NamedTuple[
251
+ // io.github.quafadas.scautable.CSV.SelectFromTuple[K1,
252
+ // io.github.quafadas.scautable.CSV.TupleContainsIdx[
253
+ // io.github.quafadas.scautable.CSV.SelectFromTuple[K1,
254
+ // io.github.quafadas.scautable.CSV.Negate[
255
+ // io.github.quafadas.scautable.CSV.NumericColsIdx[V1]]
256
+ // ],
257
+ // K1]
258
+ // ],
259
+ // io.github.quafadas.scautable.CSV.SelectFromTuple[V1,
260
+ // io.github.quafadas.scautable.CSV.TupleContainsIdx[
261
+ // io.github.quafadas.scautable.CSV.SelectFromTuple[K1,
262
+ // io.github.quafadas.scautable.CSV.Negate[
263
+ // io.github.quafadas.scautable.CSV.NumericColsIdx[V1]]
264
+ // ],
265
+ // K1]
266
+ // ]
267
+ // ]
268
+ // ] =
269
+ // val ev1 = summonInline[
270
+ // AllAreColumns[SelectFromTuple[K1, Negate[NumericColsIdx[V1]]], K1] =:= true
271
+ // ]
272
+ // columns[SelectFromTuple[K1, Negate[NumericColsIdx[V1]]]](using ev1)
273
+
274
+ inline def resolve [ST <: Tuple ]: SelectFromTuple [K1 , TupleContainsIdx [ST , K1 ]] = (" Pclass" , " Age" , " SibSp" , " Parch" , " Fare" ).asInstanceOf [SelectFromTuple [K1 , TupleContainsIdx [ST , K1 ]]]
275
+ inline def resolveT [ST <: Tuple ]: GetTypesAtNames [K1 , SelectFromTuple [K1 , TupleContainsIdx [ST , K1 ]] ,V1 ] = (1 , Some (2.0 ), 1 , 1 , 2.0 ).asInstanceOf [GetTypesAtNames [K1 , SelectFromTuple [K1 , TupleContainsIdx [ST , K1 ]] ,V1 ]]
276
+
277
+ inline def resolveNT [ST <: Tuple ]: NamedTuple [
278
+ SelectFromTuple [K1 , TupleContainsIdx [ST , K1 ]],
279
+ GetTypesAtNames [K1 , SelectFromTuple [K1 , TupleContainsIdx [ST , K1 ]] ,V1 ]
280
+ ] =
281
+ (1 , Some (2.0 ), 1 , 1 , 2.0 )
282
+ .withNames[(" Pclass" , " Age" , " SibSp" , " Parch" , " Fare" )]
283
+ .asInstanceOf [
284
+ NamedTuple [
285
+ SelectFromTuple [K1 , TupleContainsIdx [ST , K1 ]],
286
+ GetTypesAtNames [K1 , SelectFromTuple [K1 , TupleContainsIdx [ST , K1 ]] ,V1 ]
287
+ ]
288
+ ]
289
+
290
+
291
+
292
+ inline def columns [ST <: Tuple ](using ev : AllAreColumns [ST , K1 ] =:= true ):
293
+ Iterator [
294
+ NamedTuple [
295
+ SelectFromTuple [K1 , TupleContainsIdx [ST , K1 ]],
296
+ GetTypesAtNames [K1 , SelectFromTuple [K1 , TupleContainsIdx [ST , K1 ]] ,V1 ]
297
+ ]
298
+ ] =
299
+ val headers = constValueTuple[K1 ].toList.map(_.toString())
300
+ // val types = constValueTuple[SelectFromTuple[V1, TupleContainsIdx[ST, K1]]].toList.map(_.toString())
301
+ val selectedHeaders = constValueTuple[SelectFromTuple [K1 , TupleContainsIdx [ST , K1 ]]].toList.map(_.toString())
302
+
303
+ // Preserve the existing column order
304
+ val idxes = selectedHeaders.map(headers.indexOf(_)).filterNot(_ == - 1 )
305
+
306
+ // println(s"headers $headers")
307
+ // println(s"selectedHeaders $selectedHeaders")
308
+ // println(s"idxes $idxes")
309
+
310
+ itr.map[NamedTuple [SelectFromTuple [K1 , TupleContainsIdx [ST , K1 ]], GetTypesAtNames [K1 , SelectFromTuple [K1 , TupleContainsIdx [ST , K1 ]], V1 ]]]{
311
+ (x : NamedTuple [K1 , V1 ]) =>
312
+ val tuple = x.toTuple
313
+
314
+ // println("in tuple")
315
+ // println(tuple.toList.mkString(","))
316
+ val selected : Tuple = idxes.foldRight(EmptyTuple : Tuple ){
317
+ (idx, acc) =>
318
+ // println(tuple(idx))
319
+ tuple(idx) *: acc
320
+ }
321
+
322
+ val out = selected
323
+ .withNames[SelectFromTuple [K1 , TupleContainsIdx [ST , K1 ]]]
324
+ .asInstanceOf [
325
+ NamedTuple [
326
+ SelectFromTuple [K1 , TupleContainsIdx [ST , K1 ]],
327
+ GetTypesAtNames [K1 , SelectFromTuple [K1 , TupleContainsIdx [ST , K1 ]] ,V1 ]
328
+ ]
329
+ ]
330
+
331
+ out
332
+ }
333
+
334
+ inline def numericColSummary [S <: String ](using ev : IsColumn [S , K1 ] =:= true , isNum : IsNumeric [GetTypeAtName [K1 , S , V1 ]] =:= true , s : ValueOf [S ], a : Fractional [GetTypeAtName [K1 , S , V1 ]]) =
335
+ val numericValues = itr.column[S ].toList.asInstanceOf [List [GetTypeAtName [K1 , S , V1 ]]]
336
+
337
+ val sortedValues = numericValues.sorted
338
+ val size = sortedValues.size
339
+
340
+ def percentile (p : Double ) : Double = {
341
+ val rank = p * (size - 1 )
342
+ val lower = sortedValues(rank.toInt)
343
+ val upper = sortedValues(math.ceil(rank).toInt)
344
+ lower.toDouble + a.minus(upper, lower).toDouble * (rank - rank.toInt)
345
+ }
346
+
347
+ val mean = numericValues.sum / a.fromInt(size)
348
+ val min = sortedValues.head
349
+ val max = sortedValues.last
350
+ val variance = numericValues.map(x => a.minus(x, mean)).map(x => a.times(x, x)).sum / a.fromInt(size)
351
+
352
+ val percentiles = List (0.25 , 0.5 , 0.75 ).map(percentile)
353
+
354
+ val std = math.sqrt(variance.toDouble)
355
+
356
+ (mean, std, min, percentiles(0 ), percentiles(1 ), percentiles(2 ), max).withNames[(" mean" , " std" , " min" , " 25%" , " 50%" , " 75%" , " max" )]
357
+
358
+
171
359
inline def column [S <: String ](using ev : IsColumn [S , K1 ] =:= true , s : ValueOf [S ]): Iterator [GetTypeAtName [K1 , S , V1 ]] = {
172
360
val headers = constValueTuple[K1 ].toList.map(_.toString())
173
361
/**
@@ -205,6 +393,15 @@ object CSV:
205
393
inline def addColumn [S <: String , A ](fct : (tup : NamedTuple .NamedTuple [K , V ]) => A ): Seq [NamedTuple [S *: K , A *: V ]] =
206
394
nt.toIterator.addColumn[S , A ](fct).toSeq
207
395
396
+ inline def columns [ST <: Tuple ](using ev : AllAreColumns [ST , K ] =:= true ):
397
+ Seq [
398
+ NamedTuple [
399
+ SelectFromTuple [K , TupleContainsIdx [ST , K ]],
400
+ GetTypesAtNames [K , SelectFromTuple [K , TupleContainsIdx [ST , K ]] ,V ]
401
+ ]
402
+ ] =
403
+ nt.toIterator.columns[ST ](using ev).toSeq
404
+
208
405
inline def dropColumn [S <: String ](using ev : IsColumn [S , K ] =:= true , s : ValueOf [S ]): Seq [NamedTuple [DropOneName [K , S ], DropOneTypeAtName [K , S , V ]]] =
209
406
nt.toIterator.dropColumn[S ].toSeq
210
407
@@ -251,10 +448,6 @@ object CSV:
251
448
hasMore
252
449
end hasNext
253
450
254
- private def listToTuple [A ](list : List [A ]): Tuple = list match
255
- case Nil => EmptyTuple
256
- case h :: t => h *: listToTuple(t)
257
-
258
451
def numericTypeTest (sample : Option [Int ] = None ) =
259
452
val sampled = sample match
260
453
case Some (n) =>
0 commit comments