Skip to content

Commit

Permalink
add an excel version
Browse files Browse the repository at this point in the history
  • Loading branch information
Quafadas committed Jan 10, 2025
1 parent 579a239 commit 922eb1a
Show file tree
Hide file tree
Showing 10 changed files with 274 additions and 146 deletions.
5 changes: 5 additions & 0 deletions build.mill
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,11 @@ object scautable extends CrossPlatform {
}
}
object jvm extends Shared {

override def ivyDeps: Target[Agg[Dep]] = super.ivyDeps() ++ Agg(
ivy"org.apache.poi:poi:5.4.0",
ivy"org.apache.poi:poi-ooxml:5.4.0",
)
// jvm specific settings here
object test extends ScalaTests with SharedTests with BuildInfo {

Expand Down
4 changes: 4 additions & 0 deletions scautable/js/src/excelStub.scala
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
package io.github.quafadas.scautable


object Excel
75 changes: 75 additions & 0 deletions scautable/jvm/src/excelterator.scala
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
package io.github.quafadas.scautable

import scala.io.Source
import scala.util.Try
import scala.util.chaining.*
import scala.util.matching.Regex
import scala.NamedTuple.*
import scala.compiletime.*
import CSV.*
import ConsoleFormat.*
import org.apache.poi.ss.usermodel.{ DataFormatter, WorkbookFactory, Row }
import java.io.File
import scala.collection.JavaConverters.*
import scala.quoted.*


object Excel:


given IteratorToExpr2[K](using ToExpr[String], Type[K]): ToExpr[ExcelIterator[K]] with
def apply(opt: ExcelIterator[K])(using Quotes): Expr[ExcelIterator[K]] =
val str = Expr(opt.getFilePath)
val sheet = Expr(opt.getSheet)
'{
new ExcelIterator[K]($str, $sheet)
}
end apply
end IteratorToExpr2

transparent inline def absolutePath[K](filePath: String, sheetName: String)= ${ readExcelAbolsutePath('filePath, 'sheetName) }

def readExcelAbolsutePath(pathExpr: Expr[String], sheetName: Expr[String])(using Quotes) =
import quotes.reflect.*

val fPath = pathExpr.valueOrAbort
val headers = ExcelIterator(pathExpr.valueOrAbort, sheetName.valueOrAbort).headers
val tupleExpr2 = Expr.ofTupleFromSeq(headers.map(Expr(_)))
tupleExpr2 match
case '{ $tup: t } =>


val itr = new ExcelIterator[t](fPath, sheetName.valueOrAbort)
// println("tup")
// println(tup)
// '{ NamedTuple.build[t & Tuple]()($tup) }
Expr(itr)
case _ => report.throwError(s"Could not summon Type for type: ${tupleExpr2.show}")
end match


class ExcelIterator[K](filePath: String, sheetName: String) extends Iterator[NamedTuple[K & Tuple, StringyTuple[K & Tuple] ]]:
type COLUMNS = K

def getFilePath: String = filePath
def getSheet : String = sheetName
lazy val sheetIterator = {
val workbook = WorkbookFactory.create(new File(filePath))
val sheet = workbook.getSheet(sheetName)
sheet.iterator().asScala
}
val headers = sheetIterator.next().cellIterator().asScala.toList.map(_.toString)
lazy val headersTuple =
listToTuple(headers)

override def next(): NamedTuple[K & Tuple, StringyTuple[K & Tuple]] =
if !hasNext then throw new NoSuchElementException("No more rows")
val row = sheetIterator.next()
val cells = row.cellIterator().asScala.toList.map(_.toString)
val tuple = listToTuple(cells)
NamedTuple.build[K & Tuple]()(tuple).asInstanceOf[StringyTuple[K & Tuple]]


override def hasNext: Boolean = sheetIterator.hasNext

end ExcelIterator
145 changes: 1 addition & 144 deletions scautable/src/csv.scala
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ object CSV:
case _ => constValue[A]


private def listToTuple[A](list: List[A]): Tuple = list match
def listToTuple[A](list: List[A]): Tuple = list match
case Nil => EmptyTuple
case h :: t => h *: listToTuple(t)

Expand Down Expand Up @@ -418,149 +418,6 @@ object CSV:

end extension

class CsvIterator[K](filePath: String) extends Iterator[NamedTuple[K & Tuple, StringyTuple[K & Tuple] ]]:
type COLUMNS = K

def getFilePath: String = filePath
lazy private val source = Source.fromFile(filePath)
lazy private val lineIterator = source.getLines()
lazy val headers = CSVParser.parseLine((Source.fromFile(filePath).getLines().next()))
lazy val headersTuple =
listToTuple(headers)

inline def headerIndex(s: String) =
headers.zipWithIndex.find(_._1 == s).get._2

/**
* Here be dragons, in Tuple Land, Tuple XXL is reversed, creating a discontinuity. Small tuples start at 1, big tuples start the other end.
*
* Apparently fixed in 3.6.3
*
* @return
*/
inline def headerIndex[S <: String & Singleton] =
val headers2 = if headers.size > 22 then headers.reverse else headers
headers.indexOf(constValue[S].toString)

inline override def hasNext: Boolean =
val hasMore = lineIterator.hasNext
if !hasMore then source.close()
hasMore
end hasNext

def numericTypeTest(sample: Option[Int] = None) =
val sampled = sample match
case Some(n) =>
this.take(n)
case None =>
this
val asList = headers.map(_ => ConversionAcc(0, 0, 0))

sampled.foldLeft((asList, 0L))( (acc: (List[ConversionAcc], Long), elem: NamedTuple[K & Tuple, StringyTuple[K & Tuple]] ) =>

val list = elem.toList.asInstanceOf[List[String]].zip(acc._1).map{
case (str, acc) =>

(
ConversionAcc(
acc.validInts + str.toIntOption.fold(0)(_ => 1),
acc.validDoubles + str.toDoubleOption.fold(0)(_ => 1),
acc.validLongs + str.toLongOption.fold(0)(_ => 1)
)
)
}
(list, acc._2 + 1)
)

inline def formatTypeTest(sample: Option[Int] = None): String =
val (asList, n) = numericTypeTest(sample)
val intReport = (
"int" *: listToTuple({
for(acc <- asList ) yield
(acc.validInts / n.toDouble).formatAsPercentage
}
)
)
val doubleReported = "doubles" *: listToTuple({
for(acc <- asList ) yield
(acc.validDoubles / n.toDouble).formatAsPercentage
})
val longReported = "long" *: listToTuple({
for(acc <- asList ) yield
(acc.validLongs / n.toDouble).formatAsPercentage
})
val recommendation = "recommendation" *: listToTuple({
for(acc <- asList ) yield
recommendConversion(List(acc), n)
})

val ntList = Seq(
intReport,
doubleReported,
longReported,
recommendation
)

ConsoleFormat.consoleFormat_(headers = "conversion % to" +: headers, fancy = true, table = ntList )


inline def showTypeTest(sample: Option[Int] = None): Unit =
println(formatTypeTest(sample))

inline override def next() =
if !hasNext then throw new NoSuchElementException("No more lines")
end if
val str = lineIterator.next()
val splitted = CSVParser.parseLine(str)
val tuple = listToTuple(splitted).asInstanceOf[StringyTuple[K & Tuple]]
NamedTuple.build[K & Tuple]()(tuple)
end next

next() // drop the headers
end CsvIterator

/**
* According to chatGPT will parse RFC 4180 compliant CSV.
*/
object CSVParser {
def parseLine(line: String, delimiter: Char = ',', quote: Char = '"'): List[String] = {
var inQuotes = false
val cellBuffer = new StringBuilder
val result = scala.collection.mutable.ListBuffer.empty[String]

for (char <- line) {
char match {
case `quote` if !inQuotes =>
// Start of quoted section
inQuotes = true

case `quote` if inQuotes =>
// End of quoted section (peek ahead to handle escaped quotes)
if (cellBuffer.nonEmpty && cellBuffer.last == quote) {
cellBuffer.deleteCharAt(cellBuffer.length - 1) // Handle escaped quote
cellBuffer.append(char)
} else {
inQuotes = false
}

case `delimiter` if !inQuotes =>
// Delimiter outside quotes ends the current cell
result.append(cellBuffer.toString)
cellBuffer.clear()

case _ =>
// Add character to the current cell
cellBuffer.append(char)
}
}

// Append the last cell, if any
result.append(cellBuffer.toString)

result.toList
}
}

given IteratorToExpr2[K](using ToExpr[String], Type[K]): ToExpr[CsvIterator[K]] with
def apply(opt: CsvIterator[K])(using Quotes): Expr[CsvIterator[K]] =
val str = Expr(opt.getFilePath)
Expand Down
114 changes: 114 additions & 0 deletions scautable/src/csvIterator.scala
Original file line number Diff line number Diff line change
@@ -0,0 +1,114 @@
package io.github.quafadas.scautable

import scala.io.Source
import scala.util.Try
import scala.util.chaining.*
import scala.util.matching.Regex
import scala.NamedTuple.*
import scala.compiletime.*
import CSV.*
import ConsoleFormat.*




class CsvIterator[K](filePath: String) extends Iterator[NamedTuple[K & Tuple, StringyTuple[K & Tuple] ]]:
type COLUMNS = K

def getFilePath: String = filePath
lazy private val source = Source.fromFile(filePath)
lazy private val lineIterator = source.getLines()
lazy val headers = CSVParser.parseLine((Source.fromFile(filePath).getLines().next()))
lazy val headersTuple =
listToTuple(headers)

inline def headerIndex(s: String) =
headers.zipWithIndex.find(_._1 == s).get._2

/**
* Here be dragons, in Tuple Land, Tuple XXL is reversed, creating a discontinuity. Small tuples start at 1, big tuples start the other end.
*
* Apparently fixed in 3.6.3
*
* @return
*/
inline def headerIndex[S <: String & Singleton] =
val headers2 = if headers.size > 22 then headers.reverse else headers
headers.indexOf(constValue[S].toString)

inline override def hasNext: Boolean =
val hasMore = lineIterator.hasNext
if !hasMore then source.close()
hasMore
end hasNext

def numericTypeTest(sample: Option[Int] = None) =
val sampled = sample match
case Some(n) =>
this.take(n)
case None =>
this
val asList = headers.map(_ => ConversionAcc(0, 0, 0))

sampled.foldLeft((asList, 0L))( (acc: (List[ConversionAcc], Long), elem: NamedTuple[K & Tuple, StringyTuple[K & Tuple]] ) =>

val list = elem.toList.asInstanceOf[List[String]].zip(acc._1).map{
case (str, acc) =>

(
ConversionAcc(
acc.validInts + str.toIntOption.fold(0)(_ => 1),
acc.validDoubles + str.toDoubleOption.fold(0)(_ => 1),
acc.validLongs + str.toLongOption.fold(0)(_ => 1)
)
)
}
(list, acc._2 + 1)
)

inline def formatTypeTest(sample: Option[Int] = None): String =
val (asList, n) = numericTypeTest(sample)
val intReport = (
"int" *: listToTuple({
for(acc <- asList ) yield
(acc.validInts / n.toDouble).formatAsPercentage
}
)
)
val doubleReported = "doubles" *: listToTuple({
for(acc <- asList ) yield
(acc.validDoubles / n.toDouble).formatAsPercentage
})
val longReported = "long" *: listToTuple({
for(acc <- asList ) yield
(acc.validLongs / n.toDouble).formatAsPercentage
})
val recommendation = "recommendation" *: listToTuple({
for(acc <- asList ) yield
recommendConversion(List(acc), n)
})

val ntList = Seq(
intReport,
doubleReported,
longReported,
recommendation
)

ConsoleFormat.consoleFormat_(headers = "conversion % to" +: headers, fancy = true, table = ntList )


inline def showTypeTest(sample: Option[Int] = None): Unit =
println(formatTypeTest(sample))

inline override def next() =
if !hasNext then throw new NoSuchElementException("No more lines")
end if
val str = lineIterator.next()
val splitted = CSVParser.parseLine(str)
val tuple = listToTuple(splitted).asInstanceOf[StringyTuple[K & Tuple]]
NamedTuple.build[K & Tuple]()(tuple)
end next

next() // drop the headers
end CsvIterator
Loading

0 comments on commit 922eb1a

Please sign in to comment.