From ddf064529f7f43360c251fc46317abe2f1d85ce0 Mon Sep 17 00:00:00 2001 From: Jay Date: Mon, 11 Nov 2024 00:38:01 -0500 Subject: [PATCH] update --- main.py | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/main.py b/main.py index 5f95103..960518b 100644 --- a/main.py +++ b/main.py @@ -16,21 +16,32 @@ def main(): # Extract data extract() + # Start Spark session spark = start_spark("USBirthDataProcessing") + # Load data into DataFrame df = load_data(spark) - # Generate descriptive statistics + + # Example metrics describe(df) - # Query example: Count births by year + + # Query the data query( spark, df, - "SELECT year, COUNT(*) AS birth_count FROM USBirthData GROUP BY year ORDER BY year", + ( + "SELECT year, COUNT(*) AS birth_count " + "FROM USBirthData " + "GROUP BY year " + "ORDER BY year" + ), "USBirthData", ) + # Example transformation example_transform(df) + # End Spark session end_spark(spark)