From be7437aa2a77c650103a7ee093deacb8bc667e7f Mon Sep 17 00:00:00 2001
From: Weizheng Lu <luweizheng36@hotmail.com>
Date: Thu, 9 May 2024 23:10:56 +0800
Subject: [PATCH] add github pages docs (#6)

---
 .github/workflows/deploy.yml   |  42 +++++++++++++
 docs/README.md                 |  23 +++++++
 docs/_static/custom.css        |   3 +
 docs/_static/logo.ico          | Bin 0 -> 4022 bytes
 docs/_toc.yml                  |  10 +++
 docs/conf.py                   |  49 +++++++++++++++
 docs/dataframe/dataframe.md    |   6 ++
 docs/index.md                  |  26 ++++++++
 docs/perf/ad-hoc-query.md      |   2 +
 docs/requirements-doc.txt      |  11 ++++
 tpch/cudf_queries/queries.py   | 108 ++++++++++++++++-----------------
 tpch/dask_queries/queries.py   |   2 +-
 tpch/pandas_queries/queries.py |   7 ++-
 13 files changed, 228 insertions(+), 61 deletions(-)
 create mode 100644 .github/workflows/deploy.yml
 create mode 100644 docs/README.md
 create mode 100644 docs/_static/custom.css
 create mode 100644 docs/_static/logo.ico
 create mode 100644 docs/_toc.yml
 create mode 100644 docs/conf.py
 create mode 100644 docs/dataframe/dataframe.md
 create mode 100644 docs/index.md
 create mode 100644 docs/perf/ad-hoc-query.md
 create mode 100644 docs/requirements-doc.txt

diff --git a/.github/workflows/deploy.yml b/.github/workflows/deploy.yml
new file mode 100644
index 0000000..95626a1
--- /dev/null
+++ b/.github/workflows/deploy.yml
@@ -0,0 +1,42 @@
+name: Deploy Docs
+
+on:
+  push:
+    tags:        
+      - '*' 
+
+jobs:
+  build:
+    runs-on: ubuntu-latest
+    permissions:
+      pages: write
+      id-token: write
+    strategy:
+      fail-fast: false
+      matrix:
+        python-version: ["3.11"]
+
+    steps:
+    - uses: actions/checkout@v3
+    - name: Set up Python envs
+      uses: actions/setup-python@v3
+      with:
+        python-version: ${{ matrix.python-version }}
+    - name: Install dependencies
+      run: |
+        python -m pip install --upgrade pip
+        python -m pip install -r requirements-web.txt
+    - name: Build website
+      run: |
+        sphinx-build -b html ./ ./_build/html
+        touch ./_build/html/.nojekyll
+    # Upload the book's HTML as an artifact
+    - name: Upload artifact
+      uses: actions/upload-pages-artifact@v2
+      with:
+        path: "_build/html"
+
+    # Deploy the book's HTML to GitHub Pages
+    - name: Deploy to GitHub Pages
+      id: deployment
+      uses: actions/deploy-pages@v2
diff --git a/docs/README.md b/docs/README.md
new file mode 100644
index 0000000..99b3255
--- /dev/null
+++ b/docs/README.md
@@ -0,0 +1,23 @@
+# Build the Doc
+
+## Environment Setup
+
+Install the dependencies listed in `requirements-doc.txt`. This includes tools to build this doc.
+
+Navigate to the project folder and build the project:
+
+```bash
+cd docs
+sphinx-build -b html ./ ./_build/html
+```
+
+Web-related files will be generated in the `docs/_build` directory.
+
+## Start HTTP Server
+
+After building the HTML files, you can use the built-in HTTP Server in Python and open http://127.0.0.1:8000 in your browser to view the result:
+
+```bash
+cd _build/html
+python -m http.server 8000
+```
\ No newline at end of file
diff --git a/docs/_static/custom.css b/docs/_static/custom.css
new file mode 100644
index 0000000..194630d
--- /dev/null
+++ b/docs/_static/custom.css
@@ -0,0 +1,3 @@
+html[data-theme="light"] {
+    --sbt-color-announcement: rgb(125, 125, 125);
+}
\ No newline at end of file
diff --git a/docs/_static/logo.ico b/docs/_static/logo.ico
new file mode 100644
index 0000000000000000000000000000000000000000..5169b6d6eed8f1609547423dd546f91ae9cf28c9
GIT binary patch
literal 4022
zcmeHKYjjlA72Y#>OcFvKK%SL{4_a-bSfx@Bi;p}>cq9Z3VnCju#E^s}BtS?Y1VRXe
z5dlHo1wlXym`EsTE868+uC}!Lqjuq1UA3jP{?JuKQ|`UzvHRV5b&`oo`k$Ni%{lkX
zoc+z-=lk}Z5IOKSW)#`4bR?cAnTYNM#sHH6d);&4y}KXq!n<($)n5V5>Lwrx$O6hZ
z`^^cQcP4^!-|&BA&{0)ok*dsd%9W3gmvs}%WWx`$<=XE@$u${vJR6BXF|Y`@2k@*1
z2lK5tFxCTK1I#%;|D5w*-LGfte^}3Qhv~xI!CO3YRh2`s`bkpNnbT#>luzZVM_8`8
zzfZ0kyW02{IXJT?7O{>3(i1bQ`vZ&_K)+b_J&+1~1TbfxXU;ptJnb;+!abkq;%zVL
zl8qU<wBD(oT^HC3UQ$)rL*>dRdS&e-mJJWGZ2TU}=FwMWha-ibx;P2(Zg^taalKfF
zv0n{*_cs5ZIjf5~_jTriW2|Sq!pNQV(`~GuZMvrC)wZbx<zv;dxem2>DS_9{KS$O)
z_O+}ZkKBL2vguBi8`7>?os`U(7xNI)0{q<e=mtXhGcP!?zK?UxF;6|odip`+zKiwj
z4%Ty8SU=am>iJ5o7A^gYDtm67DxWo4R%eBy?uTXFk6ErB$EZ8Y^>?t?nDjEIvdxhX
z+!}w*S>o(-xtw>pj|+~np1GHG@ebC{Y+^mPk@dV9RtuN2TC#xEPoL4MV*2l7^;5;N
z_R(vy{vnoY??c_kuxv^BQnU_xTrH@$Wq$iW!3&&8;QZt7>FEbp7wtsd+mZWv#<?T+
z3Ra63GiuMOypYw3sqdlgJAIl%?suc^=`31@cZ<z&F{t1F$De<URN)@NzE#j1a^KFn
zq!qcZVLiVZbzi2`()p~G&t_Fopy0bLsQVw`H=pK4*>$(Xri8g-Q=;d5Z?*n@LE9b!
zcLp@~sZVpLdmXC<FZ3bz{c72qKd2XGvZ~CzDr+aVKy#K?b9bTcsSNHbVrzK0*zPDX
zdnlSSFHF6a^AGUP+F|oYpzbGvG{?{!Bljj%UX+TuH{je`Q1?syG}k(u#pWTMVw>ZA
zvBPOuyB*!Kwy2W}j$532#_MO>(|M=d0M7c9pEfY+r?(K!n{-J_F*IieG`IA5Xs+Z6
z&OHfvlUy|+M>afk!>>8fl8oHr{)*f;h)%~%u``^lmm}HQ8z;r)iTLg{@G<l>6X#mE
z%k8c6t^jZFM9*DI;7-w{t3K$jxgtZY$S>s5Mw0apA3&cA(p(~owwPn6d$(|hv9&vb
zt$k5!9g1P=SPYv#OJcG8QRucDdTw^>dEl7S984RS>(V+BWu*kKw?cFH?(@8wTT#H!
z+!37lIMn?g-acn*ZUpKcdtGdeScbZPBwn&}cd>OKnyn+TY`q%C=II1B&nB{U5<1y1
z&29YNN$6t%&VduPsEoipK`mMM1vJMV%~fTytog~`;PvCsoc8oNkLKc8Y>Da-Tb=RZ
z#jx?#?#P4ID^Xuqhle2dp=_RvXY=)8Y@Qpz=3B{Zo*(7@&s8GP{=`7qz&&~lsl`j8
z;q@+6UTk}v(Q6rg({l9#BGBh-&9y}f(H=fuv`3P)&qdZ=SA=yi`Z4Qh><RNk+?VF*
zMC3l4&8}oNE~c_^DZSpflt$)TcLewbPQXjh8mZ;Q3(@CnuQU1_o-hCFa~{oY9?GIU
z;zQBlNJIXfFxH6#Le0rKKGbEtmXKw>G3=~)KIw*eA;mI&k@hR2_d9nPz3BrfL+(|@
zGy?Z%kJndD;m|$@&24cpc$+Yi=;$A!@89Fe>`EcCCpFsaPMK)*q~;jE9y!A3PNu%U
zKhpohgWRdg9O9Y#?PuIhSr`0l^=Ync2)M)kDYj7tJRcGPTJ(G;(doPw?>7N2!5_2X
zk0{~x)+%IE(1KYziST=d@LYcqn*S>BZ1rodJ(3agpm0+d;s)*XPT>Jt3-`c_SFBy|
zqZc==J&`M|83e6Fh3px)0H34R{3uSYzW?n3nq$%7{0h8THn*K?@Z~xGLooyoR9T1M
z1KSG$#C!-}CIP`)2A!kF5PU|m@w=7wvo)Z(t>Ne=j!WQ;MSVl|+Whc5L4S?rDR{p>
zJ~m&APX|Kw4Yq*aQUva?_)YV%X!U7so6`VqDS9n^(^*3EWS$=}$?Qt{%=TUYZzk<A
z&m@M!lY`NPtl^nXJnnNspUX#|>xJfii`?tM8#A!(!JDCbGP{$V@Olya)@%GS{R+Il
z9X%ioJ|y$)Z}xw19D9!bMxrGZJu5N>=N>0I$!VYaw_V2jchb#2-ABf|BV&wrM#dYz
z85L%{i+_}EU$-mZs{s41Pk3Ye<tgi3m*dHGISe4D>k}WYQ>5!Ukq1T;xok+>d-0%4
e(`2AIo~hZz)CA}-UJq;l-q9|;#yk8x{_($}!jTyO

literal 0
HcmV?d00001

diff --git a/docs/_toc.yml b/docs/_toc.yml
new file mode 100644
index 0000000..9a067f7
--- /dev/null
+++ b/docs/_toc.yml
@@ -0,0 +1,10 @@
+format: jb-book
+root: index
+parts:
+- caption: DataFrame
+  chapters:
+  - file: dataframe/dataframe
+
+- caption: Performance
+  chapters:
+  - file: perf/ad-hoc-query
\ No newline at end of file
diff --git a/docs/conf.py b/docs/conf.py
new file mode 100644
index 0000000..31a7fdf
--- /dev/null
+++ b/docs/conf.py
@@ -0,0 +1,49 @@
+author = 'DF-Eval Team'
+bibtex_bibfiles = ['references.bib']
+bibtex_reference_style = 'author_year'
+comments_config = {'hypothesis': False, 'utterances': False}
+copyright = '2023-2024'
+exclude_patterns = ['**.ipynb_checkpoints', '.DS_Store', 'Thumbs.db', '_build']
+extensions = ['sphinx_togglebutton', 'sphinx_copybutton', 'myst_nb', 'jupyter_book', 'sphinx_thebe', 'sphinx_comments', 'sphinx_external_toc', 'sphinx.ext.intersphinx', 'sphinx_design', 'sphinx_book_theme', 'sphinxcontrib.bibtex', 'sphinx_jupyterbook_latex']
+external_toc_exclude_missing = True
+external_toc_path = '_toc.yml'
+html_baseurl = ''
+html_favicon = "_static/logo.ico"
+html_logo = 'logo.svg'
+html_sourcelink_suffix = ''
+html_theme = 'sphinx_book_theme'
+html_theme_options = {
+    'search_bar_text': 'Search...', 
+    'path_to_docs': 'docs', 
+    'repository_url': 'https://github.com/godaai/df-eval', 
+    'repository_branch': 'main', 
+    'extra_footer': '', 
+    'home_page_in_toc': True, 
+    'announcement': "If you find this page helpful，please star us on <a href=\"https://github.com/godaai/df-eval\">GitHub</a>.", 
+    'analytics': {'google_analytics_id': ''}, 
+    'use_repository_button': True, 
+    'use_edit_page_button': False, 
+    'use_issues_button': False,
+    "toc_title": "In this page",
+}
+html_static_path = ["_static"]
+html_css_files = ["custom.css"]
+html_js_files = [
+    "https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.6/require.min.js",
+]
+html_title = 'DF-Eval'
+latex_engine = 'pdflatex'
+myst_enable_extensions = ['colon_fence', 'dollarmath', 'linkify', 'substitution', 'tasklist']
+myst_url_schemes = ['mailto', 'http', 'https']
+nb_execution_allow_errors = False
+nb_execution_cache_path = ''
+nb_execution_excludepatterns = []
+nb_execution_in_temp = False
+nb_execution_mode = 'off'
+nb_execution_timeout = 30
+nb_output_stderr = 'show'
+numfig = False
+pygments_style = 'sphinx'
+suppress_warnings = ['myst.domains']
+use_jupyterbook_latex = True
+use_multitoc_numbering = True
diff --git a/docs/dataframe/dataframe.md b/docs/dataframe/dataframe.md
new file mode 100644
index 0000000..20d88b3
--- /dev/null
+++ b/docs/dataframe/dataframe.md
@@ -0,0 +1,6 @@
+(sec-dataframe)=
+# What's DataFrame
+
+In recent years, the convenience of DataFrames has made them the tool of choice for data scientists to perform a variety of tasks, from data loading, cleaning, and wrangling to statistical modeling and visualization. [pandas](https://pandas.pydata.org/), the most popular DataFrame system, is the de facto standard. It is widely used by data scientists as it is easy to use, even users with little or no experience in Python programming can quickly learn how to do data analysis. It provides a powerful set of tools for data manipulation, including filtering, merging, grouping, and aggregating data. 
+
+## DataFrame Algebra
\ No newline at end of file
diff --git a/docs/index.md b/docs/index.md
new file mode 100644
index 0000000..0259eed
--- /dev/null
+++ b/docs/index.md
@@ -0,0 +1,26 @@
+# DF-Eval
+
+::::{grid} 2
+:reverse:
+
+:::{grid-item}
+:columns: 3
+:class: sd-m-auto
+
+
+:::
+
+:::{grid-item}
+:columns: 9
+:class: sd-fs-3
+
+A multi-level, multi-dimensional evaluation suite for DataFrame systems.
+
+% The SVG rendering breaks latex builds for the GitHub badge, so only include in HTML
+```{only} html
+[![](https://img.shields.io/github/stars/godaai/df-eval?style=for-the-badge)](https://github.com/godaai/dv-eval)
+```
+
+:::
+
+::::
\ No newline at end of file
diff --git a/docs/perf/ad-hoc-query.md b/docs/perf/ad-hoc-query.md
new file mode 100644
index 0000000..1e4e346
--- /dev/null
+++ b/docs/perf/ad-hoc-query.md
@@ -0,0 +1,2 @@
+(sec-ad-hoc-query)=
+# Ad-hoc Query
\ No newline at end of file
diff --git a/docs/requirements-doc.txt b/docs/requirements-doc.txt
new file mode 100644
index 0000000..f8bbe4a
--- /dev/null
+++ b/docs/requirements-doc.txt
@@ -0,0 +1,11 @@
+jupyter-book
+sphinx_togglebutton
+sphinx_copybutton
+myst_nb
+sphinx_comments
+sphinx_external_toc
+sphinx_design
+sphinx_book_theme
+sphinxcontrib-bibtex
+sphinx-jupyterbook-latex
+sphinxcontrib-jsmath
\ No newline at end of file
diff --git a/tpch/cudf_queries/queries.py b/tpch/cudf_queries/queries.py
index 34ccf8f..8cc5033 100644
--- a/tpch/cudf_queries/queries.py
+++ b/tpch/cudf_queries/queries.py
@@ -1,7 +1,6 @@
 import cudf
 import cudf.pandas
 cudf.pandas.install()
-import pandas as pd
 
 import argparse
 import json
@@ -10,9 +9,7 @@
 import traceback
 from typing import Dict
 
-import sys
-
-# import pandas as pd
+import pandas as pd
 from common_utils import log_time_fn, parse_common_arguments, print_result_fn
 
 dataset_dict = {}
@@ -128,7 +125,7 @@ def q01(root: str, storage_options: Dict):
         ],
     ]
     sel = lineitem_filtered.L_SHIPDATE <= date
-    lineitem_filtered = lineitem_filtered[sel]
+    lineitem_filtered = lineitem_filtered.loc[sel]
     lineitem_filtered["AVG_QTY"] = lineitem_filtered.L_QUANTITY
     lineitem_filtered["AVG_PRICE"] = lineitem_filtered.L_EXTENDEDPRICE
     lineitem_filtered["DISC_PRICE"] = lineitem_filtered.L_EXTENDEDPRICE * (
@@ -316,7 +313,7 @@ def q03(root: str, storage_options: Dict):
         :, ["L_ORDERKEY", "REVENUE", "O_ORDERDATE", "O_SHIPPRIORITY"]
     ]
 
-    # [change 1]Convert cudf DataFrame to Pandas DataFrame and format timestamp
+    # [DIFF] Convert cudf DataFrame to Pandas DataFrame and format timestamp
     total["O_ORDERDATE"] = pd.to_datetime(total["O_ORDERDATE"]).dt.strftime("%Y-%m-%d")
     return total
 
@@ -408,8 +405,8 @@ def q07(root: str, storage_options: Dict):
         (lineitem["L_SHIPDATE"] >= pd.Timestamp("1995-01-01"))
         & (lineitem["L_SHIPDATE"] < pd.Timestamp("1997-01-01"))
     ]
-    lineitem_filtered["L_YEAR"] = lineitem_filtered["L_SHIPDATE"].dt.year
-    lineitem_filtered["VOLUME"] = lineitem_filtered["L_EXTENDEDPRICE"] * (
+    lineitem_filtered.loc[:, "L_YEAR"] = lineitem_filtered["L_SHIPDATE"].dt.year
+    lineitem_filtered.loc[:, "VOLUME"] = lineitem_filtered["L_EXTENDEDPRICE"] * (
         1.0 - lineitem_filtered["L_DISCOUNT"]
     )
     lineitem_filtered = lineitem_filtered.loc[
@@ -704,11 +701,11 @@ def g2(x):
         columns={"g1": "HIGH_LINE_COUNT", "g2": "LOW_LINE_COUNT"}
     )
 
-    # Round the result to one decimal place -- If you use test_result.py to test the results, please uncomment the following two lines.
+    # Round the result to one decimal
+    # If you use test_result.py to test the results, please uncomment the following two lines.
     # total["HIGH_LINE_COUNT"] = total["HIGH_LINE_COUNT"].astype(float).round(1)
     # total["LOW_LINE_COUNT"] = total["LOW_LINE_COUNT"].astype(float).round(1)
 
-
     return total
 
 
@@ -730,14 +727,11 @@ def q13(root: str, storage_options: Dict):
     count_df = c_o_merged.groupby(["C_CUSTKEY"], as_index=False).agg(
         C_COUNT=pd.NamedAgg(column="O_ORDERKEY", aggfunc="count")
     )
-
     total = count_df.groupby(["C_COUNT"], as_index=False).size()
-    # [change 3] for TypeError: Series.sort_values() got an unexpected keyword argument 'by'
-    # the error is caused here: in cuDF,DataFrameGroupBy.size() Return the size of each group. https://docs.rapids.ai/api/cudf/stable/user_guide/api_docs/api/cudf.core.groupby.groupby.dataframegroupby.size/#
-    # while in pandas, DataFrameGroupBy.size() Returns DataFrame or Series, Number of rows in each group as a Series if as_index is True or a DataFrame if as_index is False. https://pandas.pydata.org/docs/reference/api/pandas.core.groupby.DataFrameGroupBy.size.html#pandas.core.groupby.DataFrameGroupBy.size
+    
+    # [DIFF] groupby.agg is a `Series` and convert `Series` to `DataFrame`
     total = total.reset_index(name='size')
     total.columns = ["C_COUNT", "CUSTDIST"]
-
     total = total.sort_values(
         by=["CUSTDIST", "C_COUNT"],
         ascending=[False, False],
@@ -811,18 +805,18 @@ def q16(root: str, storage_options: Dict):
     partsupp = load_partsupp(root, storage_options)
     supplier = load_supplier(root, storage_options)
 
-    BRAND = "Brand#45"
-    TYPE = "MEDIUM POLISHED"
-    SIZE_LIST = [49, 14, 23, 45, 19, 3, 36, 9]
+    brand = "Brand#45"
+    p_type = "MEDIUM POLISHED"
+    size_list = [49, 14, 23, 45, 19, 3, 36, 9]
 
     # Merge part and partsupp DataFrames
     merged_df = pd.merge(part, partsupp, left_on="P_PARTKEY", right_on="PS_PARTKEY", how="inner")
 
     # Apply filters
     filtered_df = merged_df[
-        (merged_df["P_BRAND"] != BRAND) &
-        (~merged_df["P_TYPE"].str.startswith(TYPE)) &
-        (merged_df["P_SIZE"].isin(SIZE_LIST))
+        (merged_df["P_BRAND"] != brand) &
+        (~merged_df["P_TYPE"].str.startswith(p_type)) &
+        (merged_df["P_SIZE"].isin(size_list))
     ]
 
     # Exclude unwanted suppliers
@@ -887,7 +881,7 @@ def q18(root: str, storage_options: Dict):
     total = gb2.sort_values(["O_TOTALPRICE", "O_ORDERDATE"], ascending=[False, True])
     total = total.head(100)
 
-    # [change 2]Convert cudf DataFrame to Pandas DataFrame and format timestamp
+    # [DIFF] Convert cudf DataFrame to Pandas DataFrame and format timestamp
     total["O_ORDERDATE"] = pd.to_datetime(total["O_ORDERDATE"]).dt.strftime("%Y-%m-%d")
 
     return total
@@ -959,42 +953,42 @@ def q19(root: str, storage_options: Dict):
     jn = flineitem.merge(fpart, left_on="L_PARTKEY", right_on="P_PARTKEY")
     jnsel = (
         (
-        (jn.P_BRAND == brand1)
-        & (
-            (jn.P_CONTAINER == "SM BOX")
-            | (jn.P_CONTAINER == "SM CASE")
-            | (jn.P_CONTAINER == "SM PACK")
-            | (jn.P_CONTAINER == "SM PKG")
-        )
-        & (jn.L_QUANTITY >= quantity1)
-        & (jn.L_QUANTITY <= quantity1 + 10)
-        & (jn.P_SIZE <= 5)
-        )
-        |
-        (
-        (jn.P_BRAND == brand2)
-        & (
-            (jn.P_CONTAINER == "MED BAG")
-            | (jn.P_CONTAINER == "MED BOX")
-            | (jn.P_CONTAINER == "MED PACK")
-            | (jn.P_CONTAINER == "MED PKG")
-        )
-        & (jn.L_QUANTITY >= quantity2)
-        & (jn.L_QUANTITY <= quantity2 + 10)
-        & (jn.P_SIZE <= 10)
+            (jn.P_BRAND == brand1)
+            & (
+                (jn.P_CONTAINER == "SM BOX")
+                | (jn.P_CONTAINER == "SM CASE")
+                | (jn.P_CONTAINER == "SM PACK")
+                | (jn.P_CONTAINER == "SM PKG")
+            )
+            & (jn.L_QUANTITY >= quantity1)
+            & (jn.L_QUANTITY <= quantity1 + 10)
+            & (jn.P_SIZE <= 5)
+            )
+            |
+            (
+            (jn.P_BRAND == brand2)
+            & (
+                (jn.P_CONTAINER == "MED BAG")
+                | (jn.P_CONTAINER == "MED BOX")
+                | (jn.P_CONTAINER == "MED PACK")
+                | (jn.P_CONTAINER == "MED PKG")
+            )
+            & (jn.L_QUANTITY >= quantity2)
+            & (jn.L_QUANTITY <= quantity2 + 10)
+            & (jn.P_SIZE <= 10)
         )
         |
         (
-         (jn.P_BRAND == brand3)
-        & (
-            (jn.P_CONTAINER == "LG BOX")
-            | (jn.P_CONTAINER == "LG CASE")
-            | (jn.P_CONTAINER == "LG PACK")
-            | (jn.P_CONTAINER == "LG PKG")
-        )
-        & (jn.L_QUANTITY >= quantity3)
-        & (jn.L_QUANTITY <= quantity3 + 10)
-        & (jn.P_SIZE <= 15)
+            (jn.P_BRAND == brand3)
+            & (
+                (jn.P_CONTAINER == "LG BOX")
+                | (jn.P_CONTAINER == "LG CASE")
+                | (jn.P_CONTAINER == "LG PACK")
+                | (jn.P_CONTAINER == "LG PKG")
+            )
+            & (jn.L_QUANTITY >= quantity3)
+            & (jn.L_QUANTITY <= quantity3 + 10)
+            & (jn.P_SIZE <= 15)
         )
     )
     jn = jn[jnsel]
@@ -1104,7 +1098,7 @@ def q21(root: str, storage_options: Dict):
     )
     total = total.loc[:, ["S_NAME"]]
     total = total.groupby("S_NAME", as_index=False).size()
-    # [change 4] add reset_index for the same error in q13
+    # [DIFF] groupby.add `Series` to `DataFrame`
     total = total.reset_index(name='size')
     total.columns = ["S_NAME", "NUMWAIT"]
     total = total.sort_values(by=["NUMWAIT", "S_NAME"], ascending=[False, True])
@@ -1145,7 +1139,7 @@ def q22(root: str, storage_options: Dict):
     )
     customer_selected = customer_selected.loc[:, ["CNTRYCODE", "C_ACCTBAL"]]
     agg1 = customer_selected.groupby(["CNTRYCODE"], as_index=False).size()
-    # [change 5] add reset_index for the same error in q13
+    # [DIFF] groupby.add `Series` to `DataFrame`
     agg1 = agg1.reset_index(name='size')
 
     agg1.columns = ["CNTRYCODE", "NUMCUST"]
diff --git a/tpch/dask_queries/queries.py b/tpch/dask_queries/queries.py
index 7274e07..ee0ddb1 100644
--- a/tpch/dask_queries/queries.py
+++ b/tpch/dask_queries/queries.py
@@ -1359,7 +1359,7 @@ def main():
     if args.endpoint == "local" or args.endpoint is None:
         from dask.distributed import LocalCluster
 
-        client = LocalCluster()
+        client = Client(LocalCluster())
     elif args.endpoint:
         client = Client(args.endpoint)
 
diff --git a/tpch/pandas_queries/queries.py b/tpch/pandas_queries/queries.py
index e24bd99..440c302 100644
--- a/tpch/pandas_queries/queries.py
+++ b/tpch/pandas_queries/queries.py
@@ -695,7 +695,8 @@ def g2(x):
         columns={"g1": "HIGH_LINE_COUNT", "g2": "LOW_LINE_COUNT"}
     )
 
-    # Round the result to one decimal place -- If you use test_result.py to test the results, please uncomment the following two lines.
+    # Round the result to one decimal
+    # If you use test_result.py to test the results, please uncomment the following two lines.
     # total["HIGH_LINE_COUNT"] = total["HIGH_LINE_COUNT"].astype(float).round(1)
     # total["LOW_LINE_COUNT"] = total["LOW_LINE_COUNT"].astype(float).round(1)
 
@@ -796,7 +797,7 @@ def q16(root: str, storage_options: Dict):
     supplier = load_supplier(root, storage_options)
 
     brand = "Brand#45"
-    type = "MEDIUM POLISHED"
+    p_type = "MEDIUM POLISHED"
     size_list = [49, 14, 23, 45, 19, 3, 36, 9]
 
     # Merge part and partsupp DataFrames
@@ -805,7 +806,7 @@ def q16(root: str, storage_options: Dict):
     # Apply filters
     filtered_df = merged_df[
         (merged_df["P_BRAND"] != brand) &
-        (~merged_df["P_TYPE"].str.startswith(type)) &
+        (~merged_df["P_TYPE"].str.startswith(p_type)) &
         (merged_df["P_SIZE"].isin(size_list))
     ]