fixed import statements for ea. function in docs

lshpaner · Jul 30, 2024 · efee8dd · efee8dd
1 parent 6bbb4fd
commit efee8dd
Show file tree

Hide file tree

Showing 9 changed files with 220 additions and 22 deletions.
diff --git a/docs/.doctrees/environment.pickle b/docs/.doctrees/environment.pickle
diff --git a/docs/.doctrees/usage_guide.doctree b/docs/.doctrees/usage_guide.doctree
diff --git a/docs/_sources/usage_guide.rst.txt b/docs/_sources/usage_guide.rst.txt
@@ -75,8 +75,8 @@ directories do not exist, the function creates them.
 
 .. code-block:: python
 
-    # import function from library
     from eda_toolkit import ensure_directory 
+    
     import os # import operating system for dir
     
 
@@ -151,8 +151,6 @@ column in the dataframe.
 
 .. code-block:: python
 
-    import pandas as pd
-    import random
     from eda_toolkit import add_ids
 
     # Add a column of unique IDs with 9 digits and call it "census_id"
@@ -301,7 +299,6 @@ In the example below, we demonstrate how to use the ``strip_trailing_period`` fu
 
 .. code-block:: python
 
-    import pandas as pd
     from eda_toolkit import strip_trailing_period
 
     # Create a sample dataframe with trailing periods in some values
@@ -523,7 +520,8 @@ function to analyze a DataFrame's columns.
 
 .. code-block:: python
 
-    import pandas as pd
+    from eda_toolkit import dataframe_columns
+
     dataframe_columns(df=df)
 
 
@@ -754,7 +752,6 @@ function to analyze a DataFrame's columns.
 
 \
 
-
 Generating Summary Tables for Variable Combinations
 -----------------------------------------------------
 
@@ -797,6 +794,8 @@ variables from a DataFrame containing the census data [1]_.
 
 .. code-block:: python
 
+    from eda_toolkit import summarize_all_combinations
+
     # Define unique variables for the analysis
     unique_vars = [
         "age_group",
@@ -973,6 +972,69 @@ The first sheet will be a Table of Contents with hyperlinks to each summary tabl
 
 
 
+Saving DataFrames to Excel with Customized Formatting
+-------------------------------------------------------
+
+This section explains how to save multiple DataFrames to separate sheets in an Excel file with customized formatting using the ``save_dataframes_to_excel`` function.
+
+
+.. function:: save_dataframes_to_excel(file_path, df_dict, decimal_places=0)
+
+    Save multiple DataFrames to separate sheets in an Excel file with customized
+    formatting.
+
+    :param file_path: Full path to the output Excel file.
+    :type file_path: str
+    :param df_dict: Dictionary where keys are sheet names and values are DataFrames to save.
+    :type df_dict: dict
+    :param decimal_places: Number of decimal places to round numeric columns. Default is 0.
+    :type decimal_places: int
+
+    :notes:
+        - The function will autofit columns and left-align text.
+        - Numeric columns will be formatted with the specified number of decimal places.
+        - Headers will be bold and left-aligned without borders.
+
+The function performs the following tasks:
+- Writes each DataFrame to its respective sheet in the Excel file.
+- Rounds numeric columns to the specified number of decimal places.
+- Applies customized formatting to headers and cells.
+- Autofits columns based on the content length.
+
+**Example Usage**
+
+Below, we use the `save_dataframes_to_excel` function to save two DataFrames: the original DataFrame and a filtered DataFrame with ages between 18 and 40.
+
+.. code-block:: python
+
+    from eda_toolkit import save_dataframes_to_excel
+
+    # Example usage
+    file_name = "df_census.xlsx"  # Name of the output Excel file
+    file_path = os.path.join(data_path, file_name) 
+
+    # filter DataFrame to Ages 18-40
+    filtered_df = df[(df["age"] > 18) & (df["age"] < 40)]
+
+    df_dict = {
+        "original_df": df,
+        "ages_18_to_40": filtered_df,
+    }
+
+    save_dataframes_to_excel(
+        file_path=file_path,
+        df_dict=df_dict,
+        decimal_places=0,
+    )
+
+
+**Output**
+
+The output Excel file will contain the original DataFrame and a filtered DataFrame as a separate tab with ages between `18` and `40`, each on separate sheets with customized formatting.
+
+
+
+
 Binning Numerical Columns
 ---------------------------
 

diff --git a/docs/genindex.html b/docs/genindex.html
@@ -119,6 +119,8 @@ <h2 id="B">B</h2>
         <li><a href="usage_guide.html#ensure_directory">ensure_directory()</a>
 </li>
         <li><a href="usage_guide.html#parse_date_with_rule">parse_date_with_rule()</a>
+</li>
+        <li><a href="usage_guide.html#save_dataframes_to_excel">save_dataframes_to_excel()</a>
 </li>
         <li><a href="usage_guide.html#strip_trailing_period">strip_trailing_period()</a>
 </li>
@@ -171,6 +173,13 @@ <h2 id="S">S</h2>
 <table style="width: 100%" class="indextable genindextable"><tr>
   <td style="width: 33%; vertical-align: top;"><ul>
       <li>
+    save_dataframes_to_excel()
+
+      <ul>
+        <li><a href="usage_guide.html#save_dataframes_to_excel">built-in function</a>
+</li>
+      </ul></li>
+      <li>
     strip_trailing_period()
 
       <ul>

diff --git a/docs/index.html b/docs/index.html
@@ -127,6 +127,10 @@ <h1>Table of Contents<a class="headerlink" href="#table-of-contents" title="Perm
 <li class="toctree-l3"><a class="reference internal" href="usage_guide.html#summarize_all_combinations"><code class="docutils literal notranslate"><span class="pre">summarize_all_combinations()</span></code></a></li>
 </ul>
 </li>
+<li class="toctree-l2"><a class="reference internal" href="usage_guide.html#saving-dataframes-to-excel-with-customized-formatting">Saving DataFrames to Excel with Customized Formatting</a><ul>
+<li class="toctree-l3"><a class="reference internal" href="usage_guide.html#save_dataframes_to_excel"><code class="docutils literal notranslate"><span class="pre">save_dataframes_to_excel()</span></code></a></li>
+</ul>
+</li>
 <li class="toctree-l2"><a class="reference internal" href="usage_guide.html#binning-numerical-columns">Binning Numerical Columns</a></li>
 </ul>
 </li>

diff --git a/docs/objects.inv b/docs/objects.inv
diff --git a/docs/searchindex.js b/docs/searchindex.js
diff --git a/docs/usage_guide.html b/docs/usage_guide.html
@@ -81,6 +81,10 @@
 <li class="toctree-l3"><a class="reference internal" href="#summarize_all_combinations"><code class="docutils literal notranslate"><span class="pre">summarize_all_combinations()</span></code></a></li>
 </ul>
 </li>
+<li class="toctree-l2"><a class="reference internal" href="#saving-dataframes-to-excel-with-customized-formatting">Saving DataFrames to Excel with Customized Formatting</a><ul>
+<li class="toctree-l3"><a class="reference internal" href="#save_dataframes_to_excel"><code class="docutils literal notranslate"><span class="pre">save_dataframes_to_excel()</span></code></a></li>
+</ul>
+</li>
 <li class="toctree-l2"><a class="reference internal" href="#binning-numerical-columns">Binning Numerical Columns</a></li>
 </ul>
 </li>
@@ -156,8 +160,8 @@ <h2>Path directories<a class="headerlink" href="#path-directories" title="Permal
 <code class="docutils literal notranslate"><span class="pre">images</span></code> folder in the parent directory. Using the <code class="docutils literal notranslate"><span class="pre">ensure_directory</span></code>
 function, we then verify that these directories exist. If any of the specified
 directories do not exist, the function creates them.</p>
-<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="c1"># import function from library</span>
-<span class="kn">from</span> <span class="nn">eda_toolkit</span> <span class="kn">import</span> <span class="n">ensure_directory</span>
+<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="kn">from</span> <span class="nn">eda_toolkit</span> <span class="kn">import</span> <span class="n">ensure_directory</span>
+
 <span class="kn">import</span> <span class="nn">os</span> <span class="c1"># import operating system for dir</span>
 
 
@@ -229,9 +233,7 @@ <h2>Adding Unique Identifiers<a class="headerlink" href="#adding-unique-identifi
 each ID, a seed for reproducibility, and whether to set the new ID column as the
 index. The function generates unique IDs for each row and adds them as the first
 column in the dataframe.</p>
-<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="kn">import</span> <span class="nn">pandas</span> <span class="k">as</span> <span class="nn">pd</span>
-<span class="kn">import</span> <span class="nn">random</span>
-<span class="kn">from</span> <span class="nn">eda_toolkit</span> <span class="kn">import</span> <span class="n">add_ids</span>
+<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="kn">from</span> <span class="nn">eda_toolkit</span> <span class="kn">import</span> <span class="n">add_ids</span>
 
 <span class="c1"># Add a column of unique IDs with 9 digits and call it &quot;census_id&quot;</span>
 <span class="n">df</span> <span class="o">=</span> <span class="n">add_ids</span><span class="p">(</span>
@@ -374,8 +376,7 @@ <h2>Trailing Period Removal<a class="headerlink" href="#trailing-period-removal"
 
 <p><strong>Example Usage</strong></p>
 <p>In the example below, we demonstrate how to use the <code class="docutils literal notranslate"><span class="pre">strip_trailing_period</span></code> function to clean a column in a DataFrame. We start by importing the necessary libraries and creating a sample DataFrame. We then use the <code class="docutils literal notranslate"><span class="pre">strip_trailing_period</span></code> function to remove any trailing periods from the specified column.</p>
-<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="kn">import</span> <span class="nn">pandas</span> <span class="k">as</span> <span class="nn">pd</span>
-<span class="kn">from</span> <span class="nn">eda_toolkit</span> <span class="kn">import</span> <span class="n">strip_trailing_period</span>
+<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="kn">from</span> <span class="nn">eda_toolkit</span> <span class="kn">import</span> <span class="n">strip_trailing_period</span>
 
 <span class="c1"># Create a sample dataframe with trailing periods in some values</span>
 <span class="n">data</span> <span class="o">=</span> <span class="p">{</span>
@@ -583,7 +584,8 @@ <h2>DataFrame Analysis<a class="headerlink" href="#dataframe-analysis" title="Pe
 <p><strong>Example Usage</strong></p>
 <p>In the example below, we demonstrate how to use the <code class="docutils literal notranslate"><span class="pre">dataframe_columns</span></code>
 function to analyze a DataFrame’s columns.</p>
-<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="kn">import</span> <span class="nn">pandas</span> <span class="k">as</span> <span class="nn">pd</span>
+<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="kn">from</span> <span class="nn">eda_toolkit</span> <span class="kn">import</span> <span class="n">dataframe_columns</span>
+
 <span class="n">dataframe_columns</span><span class="p">(</span><span class="n">df</span><span class="o">=</span><span class="n">df</span><span class="p">)</span>
 </pre></div>
 </div>
@@ -842,7 +844,9 @@ <h2>Generating Summary Tables for Variable Combinations<a class="headerlink" hre
 <p><strong>Example Usage</strong></p>
 <p>Below, we use the <code class="docutils literal notranslate"><span class="pre">summarize_all_combinations</span></code> function to generate summary tables for the specified
 variables from a DataFrame containing the census data <a class="footnote-reference brackets" href="#id6" id="id4" role="doc-noteref"><span class="fn-bracket">[</span>1<span class="fn-bracket">]</span></a>.</p>
-<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="c1"># Define unique variables for the analysis</span>
+<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="kn">from</span> <span class="nn">eda_toolkit</span> <span class="kn">import</span> <span class="n">summarize_all_combinations</span>
+
+<span class="c1"># Define unique variables for the analysis</span>
 <span class="n">unique_vars</span> <span class="o">=</span> <span class="p">[</span>
     <span class="s2">&quot;age_group&quot;</span><span class="p">,</span>
     <span class="s2">&quot;workclass&quot;</span><span class="p">,</span>
@@ -998,6 +1002,63 @@ <h2>Generating Summary Tables for Variable Combinations<a class="headerlink" hre
 The first sheet will be a Table of Contents with hyperlinks to each summary table.</p>
 <div class="no-click"><a class="reference internal image-reference" href="_images/summarize_all_combinations.gif"><img alt="EDA Toolkit Logo" class="align-left" src="_images/summarize_all_combinations.gif" style="width: 800px;" /></a>
 </div><div style="height: 106px;"></div></section>
+<section id="saving-dataframes-to-excel-with-customized-formatting">
+<h2>Saving DataFrames to Excel with Customized Formatting<a class="headerlink" href="#saving-dataframes-to-excel-with-customized-formatting" title="Permalink to this heading"></a></h2>
+<p>This section explains how to save multiple DataFrames to separate sheets in an Excel file with customized formatting using the <code class="docutils literal notranslate"><span class="pre">save_dataframes_to_excel</span></code> function.</p>
+<dl class="py function">
+<dt class="sig sig-object py" id="save_dataframes_to_excel">
+<span class="sig-name descname"><span class="pre">save_dataframes_to_excel</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">file_path</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">df_dict</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">decimal_places</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">0</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#save_dataframes_to_excel" title="Permalink to this definition"></a></dt>
+<dd><p>Save multiple DataFrames to separate sheets in an Excel file with customized
+formatting.</p>
+<dl class="field-list simple">
+<dt class="field-odd">Parameters<span class="colon">:</span></dt>
+<dd class="field-odd"><ul class="simple">
+<li><p><strong>file_path</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.12)"><em>str</em></a>) – Full path to the output Excel file.</p></li>
+<li><p><strong>df_dict</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#dict" title="(in Python v3.12)"><em>dict</em></a>) – Dictionary where keys are sheet names and values are DataFrames to save.</p></li>
+<li><p><strong>decimal_places</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.12)"><em>int</em></a>) – Number of decimal places to round numeric columns. Default is 0.</p></li>
+</ul>
+</dd>
+<dt class="field-even">Notes<span class="colon">:</span></dt>
+<dd class="field-even"><ul class="simple">
+<li><p>The function will autofit columns and left-align text.</p></li>
+<li><p>Numeric columns will be formatted with the specified number of decimal places.</p></li>
+<li><p>Headers will be bold and left-aligned without borders.</p></li>
+</ul>
+</dd>
+</dl>
+</dd></dl>
+
+<p>The function performs the following tasks:
+- Writes each DataFrame to its respective sheet in the Excel file.
+- Rounds numeric columns to the specified number of decimal places.
+- Applies customized formatting to headers and cells.
+- Autofits columns based on the content length.</p>
+<p><strong>Example Usage</strong></p>
+<p>Below, we use the <cite>save_dataframes_to_excel</cite> function to save two DataFrames: the original DataFrame and a filtered DataFrame with ages between 18 and 40.</p>
+<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="kn">from</span> <span class="nn">eda_toolkit</span> <span class="kn">import</span> <span class="n">save_dataframes_to_excel</span>
+
+<span class="c1"># Example usage</span>
+<span class="n">file_name</span> <span class="o">=</span> <span class="s2">&quot;df_census.xlsx&quot;</span>  <span class="c1"># Name of the output Excel file</span>
+<span class="n">file_path</span> <span class="o">=</span> <span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">data_path</span><span class="p">,</span> <span class="n">file_name</span><span class="p">)</span>
+
+<span class="c1"># filter DataFrame to Ages 18-40</span>
+<span class="n">filtered_df</span> <span class="o">=</span> <span class="n">df</span><span class="p">[(</span><span class="n">df</span><span class="p">[</span><span class="s2">&quot;age&quot;</span><span class="p">]</span> <span class="o">&gt;</span> <span class="mi">18</span><span class="p">)</span> <span class="o">&amp;</span> <span class="p">(</span><span class="n">df</span><span class="p">[</span><span class="s2">&quot;age&quot;</span><span class="p">]</span> <span class="o">&lt;</span> <span class="mi">40</span><span class="p">)]</span>
+
+<span class="n">df_dict</span> <span class="o">=</span> <span class="p">{</span>
+    <span class="s2">&quot;original_df&quot;</span><span class="p">:</span> <span class="n">df</span><span class="p">,</span>
+    <span class="s2">&quot;ages_18_to_40&quot;</span><span class="p">:</span> <span class="n">filtered_df</span><span class="p">,</span>
+<span class="p">}</span>
+
+<span class="n">save_dataframes_to_excel</span><span class="p">(</span>
+    <span class="n">file_path</span><span class="o">=</span><span class="n">file_path</span><span class="p">,</span>
+    <span class="n">df_dict</span><span class="o">=</span><span class="n">df_dict</span><span class="p">,</span>
+    <span class="n">decimal_places</span><span class="o">=</span><span class="mi">0</span><span class="p">,</span>
+<span class="p">)</span>
+</pre></div>
+</div>
+<p><strong>Output</strong></p>
+<p>The output Excel file will contain the original DataFrame and a filtered DataFrame as a separate tab with ages between <cite>18</cite> and <cite>40</cite>, each on separate sheets with customized formatting.</p>
+</section>
 <section id="binning-numerical-columns">
 <h2>Binning Numerical Columns<a class="headerlink" href="#binning-numerical-columns" title="Permalink to this heading"></a></h2>
 <p>If your DataFrame (e.g., the census data <a class="footnote-reference brackets" href="#id6" id="id5" role="doc-noteref"><span class="fn-bracket">[</span>1<span class="fn-bracket">]</span></a>)