deploy: 15fe7e0

outbreak-info · Feb 12, 2025 · cda2d8b · cda2d8b
commit cda2d8b
Show file tree

Hide file tree

Showing 232 changed files with 44,053 additions and 0 deletions.
diff --git a/.buildinfo b/.buildinfo
@@ -0,0 +1,4 @@
+# Sphinx build info version 1
+# This file hashes the configuration used when building these files. When it is not found, a full rebuild will be done.
+config: dc778ca46dc62ca4d52db49f3721fc60
+tags: 645f666f9bcd5a90fca523b33c5a78b7
diff --git a/.doctrees/Cases_by_location.doctree b/.doctrees/Cases_by_location.doctree
diff --git a/.doctrees/Global_Prev.doctree b/.doctrees/Global_Prev.doctree
diff --git a/.doctrees/Lineage_mutations.doctree b/.doctrees/Lineage_mutations.doctree
diff --git a/.doctrees/Mut_By_Lin.doctree b/.doctrees/Mut_By_Lin.doctree
diff --git a/.doctrees/Outbreak_data Functions.doctree b/.doctrees/Outbreak_data Functions.doctree
diff --git a/.doctrees/Prevalence_by_location.doctree b/.doctrees/Prevalence_by_location.doctree
diff --git a/.doctrees/Seq_counts.doctree b/.doctrees/Seq_counts.doctree
diff --git a/.doctrees/auth_setup.doctree b/.doctrees/auth_setup.doctree
diff --git a/.doctrees/collection_date.doctree b/.doctrees/collection_date.doctree
diff --git a/.doctrees/daily_lag.doctree b/.doctrees/daily_lag.doctree
diff --git a/.doctrees/daily_prev_by_location.doctree b/.doctrees/daily_prev_by_location.doctree
diff --git a/.doctrees/environment.pickle b/.doctrees/environment.pickle
diff --git a/.doctrees/index.doctree b/.doctrees/index.doctree
diff --git a/.doctrees/mutation_details.doctree b/.doctrees/mutation_details.doctree
diff --git a/.nojekyll b/.nojekyll
diff --git a/Global_Prev.html b/Global_Prev.html
diff --git a/Mut_By_Lin.html b/Mut_By_Lin.html
diff --git a/_images/ca_cases.png b/_images/ca_cases.png
diff --git a/_images/compare.png b/_images/compare.png
diff --git a/_images/multi_state_cases.png b/_images/multi_state_cases.png
diff --git a/_images/mut_by_lin.png b/_images/mut_by_lin.png
diff --git a/_images/prev_visual.png b/_images/prev_visual.png
diff --git a/_images/top4.png b/_images/top4.png
diff --git a/_sources/Global_Prev.rst.txt b/_sources/Global_Prev.rst.txt
@@ -0,0 +1,55 @@
+global_prevalence(pango_lin, mutations, cumulative)
+----------------------------------------------------
+
+.. autofunction:: outbreak_data.global_prevalence
+
+Example: Get global info on lineage 'XBB'::
+
+    df = outbreak_data.global_prevalence('xbb')
+    print(df)
+
+.. code-block::
+   :caption: Output:
+
+               date  total_count  lineage_count  total_count_rolling  \
+    0    2021-06-29        15453              2         10772.428571   
+    1    2021-06-30        13101              0         11060.571429   
+    2    2021-07-01        13088              0         11495.000000   
+    3    2021-07-02        11562              0         11890.571429   
+    4    2021-07-03         8310              0         11845.571429   
+    ..          ...          ...            ...                  ...   
+    713  2023-06-12           27              0           112.428571   
+    714  2023-06-13            8              0            61.714286   
+    715  2023-06-14            1              0            36.000000   
+    716  2023-06-15            1              0            25.285714   
+    717  2023-06-17            1              0             8.000000   
+
+         lineage_count_rolling  proportion  proportion_ci_lower  \
+    0                 0.285714    0.000027         4.558329e-08   
+    1                 0.285714    0.000026         4.439232e-08   
+    2                 0.285714    0.000025         4.271630e-08   
+    3                 0.285714    0.000024         4.129377e-08   
+    4                 0.285714    0.000024         4.145063e-08   
+    ..                     ...         ...                  ...   
+    713               0.142857    0.001271         4.374452e-06   
+    714               0.000000    0.000000         7.888011e-06   
+    715               0.000000    0.000000         1.354537e-05   
+    716               0.000000    0.000000         1.944577e-05   
+    717               0.000000    0.000000         5.949030e-05   
+
+         proportion_ci_upper  
+    0               0.000233  
+    1               0.000227  
+    2               0.000218  
+    3               0.000211  
+    4               0.000212  
+    ..                   ...  
+    713             0.022129  
+    714             0.039548  
+    715             0.066944  
+    716             0.094683  
+    717             0.262217  
+
+[718 rows x 8 columns]
+
+
diff --git a/_sources/Mut_By_Lin.rst.txt b/_sources/Mut_By_Lin.rst.txt
@@ -0,0 +1,44 @@
+mutations_by_lineage(mutation, location, pango_lin)
+---------------------------------------------------
+
+.. autofunction:: outbreak_data.mutations_by_lineage
+
+
+Example usage::
+
+    #Get info on mutation 'orf1b:p314l'
+    df = od.mutations_by_lineage('orf1b:p314l')
+    print(df)
+
+.. code-block::
+   :caption: Output
+
+            pangolin_lineage  lineage_count  mutation_count  proportion  \
+    0                   ba.2        1227503         1222717    0.996101   
+    1                b.1.1.7        1154337         1147331    0.993931   
+    2                 ba.1.1        1044480         1039813    0.995532   
+    3                   ay.4         858839          854935    0.995454   
+    4                   ba.1         438947          437207    0.996036   
+    ...                  ...            ...             ...         ...   
+    2851                fn.1              1               1    1.000000   
+    2852  miscba1ba2post5386              1               1    1.000000   
+    2853            xbb.1.23              1               1    1.000000   
+    2854            xbb.1.37              1               1    1.000000   
+    2855                 xbv              1               1    1.000000   
+
+          proportion_ci_lower  proportion_ci_upper  
+    0                0.995990             0.996210  
+    1                0.993788             0.994071  
+    2                0.995402             0.995658  
+    3                0.995310             0.995595  
+    4                0.995847             0.996219  
+    ...                   ...                  ...  
+    2851             0.146746             0.999614  
+    2852             0.146746             0.999614  
+    2853             0.146746             0.999614  
+    2854             0.146746             0.999614  
+    2855             0.146746             0.999614  
+
+[2856 rows x 6 columns]
+
+
diff --git a/_sources/Workflows1.rst.txt b/_sources/Workflows1.rst.txt
@@ -0,0 +1,106 @@
+
+Lineage and Mutation Analysis
+-----------------------------
+
+Here is an example workflow that allows the user to manipulate the data to find all the XBB  lineages prevalent in India within a 1-year timeframe::
+  
+      import outbreak_data as od
+      import pandas as pd
+
+      # Get the prevalence of all circulating XBB lineages in India
+      data = od.prevalence_by_location("IND", startswith = 'xbb')
+      # multiply prevalence values by 100% for scale
+      data['prevalence_rolling'] = data['prevalence_rolling'].apply(lambda x: x*100)
+      # Search for data based on date range
+      data = data.sort_values(by="date")
+      data = data.loc[data["date"].between("2020-09-12", "2022-03-31")]
+      
+
+.. code-block::
+   :caption: Output:
+    
+               date  total_count  lineage_count     lineage  prevalence  \
+    3014 2022-09-12            0              0    xbb.1.16    0.000000   
+    3781 2022-09-12            0              0     xbb.2.3    0.000000   
+    2593 2022-09-12          152              2       xbb.1    0.013158   
+    3782 2022-09-13            0              0     xbb.2.3    0.000000   
+    3015 2022-09-13            0              0    xbb.1.16    0.000000   
+    ...         ...          ...            ...         ...         ...   
+    4086 2023-03-31          196              2   xbb.2.3.2    0.010204   
+    3322 2023-03-31          196             29  xbb.1.16.1    0.147959   
+    2793 2023-03-31          196              1       xbb.1    0.005102   
+    3381 2023-03-31          196              7  xbb.1.16.2    0.035714   
+    3981 2023-03-31          196             15     xbb.2.3    0.076531   
+
+         prevalence_rolling  
+    3014            0.000000  
+    3781            0.000000  
+    2593            0.003451  
+    3782            0.000000  
+    3015            0.000000  
+    ...                  ...  
+    4086            0.031184  
+    3322            0.144578  
+    2793            0.014174  
+    3381            0.045358  
+    3981            0.084337  
+
+[985 rows x 6 columns]
+
+.. image:: prev_visual.*
+
+.. note:: The `Vega-Altair <https://altair-viz.github.io/index.html>`_ visualization package is used for demonstration purposes.         However, any Python visual package can be used to create graphi    cal representations of the data.
+
+# Finding the Most Prevalent Lineages 
+If we wanted to determine and plot the top four most prevalent lineages in India, we can make a few queries and use a few simple commands to create a table that shows us what these lineages are::
+
+    data=od.prevalence_by_location("IND")
+    most_prev = data.groupby('lineage').apply(max) # Finds the lineages with the most hits
+    most_prev = most_prev.mask(most_prev == '').dropna(how = 'any') # Drop any unknowns
+    most_prev = most_prev.iloc[:4]
+    print(most_prev)
+
+.. code-block::
+   :caption: Output
+
+                     date  total_count  lineage_count    lineage  prevalence  \
+    lineage                                                                    
+    ba.2       2023-04-20         5668           1445       ba.2    0.822785   
+    ba.2.10.1  2023-04-19         5668             93  ba.2.10.1    0.285714   
+    bq.1.1     2023-03-27          402              7     bq.1.1    0.428571   
+    ch.1.1     2023-02-13          119              4     ch.1.1    0.400000   
+
+               prevalence_rolling  
+    lineage                        
+    ba.2                 0.677541  
+    ba.2.10.1            0.095541  
+    bq.1.1               0.156863  
+    ch.1.1               0.066667  
+
+Next we'll collect the prevalence data on each of the four lineages::
+
+    # Retrieve the official data on the prevalences of these lineages using `daily_prev <file:///Users/sarahrandall/Python-outbreak-info/docs/build/html/daily_prev.html>`_
+    d1 = od.daily_prev('ba.2', "IND")
+    d2 = od.daily_prev('ba.2.10.1', "IND")
+    d3 = od.daily_prev('bq.1.1', "IND")
+    d4 = od.daily_prev( 'ch.1.1', "IND")
+
+    # Formatting for creating the graph
+    d1['lineage'] = 'ba.2'
+    d2['lineage'] = 'ba.2.10.1'
+    d3['lineage'] = 'bq.1.1'
+    d4['lineage'] = 'ch.1.1'
+
+    # Group together data from each lineage
+    data = pd.concat([d1, d2, d3, d4])
+    data = data.rename(columns = {'proportion': 'proportion (%)'})
+
+    #Pick a date range to analyze
+    data = data.sort_values(by="date")
+    data = data.loc[data["date"].between("2022-09-12", "2023-03-31")]
+    # Increase prevalence by 100%
+    data['proportion'] = data['proportion'].apply(lambda x: x*100)
+
+    ## Use the visual package of your choice to create an area graph using your data
+
+.. image:: top4.*
diff --git a/_sources/all_lineage_prevalences.rst.txt b/_sources/all_lineage_prevalences.rst.txt
@@ -0,0 +1,42 @@
+all_lineage_prevalences
+---------------------------------------------
+
+.. autofunction:: outbreak_data.all_lineage_prevalences
+
+
+Example usage::
+    
+    #Find the prevalence all lineages in Argentina that begin with 'xbb.1'  
+    df = od.prevalence_by_location("ARG", startswith = 'xbb.1')
+    print(df)
+
+.. code-block::
+   :caption: Output
+
+                date  total_count  lineage_count  lineage  prevalence  \
+    1454  2022-10-12            3              1    xbb.1    0.333333   
+    1455  2022-10-13            0              0    xbb.1    0.000000   
+    1456  2022-10-14            0              0    xbb.1    0.000000   
+    1457  2022-10-15            0              0    xbb.1    0.000000   
+    1458  2022-10-16            0              0    xbb.1    0.000000   
+    ...          ...          ...            ...      ...         ...   
+    1673  2023-03-17            0              0  xbb.1.5    0.000000   
+    1674  2023-03-18            0              0  xbb.1.5    0.000000   
+    1675  2023-03-19            0              0  xbb.1.5    0.000000   
+    1676  2023-03-20            0              0  xbb.1.5    0.000000   
+    1677  2023-03-21            1              1  xbb.1.5    1.000000   
+
+          prevalence_rolling  
+    1454            0.350000  
+    1455            0.179487  
+    1456            0.109375  
+    1457            0.065421  
+    1458            0.058577  
+    ...                  ...  
+    1673            1.000000  
+    1674            1.000000  
+    1675            1.000000  
+    1676            1.000000  
+    1677            1.000000  
+
+[224 rows x 6 columns]
diff --git a/_sources/auth_setup.rst.txt b/_sources/auth_setup.rst.txt
@@ -0,0 +1,7 @@
+authenticate_new_user
+----------------------------------------------------
+
+.. autofunction:: authenticate_user.authenticate_new_user
+
+
+.. note:: Authentication is only needed to access clinical data endpoints. A web browser and internet access are required for authentication. The auth_token is saved locally between runs, so this function only needs to be run once.
diff --git a/_sources/authenticate_new_user.rst.txt b/_sources/authenticate_new_user.rst.txt
@@ -0,0 +1,7 @@
+authenticate_new_user
+----------------------------------------------------
+
+.. autofunction:: authenticate_user.authenticate_new_user
+
+
+.. note::  Please note that authentication is required to access clinical data endpoints. Wastewater data endpoints do not apply. A web browser is required for authentication. The user's authentication token is saved locally between runs and in most cases only needs to be run once.
diff --git a/_sources/cases_by_location.rst.txt b/_sources/cases_by_location.rst.txt
@@ -0,0 +1,27 @@
+cases_by_location
+---------------------
+
+.. autofunction:: outbreak_data.cases_by_location
+
+**Example Usage**
+
+Get the number of SARS-CoV-2 cases in Colorado::
+
+    >>> df = outbreak_data.cases_by_location('USA_US-CO', 2)
+    >>> df
+
+                                  confirmed_numIncrease  confirmed_rolling
+    location          date                                                
+    USA_Colorado_None 2020-02-12                      0           0.000000
+                      2020-02-13                      0           0.000000
+                      2020-02-14                      0           0.000000
+                      2020-02-15                      0           0.000000
+                      2020-02-16                      0           0.000000
+    ...                                             ...                ...
+                      2023-03-04                      0         436.000000
+                      2023-03-05                      0         436.000000
+                      2023-03-06                    553         440.428558
+                      2023-03-07                      0         440.428558
+                      2023-03-08                    827         438.428558
+
+    [1121 rows x 2 columns]
diff --git a/_sources/cluster_df.rst.txt b/_sources/cluster_df.rst.txt
@@ -0,0 +1,4 @@
+cluster_df
+------------
+
+.. autofunction:: outbreak_tools.cluster_df
diff --git a/_sources/cluster_lineages.rst.txt b/_sources/cluster_lineages.rst.txt
@@ -0,0 +1,4 @@
+cluster_lineages
+-----------------
+
+.. autofunction:: outbreak_clustering.cluster_lineages
diff --git a/_sources/collection_date.rst.txt b/_sources/collection_date.rst.txt
@@ -0,0 +1,16 @@
+collection_date(pango_lin, mutations=None, location=None)
+----------------------------------------------------------
+
+.. autofunction:: outbreak_data.collection_date
+
+Example usage::
+
+    df = od.collection_date('b.1.1.7', location='IND')
+    print(df)
+
+.. code-block::
+   :caption: Output
+                    
+                    Values
+    date        2021-11-26
+    date_count           2
diff --git a/_sources/const_idx.rst.txt b/_sources/const_idx.rst.txt
@@ -0,0 +1,4 @@
+const_idx
+---------
+
+.. autofunction:: outbreak_tools.const_idx