Skip to content

Latest commit

 

History

History
164 lines (123 loc) · 6.27 KB

README.md

File metadata and controls

164 lines (123 loc) · 6.27 KB

Sample Size Calculation Based on Parameters

A simple code for the most basic formulas for sample size calculation. Only sample sizes for parameter point estimates without hypothesis testing and power analysis.

Variables:

  • Data - the data itself. Only used to calculate standard deviation when the population standard deviation is unknown
  • Prior - prior knowledge of the population standard deviation or proportion
  • Conf - confidence level
  • Error - margin of error
import math
import pandas as pd
import scipy.stats as stats


# Formula based on mean estimate in infinite population
def mean_est_inf(z, std, error): 
    n = (math.pow(z, 2) * math.pow(std, 2)) / math.pow(error, 2)
    return n


# Formula based on proportion esitmate in infinite population
def prop_est_inf(z, prior, error):
    n = (math.pow(z, 2) * prior * (1 - prior)) / math.pow(error, 2)
    return n


# Formula based on mean estimate in finite population
def mean_est_fin(z, std, error, pop_size):
    n = (math.pow(z, 2) * math.pow(std, 2) * int(pop_size)) \
        / (math.pow(error, 2) * (int(pop_size) - 1) + (math.pow(z, 2) * math.pow(std, 2)))
    return n


# Formula based on proportion esitmate in finite population
def prop_est_fin(z, prior, error, pop_size):
    n = (math.pow(z, 2) * prior * (1 - prior) * int(pop_size)) \
        / (math.pow(error, 2) * (int(pop_size) - 1) + math.pow(z, 2) * prior * (1 - prior))
    return n


# Main sample size calculation function
def sample_size(data, prior, conf, error):
    global n
    alpha = 1 - conf
    z = stats.norm.ppf(1 - alpha / 2)
    
    # Node 1 - determination of what formula to apply based on the infinite/finite nature of the population
    infinite = input('Input the population state:'
                     '\n1) Population is infinite'
                     '\n2) Population is finite'
                     '\n')
    
    if infinite == '1':
        
        # Node 2 - determination of what formula to apply based on mean/proportion target estimation
        type = input('Input the type of calculation:'
                     '\n1) Mean point estimate'
                     '\n2) Proportion estimate'
                     '\n')

        if type == '1':
            
            # Node 3 - determination of what formula to apply based on population parameter
            mode = input('Input the state of prior knowledge of std:'
                         '\n1) Data is pop'  # Data inserted into the function is itself the population
                         '\n2) Data is sample, estimate std based on its range'  # Std estimation based on sample range
                         '\n3) Prior std inputted'  # Population std is inserted into the function as 'prior'
                         '\n')

            if mode == '1':
                std = data.std()
                n = mean_est_inf(z, std, error)
                print('Sample size based on pop std:', round(n, 2))

            elif mode == '2':
                std = (data.max() - data.min()) / 4
                print(std)
                n = mean_est_inf(z, std, error)
                print('Sample size based on sample range:', round(n, 2))

            elif mode == '3':
                std = prior
                n = mean_est_inf(z, std, error)
                print('Sample size based on prior std:', round(n, 2))

        elif type == '2':

            # Node 3 - determination of what formula to apply based on population parameter
            mode = input('Input the state of prior knowledge of proportions:'
                         '\n1) Proportion is inputted'
                         '\n2) Proportion is not known'
                         '\n')

            if mode == '1':
                n = prop_est_inf(z, prior, error)
                print('Sample size based on prior proportion:', round(n, 2))

            elif mode == '2':
                n = prop_est_inf(z, 0.5, error)
                print('Sample size based on unknown proportion:', round(n, 2))

    elif infinite == '2':

        pop_size = input('Input the pop size:'
                         '\n')

        # Node 2 - determination of what formula to apply based on mean/proportion target estimation
        type = input('Input the type of calculation:'
                     '\n1) Mean point estimate'
                     '\n2) Proportion estimate'
                     '\n')

        if type == '1':
            
            # Node 3 - determination of what formula to apply based on population parameter
            mode = input('Input the state of prior knowledge of std:'
                         '\n1) Data is pop'  # Data inserted into the function is itself the population
                         '\n2) Data is sample, estimate std based on its range'  # Std estimation based on sample range
                         '\n3) Prior std inputted'  # Population std is inserted into the function as 'prior'
                         '\n')

            if mode == '1':
                std = data.std()
                n = mean_est_fin(z, std, error, pop_size)
                print('Sample size based on pop std:', round(n, 2))

            elif mode == '2':
                std = (data.max() - data.min()) / 4
                print(std)
                n = mean_est_fin(z, std, error, pop_size)
                print('Sample size based on sample range:', round(n, 2))

            elif mode == '3':
                std = prior
                n = mean_est_fin(z, std, error, pop_size)
                print('Sample size based on prior std:', round(n, 2))

        elif type == '2':

            # Node 3 - determination of what formula to apply based on population parameter
            mode = input('Input the state of prior knowledge of proportions:'
                         '\n1) Proportion is inputted'
                         '\n2) Proportion is not known'
                         '\n')

            if mode == '1':
                n = prop_est_fin(z, prior, error, pop_size)
                print('Sample size based on prior proportion:', round(n, 2))

            elif mode == '2':
                n = prop_est_fin(z, 0.5, error, pop_size)
                print('Sample size based on unknown proportion:', round(n, 2))

    return n


sample_size = sample_size(data=df[column],
                          prior=0.2,
                          conf=0.95,
                          error=0.02)