Skip to content

Commit

Permalink
Merge pull request #29 from RyanAugust/documentation
Browse files Browse the repository at this point in the history
Documentation
  • Loading branch information
RyanAugust authored Jul 11, 2024
2 parents 1643211 + 76b87aa commit 1fa2fff
Showing 1 changed file with 107 additions and 5 deletions.
112 changes: 107 additions & 5 deletions src/pysimmmulator/simulate.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,13 +35,25 @@ def _report_random_state(self):

def simulate_baseline(
self,
base_p,
base_p: int,
trend_p: int,
temp_var: int,
temp_coef_mean: int,
temp_coef_sd: int,
error_std: int,
) -> None:
"""Simulation of baseline sales and revenue for the subject business.
Args:
basic_params (basic_parameters): Number of years you want to generate data for.
base_p (int): Amount of baseline sales we get in a day (sales not due to ads)
trend_p (int): How much baseline sales is going to grow over the whole period of our data.
temp_var (int): How big the height of the sine function is for temperature -- i.e. how much temperature varies (used to inject seasonality into our data)
temp_coef_mean (int): The average of how important seasonality is in our data (the larger this number, the more important seasonality is for sales)
temp_coef_sd (int): The standard deviation of how important seasonality is in our data (the larger this number, the more variable the importance of seasonality is for sales)
error_std (int): Amount of statistical noise added to baseline sales (the larger this number, the noisier baseline sales will be).
Returns:
None"""
self.baseline_params = baseline_parameters(
basic_params=self.basic_params,
base_p=base_p,
Expand Down Expand Up @@ -95,6 +107,16 @@ def simulate_ad_spend(
campaign_spend_std: int,
max_min_proportion_on_each_channel: dict,
) -> None:
"""Simulation of ad spend based on normal distribution parameters for campaign spend.
Overall campaign spend is then divided amongst each channel based on passed
min-max proportionality.
Args:
campaign_spend_mean (int): The average amount of money spent on a campaign.
campaign_spend_std (int): The standard deviation of money spent on a campaign
max_min_proportion_on_each_channel (dict): Specifies the minimum and maximum percentages of total spend allocated to each channel.
Returns:
None"""
ad_spend_params = ad_spend_parameters(
campaign_spend_mean=campaign_spend_mean,
campaign_spend_std=campaign_spend_std,
Expand Down Expand Up @@ -148,19 +170,45 @@ def simulate_ad_spend(
logger.info("You have completed running step 2: Simulating ad spend.")

def _negative_check(self, df: pd.DataFrame, column: str, channel: str) -> None:
"""Checks each column of the dataframe for negative values. Negative values are seen as errors
in the case of this simulation, given that values produced typically reflect investment or media metrics.
Args:
df (DataFrame): Dataframe containing columns of metrics with rows of date wise values
column (str): specified column to search for negativ values.
channel (str): context passed to the function for sake of error logging when negative values are detected
Returns:
None"""
if df[column].min() < 0:
sub_zero_count = (df[column] < 0).sum()
logger.warning(
f"There are {sub_zero_count} negative values for {channel} in {column.split('_')[1]}. Consider adjusting your distribution parameters. For now those values will be set to 0"
)

def _negative_replace(self, df: pd.DataFrame, column: str) -> pd.DataFrame:
"""Replaces negative velues within a passed column.
Args:
df (DataFrame): Dataframe containing columns of metrics with rows of date wise values
column (str): specified column to search for negativ values
Returns:
df (DataFrame): Treated dataframe with replacement"""
df.loc[df[column] < 0, column] = 0
return df

def simulate_media(
self, true_cpm: dict, true_cpc: dict, noisy_cpm_cpc: dict
) -> None:
"""Simulation of relevant media metrics for each channel.
True values are passed and noise is applied in accordance with a normal distribution described within the noisy dict.
Media metrics are checked for 0 values stemming from the random noise applied and will be flagged with logger when found. It is generally understood that negativ evalues should not arrise for media metrics.
Args:
true_cpm (dict): Specifies the true Cost per Impression (CPM) of each channel (noise will be added to this to simulate number of impressions)
true_cpc (dict): Specifies the true Cost per Click (CPC) of each channel (noise will be added to this to simulate number of clicks)
noisy_cpm_cpc (dict): Specifies the bias and scale of noise added to the true value CPM or CPC for each channel.
Returns:
None"""
media_params = media_parameters(true_cpm, true_cpc, noisy_cpm_cpc)
media_params.check(basic_params=self.basic_params)

Expand Down Expand Up @@ -226,6 +274,12 @@ def simulate_media(
logger.info("You have completed running step 3: Simulating media.")

def simulate_cvr(self, noisy_cvr: dict) -> None:
"""Generate Conversion Rate using the true conversion rates passed in the basic params in combination with noise parameters passed in this function.
Args:
noisy_cpm_cpc (dict): Specifies the bias and scale of noise added to the true value CVR for each channel.
Returns:
None"""
cvr_params = cvr_parameters(noisy_cvr)
cvr_params.check(basic_params=self.basic_params)

Expand All @@ -245,6 +299,13 @@ def simulate_cvr(self, noisy_cvr: dict) -> None:
logger.info("You have completed running step 4: Simulating CVR.")

def _reformat_for_mmm(self) -> None:
"""Establishes a date based index which previously generated spend, media metric, and conversion data is then mapped to.
This begins to form the structure of a dataframe that can function as an MMM input.
Args:
None
Returns:
None
"""
date_backbone = pd.date_range(
start=self.basic_params.start_date, end=self.basic_params.end_date, freq="D"
)
Expand Down Expand Up @@ -280,12 +341,22 @@ def _reformat_for_mmm(self) -> None:

@staticmethod
def _build_decay_vector(original_vector: pd.Series, decay_value: float) -> pd.Series:
"""Helper function for the iterative portion of simulating adstocking.
Args:
original_vector (Series): Original vector of media values (ie. impressions or clicks)
decay_value (float): Simple value which is 0 < x < 1 and describes the decay of adstock over time."""
decayed_vector = [original_vector.values[0]]
for i, orig_value in enumerate(original_vector.values[1:]):
decayed_vector.append(orig_value + decay_value * decayed_vector[i])
return pd.Series(decayed_vector)

def _simulate_decay(self, true_lambda_decay: dict) -> None:
"""Helper function for the simulation of adstocking. Ad stocking is the idea that an ad seen today has a lasting effect for some amount of time in the future.
This function takes an original vector and progressively adds media outcomes (impressions or clicks) to reflect the adstocking concept.
Args:
true_lambda_decay (dict): mapping of channel: value. Where the values represents the decay over time of a media metrics adstock"""
for channel in true_lambda_decay.keys():
metric = (
"impressions"
Expand All @@ -298,7 +369,6 @@ def _simulate_decay(self, true_lambda_decay: dict) -> None:
)

logger.info("You have completed running step 5b: applying adstock decay.")
# Knew I could find a better way, even better now

def _simulate_diminishing_returns(
self,
Expand Down Expand Up @@ -335,6 +405,14 @@ def _simulate_diminishing_returns(
def simulate_decay_returns(
self, true_lambda_decay: dict, alpha_saturation: dict, gamma_saturation: dict
) -> None:
"""Generates the decay values associated with ad stocking.
Args:
true_lambda_decay (dict): Numbers between 0 and 1 specifying the lambda parameters for a geometric distribution for adstocking media variables.
alpha_saturation (dict): Specifying alpha parameter of geometric distribution for applying diminishing returns to media variables
gamma_saturation (dict): Between 0 and 1 specifying gamma parameter of geometric distribution for applying diminishing returns to media variables
Returns:
None"""
adstock_params = adstock_parameters(
true_lambda_decay, alpha_saturation, gamma_saturation
)
Expand All @@ -347,7 +425,13 @@ def simulate_decay_returns(

logger.info("You have completed running step 5: Simulating adstock.")

def calculate_conversions(self):
def calculate_conversions(self) -> None:
"""Calculates row wise values for conversions based on the noisy cvr and the adstocked media metric associated with each channel.
Args:
None
Returns:
None"""
for channel in self.basic_params.all_channels:
metric = (
"impressions"
Expand All @@ -363,7 +447,13 @@ def calculate_conversions(self):
"You have completed running step 6: Calculating the number of conversions."
)

def consolidate_dataframe(self):
def consolidate_dataframe(self) -> None:
"""Filters and formats internal data into uniform output.
Args:
None
Returns:
None"""
metric_cols = []
[metric_cols.append(f"{channel}_impressions") for channel in self.basic_params.channels_impressions]
[metric_cols.append(f"{channel}_clicks") for channel in self.basic_params.channels_clicks]
Expand All @@ -390,6 +480,12 @@ def consolidate_dataframe(self):
)

def calculate_channel_roi(self) -> None:
"""Calculates the ROI for all channels, based on pre-generated spend and conversions data
Args:
None
Returns:
None"""
self.channel_roi = {}
for channel in self.basic_params.all_channels:
total_cpa = (
Expand All @@ -400,6 +496,12 @@ def calculate_channel_roi(self) -> None:
self.channel_roi[channel] = total_roi

def finalize_output(self, aggregation_level: str) -> None:
"""Provide aggregation (daily, weekly) and column filtering for final output
Args:
aggregation_level (str): [daily, weekly] the granulatiry at which to return output data
Returns:
None"""
output_params = output_parameters(aggregation_level)
metric_cols = []
[metric_cols.append(f"{channel}_impressions") for channel in self.basic_params.channels_impressions]
Expand Down Expand Up @@ -427,7 +529,6 @@ def finalize_output(self, aggregation_level: str) -> None:
)

def run_with_config(self, config: dict) -> set[pd.DataFrame, dict]:
# import pysimmmulator.load_parameters as load_params
if self.basic_params is None:
self.basic_params = basic_parameters(**config["basic_params"])
self.simulate_baseline(**config["baseline_params"])
Expand All @@ -444,6 +545,7 @@ def run_with_config(self, config: dict) -> set[pd.DataFrame, dict]:


class multisimmm(simmm):
"""Provides capability to generate multiple runs on a single configuration"""
def __init__(self):
super(multisimmm, self).__init__()
self.final_frames = []
Expand Down

0 comments on commit 1fa2fff

Please sign in to comment.