💡 Updated and kept in English all docstrings

InovaFiscaliza · Feb 9, 2024 · f95359e · f95359e
1 parent 3aa8e37
commit f95359e
Show file tree

Hide file tree

Showing 4 changed files with 35 additions and 32 deletions.
diff --git a/extracao/constants.py b/extracao/constants.py
@@ -3,7 +3,7 @@
 # %% auto 0
 __all__ = [
 	'TIMEOUT',
-	'RELATORIO',
+	'RELATORIO_SRD',
 	'ESTACAO',
 	'MALHA_IBGE',
 	'FILES',
@@ -51,7 +51,7 @@
 
 # %% ../nbs/00_constants.ipynb 4
 TIMEOUT = 5
-RELATORIO = (
+RELATORIO_SRD = (
 	'http://sistemas.anatel.gov.br/se/eApp/reports/b/srd/resumo_sistema.php?id={id}&state={state}'
 )
 ESTACAO = 'http://sistemas.anatel.gov.br/se/public/view/b/srd.php?wfid=estacoes&id={}'

diff --git a/extracao/datasources/smp.py b/extracao/datasources/smp.py
@@ -73,8 +73,8 @@ def exclude_duplicated(
 		self,
 		df: pd.DataFrame,  # DataFrame com os dados de Estações
 	) -> pd.DataFrame:  # DataFrame com os dados duplicados excluídos
-		f"""Exclui os registros duplicados
-        O subconjunto de colunas consideradas é {AGG_SMP}
+		f"""Exclude the duplicated rows
+        Columns considered are {'\n'.join(AGG_SMP)}
         """
 		df['Estação'] = df['Estação'].astype('int')
 		df = df.sort_values('Estação', ignore_index=True)
@@ -102,6 +102,7 @@ def exclude_duplicated(
 
 	@staticmethod
 	def read_channels():
+		"""Reads and formats the SMP channels files"""
 		channels = pd.read_csv(CHANNELS, dtype='string')
 		cols = ['Downlink_Inicial', 'Downlink_Final', 'Uplink_Inicial', 'Uplink_Final']
 		channels[cols] = channels[cols].astype('float')
@@ -114,6 +115,7 @@ def exclude_invalid_channels(
 		self,
 		df: pd.DataFrame,  # DataFrame de Origem
 	) -> pd.DataFrame:  # DataFrame com os canais inválidos excluídos
+		"""Helper function to keep only the valid downlink channels"""
 		df_sub = df[df.Canalização == 'Downlink'].reset_index(drop=True)
 		# for flag in ["Uplink", "Inválida"]:
 		#     discarded = df[df.Canalização == flag]
@@ -185,9 +187,9 @@ def validate_channels(
 
 	def generate_uplink(
 		self,
-		df: pd.DataFrame,  # DataFrame de Origem
-	) -> pd.DataFrame:  # DataFrame com os canais de Uplink adicionados
-		"""Gera os canais de Uplink a partir dos canais de Downlink""" ''
+		df: pd.DataFrame,  # Source dataFrame with downlink frequencies and offset
+	) -> pd.DataFrame:  # DataFrame with the uplink frequencies added
+		"""Generate the respective Uplink channels based on the Downlink frequencies and Offset"""
 		df['Offset'] = pd.to_numeric(df['Offset'], errors='coerce').astype('float')
 		df['Largura_Emissão(kHz)'] = pd.to_numeric(
 			df['Largura_Emissão(kHz)'], errors='coerce'
@@ -202,7 +204,15 @@ def generate_uplink(
 		df.loc[valid, 'Frequência_Recepção'] = df.loc[valid, 'Frequência'] - df.loc[valid, 'Offset']
 		return df
 
-	def substitute_coordenates(self, df: pd.DataFrame) -> pd.DataFrame:
+	def substitute_coordenates(
+		self,
+		df: pd.DataFrame,  # Source dataframe
+	) -> pd.DataFrame:  # Source dataframe with coordinates replace for the city one
+		"""Substitute the coordinates for the central coordinates of the municipality
+		Only does it for the grouped rows (Multiplicity > 1) since for these rows the
+		coordinate values are no longer valid.
+
+		"""
 		ibge = pd.read_csv(
 			IBGE_MUNICIPIOS,
 			dtype='string',
@@ -225,9 +235,9 @@ def substitute_coordenates(self, df: pd.DataFrame) -> pd.DataFrame:
 
 	def input_fixed_columns(
 		self,
-		df: pd.DataFrame,  # DataFrame de Origem
-	) -> pd.DataFrame:  # DataFrame com os canais de downlink e uplink contenados e formatados
-		"""Add the fixed helper columns to the dataframe"""
+		df: pd.DataFrame,  # Source dataframe
+	) -> pd.DataFrame:  # Cleaned dataframe with some additional columns added
+		"""Formats and adds some helper columns to the dataframe"""
 		df['Status'] = 'L'
 		df['Serviço'] = '010'
 		down = df.drop('Frequência_Recepção', axis=1)
@@ -242,9 +252,9 @@ def input_fixed_columns(
 
 	def _format(
 		self,
-		df: pd.DataFrame,  # DataFrame com os dados de Estações e Plano_Básico mesclados
-	) -> pd.DataFrame:  # DataFrame com os dados mesclados e limpos
-		"""Clean the merged dataframe with the data from the MOSAICO page"""
+		df: pd.DataFrame,  # Source dataframe
+	) -> pd.DataFrame:  # Final processed dataframe
+		"""Formats, cleans, groups, adds and standardizes the queried data from the database"""
 		df = df.rename(columns=self.cols_mapping)
 		df = self.split_designacao(df)
 		df = self.exclude_duplicated(df)

diff --git a/extracao/datasources/srd.py b/extracao/datasources/srd.py
@@ -11,13 +11,7 @@
 import pandas as pd
 from dotenv import find_dotenv, load_dotenv
 
-from extracao.constants import (
-	BW_MAP,
-	COLS_SRD,
-	DICT_SRD,
-	MONGO_SRD,
-	PROJECTION_SRD,
-)
+from extracao.constants import BW_MAP, COLS_SRD, DICT_SRD, MONGO_SRD, PROJECTION_SRD, RELATORIO_SRD
 
 from .mosaico import Mosaico
 
@@ -30,7 +24,7 @@
 
 # %% ../../nbs/01e_srd.ipynb 7
 class SRD(Mosaico):
-	"""Classe para encapsular a lógica de extração de Radiodifusão"""
+	"""Class to encapsulate the Radio Broadcasting Service extraction logic"""
 
 	def __init__(self, mongo_uri: str = MONGO_URI, limit: int = 0) -> None:
 		super().__init__(mongo_uri)
@@ -61,6 +55,7 @@ def cols_mapping(self):
 		return DICT_SRD
 
 	def extraction(self) -> pd.DataFrame:
+		"""Extracts the data from the MongoDB database and returns a DataFrame"""
 		pipeline = [{'$match': self.query}, {'$project': self.projection}]
 		if self.limit > 0:
 			pipeline.append({'$limit': self.limit})
@@ -72,7 +67,7 @@ def _format(
 		self,
 		df: pd.DataFrame,  # DataFrame com o resultantes do banco de dados
 	) -> pd.DataFrame:  # DataFrame formatado
-		"""Formata, limpa e padroniza os dados provenientes da query no banco"""
+		"""Formats, cleans and standardizes the queried data from the database"""
 
 		df = df.rename(columns=self.cols_mapping)
 		status = df.Status.str.contains('-C1$|-C2$|-C3$|-C4$|-C7|-C98$', na=False)
@@ -106,11 +101,8 @@ def _format(
 			.astype('float')
 		).fillna(-1.0)
 		df.loc[:, ['Id', 'Status']] = df.loc[:, ['Id', 'Status']].astype('string')
-		df['Relatório_Canal'] = (
-			'http://sistemas.anatel.gov.br/se/eApp/reports/b/srd/resumo_sistema.php?id='
-			+ df['Id']
-			+ '&state='
-			+ df['Status']
+		df['Relatório_Canal'] = df.apply(
+			lambda row: RELATORIO_SRD.format(row['Id'], row['Status']), axis=1
 		)
 		# self.append2discarded([self.discarded, discarded, discarded_with_na])
 		return df.loc[:, self.columns]
diff --git a/extracao/datasources/telecom.py b/extracao/datasources/telecom.py
@@ -28,7 +28,7 @@
 
 # %% ../../nbs/01f_telecom.ipynb 7
 class Telecom(Mosaico):
-	"""Extração e Processamento dos serviços de Telecomunições distintos de SMP"""
+	"""This class encapsulates the extraction and processing of Telecommunications Services from the MOSAICO MongoDB"""
 
 	def __init__(self, mongo_uri: str = MONGO_URI, limit: int = 0) -> None:
 		super().__init__(mongo_uri)
@@ -59,6 +59,7 @@ def cols_mapping(self):
 		return DICT_LICENCIAMENTO
 
 	def extraction(self) -> pd.DataFrame:
+		"""Extract the data from the MOSAICO MongoDB collection"""
 		pipeline = [{'$match': self.query}, {'$project': self.projection}]
 		if self.limit > 0:
 			pipeline.append({'$limit': self.limit})
@@ -68,9 +69,9 @@ def extraction(self) -> pd.DataFrame:
 
 	def _format(
 		self,
-		df: pd.DataFrame,  # DataFrame com os dados de Estações e Plano_Básico mesclados
-	) -> pd.DataFrame:  # DataFrame com os dados mesclados e limpos
-		"""Clean the merged dataframe with the data from the MOSAICO page"""
+		df: pd.DataFrame,  # Source dataframe
+	) -> pd.DataFrame:  # Final processed dataframe
+		"""Formats, cleans, groups and standardizes the queried data from the database"""
 		df = df.rename(columns=self.cols_mapping)
 		df = self.split_designacao(df)
 		duplicated = df.duplicated(subset=AGG_LICENCIAMENTO, keep='first')