Skip to content

Commit

Permalink
💡 Updated and kept in English all docstrings
Browse files Browse the repository at this point in the history
  • Loading branch information
ronaldokun committed Feb 9, 2024
1 parent 3aa8e37 commit f95359e
Show file tree
Hide file tree
Showing 4 changed files with 35 additions and 32 deletions.
4 changes: 2 additions & 2 deletions extracao/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
# %% auto 0
__all__ = [
'TIMEOUT',
'RELATORIO',
'RELATORIO_SRD',
'ESTACAO',
'MALHA_IBGE',
'FILES',
Expand Down Expand Up @@ -51,7 +51,7 @@

# %% ../nbs/00_constants.ipynb 4
TIMEOUT = 5
RELATORIO = (
RELATORIO_SRD = (
'http://sistemas.anatel.gov.br/se/eApp/reports/b/srd/resumo_sistema.php?id={id}&state={state}'
)
ESTACAO = 'http://sistemas.anatel.gov.br/se/public/view/b/srd.php?wfid=estacoes&id={}'
Expand Down
34 changes: 22 additions & 12 deletions extracao/datasources/smp.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,8 +73,8 @@ def exclude_duplicated(
self,
df: pd.DataFrame, # DataFrame com os dados de Estações
) -> pd.DataFrame: # DataFrame com os dados duplicados excluídos
f"""Exclui os registros duplicados
O subconjunto de colunas consideradas é {AGG_SMP}
f"""Exclude the duplicated rows
Columns considered are {'\n'.join(AGG_SMP)}
"""
df['Estação'] = df['Estação'].astype('int')
df = df.sort_values('Estação', ignore_index=True)
Expand Down Expand Up @@ -102,6 +102,7 @@ def exclude_duplicated(

@staticmethod
def read_channels():
"""Reads and formats the SMP channels files"""
channels = pd.read_csv(CHANNELS, dtype='string')
cols = ['Downlink_Inicial', 'Downlink_Final', 'Uplink_Inicial', 'Uplink_Final']
channels[cols] = channels[cols].astype('float')
Expand All @@ -114,6 +115,7 @@ def exclude_invalid_channels(
self,
df: pd.DataFrame, # DataFrame de Origem
) -> pd.DataFrame: # DataFrame com os canais inválidos excluídos
"""Helper function to keep only the valid downlink channels"""
df_sub = df[df.Canalização == 'Downlink'].reset_index(drop=True)
# for flag in ["Uplink", "Inválida"]:
# discarded = df[df.Canalização == flag]
Expand Down Expand Up @@ -185,9 +187,9 @@ def validate_channels(

def generate_uplink(
self,
df: pd.DataFrame, # DataFrame de Origem
) -> pd.DataFrame: # DataFrame com os canais de Uplink adicionados
"""Gera os canais de Uplink a partir dos canais de Downlink""" ''
df: pd.DataFrame, # Source dataFrame with downlink frequencies and offset
) -> pd.DataFrame: # DataFrame with the uplink frequencies added
"""Generate the respective Uplink channels based on the Downlink frequencies and Offset"""
df['Offset'] = pd.to_numeric(df['Offset'], errors='coerce').astype('float')
df['Largura_Emissão(kHz)'] = pd.to_numeric(
df['Largura_Emissão(kHz)'], errors='coerce'
Expand All @@ -202,7 +204,15 @@ def generate_uplink(
df.loc[valid, 'Frequência_Recepção'] = df.loc[valid, 'Frequência'] - df.loc[valid, 'Offset']
return df

def substitute_coordenates(self, df: pd.DataFrame) -> pd.DataFrame:
def substitute_coordenates(
self,
df: pd.DataFrame, # Source dataframe
) -> pd.DataFrame: # Source dataframe with coordinates replace for the city one
"""Substitute the coordinates for the central coordinates of the municipality
Only does it for the grouped rows (Multiplicity > 1) since for these rows the
coordinate values are no longer valid.
"""
ibge = pd.read_csv(
IBGE_MUNICIPIOS,
dtype='string',
Expand All @@ -225,9 +235,9 @@ def substitute_coordenates(self, df: pd.DataFrame) -> pd.DataFrame:

def input_fixed_columns(
self,
df: pd.DataFrame, # DataFrame de Origem
) -> pd.DataFrame: # DataFrame com os canais de downlink e uplink contenados e formatados
"""Add the fixed helper columns to the dataframe"""
df: pd.DataFrame, # Source dataframe
) -> pd.DataFrame: # Cleaned dataframe with some additional columns added
"""Formats and adds some helper columns to the dataframe"""
df['Status'] = 'L'
df['Serviço'] = '010'
down = df.drop('Frequência_Recepção', axis=1)
Expand All @@ -242,9 +252,9 @@ def input_fixed_columns(

def _format(
self,
df: pd.DataFrame, # DataFrame com os dados de Estações e Plano_Básico mesclados
) -> pd.DataFrame: # DataFrame com os dados mesclados e limpos
"""Clean the merged dataframe with the data from the MOSAICO page"""
df: pd.DataFrame, # Source dataframe
) -> pd.DataFrame: # Final processed dataframe
"""Formats, cleans, groups, adds and standardizes the queried data from the database"""
df = df.rename(columns=self.cols_mapping)
df = self.split_designacao(df)
df = self.exclude_duplicated(df)
Expand Down
20 changes: 6 additions & 14 deletions extracao/datasources/srd.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,13 +11,7 @@
import pandas as pd
from dotenv import find_dotenv, load_dotenv

from extracao.constants import (
BW_MAP,
COLS_SRD,
DICT_SRD,
MONGO_SRD,
PROJECTION_SRD,
)
from extracao.constants import BW_MAP, COLS_SRD, DICT_SRD, MONGO_SRD, PROJECTION_SRD, RELATORIO_SRD

from .mosaico import Mosaico

Expand All @@ -30,7 +24,7 @@

# %% ../../nbs/01e_srd.ipynb 7
class SRD(Mosaico):
"""Classe para encapsular a lógica de extração de Radiodifusão"""
"""Class to encapsulate the Radio Broadcasting Service extraction logic"""

def __init__(self, mongo_uri: str = MONGO_URI, limit: int = 0) -> None:
super().__init__(mongo_uri)
Expand Down Expand Up @@ -61,6 +55,7 @@ def cols_mapping(self):
return DICT_SRD

def extraction(self) -> pd.DataFrame:
"""Extracts the data from the MongoDB database and returns a DataFrame"""
pipeline = [{'$match': self.query}, {'$project': self.projection}]
if self.limit > 0:
pipeline.append({'$limit': self.limit})
Expand All @@ -72,7 +67,7 @@ def _format(
self,
df: pd.DataFrame, # DataFrame com o resultantes do banco de dados
) -> pd.DataFrame: # DataFrame formatado
"""Formata, limpa e padroniza os dados provenientes da query no banco"""
"""Formats, cleans and standardizes the queried data from the database"""

df = df.rename(columns=self.cols_mapping)
status = df.Status.str.contains('-C1$|-C2$|-C3$|-C4$|-C7|-C98$', na=False)
Expand Down Expand Up @@ -106,11 +101,8 @@ def _format(
.astype('float')
).fillna(-1.0)
df.loc[:, ['Id', 'Status']] = df.loc[:, ['Id', 'Status']].astype('string')
df['Relatório_Canal'] = (
'http://sistemas.anatel.gov.br/se/eApp/reports/b/srd/resumo_sistema.php?id='
+ df['Id']
+ '&state='
+ df['Status']
df['Relatório_Canal'] = df.apply(
lambda row: RELATORIO_SRD.format(row['Id'], row['Status']), axis=1
)
# self.append2discarded([self.discarded, discarded, discarded_with_na])
return df.loc[:, self.columns]
9 changes: 5 additions & 4 deletions extracao/datasources/telecom.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@

# %% ../../nbs/01f_telecom.ipynb 7
class Telecom(Mosaico):
"""Extração e Processamento dos serviços de Telecomunições distintos de SMP"""
"""This class encapsulates the extraction and processing of Telecommunications Services from the MOSAICO MongoDB"""

def __init__(self, mongo_uri: str = MONGO_URI, limit: int = 0) -> None:
super().__init__(mongo_uri)
Expand Down Expand Up @@ -59,6 +59,7 @@ def cols_mapping(self):
return DICT_LICENCIAMENTO

def extraction(self) -> pd.DataFrame:
"""Extract the data from the MOSAICO MongoDB collection"""
pipeline = [{'$match': self.query}, {'$project': self.projection}]
if self.limit > 0:
pipeline.append({'$limit': self.limit})
Expand All @@ -68,9 +69,9 @@ def extraction(self) -> pd.DataFrame:

def _format(
self,
df: pd.DataFrame, # DataFrame com os dados de Estações e Plano_Básico mesclados
) -> pd.DataFrame: # DataFrame com os dados mesclados e limpos
"""Clean the merged dataframe with the data from the MOSAICO page"""
df: pd.DataFrame, # Source dataframe
) -> pd.DataFrame: # Final processed dataframe
"""Formats, cleans, groups and standardizes the queried data from the database"""
df = df.rename(columns=self.cols_mapping)
df = self.split_designacao(df)
duplicated = df.duplicated(subset=AGG_LICENCIAMENTO, keep='first')
Expand Down

0 comments on commit f95359e

Please sign in to comment.