Skip to content

Commit

Permalink
Merge branch 'dev'
Browse files Browse the repository at this point in the history
  • Loading branch information
Ronaldo S.A. Batista committed Sep 12, 2024
2 parents f8d7325 + c09e71d commit d8cf854
Show file tree
Hide file tree
Showing 11 changed files with 336 additions and 27 deletions.
278 changes: 278 additions & 0 deletions environment_win_dev.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,278 @@
name: db
channels:
- conda-forge
dependencies:
- asttokens=2.4.1
- astunparse=1.6.3
- attrs=23.2.0
- aws-c-auth=0.7.20
- aws-c-cal=0.6.12
- aws-c-common=0.9.17
- aws-c-compression=0.2.18
- aws-c-event-stream=0.4.2
- aws-c-http=0.8.1
- aws-c-io=0.14.8
- aws-c-mqtt=0.10.4
- aws-c-s3=0.5.8
- aws-c-sdkutils=0.1.16
- aws-checksums=0.1.18
- aws-crt-cpp=0.26.8
- aws-sdk-cpp=1.11.267
- azure-core-cpp=1.11.1
- azure-identity-cpp=1.6.0
- azure-storage-blobs-cpp=12.10.0
- azure-storage-common-cpp=12.5.0
- blosc=1.21.5
- branca=0.7.2
- brotli=1.1.0
- brotli-bin=1.1.0
- brotli-python=1.1.0
- bzip2=1.0.8
- c-ares=1.28.1
- ca-certificates=2024.2.2
- cairo=1.18.0
- certifi=2024.2.2
- cfitsio=4.4.0
- charset-normalizer=3.3.2
- click=8.1.7
- click-plugins=1.1.1
- cligj=0.7.2
- colorama=0.4.6
- comm=0.2.2
- contourpy=1.2.1
- cycler=0.12.1
- debugpy=1.8.1
- decorator=5.1.1
- dnspython=2.6.1
- exceptiongroup=1.2.0
- execnb=0.1.6
- executing=2.0.1
- expat=2.6.2
- fastcore=1.5.35
- fastprogress=1.0.3
- fiona=1.9.6
- fmt=10.2.1
- folium=0.16.0
- font-ttf-dejavu-sans-mono=2.37
- font-ttf-inconsolata=3.000
- font-ttf-source-code-pro=2.038
- font-ttf-ubuntu=0.83
- fontconfig=2.14.2
- fonts-conda-ecosystem=1
- fonts-conda-forge=1
- fonttools=4.51.0
- freetype=2.12.1
- freexl=2.0.0
- gdal=3.8.5
- geographiclib=2.0
- geopandas=0.14.4
- geopandas-base=0.14.4
- geopy=2.4.1
- geos=3.12.1
- geotiff=1.7.1
- ghapi=1.0.5
- hdf4=4.2.15
- hdf5=1.14.3
- icu=73.2
- idna=3.7
- importlib-metadata=7.1.0
- importlib_metadata=7.1.0
- intel-openmp=2024.1.0
- ipykernel=6.29.3
- ipython=8.24.0
- ipywidgets=8.1.2
- jedi=0.19.1
- jinja2=3.1.4
- joblib=1.4.2
- jupyter_client=8.6.1
- jupyter_core=5.7.2
- jupyterlab_widgets=3.0.10
- kealib=1.5.3
- kiwisolver=1.4.5
- krb5=1.21.2
- lcms2=2.16
- lerc=4.0.0
- libabseil=20240116.2
- libaec=1.1.3
- libarchive=3.7.2
- libarrow=16.0.0
- libarrow-acero=16.0.0
- libarrow-dataset=16.0.0
- libarrow-substrait=16.0.0
- libblas=3.9.0
- libboost-headers=1.85.0
- libbrotlicommon=1.1.0
- libbrotlidec=1.1.0
- libbrotlienc=1.1.0
- libcblas=3.9.0
- libcrc32c=1.1.2
- libcurl=8.7.1
- libdeflate=1.20
- libevent=2.1.12
- libexpat=2.6.2
- libffi=3.4.2
- libgdal=3.8.5
- libglib=2.80.2
- libgoogle-cloud=2.23.0
- libgoogle-cloud-storage=2.23.0
- libgrpc=1.62.2
- libhwloc=2.10.0
- libiconv=1.17
- libintl=0.22.5
- libjpeg-turbo=3.0.0
- libkml=1.3.0
- liblapack=3.9.0
- libnetcdf=4.9.2
- libparquet=16.0.0
- libpng=1.6.43
- libpq=16.3
- libprotobuf=4.25.3
- libre2-11=2023.09.01
- librttopo=1.1.0
- libsodium=1.0.18
- libspatialindex=1.9.3
- libspatialite=5.1.0
- libsqlite=3.45.3
- libssh2=1.11.0
- libthrift=0.19.0
- libtiff=4.6.0
- libutf8proc=2.8.0
- libwebp-base=1.4.0
- libxcb=1.15
- libxml2=2.12.7
- libzip=1.10.1
- libzlib=1.2.13
- lz4-c=1.9.4
- lzo=2.10
- m2w64-gcc-libgfortran=5.3.0
- m2w64-gcc-libs=5.3.0
- m2w64-gcc-libs-core=5.3.0
- m2w64-gmp=6.1.0
- m2w64-libwinpthread-git=5.0.0.4634.697f757
- mapclassify=2.6.1
- markdown-it-py=3.0.0
- markupsafe=2.1.5
- matplotlib-base=3.8.4
- matplotlib-inline=0.1.7
- mdurl=0.1.2
- minizip=4.0.5
- mkl=2024.1.0
- msys2-conda-epoch=20160418
- munkres=1.1.4
- nbdev=2.2.10
- nest-asyncio=1.6.0
- openjpeg=2.5.2
- openssl=3.3.0
- orc=2.0.0
- pcre2=10.43
- pickleshare=0.7.5
- pixman=0.43.4
- poppler=24.04.0
- poppler-data=0.4.12
- postgresql=16.3
- proj=9.4.0
- pthread-stubs=0.4
- pthreads-win32=2.9.1
- pure_eval=0.2.2
- pyarrow-core=16.0.0
- pyproj=3.6.1
- pysocks=1.7.1
- python=3.12.3
- python-tzdata=2024.1
- python_abi=3.12
- pyyaml=6.0.1
- re2=2023.09.01
- requests=2.31.0
- rtree=1.2.0
- ruff=0.4.4
- scikit-learn=1.4.2
- shapely=2.0.4
- six=1.16.0
- snappy=1.2.0
- spdlog=1.13.0
- sqlite=3.45.3
- stack_data=0.6.2
- tbb=2021.12.0
- threadpoolctl=3.5.0
- tiledb=2.23.0
- tk=8.6.13
- typer-slim=0.12.3
- typer-slim-standard=0.12.3
- typing_extensions=4.11.0
- ucrt=10.0.22621.0
- uriparser=0.9.8
- vc=14.3
- vc14_runtime=14.38.33130
- vs2015_runtime=14.38.33130
- win_inet_pton=1.1.0
- xerces-c=3.2.5
- xmltodict=0.13.0
- xorg-libxau=1.0.11
- xorg-libxdmcp=1.1.3
- xyzservices=2024.4.0
- xz=5.2.6
- yaml=0.2.5
- zeromq=4.3.5
- zlib=1.2.13
- zstd=1.5.6
- pip:
- networkx==3.1
- numpy==1.26.0
- openpyxl==3.1.2
- packaging==23.1
- pandas==2.0.3
- paramiko==3.2.0
- parso==0.8.3
- pathspec==0.11.2
- patsy==0.5.3
- phik==0.12.3
- pillow==10.0.1
- pinggy==0.0.4
- pip==23.2.1
- platformdirs==3.10.0
- pooch==1.7.0
- prompt-toolkit==3.0.39
- psutil==5.9.5
- pyarrow==13.0.0
- pycparser==2.21
- pydantic==2.3.0
- pydantic-core==2.6.3
- pygments==2.16.1
- pymongo==4.5.0
- pynacl==1.5.0
- pyodbc==4.0.39
- pyparsing==3.1.1
- python-dateutil==2.8.2
- python-dotenv==1.0.0
- pytz==2023.3.post1
- pywavelets==1.4.1
- pywin32==304
- pyzmq==25.1.1
- rich==13.5.3
- scipy==1.11.2
- seaborn==0.12.2
- setuptools==68.2.2
- shellingham==1.5.3
- sniffio==1.3.0
- statsmodels==0.14.0
- tangled-up-in-unicode==0.2.0
- tomli==2.0.1
- tornado==6.3.3
- tqdm==4.66.1
- traitlets==5.10.0
- typeguard==4.1.5
- typer==0.9.0
- typing-extensions==4.8.0
- tzdata==2023.3
- unicodedata2==15.0.0
- urllib3==2.0.4
- visions==0.7.5
- watchdog==3.0.0
- wcwidth==0.2.6
- wheel==0.41.2
- widgetsnbextension==4.0.9
- wordcloud==1.9.2
- yarl==1.9.2
- ydata-profiling==4.5.1
- zipp==3.16.2
prefix: C:\Users\rsilva\scoop\apps\mambaforge\current\envs\db
2 changes: 0 additions & 2 deletions extracao/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -284,7 +284,6 @@
'licensee': 1.0,
'NumFistel': 1.0,
'NumEstacao': '$estacao.NumEstacao',
# flatten the nested fields with dot notation
'NomeMunicipio': '$srd_planobasico.NomeMunicipio',
'CodMunicipio': '$srd_planobasico.CodMunicipio',
'SiglaUF': '$srd_planobasico.SiglaUF',
Expand Down Expand Up @@ -359,7 +358,6 @@
{'frequency': {'$nin': [None, '', 0], '$type': 1.0}},
{'srd_planobasico.CodMunicipio': {'$nin': [None, '']}},
{'NumFistel': {'$nin': [None, '']}},
# {'habilitacao.DataValFreq': {'$nin': [None, '']}},
]
}

Expand Down
15 changes: 11 additions & 4 deletions extracao/datasources/aisweb.py
Original file line number Diff line number Diff line change
Expand Up @@ -167,7 +167,7 @@ def _process_data(
df = self._check_ils_dme(df)
df = self._process_coords(df, airport_data)
df = df[COLUMNS]
df['Frequência'] = df.Frequência.apply(lambda x: ''.join(re.findall('\d|\.', x)))
df['Frequência'] = df.Frequência.apply(lambda x: ''.join(re.findall(r'\d|\.', x)))
df = df[~df['Frequência'].isin({'', '0'})].reset_index(drop=True)
df['Frequência'] = df.Frequência.str.extract(r'(^\d+\.?\d*)')
df['Frequência'] = df.Frequência.astype('float')
Expand All @@ -178,8 +178,15 @@ def request_stations(
icao_code: str, # Código ICAO identificando o aeroporto
) -> pd.DataFrame: # DataFrame com os dados de estações do aeroporto de código `icao_code`
"""Recebe o código do aeroporto `icao_code` e retorna as estações registradas nele"""
dict_data = self._get_request('&icaoCode=', icao_code)
return self._process_data(dict_data) if dict_data.get('aisweb') else pd.DataFrame()

from xml.parsers.expat import ExpatError

try:
dict_data = self._get_request('&icaoCode=', icao_code)
return self._process_data(dict_data) if dict_data.get('aisweb') else pd.DataFrame()
except ExpatError:
print(f'Error parsing XML for ICAO code: {icao_code}')
return pd.DataFrame()

@cached_property
def records(
Expand All @@ -190,7 +197,7 @@ def records(
self.request_stations,
self.airports.AeroCode,
threadpool=True,
n_workers=20,
n_workers=8,
pause=0.1,
progress=False,
)
Expand Down
2 changes: 2 additions & 0 deletions extracao/datasources/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,8 @@ def _read(self, stem: str, backend: str = 'pyarrow') -> pd.DataFrame:

def _save(self, df: pd.DataFrame, folder: Union[str, Path], stem: str) -> pd.DataFrame:
"""Format, Save and return a dataframe"""
folder = Path(folder)
folder.mkdir(parents=True, exist_ok=True)
try:
file = Path(f'{folder}/{stem}.parquet.gzip')
df.astype('category').to_parquet(
Expand Down
4 changes: 2 additions & 2 deletions extracao/datasources/mosaico.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,8 +74,8 @@ def split_designacao(
.str.split()
)
# Log
processing = 'Largura e Classe de Emissão individuais extraídas'
Base.register_log(df, processing, 'Designação_Emissão')
# processing = 'Largura e Classe de Emissão individuais extraídas'
# Base.register_log(df, processing, 'Designação_Emissão')

df = df.explode('Temp', ignore_index=True)

Expand Down
2 changes: 1 addition & 1 deletion extracao/datasources/sitarweb.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ def extraction(self):
if self.read_cache:
df = self._read(f'{self.stem}_raw', 'numpy_nullable')
else:
df = pd.read_sql_query(self.query, self.connect(), dtype='string', copy=False)
df = pd.read_sql_query(self.query, self.connect(), dtype='string')
df['Log'] = '[]'
return df

Expand Down
8 changes: 4 additions & 4 deletions extracao/datasources/srd.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,10 +74,10 @@ def _format(
status = df.Status.str.contains('-C1$|-C2$|-C3$|-C4$|-C7|-C98$', na=False)

# Discard inactive statuses
discarded = df[~status].copy()
processing = 'Status não considerado para fins de monitoração'
Mosaico.register_log(discarded, processing, 'Status')
self.append2discarded(discarded)
# discarded = df[~status].copy()
# processing = 'Status não considerado para fins de monitoração'
# Mosaico.register_log(discarded, processing, 'Status')
# self.append2discarded(discarded)

df[status].reset_index(drop=True, inplace=True)

Expand Down
Loading

0 comments on commit d8cf854

Please sign in to comment.