From 582c2838bb3c8168ab7ecb8ea5f68451b7c90b98 Mon Sep 17 00:00:00 2001 From: EJ Date: Mon, 18 Dec 2023 13:34:54 +0100 Subject: [PATCH] Small bug fixes --- Dockerfile | 12 +- assets/.gitignore | 2 + assets/data/.gitignore | 1 + requirements.txt | 545 ++--------------------------------------- src/clean.py | 6 +- 5 files changed, 32 insertions(+), 534 deletions(-) create mode 100644 assets/.gitignore diff --git a/Dockerfile b/Dockerfile index 3a356f8..159322b 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,21 +1,23 @@ -FROM python:3.8 +FROM python:3.10 WORKDIR /usr/d2m RUN mkdir -p assets/data/raw -COPY src ./src -COPY dvc.yaml ./dvc.yaml -COPY params_default.yaml ./params.yaml -COPY params_default.yaml ./params_default.yaml COPY requirements.txt ./requirements.txt RUN pip3 install -r requirements.txt RUN pip3 install dvc RUN pip3 install flask flask-restful +COPY src ./src +COPY dvc.yaml ./dvc.yaml +COPY params_default.yaml ./params.yaml +COPY params_default.yaml ./params_default.yaml + EXPOSE 5000 +RUN git init RUN dvc init --no-scm CMD ["python3", "src/api.py"] diff --git a/assets/.gitignore b/assets/.gitignore new file mode 100644 index 0000000..dd8edd7 --- /dev/null +++ b/assets/.gitignore @@ -0,0 +1,2 @@ +/profile +/models diff --git a/assets/data/.gitignore b/assets/data/.gitignore index 47db83b..243f5a4 100644 --- a/assets/data/.gitignore +++ b/assets/data/.gitignore @@ -4,3 +4,4 @@ /scaled /sequentialized /combined +/cleaned diff --git a/requirements.txt b/requirements.txt index 19a8130..b3f9560 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,528 +1,17 @@ -absl-py==1.0.0 -aiohttp==3.8.1 -aiohttp-retry==2.4.6 -aiosignal==1.2.0 -aniso8601==9.0.1 -anyio==3.5.0 -appdirs==1.4.4 -argon2-cffi==21.3.0 -argon2-cffi-bindings==21.2.0 -arrow==1.2.3 -asciimatics==1.14.0 -asgiref==3.5.0 -asn1crypto==1.5.1 -astroid==2.11.6 -asttokens==2.0.5 -astunparse==1.6.3 -async-timeout==4.0.2 -asyncssh==2.8.1 -atpublic==3.0.1 -attr==0.3.1 -attrdict==2.0.1 -attrs==21.4.0 -auto-sklearn==0.14.7 -azure-core==1.23.1 -azure-storage-blob==12.11.0 -Babel==2.9.1 -backcall==0.2.0 -beautifulsoup4==4.11.1 -bitstruct==8.11.1 -black==22.3.0 -bleach==5.0.0 -blinker==1.4 -blis==0.7.9 -boto==2.49.0 -boto3==1.16.63 -botocore==1.19.63 -bottle==0.12.21 -boxing==0.1.4 -Brlapi==0.7.0 -Brotli==1.0.9 -cachetools==4.2.4 -caffeine==2.9.4 -carbontracker==1.1.7 -catalogue==2.0.8 -certifi==2021.10.8 -cffi==1.15.0 -chardet==3.0.4 -charset-normalizer==2.0.12 -chrome-gnome-shell==0.0.0 -click==8.0.4 -cloudpickle==2.0.0 -codecarbon==2.1.4 -coincurve==17.0.0 -colorama==0.4.4 -command-not-found==0.3 -commonmark==0.9.1 -confection==0.0.4 -configobj==5.0.6 -ConfigSpace==0.4.21 -coreapi==2.3.3 -coreschema==0.0.4 -cryptography==36.0.1 -cupshelpers==1.0 -cx-Freeze==6.11.1 -cyclemoid-pytorch==1.1.0 -cycler==0.11.0 -cymem==2.0.7 -Cython==0.29.28 -cytoolz==0.12.1 -dash==2.3.0 -dash-bootstrap-components==0.13.1 -dash-core-components==2.0.0 -dash-cytoscape==0.3.0 -dash-html-components==2.0.0 -dash-table==5.0.0 -dask==2022.9.0 -dbus-python==1.2.16 -debugpy==1.5.1 -decorator==5.1.1 -defer==1.0.6 -defusedxml==0.7.1 -Deprecated==1.2.13 -diagrams==0.23.3 -dictdiffer==0.9.0 -dill==0.3.4 -diskcache==5.4.0 -distributed==2022.9.0 -distro==1.7.0 -distro-info===0.23ubuntu1 -Django==3.2.14 -django-annoying==0.10.6 -django-cors-headers==3.6.0 -django-debug-toolbar==3.2.1 -django-extensions==3.1.0 -django-filter==2.4.0 -django-model-utils==4.1.1 -django-ranged-fileresponse==0.1.2 -django-rest-swagger==2.2.0 -django-rq==2.5.1 -django-user-agents==0.4.0 -djangorestframework==3.13.1 -dm-tree==0.1.6 -docker==6.0.1 -docker-pycreds==0.4.0 -dpath==2.0.6 -drf-dynamic-fields==0.3.0 -drf-flex-fields==0.9.5 -drf-generators==0.3.0 -drf-yasg==1.20.0 -dulwich==0.20.32 -dvc==2.9.5 -eciespy==0.3.13 -edward2==0.0.2 -Elixir==0.7.1 -emcee==3.1.2 -entrypoints==0.3 -essential-generators==1.0 -et-xmlfile==1.1.0 -eth-hash==0.5.1 -eth-keys==0.4.0 -eth-typing==3.2.0 -eth-utils==2.1.0 -ewmh==0.1.6 -executing==0.8.3 -expiringdict==1.1.4 -fairscale==0.4.6 -fastapi==0.92.0 -fastjsonschema==2.15.3 -fedml==0.7.502 -fire==0.5.0 -fit-tool==0.9.13 -Flask==2.0.3 -Flask-Babel==2.0.0 -Flask-Compress==1.11 -Flask-Login==0.6.1 -Flask-RESTful==0.3.9 -flaskwebgui==0.3.5 -flatbuffers==2.0 -flatten-dict==0.4.2 -flufl.lock==7.0 -flwr==1.3.0 -fonttools==4.29.1 -frozenlist==1.3.0 -fsspec==2022.2.0 -fst-pso==1.8.1 -ftfy==6.1.1 -funcy==1.17 -future==0.18.2 -FuzzyTM==2.0.5 -fuzzywuzzy==0.18.0 -gast==0.4.0 -gensim==4.3.0 -geocoder==1.38.1 -gevent==21.12.0 -geventhttpclient==2.0.2 -ghostscript==0.7 -gitdb==4.0.9 -GitPython==3.1.27 -google-api-core==2.10.0 -google-auth==2.11.0 -google-auth-oauthlib==0.4.6 -google-cloud-appengine-logging==1.1.0 -google-cloud-audit-log==0.2.0 -google-cloud-core==2.3.2 -google-cloud-logging==2.7.1 -google-cloud-storage==2.5.0 -google-crc32c==1.5.0 -google-pasta==0.2.0 -google-resumable-media==2.3.3 -googleapis-common-protos==1.56.4 -gprof2dot==2021.2.21 -grandalf==0.6 -graphviz==0.20.1 -greenlet==1.1.2 -grpc-google-iam-v1==0.12.3 -grpcio==1.53.0 -h11==0.14.0 -h5py==3.6.0 -HeapDict==1.0.1 -hopfield-layers==1.0.2 -hopular==1.0.0 -htmlmin==0.1.12 -httpcore==0.16.3 -httplib2==0.14.0 -httpx==0.23.3 -idna==3.3 -ImageHash==4.3.1 -imageio==2.16.1 -importlib-metadata==4.11.2 -importlib-resources==5.4.0 -inflection==0.5.1 -interpret==0.2.7 -interpret-core==0.2.7 -ipykernel==6.9.2 -ipython==8.1.1 -ipython-genutils==0.2.0 -ipywidgets==7.7.1 -isodate==0.6.1 -isort==5.10.1 -iterators==0.0.2 -itsdangerous==2.1.1 -itypes==1.2.0 -jax==0.4.8 -jdcal==1.4.1 -jedi==0.18.1 -Jinja2==3.0.3 -jmespath==0.10.0 -joblib==1.2.0 -json5==0.9.6 -jsonschema==3.2.0 -jupyter==1.0.0 -jupyter-client==7.1.2 -jupyter-console==6.4.4 -jupyter-core==4.9.2 -jupyter-server==1.16.0 -jupyterlab==3.3.4 -jupyterlab-pygments==0.2.2 -jupyterlab-server==2.12.0 -jupyterlab-vim==0.15.1 -jupyterlab-widgets==1.1.1 -kaleido==0.2.1 -keras==2.12.0 -keras-nlp==0.4.0 -Keras-Preprocessing==1.1.2 -keras-tuner==1.3.3 -keyring==18.0.1 -kiwisolver==1.3.2 -kt-legacy==1.0.4 -label-studio==1.6.0 -label-studio-converter==0.0.44 -label-studio-tools==0.0.1 -langcodes==3.3.0 -language-selector==0.1 -launchdarkly-server-sdk==7.3.0 -launchpadlib==1.10.13 -lazr.restfulclient==0.14.2 -lazr.uri==1.0.3 -lazy-object-proxy==1.7.1 -liac-arff==2.5.0 -libclang==13.0.0 -lightgbm==3.3.3 -lime==0.2.0.1 -llvmlite==0.36.0 -locket==1.0.0 -lockfile==0.12.2 -louis==3.12.0 -lxml==4.8.0 -macaroonbakery==1.3.1 -mailchecker==4.1.13 -Markdown==2.6.11 -markdown-server==0.1.4 -MarkupSafe==2.0.1 -matplotlib==3.5.1 -matplotlib-inline==0.1.3 -mccabe==0.7.0 -miniful==0.0.6 -minio==7.1.14 -missingno==0.5.1 -mistune==3.0.0a3 -ml-dtypes==0.0.4 -msrest==0.6.21 -multidict==6.0.2 -multimethod==1.7 -multiprocess==0.70.12.2 -murmurhash==1.0.9 -mutagen==1.44.0 -mypy-extensions==0.4.3 -names==0.3.0 -nanotime==0.5.2 -nbclassic==0.3.7 -nbclient==0.6.0 -nbconvert==6.5.0 -nbformat==5.3.0 -nest-asyncio==1.5.4 -netifaces==0.10.4 -networkx==2.7 -nltk==3.6.7 -nonconformist==2.1.0 -notebook==6.4.11 -notebook-shim==0.1.0 -ntplib==0.4.0 -Nuitka==0.9.6 -numba==0.53.0 -numexpr==2.8.3 -numpy==1.23.5 -nvidia-ml-py3==7.352.0 -oauthlib==3.2.0 -olefile==0.46 -openapi-codec==1.3.2 -opencv-python==4.6.0.66 -openml==0.13.1 -openpyxl==2.5.12 -opt-einsum==3.3.0 -ordered-set==4.0.2 -orjson==3.7.6 -packaging==21.3 -paho-mqtt==1.6.1 -pandas==1.3.5 -pandas-profiling==3.6.6 -pandocfilters==1.5.0 -parso==0.8.3 -partd==1.3.0 -patchelf==0.15.0.0 -pathos==0.2.8 -pathspec==0.9.0 -pathtools==0.1.2 -pathy==0.10.1 -patsy==0.5.3 -pbr==5.9.0 -pdfminer.six==20220524 -pdftopng==0.2.3 -pexpect==4.6.0 -phik==0.12.0 -phonenumbers==8.12.44 -pickleshare==0.7.5 -Pillow==9.0.1 -platformdirs==2.5.2 -plotext==4.2.0 -plotly==5.6.0 -ply==3.11 -polars==0.13.51 -pox==0.3.0 -ppft==1.6.6.4 -present==0.6.0 -preshed==3.0.8 -prometheus-client==0.14.1 -promise==2.3 -prompt-toolkit==3.0.28 -proto-plus==1.20.3 -protobuf==3.20.3 -psutil==5.9.0 -psycopg2-binary==2.9.1 -ptyprocess==0.7.0 -pure-eval==0.2.2 -py-cpuinfo==9.0.0 -py-gfm==1.0.2 -pyarrow==9.0.0 -pyasn1==0.4.8 -pyasn1-modules==0.2.8 -pycairo==1.16.2 -pycatch22==0.4.2 -pycodestyle==2.8.0 -pycparser==2.21 -pycryptodome==3.17 -pycryptodomex==3.6.1 -pycups==1.9.73 -pydantic==1.10.5 -pyDeprecate==0.3.2 -pydot==1.4.2 -pyfiglet==0.8.post1 -pyFUME==0.2.25 -pygit2==1.9.0 -Pygments==2.11.2 -PyGObject==3.36.0 -pygtrie==2.4.2 -PyJWT==1.7.1 -pylint==2.14.1 -pymacaroons==0.13.0 -PyNaCl==1.3.0 -pynisher==0.6.4 -pynvml==11.5.0 -pyparsing==3.0.7 -PyPDF2==2.4.2 -pyRFC3339==1.1 -pyrfr==0.8.3 -pyrsistent==0.18.1 -python-apt==2.0.1 -python-benedict==0.25.0 -python-dateutil==2.8.1 -python-debian===0.1.36ubuntu1 -python-fsutil==0.6.0 -python-json-logger==2.0.4 -python-pptx==0.6.21 -python-rapidjson==1.9 -python-slugify==6.1.1 -python-xlib==0.23 -pytorch-lightning==1.6.4 -pytz==2019.3 -PyWavelets==1.2.0 -pyxattr==0.6.1 -pyxdg==0.26 -PyYAML==6.0 -pyzmq==22.3.0 -qtconsole==5.3.1 -QtPy==2.1.0 -quantus==0.3.1 -ratelim==0.1.6 -redis==4.2.2 -regex==2022.3.15 -reportlab==3.5.34 -requests==2.27.1 -requests-oauthlib==1.3.1 -requests-unixsocket==0.2.0 -rfc3986==1.5.0 -rich==11.2.0 -rq==1.10.1 -rsa==4.8 -ruamel.yaml==0.17.21 -ruamel.yaml.clib==0.2.6 -rules==2.2 -s3transfer==0.3.7 -SALib==1.4.5 -scikit-image==0.19.2 -scikit-learn==1.2.0 -scipy==1.8.0 -scmrepo==0.0.7 -screen-resolution-extra==0.0.0 -seaborn==0.11.2 -SecretStorage==2.3.1 -semver==2.13.0 -Send2Trash==1.8.0 -sentry-sdk==1.5.8 -setproctitle==1.3.2 -shap==0.41.0 -shortuuid==1.0.8 -shtab==1.5.3 -simpful==2.10.0 -simplejson==3.16.0 -six==1.16.0 -sklearn==0.0 -skope-rules==1.0.1 -slicer==0.0.7 -smac==1.2 -smart-open==5.2.1 -smmap==5.0.0 -sniffio==1.2.0 -sortedcontainers==2.4.0 -soupsieve==2.3.2.post1 -spacy==3.5.0 -spacy-legacy==3.0.12 -spacy-loggers==1.0.4 -SQLAlchemy==0.7.10 -sqlalchemy-migrate==0.13.0 -sqlparse==0.4.2 -srsly==2.4.6 -srt==3.5.2 -stack-data==0.2.0 -starlette==0.25.0 -statsmodels==0.13.5 -stumpy==1.10.2 -substra==0.42.0 -substrafl==0.34.0 -substratools==0.20.0 -systemd-python==234 -tables==3.7.0 -tabula-py==2.4.0 -tabulate==0.8.9 -tangled-up-in-unicode==0.1.0 -tblib==1.7.0 -Tempita==0.5.2 -tenacity==8.0.1 -tensorboard==2.12.0 -tensorboard-data-server==0.7.0 -tensorboard-plugin-wit==1.8.1 -tensorflow==2.12.0 -tensorflow-estimator==2.12.0 -tensorflow-hub==0.12.0 -tensorflow-io-gcs-filesystem==0.24.0 -tensorflow-probability==0.16.0 -tensorflow-text==2.11.0 -termcolor==1.1.0 -terminado==0.13.3 -text-unidecode==1.3 -tf-estimator-nightly==2.8.0.dev2021122109 -tflite-runtime==2.8.0 -thinc==8.1.7 -threadpoolctl==3.1.0 -tifffile==2022.3.16 -tinycss2==1.1.1 -toml==0.10.2 -tomli==2.0.1 -tomlkit==0.11.0 -toolz==0.12.0 -torch==1.11.0 -torchmetrics==0.9.1 -torchvision==0.14.1 -tornado==6.1 -tqdm==4.63.0 -traitlets==5.1.1 -treeinterpreter==0.2.3 -tritonclient==2.31.0 -tsfresh==0.20.0 -typed-ast==1.5.4 -typeguard==2.13.3 -typer==0.7.0 -typing_extensions==4.4.0 -ua-parser==0.10.0 -ubuntu-advantage-tools==8001 -ubuntu-drivers-common==0.0.0 -ufw==0.36 -ujson==5.2.0 -unattended-upgrades==0.1 -uritemplate==4.1.1 -urllib3==1.26.8 -user-agents==2.2.0 -uvicorn==0.20.0 -visidata==1.5.2 -visions==0.7.5 -voluptuous==0.12.2 -vosk==0.3.37 -wadllib==1.3.3 -wandb==0.13.2 -wasabi==1.1.1 -wcwidth==0.2.5 -webencodings==0.5.1 -websocket-client==1.3.2 -websockets==8.1 -Werkzeug==2.0.3 -wget==3.2 -whichcraft==0.6.1 -widgetsnbextension==3.6.1 -wrapt==1.13.3 -xgboost==1.5.2 -xkit==0.0.0 -xlrd==0.7.1 -XlsxWriter==3.0.3 -xlwt==0.7.2 -xmljson==0.2.0 -xmltodict==0.12.0 -yarl==1.7.2 -ydata-profiling==4.1.1 -youtube-dl==2020.3.24 -yt-dlp==2022.3.8.1 -zc.lockfile==2.0 -zict==2.2.0 -zipp==3.7.0 -zope.event==4.5.0 -zope.interface==5.4.0 +numpy +matplotlib +plotly +pyyaml +tensorflow +scikit-learn +flask +lime +shap +codecarbon +pandas +scipy +ydata-profiling +pydantic +keras-tuner +xgboost +lightgbm diff --git a/src/clean.py b/src/clean.py index ad6c303..a7ca2f9 100644 --- a/src/clean.py +++ b/src/clean.py @@ -211,7 +211,11 @@ def parse_profile_warnings(self): messages = profile_json["alerts"] variables = list(profile_json["variables"].keys()) - correlations = profile_json["correlations"]["auto"] + + try: + correlations = profile_json["correlations"]["auto"] + except: + correlations = None removable_features = []