Skip to content

Commit

Permalink
Merge pull request #3 from janvandermeulen/linter
Browse files Browse the repository at this point in the history
Linter
  • Loading branch information
Remi-Lejeune authored May 6, 2024
2 parents deb2f5d + 9fd9a9e commit 0c48d6b
Show file tree
Hide file tree
Showing 18 changed files with 1,011 additions and 161 deletions.
5 changes: 4 additions & 1 deletion .dvc/config
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
[core]
remote = s3-read
remote = gdrive
['remote "aws"']
url = s3://dvc-remla/dvc
version_aware = true
['remote "s3-read"']
url = https://dvc-remla.s3.amazonaws.com/dvc
['remote "gdrive"']
url = gdrive://13Kk88ZJAhU16WcaSnSiHBtWO4CcrURVL
10 changes: 8 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,12 @@ In this assignment we will be transferring a small kaggle model to a professiona
- poetry for dependency management.

### How to run
To run this codee you need to have poetry installed. You can install the packages by running the following command:
To run this codee you need to have poetry installed. You can install the packages by running the following commands:
- ```poetry install```
- ```dvc pull```
- ```poetry shell```
To retrieve the data and run the pipeline:
- ```dvc pull```
- ```dvc repro```
To run the code quality metrics:
- ```pylint ./phishing_detection```
- ```bandit ./ -r```
4 changes: 4 additions & 0 deletions data/test.txt.dvc
Original file line number Diff line number Diff line change
Expand Up @@ -3,3 +3,7 @@ outs:
size: 32724891
hash: md5
path: test.txt
cloud:
aws:
etag: a1cab28ad8554fd9269d31e5708469d0
version_id: 'null'
4 changes: 4 additions & 0 deletions data/train.txt.dvc
Original file line number Diff line number Diff line change
Expand Up @@ -3,3 +3,7 @@ outs:
size: 93921671
hash: md5
path: train.txt
cloud:
aws:
etag: 13fd241a41370119612d5b00b0433009
version_id: 'null'
4 changes: 4 additions & 0 deletions data/val.txt.dvc
Original file line number Diff line number Diff line change
Expand Up @@ -3,3 +3,7 @@ outs:
size: 46258330
hash: md5
path: val.txt
cloud:
aws:
etag: 7b1e28b1e91582c6f36e85517c3cc6ee
version_id: 'null'
138 changes: 79 additions & 59 deletions dvc.lock
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
schema: '2.0'
stages:
prepare:
cmd: python .\phishing-detection\phishing_detection\get_data.py data
cmd: python ./phishing-detection/phishing_detection/get_data.py data
deps:
- path: data/test.txt
hash: md5
Expand All @@ -15,108 +15,128 @@ stages:
hash: md5
md5: 7b1e28b1e91582c6f36e85517c3cc6ee
size: 46258330
- path: phishing-detection\phishing_detection\get_data.py
- path: phishing-detection/phishing_detection/get_data.py
hash: md5
md5: 6708263ffcff004f1762661aaf786abe
size: 1539
md5: bdf8f521e5c27faa0d177946a2ceac54
size: 1848
outs:
- path: data/raw/X_test.txt
hash: md5
md5: 3fea8da6e4997dd0ede997235a10ca51
size: 29407820
md5: 1563282b483d43739f43f425200044c5
size: 29407738
- path: data/raw/X_train.txt
hash: md5
md5: e30e89161e21482d56537c6d0889db94
size: 84396770
- path: data/raw/X_val.txt
hash: md5
md5: 53d02f5e8a3a4de5fbb99be60fffea4f
size: 41567062
md5: 7d004257a712e5265a5bd985aee42979
size: 41567033
- path: data/raw/y_test.txt
hash: md5
md5: f04d1da8fd322e8e8d7b40ca2cff6cd9
size: 4045462
md5: 2e3c9d7f5cfbbbaa9ab11e95747c6b88
size: 4045450
- path: data/raw/y_train.txt
hash: md5
md5: a2628fb8ca50c89fe1250772b8a50206
size: 11616282
- path: data/raw/y_val.txt
hash: md5
md5: b63122d1f1df50d4e75507b729ab6c03
size: 5721428
md5: 3c330979d7cb08758fcd9d6feaf6aa04
size: 5721418
preprocess:
cmd: python .\phishing-detection\phishing_detection\preprocess.py data
cmd: python ./phishing-detection/phishing_detection/preprocess.py data
deps:
- path: data/raw/X_test.txt
hash: md5
md5: 3fea8da6e4997dd0ede997235a10ca51
size: 29407820
md5: 1563282b483d43739f43f425200044c5
size: 29407738
- path: data/raw/X_train.txt
hash: md5
md5: e30e89161e21482d56537c6d0889db94
size: 84396770
- path: data/raw/X_val.txt
hash: md5
md5: 53d02f5e8a3a4de5fbb99be60fffea4f
size: 41567062
md5: 7d004257a712e5265a5bd985aee42979
size: 41567033
- path: data/raw/y_test.txt
hash: md5
md5: f04d1da8fd322e8e8d7b40ca2cff6cd9
size: 4045462
md5: 2e3c9d7f5cfbbbaa9ab11e95747c6b88
size: 4045450
- path: data/raw/y_train.txt
hash: md5
md5: a2628fb8ca50c89fe1250772b8a50206
size: 11616282
- path: data/raw/y_val.txt
hash: md5
md5: b63122d1f1df50d4e75507b729ab6c03
size: 5721428
- path: phishing-detection\phishing_detection\preprocess.py
md5: 3c330979d7cb08758fcd9d6feaf6aa04
size: 5721418
- path: phishing-detection/phishing_detection/preprocess.py
hash: md5
md5: 6061605426b00694090cdd7d6fc04a46
size: 3298
md5: 755b39e473c43e878554eb91c365223b
size: 3490
outs:
- path: data/preprocess/X_test.npy
hash: md5
md5: 93675aba9e6e2c9b80277481652f0f7d
size: 291359328
md5: 23fa98d19a833016d86142c36cecda27
size: 291358528
cloud:
aws:
etag: 421dedecdf7fecc1b1125f6fbd5ad28e-6
version_id: 'null'
- path: data/preprocess/X_train.npy
hash: md5
md5: d81af1c35a77cdeac4d8ccee76cc939e
size: 836619328
cloud:
aws:
etag: 745db7209662781c75018ec2bbe59b67-16
version_id: 'null'
- path: data/preprocess/X_val.npy
hash: md5
md5: 6cf90ab489d52d04b2b739d1648378c9
size: 412064128
md5: 22128e9681506e90a2b437d45da5f8cf
size: 412063328
cloud:
aws:
etag: 876339ed4410440941887103644018ee-8
version_id: 'null'
- path: data/preprocess/char_index.json
hash: md5
md5: fe87b0a484b3b60b50fd58cd538aaaae
size: 639
cloud:
aws:
etag: fe87b0a484b3b60b50fd58cd538aaaae
version_id: 'null'
- path: data/preprocess/y_test.npy
hash: md5
md5: f75fc025e787c7c33a7a8c50d3b901d0
size: 1456924
md5: 311ff57eefa708370ff251eabff39a65
size: 1456920
- path: data/preprocess/y_train.npy
hash: md5
md5: 9b660634af0db95a6e2932208bc73b91
size: 8366320
cloud:
aws:
etag: 9b660634af0db95a6e2932208bc73b91
version_id: 'null'
- path: data/preprocess/y_val.npy
hash: md5
md5: b2b6fd1c6458b139988973e159e5fff1
size: 2060448
md5: 1d5fa5d29393e97dbba3a5cd32813e14
size: 2060444
train:
cmd:
- python .\phishing-detection\phishing_detection\model_definition.py data
- python .\phishing-detection\phishing_detection\train.py data
- python ./phishing-detection/phishing_detection/model_definition.py data
- python ./phishing-detection/phishing_detection/train.py data
deps:
- path: data/preprocess/X_train.npy
hash: md5
md5: d81af1c35a77cdeac4d8ccee76cc939e
size: 836619328
- path: data/preprocess/X_val.npy
hash: md5
md5: 6cf90ab489d52d04b2b739d1648378c9
size: 412064128
md5: 22128e9681506e90a2b437d45da5f8cf
size: 412063328
- path: data/preprocess/char_index.json
hash: md5
md5: fe87b0a484b3b60b50fd58cd538aaaae
Expand All @@ -127,49 +147,49 @@ stages:
size: 8366320
- path: data/preprocess/y_val.npy
hash: md5
md5: b2b6fd1c6458b139988973e159e5fff1
size: 2060448
- path: phishing-detection\phishing_detection\model_definition.py
md5: 1d5fa5d29393e97dbba3a5cd32813e14
size: 2060444
- path: phishing-detection/phishing_detection/model_definition.py
hash: md5
md5: 2fc8e16119ddd926da3552c06bc4ed17
size: 2167
- path: phishing-detection\phishing_detection\train.py
md5: a9147652d192df0b0c96b81a17835a28
size: 2391
- path: phishing-detection/phishing_detection/train.py
hash: md5
md5: 3a7679d044d5e20a9aca3e3bfef7a098
size: 1645
md5: de203b5cebd4d6fb83cd5dd841f5e822
size: 1827
outs:
- path: data/model/initial_model.keras
hash: md5
md5: 5f40cb6e2ba4e900651c8257a4e38565
md5: 8309bed8a584bb33e377a1183c811a21
size: 49737
- path: data/model/trained_model.keras
hash: md5
md5: d8e606f49bff4b99f52a4bcec3fcb8f4
size: 5472087
md5: 2b91e95957947219e7cf6edf3b1493e3
size: 5472435
test:
cmd: python .\phishing-detection\phishing_detection\predict.py data
cmd: python ./phishing-detection/phishing_detection/predict.py data
deps:
- path: data/model/trained_model.keras
hash: md5
md5: d8e606f49bff4b99f52a4bcec3fcb8f4
size: 5472087
md5: 2b91e95957947219e7cf6edf3b1493e3
size: 5472435
- path: data/preprocess/X_test.npy
hash: md5
md5: 93675aba9e6e2c9b80277481652f0f7d
size: 291359328
md5: 23fa98d19a833016d86142c36cecda27
size: 291358528
- path: data/preprocess/y_test.npy
hash: md5
md5: f75fc025e787c7c33a7a8c50d3b901d0
size: 1456924
- path: phishing-detection\phishing_detection\predict.py
md5: 311ff57eefa708370ff251eabff39a65
size: 1456920
- path: phishing-detection/phishing_detection/predict.py
hash: md5
md5: 5f8eb7428d2ad8f48b30f9b1bf4a244c
size: 3092
md5: 4ccca662abbfa46adaf5d758052f70db
size: 3148
outs:
- path: data/results/confusion_matrix.pdf
hash: md5
md5: 03a14f4eab2ef7da906cbdaf25cd1f8a
size: 13214
md5: 814e380c2f919555d2990c700f3d4537
size: 13666
- path: data/results/results.txt
hash: md5
md5: 4c24ed3cfccea4b4e08cc753e8a82f51
Expand Down
20 changes: 10 additions & 10 deletions dvc.yaml
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
stages:
prepare:
cmd: python .\phishing-detection\phishing_detection\get_data.py data
cmd: python ./phishing-detection/phishing_detection/get_data.py data
deps:
- phishing-detection\phishing_detection\get_data.py
- phishing-detection/phishing_detection/get_data.py
- data/train.txt
- data/test.txt
- data/val.txt
Expand All @@ -14,9 +14,9 @@ stages:
- data/raw/X_test.txt
- data/raw/y_test.txt
preprocess:
cmd : python .\phishing-detection\phishing_detection\preprocess.py data
cmd : python ./phishing-detection/phishing_detection/preprocess.py data
deps:
- phishing-detection\phishing_detection\preprocess.py
- phishing-detection/phishing_detection/preprocess.py
- data/raw/X_train.txt
- data/raw/y_train.txt
- data/raw/X_val.txt
Expand All @@ -33,11 +33,11 @@ stages:
- data/preprocess/char_index.json
train:
cmd:
- python .\phishing-detection\phishing_detection\model_definition.py data
- python .\phishing-detection\phishing_detection\train.py data
- python ./phishing-detection/phishing_detection/model_definition.py data
- python ./phishing-detection/phishing_detection/train.py data
deps:
- phishing-detection\phishing_detection\model_definition.py
- phishing-detection\phishing_detection\train.py
- phishing-detection/phishing_detection/model_definition.py
- phishing-detection/phishing_detection/train.py
- data/preprocess/X_train.npy
- data/preprocess/y_train.npy
- data/preprocess/X_val.npy
Expand All @@ -47,9 +47,9 @@ stages:
- data/model/initial_model.keras
- data/model/trained_model.keras
test:
cmd: python .\phishing-detection\phishing_detection\predict.py data
cmd: python ./phishing-detection/phishing_detection/predict.py data
deps:
- phishing-detection\phishing_detection\predict.py
- phishing-detection/phishing_detection/predict.py
- data/model/trained_model.keras
- data/preprocess/X_test.npy
- data/preprocess/y_test.npy
Expand Down
2 changes: 2 additions & 0 deletions phishing-detection/.bandit
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
[bandit]
skips = B106
Loading

0 comments on commit 0c48d6b

Please sign in to comment.