diff --git a/README.md b/README.md index 02d2317..e10b42e 100644 --- a/README.md +++ b/README.md @@ -5,16 +5,32 @@ MammalBase is a database of recent mammals. The main focus of this database is t # Documentation -[Instructions](documentation/instructions.md) +Documentation has been compiled behind the documentation directory. There, the structure of the code, Django model objects used in the code, and, for example, the logic of import and export operations are explained. Various scripts related to the use of MammalBase are also described in their own documents. -[Architecture](documentation/architecture.md) +Below are a few direct links to key instructions. -[Data Architecture](documentation/data-architecture.md) +[Instructions](documentation/common/instructions.md) +- Setting up the development environment and other important aspects in development work + +[Architecture](documentation/mammalbase/arch/architecture.md) +- Overview of the software architecture + +[Data Architecture](documentation/mammalbase/arch/data-architecture.md) +- Overview of data architecture [Master Habitats Script](documentation/scripts/master_habitat_scripts.md) +- Retrieval of Master Habitats using a script + +[Features](documentation/mammalbase/features/) +- Current features of MammalBase (e.g. importing and exporting tsv-files) + +[Models](documentation/mammalbase/models/) +- Models used in MammalBase ## Testing -[Testing Guide](documentation/testing.md) +[Testing Guide](documentation/common/testing.md) +- Instructions for testing the software ## Using Celery to run tasks in the background -[Celery instructions](documentation/celery.md) +[Celery instructions](documentation/common/celery.md) +- Background tasks in the software are executed with the Celery library. diff --git a/app/exports/query_sets/base_query.py b/app/exports/query_sets/base_query.py index d9013b3..04eea1d 100644 --- a/app/exports/query_sets/base_query.py +++ b/app/exports/query_sets/base_query.py @@ -23,8 +23,11 @@ def base_query(measurement_choices): | Q(source_unit__master_unit__is_active=False) ) #filtteröi onko tiettyjen kysymysten arvot active vai non-active - # measurement_choices is a list of user choices made in forms.py/views.py. - query_filter_list = [Q(source_attribute__master_attribute__attributegrouprelation__group__name=value) for value in measurement_choices] + # measurement_choices is a list of user choices made in forms.py/views.py. + query_filter_list = [ + Q( + source_attribute__master_attribute__attributegrouprelation__group__name=value + ) for value in measurement_choices] measurement_choice_filter = Q() for query_filter in query_filter_list: measurement_choice_filter |= query_filter @@ -65,4 +68,4 @@ def base_query(measurement_choices): | Q(source_attribute__reference__status=3) ).filter(measurement_choice_filter) - return query_filter \ No newline at end of file + return query_filter diff --git a/app/exports/query_sets/measurement_or_fact_query.py b/app/exports/query_sets/measurement_or_fact_query.py index f322012..9554469 100644 --- a/app/exports/query_sets/measurement_or_fact_query.py +++ b/app/exports/query_sets/measurement_or_fact_query.py @@ -37,6 +37,20 @@ def measurement_or_fact_query( | Q(source_entity__reference__is_active=False) | Q(source_entity__reference__master_reference__is_active=False) | Q(source_statistic__is_active=False) + + ) + + nominal_non_active = ( + Q(source_entity__master_entity__entity__is_active=False) + | Q(source_entity__reference__is_active=False) + | Q(source_entity__reference__master_reference__is_active=False) + | Q(source_choiceset_option__source_attribute__master_attribute__id=None) + | Q(source_choiceset_option__source_attribute__master_attribute__name='- Checked, Unlinked -') + | Q(source_choiceset_option__source_attribute__master_attribute__name__exact='') + | Q(source_entity__master_entity__name__exact='') + | Q(source_entity__master_entity__id__isnull=True) + | Q(source_choiceset_option__source_attribute__reference__status=1) + | Q(source_choiceset_option__source_attribute__reference__status=3) ) now = datetime.now(tz=timezone(timedelta(hours=2))) @@ -69,9 +83,9 @@ def measurement_or_fact_query( default = mb_reference ) - nominal_query = SourceChoiceSetOptionValue.objects.annotate( + nominal_query = SourceChoiceSetOptionValue.objects.exclude(nominal_non_active).annotate( entity_id=Concat( - Value('http://localhost:8000/sav/'), + Value('https://www.mammalbase.net/sav/'), 'id', Value('/'), output_field=CharField() @@ -80,6 +94,10 @@ def measurement_or_fact_query( basis_of_record_description=F('source_entity__reference__master_reference__type'), references=references, measurement_resolution=Case( + When( + source_entity__master_entity__entity__name__exact=None, + then=Value('NA') + ), When( source_entity__master_entity__entity__name__iendswith='species', then=Value('NA') @@ -94,6 +112,10 @@ def measurement_or_fact_query( source_choiceset_option__source_attribute__method__name__exact=None, then=Value('NA') ), + When( + source_choiceset_option__source_attribute__method__name__exact='nan', + then=Value('NA') + ), default='source_choiceset_option__source_attribute__method__name', output_field=CharField() ), @@ -104,7 +126,9 @@ def measurement_or_fact_query( individual_count=Value('NA'), dispersion=Value('NA'), measurement_value_min=Value('NA'), - measurement_value_max=Value('NA') + measurement_value_max=Value('NA'), + measurement_accuracy=Value('NA'), + statistical_method=Value('NA') ) @@ -140,9 +164,16 @@ def measurement_or_fact_query( measurement_remarks=Case( When( remarks__exact=None, - then=Value('NA') + then=Concat( + Value('Data quality score '), + 'data_quality_score', + Value('/10')) ), - default='remarks', + default=Concat( + Value('Data quality score '), + 'data_quality_score', + Value("/10 | "), + 'remarks'), output_field=CharField() ), aggregate_measure=Case( @@ -181,7 +212,7 @@ def measurement_or_fact_query( if measurement_choice == "Nominal traits": # TODO: Find fields in query and export to spreadsheet here fields = [ - ('entity_id','traitID'), + ('entity_id','measurementID'), ('basis_of_record', 'basisOfRecord'), ('source_entity__reference__master_reference__type', 'basisOfRecordDescription'), ('references', 'references'), @@ -194,7 +225,9 @@ def measurement_or_fact_query( ('individual_count', 'individualCount'), ('dispersion', 'dispersion'), ('measurement_value_min', 'measurementValue_min'), - ('measurement_value_max', 'measurementValue_max') + ('measurement_value_max', 'measurementValue_max'), + ('measurement_accuracy', 'measurementAccuracy'), + ('statistical_method', 'statisticalMethod') ] query = nominal_query elif measurement_choice in ('External measurements', 'Cranial measurements'): diff --git a/app/exports/query_sets/metadata_query.py b/app/exports/query_sets/metadata_query.py index 4e1d15e..f06769e 100644 --- a/app/exports/query_sets/metadata_query.py +++ b/app/exports/query_sets/metadata_query.py @@ -27,12 +27,10 @@ def metadata_query(measurement_choices): query = base.exclude(non_active).annotate( dataset_id=Value('https://urn.fi/urn:nbn:fi:att:8dce459f-1401-4c6a-b2bb-c831bd8d3d6f'), - dataset_name=Value( - 'MammalBase — Dataset 03: Trait Data in Ecological Trait-data Standard (ETS) format'), + dataset_name=Value('MammalBase — Dataset 03: Trait Data in Ecological Trait-data Standard (ETS) format'), dataset_description=Value( 'MammalBase - www.mammalbase.net: ' - 'Trait dataset output in Ecological Trait-data Standard (ETS)' - ), + 'Trait dataset output in Ecological Trait-data Standard (ETS)'), orcid_uid=Subquery( SocialAccount.objects.filter( user_id=OuterRef('created_by__id') @@ -54,9 +52,8 @@ def metadata_query(measurement_choices): 'URL: https://doi.org/10.5281/zenodo.1485739') ), rights_holder=Value( - 'Lintulaakso, ', - 'Kari;https://orcid.org/0000-0001-9627-8821;Finnish Museum of Natural History LUOMUS' - ), + 'Lintulaakso, ' + 'Kari;https://orcid.org/0000-0001-9627-8821;Finnish Museum of Natural History LUOMUS'), rights=Value('Attribution 4.0 International (CC BY 4.0)'), licence=Value('CC BY 4.0') ).annotate( @@ -69,12 +66,10 @@ def metadata_query(measurement_choices): nominal_query = SourceChoiceSetOptionValue.objects.exclude(non_active).annotate( dataset_id=Value('https://urn.fi/urn:nbn:fi:att:8dce459f-1401-4c6a-b2bb-c831bd8d3d6f'), - dataset_name=Value( - 'MammalBase — Dataset 03: Trait Data in Ecological Trait-data Standard (ETS) format'), + dataset_name=Value('MammalBase — Dataset 03: Trait Data in Ecological Trait-data Standard (ETS) format'), dataset_description=Value( 'MammalBase - www.mammalbase.net: ' - 'Trait dataset output in Ecological Trait-data Standard (ETS)' - ), + 'Trait dataset output in Ecological Trait-data Standard (ETS)'), orcid_uid=Subquery( SocialAccount.objects.filter( user_id=OuterRef('created_by__id') @@ -97,8 +92,7 @@ def metadata_query(measurement_choices): ), rights_holder=Value( 'Lintulaakso, ' - 'Kari;https://orcid.org/0000-0001-9627-8821;Finnish Museum of Natural History LUOMUS' - ), + 'Kari;https://orcid.org/0000-0001-9627-8821;Finnish Museum of Natural History LUOMUS'), rights=Value('Attribution 4.0 International (CC BY 4.0)'), licence=Value('CC BY 4.0') ).annotate( @@ -110,7 +104,6 @@ def metadata_query(measurement_choices): ).order_by('author').distinct() fields = [ - ('id', 'hmmm'), ('dataset_id', 'datasetID'), ('dataset_name', 'datasetName'), ('dataset_description', 'datasetDescription'), @@ -128,10 +121,7 @@ def metadata_query(measurement_choices): if "Nominal traits" in measurement_choices: queries.append((nominal_query, fields)) - if ("Cranial measurements" in - measurement_choices or - "External measurements" in - measurement_choices): + if "Cranial measurements" in measurement_choices or "External measurements" in measurement_choices: queries.append((query, fields)) return queries diff --git a/app/exports/query_sets/occurrence_query.py b/app/exports/query_sets/occurrence_query.py index 8fad152..cca5395 100644 --- a/app/exports/query_sets/occurrence_query.py +++ b/app/exports/query_sets/occurrence_query.py @@ -1,4 +1,4 @@ -from django.db.models import Value, Case, When, CharField, Q +from django.db.models import Value, Q from mb.models import SourceChoiceSetOptionValue from .base_query import base_query diff --git a/app/exports/query_sets/taxon_query.py b/app/exports/query_sets/taxon_query.py index 5c04dff..ceedf81 100644 --- a/app/exports/query_sets/taxon_query.py +++ b/app/exports/query_sets/taxon_query.py @@ -16,6 +16,7 @@ def taxon_query(measurement_choices): non_active = ( Q(source_entity__master_entity__entity__is_active=False) + | Q(source_entity__master_entity__id=None) ) query = base.exclude(non_active).annotate( diff --git a/app/exports/query_sets/traitdata_query.py b/app/exports/query_sets/traitdata_query.py index e1b82c7..7f2179d 100644 --- a/app/exports/query_sets/traitdata_query.py +++ b/app/exports/query_sets/traitdata_query.py @@ -19,6 +19,23 @@ def traitdata_query(measurement_choices): Q(source_attribute__master_attribute__unit__is_active=False) ) + nominal_non_active = ( + Q(source_choiceset_option__source_attribute__master_attribute__unit__is_active=False) + | Q(source_choiceset_option__source_attribute__master_attribute__id=None) + | Q(source_choiceset_option__source_attribute__master_attribute__name='- Checked, Unlinked -') + | Q(source_choiceset_option__source_attribute__master_attribute__name__exact='') + | Q(source_entity__master_entity__name__exact='') + | Q(source_entity__master_entity__id__isnull=True) + | Q(source_choiceset_option__source_attribute__reference__status=1) + | Q(source_choiceset_option__source_attribute__reference__status=3) + | Q(source_choiceset_option__master_choiceset_option__name=None) + ) + master_attribute_filter = ( + Q(source_choiceset_option__master_choiceset_option__master_attribute_id = + F('source_choiceset_option__source_attribute__master_attribute__id') + ) + ) + query = base.exclude(non_active).annotate( trait_id=Concat( Value('https://www.mammalbase.net/ma/'), @@ -64,7 +81,8 @@ def traitdata_query(measurement_choices): 'source_attribute__master_attribute__name' ) - nominal_query = SourceChoiceSetOptionValue.objects.annotate( + nominal_query = SourceChoiceSetOptionValue.objects.filter( + master_attribute_filter).exclude(nominal_non_active).annotate( trait_id=Concat( Value('https://www.mammalbase.net/ma/'), 'source_choiceset_option__source_attribute__master_attribute__id', @@ -81,7 +99,12 @@ def traitdata_query(measurement_choices): Value('/'), output_field=CharField() ), - measurement_id=Value('NA'), + measurement_id=Concat( + Value('https://www.mammalbase.net/sav/'), + 'id', + Value('/'), + output_field=CharField() + ), occurrence_id=Value('NA'), warnings=Value('NA'), ).order_by( diff --git a/app/exports/query_sets/traitlist_query.py b/app/exports/query_sets/traitlist_query.py index 5a4e6dd..893b212 100644 --- a/app/exports/query_sets/traitlist_query.py +++ b/app/exports/query_sets/traitlist_query.py @@ -1,7 +1,7 @@ -from django.db.models import Value, CharField, Q +from django.db.models import Value, CharField, Q, Case, When from django.db.models.functions import Concat, Replace -from mb.models import SourceChoiceSetOptionValue +from mb.models import MasterAttribute from .base_query import base_query @@ -14,11 +14,13 @@ def traitlist_query(measurement_choices): """ base = base_query(measurement_choices) + non_active = ( Q(source_attribute__master_attribute__unit__is_active=False) | Q(source_attribute__master_attribute__reference__is_active=False) ) + query = base.exclude(non_active).annotate( identifier=Concat( Value('https://www.mammalbase.net/ma/'), @@ -39,30 +41,44 @@ def traitlist_query(measurement_choices): 'source_attribute__master_attribute__attributegrouprelation__display_order' ).distinct() - nominal_query = SourceChoiceSetOptionValue.objects.annotate( + attributelink = {} + attributes = MasterAttribute.objects.filter(groups__name='Nominal traits') + for i in attributes: + for j in i.masterchoicesetoption_set.all(): + try: + attributelink[i.name].append(j.name) + except: + attributelink[i.name]=[j.name] + + nominal_query = MasterAttribute.objects.prefetch_related( + 'master_choiceset_option_set').filter(groups__name='Nominal traits').annotate( identifier=Concat( Value('https://www.mammalbase.net/ma/'), - 'source_choiceset_option__source_attribute__master_attribute__id', + 'id', Value('/'), output_field=CharField() ), trait=Replace( - 'source_choiceset_option__source_attribute__master_attribute__name', + 'name', Value(' '), Value('_') ), narrowerTerm=Value('NA'), relatedTerm=Value('NA'), + factorLevels=Case( + *[ + When(name=attribute_name, then=Value(', '.join(attributelink[attribute_name]))) for attribute_name in attributelink + ],output_field=CharField() + ), broaderTerm=Value('NA'), expectedUnit=Value('NA'), - max_allowed_value=Value('NA'), - min_allowed_value=Value('NA'), comments=Value('NA'), ).order_by( - 'source_choiceset_option__source_attribute__master_attribute__groups__name', - 'source_choiceset_option__source_attribute__master_attribute__attributegrouprelation__display_order' + 'groups__name', + 'attributegrouprelation__display_order' ).distinct() + fields = [ ('identifier', 'identifier'), ('trait', 'trait'), @@ -79,20 +95,21 @@ def traitlist_query(measurement_choices): ('source_attribute__master_attribute__reference__citation', 'source') ] + nominal_fields = [ ('identifier', 'identifier'), ('trait', 'trait'), ('broaderTerm', 'broaderTerm'), ('narrowerTerm', 'narrowerTerm'), ('relatedTerm', 'relatedTerm'), - ('source_choiceset_option__source_attribute__master_attribute__value_type', 'valueType'), + ('value_type', 'valueType'), ('expectedUnit', 'expectedUnit'), - ('source_choiceset_option__master_choiceset_option__name', 'factorLevels'), + ('factorLevels', 'factorLevels'), ('max_allowed_value', 'maxAllowedValue'), ('min_allowed_value', 'minAllowedValue'), - ('source_choiceset_option__source_attribute__master_attribute__description', 'traitDescription'), + ('description', 'traitDescription'), ('comments', 'comments'), - ('source_choiceset_option__source_attribute__master_attribute__reference__citation', 'source') + ('reference__citation', 'source') ] queries = [] diff --git a/app/exports/tasks.py b/app/exports/tasks.py index c93c08f..10e0d9f 100644 --- a/app/exports/tasks.py +++ b/app/exports/tasks.py @@ -148,6 +148,7 @@ def ets_export_query_set( export_list = [] for choice in measurement_choices: + print(choice) export_list.append({ 'file_name': ('measurement_or_fact_' f'{choice.split()[0].lower()}'), @@ -160,9 +161,12 @@ def ets_export_query_set( 'taxon': taxon_query, 'occurrence': occurrence_query, 'metadata': metadata_query }.items(): - export_list.append({ - 'file_name': file_name, - 'queries_and_fields': query_function(measurement_choices) }) + if ('External measurements' or 'Cranial measurements') not in measurement_choices and file_name == "occurrence": + pass + else: + export_list.append({ + 'file_name': file_name, + 'queries_and_fields': query_function(measurement_choices) }) export_zip_file( email_recipient=email_recipient, diff --git a/documentation/architecture.md b/documentation/architecture.md deleted file mode 100644 index d62f6e7..0000000 --- a/documentation/architecture.md +++ /dev/null @@ -1,80 +0,0 @@ -## Directory Structure - -The project directory structure is as follows: - -- `.github/`: Contains GitHub-specific files, such as workflows for GitHub Actions. -- `app/`: The main application directory. It contains several subdirectories: - - `config/`: Configuration files for the application. - - `exports/`: Files related to exporting data. - - `imports/`: Files related to importing data. - - `itis/`: ITIS (Integrated Taxonomic Information System) related files. - - `main/`: User authentication, user profiles, and other general application features. - - `mb/`: Main mammalbase application files. - - `scripts/`: Scripts used in the application. - - `tdwg/`: TDWG (Taxonomic Databases Working Group) related files. - - `tests/`: Unit tests for the application. - - `urls/`: URL handling is centralized here. paths, subpath includes, etc -- `documentation/`: Contains markdown files with documentation about the application, its architecture, testing, environment variables, etc. -- `nginx/`: Configuration files for the Nginx server. - -## App - -The `app` directory contains the main django application files. The application is divided into several subapplications, each of which contains files related to a specific feature of the application. - -### Config - -The `config` directory contains configuration files for the application. The `settings.py` file contains the main settings for the application, such as database settings, installed apps, middleware, etc. - -### Exports - -Documentation for the exports directory can be found in the [exports documentation](documentation/exports.md). - - -### Imports - -Documentation for the imports directory can be found in the [imports documentation](documentation/imports.md). - -### Itis - -Itis is a directory that contains files related to the ITIS (Integrated Taxonomic Information System) API. - -### Main - -The `main` directory contains the main application files related to user authentication, user profiles, and other general application features. It contains templates to login with socian account orcId. - -### Mb (MammalBase) - -The `mb` directory contains main files related to main MammalBase application. All the `models` related to MammalBase are defined in this directory. Also all the `views` and `urls` related to MammalBase are defined in this directory. - -### Scripts - -The `scripts` directory contains scripts used in the deployment of the application. - -`entrypoint.prod.sh` is the entrypoint script for the production server. - -`entrypoint.sh` is the entrypoint script for the development server. - -### Tdwg - -The `tdwg` directory contains Taxon model that follows TDWG (Taxonomic Databases Working Group) standards. - -### Tests - -The `tests` directory contains unit tests for the application. The tests are divided into several folders, each of which contains tests for a specific part of the application. - -### Urls - -This subpackage contains all URL related information. All the urls and their -view counterparts are listed in the submodules starting from `__init__.py`. - -## Documentation - -The `documentation` directory contains markdown files with documentation about the application. - -## Nginx - -The `nginx` directory contains configuration files for the Nginx server. - -## .github - -The `.github` directory contains GitHub-specific files, such as workflows for GitHub Actions. diff --git a/documentation/celery.md b/documentation/common/celery.md similarity index 100% rename from documentation/celery.md rename to documentation/common/celery.md diff --git a/documentation/deploy.md b/documentation/common/deploy.md similarity index 100% rename from documentation/deploy.md rename to documentation/common/deploy.md diff --git a/documentation/environment_variables.md b/documentation/common/environment_variables.md similarity index 100% rename from documentation/environment_variables.md rename to documentation/common/environment_variables.md diff --git a/documentation/common/example.env b/documentation/common/example.env new file mode 100644 index 0000000..cd2a403 --- /dev/null +++ b/documentation/common/example.env @@ -0,0 +1,53 @@ +# Example enviroment file. +# Fill out proper values before building dev environment. +# Please refer /documentation/common/environment_variables.md for details + + +# Django configuration +# These are read mainly in /app/config/settings.py and +# /app/scripts/initialize.py. + +#SECRET_KEY=development_key +DEBUG=1 +ALLOWED_HOSTS=* +SITE_DOMAIN=localhost:8000 +SITE_NAME=MammalBase +EMAIL_BACKEND=django.core.mail.backends.console.EmailBackend +# EMAIL_USER & EMAIL_PASS is not needed for backends.console.EmailBackend +#EMAIL_USER= +#EMAIL_PASS= + +# Django Admin, please change the password. + +DJANGO_SUPERUSER_USERNAME=admin +DJANGO_SUPERUSER_PASSWORD= +DJANGO_SUPERUSER_EMAIL=admin@example.com + + +# Orcid id and password you can get from Kari + +#ORCID_CLIENT_ID= +#ORCID_SECRET= + + +# phpMyAdmin + +PMA_UPLOAD_LIMIT=10M + + +# Mysql related variables. (works as is for developement) + +DB_HOST=db +DB_PORT=3306 +DB_NAME=mb_dev +DB_USER=mb_dev +DB_PASS=password +DB_ROOT_PASS=rootpassword + + +# Docker +# On normal linux distro run `echo $UID` or `id -u` to figure out current +# user's ID. Should be commented out on windows machines. +# Refer /documentation/common/environment_variables.md for more details. + +UID= diff --git a/documentation/common/instructions.md b/documentation/common/instructions.md new file mode 100644 index 0000000..8439021 --- /dev/null +++ b/documentation/common/instructions.md @@ -0,0 +1,186 @@ +# Development Environment + +Installing developement environment takes lot of time and there are many +steps to follow and many steps that can go wrong. If you are not familiar +with Docker and other techniques used you can expect spending more or less 3 +hours installing. We really hope that documentation clears things up even a +bit. Happy installing. + + +## Install Docker + +First, you need to have Docker and Docker Compose installed on your system. + +- [Docker](https://docs.docker.com/get-docker/) +- [Docker Compose](https://docs.docker.com/compose/install/) + + +### Getting docker from your distribution + +You most probably want to install docker directly from your distribution. +Certainly this project doesn't need the latest version of Docker so you may +be better off with more tested and stable one. Installation is also simpler +and less risky. + +Debian and Ubuntu (the only command needed, run as root): +``` +apt-get install docker-compose +``` + +Note that older versions of docker make use of `docker-compose` instead of +`docker compose`. Functionality is pretty much the same and they are +interchangeable in examples of the documentation. + +## Setting up the environment + +### Clone + +Just clone the repository and navigate your terminal to the root of it. + + +### Environment variables + +Enviroment variables must be set correctly in `/.env` -file before building +docker containers. + +It's good starting point to make copy of [`example.env`](example.env) to the +root of the project (next to `README.md`): +``` +cp documentation/common/example.env .env +``` +Then just open the `.env` -file and carefully examine and modify the file +contents according the instructions and the +[Environment variable docs](environment_variables.md). + + +### Build the containers +After docker is installed and environment variables are set correctly it's +time to build the containers. Note that in the first time building will take +considerable amount of time as container images needs to be fetched from the +internet and prepared. Reasonable estimate is around 10-20min. + +``` +docker compose build +``` +It's good idea to run this periodically as container images gets upgrades. +Also after making chages to `Dockerfile` or `docker-compose.yml` containers +should be rebuild. + + +## Running the app + +If everything went well so far you can start containers. Please note that in +the first time this will take quite much time as all migrations needs to be +pushed to the database. + +### Start containers up: +``` +docker compose up +``` +Append `-d` switch for detached mode meaning that execution returns to +terminal and containers keep running on background. + +Append `--build` switch to build the containers before starting them. This +is essentially same as running `docker compose build` just before. + + +After a while app should be accessible with browser. At the address +[localhost:8000](http://localhost:8000), you should see the MammalBase app +running. You can also visit phpMyAdmin at +[localhost:8001](http://localhost:8001) to see and modify the databases. + +If the website doesn't show up wait some time or check the logs. Most likely +the service just hasn't started yet. Startup can take something like 5-30sec. + +You can make changes to the Django app in real time when the containers are +running. The [`/app`](../../app) directory has been binded to the web +container so that all the changes to the host machine's `/app` +directory are also made in the container. + +### Shutdown containers: +``` +docker compose down +``` + + +## Basic commands + +### View all logs: +``` +docker compose logs +``` +See `docker compose logs --help` for details. + + +### Follow logs as they appear: +``` +docker compose logs -f +``` + + +### Follow log of invidual container: +``` +docker compose logs -f +``` + + +### Shutdown containers and remove the volumes +``` +docker compose down -v +``` +In the case of wanting to also remove the volumes (meaning that +the **database will be reset**). + + +## Other useful commands + +### List running containers: + +``` +docker ps +``` +By knowing name or id you can do actions to certain container. It's not +guaranteed that names are same in different setups. + + +### Execute commands inside the container: + +``` +docker exec ... +``` +Runs ` ...` inside the ``. Container needs to be running +this to work. See `docker exec --help` for details. + + +### Check out local environment inside container: +``` +docker exec mammalbase_web_1 sh -c export +``` + + +### Open shell inside container: +``` +docker exec -it mammalbase_web_1 bash +``` +Note `-it` switch. It's needed for interactive commands. + + +### Generate migrations inside Django container: +``` +docker exec mammalbase_web_1 python manage.py makemigrations +``` + + +### Push migrations to the database: +``` +docker exec mammalbase_web_1 python manage.py migrate +``` + +Note that changes in source tree causes developement server to restart. +See more in [Django docs](https://docs.djangoproject.com/en/3.2/). + +Please note, that currently the django application makes migrations and +migrates the database every time the django container is started. If this +proves to be cumbersome the lines can be commented out in +[`entrypoint.sh`](./../app/scripts/entrypoint.sh) and commands above run +manually. diff --git a/documentation/testing.md b/documentation/common/testing.md similarity index 55% rename from documentation/testing.md rename to documentation/common/testing.md index 190c61a..fb1b6e7 100644 --- a/documentation/testing.md +++ b/documentation/common/testing.md @@ -6,66 +6,71 @@ To run all the existing unit tests, run: -```bash -docker exec mammalbase-web-1 bash -c "python manage.py test" +``` +docker exec mammalbase_web_1 python manage.py test ``` Run a specific test file by adding the path to the file: -```bash -docker exec mammalbase-web-1 bash -c "python manage.py test tests.[folder].[test_file]" +``` +docker exec mammalbase_web_1 python manage.py test tests.[folder].[test_file] ``` Replace [folder] and [test_file] with the correct path to the test file. -The tests may not work locally before the database privileges are set for Django. Once docker containers are running, run this command: +The tests may not work locally before the database privileges are set for +Django. Once docker containers are running, run this command: -```bash -docker compose exec db mysql -u root -p +``` +docker exec -it mammalbase_db_1 mysql -u root -p ``` -Enter the DB_ROOT_PASS from env-file +Enter the DB_ROOT_PASS from `.env` -file Once in MySQL, run this command to grant privileges: -```bash +``` GRANT ALL PRIVILEGES ON *.* TO 'mb_dev'@'%'; ``` -- NOTE: Database name (mb_dev in this case) should be the same as in the .env file. +- NOTE: Database name (mb_dev in this case) should be the same as in the .env file. -Type ```QUIT``` to exit MySQL. Tests should work now. +Hit Ctrl+D or give `QUIT` -command to exit MySQL. +Tests should work now. ### Creating and/or modifying tests -Test files are located in app/tests. +Test files are located in `/app/tests`. ### Test coverage You can create test coverage report by first installing coverage.py to the container: -```bash -docker exec mammalbase-web-1 bash -c "pip install coverage" +``` +docker exec mammalbase_web_1 pip install coverage ``` Run tests with: -```bash -docker exec mammalbase-web-1 bash -c "coverage run --source='.' manage.py test" +``` +docker exec mammalbase_web_1 coverage run --source='.' manage.py test ``` To see the report run: -```bash -docker exec mammalbase-web-1 bash -c "coverage report" +``` +docker exec mammalbase_web_1 coverage report ``` For html report run: -```bash -docker exec mammalbase-web-1 bash -c "coverage html" +``` +docker exec mammalbase_web_1 coverage html ``` ### Pylint Pylint can be run in the containers by using the following command: ``` -docker exec scripts/pylint.sh +docker exec mammalbase_web_1 scripts/pylint.sh ``` This tests whole source tree. By appending paths to the end of command line -you can test only chosen directories. +you can test only chosen directories: +``` +docker exec mammalbase_web_1 scripts/pylint.sh urls mb/models +``` The script will install pylint and pylint_django if not yet installed. diff --git a/documentation/data-architecture.md b/documentation/data-architecture.md deleted file mode 100644 index 98517ab..0000000 --- a/documentation/data-architecture.md +++ /dev/null @@ -1,13 +0,0 @@ -# Data architecture - -The application uses a `MariaDB` database to store data. The database tables are managed by Django's ORM. The application is divided into several subapplications, each of which contains files related to a specific feature of the application. - -Most of the models are defined in the `mb` directory. The `mb` directory contains main files related to the main MammalBase application. All the `models` related to MammalBase are defined in this directory. - -The `tdwg` directory contains `Taxon` model that follows TDWG (Taxonomic Databases Working Group) standards. - -All the models in mb inherit from base_model which is defined in `models` directory. The `models` directory contains files for different types of models by category. All model must be imported to `models/__init__.py` -file. - -Project also uses `simple_history` extension to keep track of changes in the database. - diff --git a/documentation/example.env b/documentation/example.env deleted file mode 100644 index b48e425..0000000 --- a/documentation/example.env +++ /dev/null @@ -1,24 +0,0 @@ -ALLOWED_HOSTS=* -DB_HOST=db -DB_PORT=3306 -DB_NAME=mb_dev -DB_USER=mb_dev -DB_PASS=password -DB_ROOT_PASS=rootpassword -EMAIL_BACKEND=django.core.mail.backends.console.EmailBackend -EMAIL_USER= -EMAIL_PASS= -ORCID_CLIENT_ID= -ORCID_SECRET= -DJANGO_SUPERUSER_USERNAME=admin -DJANGO_SUPERUSER_PASSWORD= -DJANGO_SUPERUSER_EMAIL=admin@example.com -SITE_NAME=MammalBase -SITE_DOMAIN=localhost:8000 -PMA_UPLOAD_LIMIT=10M -UID= #Should be commented out on windows machines. -ALLOWED_HOSTS=* -DEBUG=1 - -#SECRET_KEY=development_key - diff --git a/documentation/instructions.md b/documentation/instructions.md deleted file mode 100644 index d5710ad..0000000 --- a/documentation/instructions.md +++ /dev/null @@ -1,76 +0,0 @@ -# Development Environment - -## Install Docker - -First, you need to have Docker and Docker Compose installed on your system. - -- [Docker](https://docs.docker.com/get-docker/) -- [Docker Compose](https://docs.docker.com/compose/install/) - -Note that on some distributions there is separate `docker-compose` command -which can be used in place of `docker compose`. - -## Environment variables - -Create a `.env` file in the root of the repository and write needed -environment variables to it. You can take the [Example .env](./example.env) -and fill in couple of the variables according to the -[Environment variable docs](./environment_variables.md). - -## Running the environment - -To start the environment, you have to run the following command in the root -the repository. -``` -docker compose up -d --build -``` -- NOTE: Running this command without the `-d` tag will show logs in the same - terminal. -- NOTE 2: `--build` will create a new container and is unnecessary unless - something has been changed in the Dockerfile or `docker-compose.yml` file. - -Now if you go to [localhost:8000](localhost:8000), you should see the app -running. You can also go to [localhost:8001](localhost:8001) to see or modify -the created database. If the website doesn't show up, check the logs. Most -likely the service just hasn't started yet. - -You can make changes to the django app in real time when the containers are -running. The [App](./../app) directory has been binded to the web container -so that all the changes to the host machine's [App](./../app) directory are -also made in the container. - -To see logs you can run this command. You can also specify a container if you -only want to see specific logs `docker compose logs -f `. -``` -docker compose logs -f -``` -If you want to shutdown the containers, you can run this command. -``` -docker compose down -``` -In the case of wanting to also remove the volumes (meaning that the database -will be reset), you can run `docker compose down -v`. - -### Other useful commands - -You can list running container ids by following command (This is usefull if -you need for example execute some command inside certain container): -``` -docker ps -``` - -You can execute commands inside the container by running: -``` -docker compose exec -``` -For example, if you need to make migrations inside django, you can run -`docker compose exec web python manage.py makemigrations`. Then to migrate -the database you can run `docker compose exec web python manage.py migrate`. -These commands should usually been run when the developer has made changes -to the models or created a new app inside django. See more in -[Django docs](https://docs.djangoproject.com/en/3.2/). - -Please note, that currently the django application makes migrations and -migrates the database every time the django container is started. If this -proves to be cumbersome the lines can be commented out in -[entrypoint.sh](./../app/scripts/entrypoint.sh). diff --git a/documentation/mammalbase/arch/architecture.md b/documentation/mammalbase/arch/architecture.md new file mode 100644 index 0000000..3c60c56 --- /dev/null +++ b/documentation/mammalbase/arch/architecture.md @@ -0,0 +1,120 @@ +## Directory Structure + +The project directory structure is as follows: + +- [`/.github/`](../../../.github/): + Contains GitHub-specific files, such as workflows for GitHub Actions. +- [`/app/`](../../../app/): + The main application directory. It contains several subdirectories: + - [`config/`](../../../app/config/): + Configuration files for the application. + - [`exports/`](../../../app/exports/): + Files related to exporting data. + - [`imports/`](../../../app/imports/): + Files related to importing data. + - [`itis/`](../../../app/itis/): + ITIS (Integrated Taxonomic Information System) related files. + - [`main/`](../../../app/main/): + User authentication, user profiles, and other general application features. + - [`mb/`](../../../app/mb/): + Main mammalbase application files. + - [`scripts/`](../../../app/scripts/): + Scripts used in the application. + - [`tdwg/`](../../../app/tdwg/): + TDWG (Taxonomic Databases Working Group) related files. + - [`tests/`](../../../app/tests/): + Unit tests for the application. + - [`urls/`](../../../app/urls/): + URL handling is centralized here. paths, subpath includes, etc +- [`/documentation/`](../../../documentation/): + Contains markdown files with documentation about the application, its architecture, testing, environment variables, etc. + - [`common/`](../../../documentation/common/): + Common documentation. More or less developement how to. + - [`mammalbase/`](../../../documentation/mammalbase/): + Documentation about the project itself. + - [`arch/`](../../../documentation/mammalbase/arch/) + Architechture, hierarchy, how the project is arranged. + - [`features/`](../../../documentation/mammalbase/features/): + Detailed description of different functionalities of app. + - [`models/`](../../../documentation/mammalbase/models/): + Mermaid charts of models and relations. +- [`/nginx/`](../../../nginx/): + Configuration files for the Nginx server. + + +### Subpackages for models, views etc + +In place of many module there is subpackage in use. See [Organizing models in a package, Django Documentation](https://docs.djangoproject.com/en/5.0/topics/db/models/#organizing-models-in-a-package) For example `mb.models` have it's own subpackage with `__init__.py` file so all models can be arranged neatly while being imported correctly. +- `/__init__.py`: Subpackage initialization module. It should collect all objects for the subpackage via imports. +- `/category.py`: Containing objects in named category. Easy to find. Nice and neat. +- `/unsorted.py`: File that have unsorted objects. These files are temporary and objects should be rearranged to corresponding categories. + + +## Detailed description of some directories + +### /app/ + +The `app` directory contains the main django application files. The application is divided into several subapplications, each of which contains files related to a specific feature of the application. + + +### /app/config/ + +The `config` directory contains configuration files for the application. The `settings.py` file contains the main settings for the application, such as database settings, installed apps, middleware, etc. + + +### /app/exports/ + +Documentation for the exports directory can be found in the [exports documentation](documentation/exports.md). + + +### /app/imports/ + +Documentation for the imports directory can be found in the [imports documentation](documentation/imports.md). + + +### /app/itis/ + +Itis is a directory that contains files related to the ITIS (Integrated Taxonomic Information System) API. + + +### /app/main/ + +The `main` directory contains the main application files related to user authentication, user profiles, and other general application features. It contains templates to login with social account orcId. + + +### /app/mb/ + +As you may guess "mb" stads for MammalBase. The `mb` directory contains main files related to main MammalBase application. All the `models` related to MammalBase are defined in this directory. Also all the `views` related to MammalBase are defined in this directory. + + +### /app/scripts/ + +The `scripts` directory contains scripts used in the deployment of the application. + +`entrypoint.prod.sh` is the entrypoint script for the production server. + +`entrypoint.sh` is the entrypoint script for the development server. + +`pylint.sh` script to launch pylint agains the source. See +[Testing Guide](../common/testing.md#Pylint) for details + + +### /app/tdwg/ + +The `tdwg` directory contains Taxon model that follows TDWG (Taxonomic Databases Working Group) standards. + +### /app/tests/ +The `tests` directory contains unit tests for the application. The tests are divided into several folders, each of which contains tests for a specific part of the application. More about testing can be read from [Testing Documentation](testing.md). + +### /app/urls/ +This subpackage contains all URL related information. All the urls and their +view counterparts are listed in the submodules starting from `__init__.py`. + +### /documentation/ +This is current directory. The `documentation` directory contains markdown files with documentation about the application. + +### /nginx/ +The `nginx` directory contains configuration files for the Nginx server. + +### /.github/ +The `.github` directory contains GitHub-specific files, such as workflows for GitHub Actions. diff --git a/documentation/mammalbase/arch/data-architecture.md b/documentation/mammalbase/arch/data-architecture.md new file mode 100644 index 0000000..d7d6e4b --- /dev/null +++ b/documentation/mammalbase/arch/data-architecture.md @@ -0,0 +1,43 @@ +# Data architecture + +## Database + +The application uses a `MariaDB` database to store data. The database tables +are managed by Django's ORM. The application is divided into several +subapplications, each of which contains files related to a specific feature +of the application. + + +## Models + +### MammalBase + +Most of the MammalBase related models are defined in the `mb` subpackage in +[`/app/mb/models`](../../../app/mb/models) directory. + +The directory contains files for different types of models by category. +Order is kept by the subpackage `mb.models` meaning that all models are +impoted in `__init__.py`. This how models can be imported elsewhere directly +from subpackage like `from mb.models import ...`. Failing to do so may +result in duplicates of models, failing migrations +and **hard to track down errors**. + +All the models in `mb` inherit from `BaseModel` which is defined in +[`/app/mb/models/base_model.py`](../../../app/mb/models/base_model.py) file. +`BaseModel` also makes use of +[`simple_history`](https://django-simple-history.readthedocs.io/en/latest/) +extension to keep track of changes in the database. + + +### TDWG (Taxonomic Databases Working Group) + +The [`/app/tdwg`](../../../app/tdwg) directory contains `Taxon` model that +follows [TDWG (Taxonomic Databases Working Group)](https://www.tdwg.org/) +standards. + + +### More models + +There are even more models defined in: +- [`/app/itis/models.py`](../../../app/itis/models.py) +- [`/app/exports/models.py`](../../../app/exports/models.py) diff --git a/documentation/exports.md b/documentation/mammalbase/features/exports.md similarity index 82% rename from documentation/exports.md rename to documentation/mammalbase/features/exports.md index 42e5c39..2f580ce 100644 --- a/documentation/exports.md +++ b/documentation/mammalbase/features/exports.md @@ -1,5 +1,16 @@ # Exports app +Update: Architecture of exports app is somewhat changed. Basic principles +and functionality is the same, but now there can be multiple "queries and +fields" tuples so multiple queries can be put in a same file. Fields must be +indetical as the queries outputs to the single file. It turned out +to be too hard to make single massive query to include also nominal data +within. Maybe single query solution is possible, but there might be lack of +people to who can get head around all this Django ORM madness combined with +quite wide database queries. For current state of exports please read the +comments and code on [/app/exports/tasks.py](../../../app/exports/tasks.py). + + The data export feature has been defined in the `exports` app. This document aims to give a brief explanation on the different parts of the app that makes the data export possible by the standards given by the owner of the product. diff --git a/documentation/imports.md b/documentation/mammalbase/features/imports.md similarity index 100% rename from documentation/imports.md rename to documentation/mammalbase/features/imports.md diff --git a/documentation/mammalbase/models/habitat.md b/documentation/mammalbase/models/habitat.md new file mode 100644 index 0000000..2076541 --- /dev/null +++ b/documentation/mammalbase/models/habitat.md @@ -0,0 +1,31 @@ +```mermaid +classDiagram + class SourceHabitat { + FK(SourceReference) source_reference + +String habitat_type + +String habitat_percentage + } + class MasterHabitat { + FK(MasterReference) master_reference + FK(self) parent_id + +String name + +Integer code + +String group + } + class HabitatRelation { + FK(SourceHabitat) source_habitat + FK(MasterHabitat) master_habitat + } + class SourceReference { + } + class MasterReference { + } + + SourceHabitat "1.." -- "1" SourceReference : source_reference_id + MasterHabitat "N" -- "N" MasterHabitat : parent_id + MasterHabitat "1.." -- "1" MasterReference : master_reference_id + HabitatRelation "1.." -- "1" SourceHabitat : source_habitat_id + HabitatRelation "1.." -- "1" MasterHabitat : master_habitat_id + + +``` \ No newline at end of file diff --git a/documentation/mammalbase/models/location.md b/documentation/mammalbase/models/location.md new file mode 100644 index 0000000..7e9788b --- /dev/null +++ b/documentation/mammalbase/models/location.md @@ -0,0 +1,51 @@ +```mermaid +classDiagram + class SourceLocation { + FK(SourceReference) source_reference + +String name + +String verbatim_elevation + +String verbatim_longitude + +String verbatim_latitude + +String verbatim_depth + +String verbatim_coordinate_system + +String verbatim_coordinates + +String verbatim_srs + + } + class MasterLocation { + FK(MasterReference) master_reference + FK(self) higherGeographyID + +String name + +String locationID + +String continent + +String country + +String countryCode + +String stateProvince + +String county + +String municipality + +String locality + +String minimumElevationInMeters + +String maximumElevationInMeters + +String locationAccordingTo + +String locationRemarks + +String decimalLatitude + +String decimalLongitude + +String geodeticDatum + } + class LocationRelation { + FK(SourceLocation) source_location + FK(MasterLocation) master_location + } + class SourceReference { + } + class MasterReference { + } + + SourceLocation "1.." -- "1" SourceReference : source_reference_id + MasterLocation "N" -- "N" MasterLocation : higher_geography_id + MasterLocation "1.." -- "1" MasterReference : master_reference_id + LocationRelation "1.." -- "1" SourceLocation : source_location_id + LocationRelation "1.." -- "1" MasterLocation : master_location_id + + +``` \ No newline at end of file diff --git a/documentation/mammalbase/models/occurrence.md b/documentation/mammalbase/models/occurrence.md new file mode 100644 index 0000000..f8fb571 --- /dev/null +++ b/documentation/mammalbase/models/occurrence.md @@ -0,0 +1,54 @@ +```mermaid +classDiagram + class Occurrence { + FK(SourceReference) source_reference + FK(Event) event + FK(SourceLocation) source_location + FK(SourceEntity) source_entity + +String organism_quantity + +String organism_quantity_type + +String gender + +String life_stage + +String occurrence_remarks + +String associated_references + } + class Event { + FK(SourceMethod) source_method + FK(SourceHabitat) source_habitat + +String verbatim_event_date + } + class SourceLocation { + FK(SourceReference) source_reference + +String name + +String verbatim_elevation + +String verbatim_depth + +float verbatim_latitude + +float verbatim_longitude + +String verbatim_coordinates + +String verbatim_coordinate_system + +String verbatim_srs + } + class SourceHabitat { + FK(SourceReference) source_reference + +String habitat_type + +String habitat_percentage + } + class SourceEntity { + } + class SourceReference { + } + class SourceMethod { + + } + + Occurrence "1.." -- "1" Event : event_id + Occurrence "1" -- "1" SourceLocation : source_location_id + Occurrence "1.." -- "1" SourceEntity : source_entity_id + Occurrence "1.." -- "1" SourceReference : source_reference_id + Event "1.." -- "1" SourceHabitat : source_habitat_id + Event "1.." -- "1" SourceMethod : source_method_id + SourceLocation "1.." -- "1" SourceReference : source_reference_id + SourceHabitat "1.." -- "1" SourceReference : source_reference_id + + +``` \ No newline at end of file