cggh · idwright · Nov 3, 2020 · Nov 3, 2020 · Apr 8, 2021
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1,4 @@
+env
+*.csv
+output/**
+output/pf_regions/data
diff --git a/README.md b/README.md
@@ -32,41 +32,27 @@ gsheetsCredentialsPath: PATH_AS_ABOVE_BUT_WITH_credentials.json
 ```jupyter notebook fetch_and_convert.ipynb```
 
 Run all cells to produce 3 csv files.
+(Visual Studio Code works quite nicely for this)
 
+### Backup existing DB
 
-### Upload to postgres
-https://35.185.117.147/phppgadmin/redirect.php  (Lee has login)
-
-Click servers -> PostgreSQL -> login -> observatory_outlandish -> observatory -> samples -> empty
-
-Then "import", choose `samples.csv` and "Empty string/field" as the only checked options
+``` pg_dump -h localhost -p 9999 -U outlandish observatory_outlandish -n observatory --no-owner --no-privileges > obs_dump```
 
-Back to the list of tables and then do the same for `sampletypes` and `sampletypes1/2.csv`, but importing twice as there are two files
-(one is too big for the import process)
+### Upload to postgres
 
+```load_files.sh```
 
 ### Tunnel LDAP and postgres
 35.185.117.147 is the postgres server
 
-35.189.232.128 is the analytics staging server we use to tunnel
-
-gcloud.pub is your google ssl key that you add to the analystaging instance (see https://cloud.google.com/compute/docs/instances/adding-removing-ssh-keys)
 
 ```
-ssh -N -i ~/.ssh/gcloud.pub -L 127.0.0.1:9999:35.185.117.147:5432 ben_jeffery_well@35.189.232.128
+gcloud beta compute ssh --zone "us-east1-c" --project "ssdtest-141111" observatory-db -- -N -L 9999:localhost:5432
 ```
 
-Tunnel to LDAP:
+Connection to LDAP:
 
-```
-ssh -N -i ~/.ssh/gcloud.pub -L 127.0.0.1:7777:sso1.malariagen.net:636 ben_jeffery_well@35.189.232.128
-```
-
-LDAP checks the host matches what it expects, so we need to fake it by editing /etc/hosts:
-
-```
-127.0.0.1       sso1.malariagen.net
-```
+Make a temporary hole to allow connection to sso1 on 636 - ask a sysadmin
 
 
 ### Create CSV files:
@@ -75,10 +61,21 @@ python create_files.py
 ```
 
 ### Create postgres DB from the CSVs for export to outlandish
-```psql -d pf6 < schema.sql
-psql -d pf6 < table-command.sh
+```psql -v ON_ERROR_STOP=1 -d pf6 < schema.sql
+```
+```
+./table-command.sh
+```
+Compare the output of print-tables.sh with load_files.psql to see if any
+changes need to be made
+```
+psql -v ON_ERROR_STOP=1 -d pf6 < load_files.psql
 ```
 
+Remove extra columns introduced as necessary for the loading process
+```
+psql -v ON_ERROR_STOP=1 -d pf6 < tidy_up.psql
+```
 ### Dump out the resulting DB for sending
 
 ```

diff --git a/create_files.py b/create_files.py
@@ -524,8 +524,14 @@ def getAlfStudyLdapPeople(ldapPeople, alfStudy, panoptesAlfStudyLdapPeopleGroups
                     for p in alfStudyLdapPeople:
                         if p['malariagenUID'] == malariagenUID:
                             p['class'].append(group_type)
+                            if group_type == 'Contact':
+                                p['contact'] = '1'
                 else:
                     person['class'] = [group_type]
+                    if group_type == 'Contact':
+                        person['contact'] = '1'
+                    else:
+                        person['contact'] = '0'
                     study_people[malariagenUID] = person
                     alfStudyLdapPeople.append(person)
 
@@ -568,4 +574,4 @@ def establishGSheetsCredentials(client_secret_path, credentials_path, auth_host_
     return credentials
 
 
-run()
+run()
diff --git a/output/pf_regions/data b/output/pf_regions/data
diff --git a/schema.sql b/schema.sql
@@ -90,6 +90,7 @@ CREATE TABLE "study_ldap_people" (
   "study" Text,
   "twitterURL" Text,
   "uid" Text,
+  "contact" Int,
   PRIMARY KEY("uid", "study")
 );
 
@@ -134,6 +135,7 @@ CREATE TABLE "study_publications" (
 );
 
 CREATE TABLE "pf_samples" (
+    "id" int,
   "ARTresistant" Text,
   "ASMQresistant" Text,
   "CQresistant" Text,
@@ -188,6 +190,7 @@ CREATE TABLE "pf_samples" (
 );
 
 CREATE TABLE "gene_diff" (
+    "id" Int,
   "gene_id" Text PRIMARY KEY,
   "gene_name" Text, 
   "chrom" Text,
@@ -229,19 +232,3 @@ ALTER TABLE "pf_drug_gene" ADD FOREIGN KEY ("drug_id") REFERENCES "pf_drugs" ("d
 ALTER TABLE "pf_drug_gene" ADD FOREIGN KEY ("gene_id") REFERENCES "pf_resgenes" ("gene_id");
 ALTER TABLE "pf_resgenes" ADD FOREIGN KEY ("gene_id") REFERENCES "gene_diff" ("gene_id");
 
-
-\COPY countries ("country_id", "lat", "lng", "name", "alpha_3_code", "geojson", "num_samples", "CQresistance", "PYRresistance", "SDXresistance", "MQresistance", "ARTresistance", "PPQresistance") FROM 'output/countries/data' DELIMITER E'\t' CSV HEADER;
-\COPY gene_diff ("gene_id", "gene_name", "chrom", "start", "end", "global_differentiation_score", "local_differentiation_score", "distance_to_higher_local_diff_score") FROM 'output/gene_diff/data' DELIMITER E'\t' CSV HEADER;
-\COPY pf_drugs ("drug_id", "is_combination", "name", "short_description", "description") FROM 'output/pf_drugs/data' DELIMITER E'\t' CSV HEADER;
-\COPY pf_features ("feature_id", "name", "category", "description") FROM 'output/pf_features/data' DELIMITER E'\t' CSV HEADER;
-\COPY pf_featuretypes ("feature_id", "type_id", "description", "feature") FROM 'output/pf_featuretypes/data' DELIMITER E'\t' CSV HEADER;
-\COPY pf_regions ("region_id", "lat", "lng", "name", "description", "num_samples", "web_colour", "CQresistance", "PYRresistance", "SDXresistance", "MQresistance", "ARTresistance", "PPQresistance", "SPIPTpresistance", "ASMQresistance", "DHAPPQresistance", "HRP2deletion", "HRP3deletion", "HRP23deletion", "SPresistance", "geojson") FROM 'output/pf_regions/data' DELIMITER E'\t' CSV HEADER;
-\COPY pf_resgenes ("gene_id", "name", "long_name", "short_description", "description", "marker_name") FROM 'output/pf_resgenes/data' DELIMITER E'\t' CSV HEADER;
-\COPY pf_sites ("site_id", "lat", "lng", "name", "num_samples", "pf_region_id", "country_id", "CQresistance", "PYRresistance", "SDXresistance", "MQresistance", "ARTresistance", "PPQresistance", "SPIPTpresistance", "ASMQresistance", "DHAPPQresistance", "HRP2deletion", "HRP3deletion", "HRP23deletion", "SPresistance", "AnyHRPdeletion") FROM 'output/pf_sites/data' DELIMITER E'\t' CSV HEADER;
-\COPY studies ("study", "study_number", "webTitle", "description") FROM 'output/studies/data' DELIMITER E'\t' CSV HEADER;
-\COPY study_ldap_people ("study", "jobTitle1", "jobTitle2", "jobTitle3", "uid", "researchGateURL", "scholarURL", "twitterURL", "malariagenUID", "oProfile1", "oProfile2", "oProfile3", "ORCID", "sn", "mail", "givenName", "o1", "o2", "o3") FROM 'output/study_ldap_people/data' DELIMITER E'\t' CSV HEADER;
-\COPY study_publications ("study", "doi", "name", "title", "citation", "pmid") FROM 'output/study_publications/data' DELIMITER E'\t' CSV HEADER;
-\COPY pf_drug_gene ("gene_id", "drug_id") FROM 'output/pf_drug_gene/data' DELIMITER E'\t' CSV HEADER;
-\COPY pf_drug_regions ("region_id", "drug_region_id", "text", "resistance", "drug_id") FROM 'output/pf_drug_regions/data' DELIMITER E'\t' CSV HEADER;
-\COPY pf_samples ("sample_id", "PYRresistant", "MQresistant", "country_lat", "country_lng", "region_lng", "HRP23deletion", "ARTresistant", "site_id", "site_lng", "year", "country", "CQresistant", "pc_genome_callable", "ASMQresistant", "HRP2deletion", "qc_pass", "site_lat", "region_lat", "DHAPPQresistant", "SPresistant", "SPIPTpresistant", "mean_coverage", "HRP3deletion", "site", "PPQresistant", "region_id", "SDXresistant", "run_accessions", "region", "study_id", "country_id", "crt_76[K]", "crt_72-76[CVMNK]", "dhfr_51[N]", "dhfr_59[C]", "dhfr_108[S]", "dhfr_164[I]", "dhps_437[G]", "dhps_540[K]", "dhps_581[A]", "dhps_613[A]", "k13_class", "k13_alleles", "cn_mdr1", "cn_pm2", "cn_gch1", "breakpoint_mdr1", "breakpoint_pm2", "breakpoint_gch1", "Fws") FROM 'output/pf_samples/data' DELIMITER E'\t' CSV HEADER;
-
diff --git a/settings_nosecrets b/settings_nosecrets
@@ -25,7 +25,7 @@ panoptesAlfStudyPublicationsTable: study_publications
 
 ### LDAP server (people)
 # This setting is changed for tunneling as in the README
-ldapServerURL: ldaps://sso1.malariagen.net:7777
+ldapServerURL: ldaps://sso1.malariagen.net
 ldapUserDN: cn=website,ou=users,ou=system,dc=malariagen,dc=net
 ldapUserPass: PASS
 ldapPeopleBaseDN: ou=people,dc=malariagen,dc=net
@@ -35,7 +35,7 @@ ldapPeopleFields: ['mail', 'jobTitle1', 'givenName', 'sn', 'jobTitle1', 'o1', 'j
 ### Cross-server relations
 panoptesAlfStudyLdapPeopleTable: study_ldap_people
 panoptesAlfStudyLdapPeopleGroups: ['Contact', 'Public']
-panoptesAlfStudyLdapPeopleFields: ['jobTitle1', 'jobTitle2', 'jobTitle3', 'uid', 'researchGateURL', 'scholarURL', 'twitterURL', 'malariagenUID', 'oProfile1', 'oProfile2', 'oProfile3', 'ORCID', 'sn', 'mail', 'givenName', 'o1', 'o2', 'o3']
+panoptesAlfStudyLdapPeopleFields: ['contact', 'jobTitle1', 'jobTitle2', 'jobTitle3', 'uid', 'researchGateURL', 'scholarURL', 'twitterURL', 'malariagenUID', 'oProfile1', 'oProfile2', 'oProfile3', 'ORCID', 'sn', 'mail', 'givenName', 'o1', 'o2', 'o3']
 
 ### Post-fetch processing
 panoptesObsRegionsTable: pf_regions