-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathcheckPublishScratchDiffs.py
executable file
·64 lines (52 loc) · 1.38 KB
/
checkPublishScratchDiffs.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
Stephen Po-Chedley 9 May 2019
Script to compare paths with same metadata to see
if they contain the same files.
Currently used to compare CMIP6 scratch versus publish information
for disagreements.
@author: pochedls
"""
import sqlite3
import glob
import xdat
sqlDB = '../xml.db'
q = "select p.path, p.keyId from paths p join (select keyId, path, count(*) as c from paths where mip_era = 'CMIP6' and retired = 0 group by keyId having count(*) > 1) innerQuery on innerQuery.keyId = p.keyId order by p.keyId;"
conn = sqlite3.connect(sqlDB) # connect to db
c = conn.cursor()
# get keys for which we have an xml file
c.execute(q)
result = c.fetchall()
conn.close() #
def compareDirectories(files, files2):
if len(files) != len(files2):
return False
for fn in files2:
files2.remove(fn)
files2.append(fn.split('/')[-1])
for fn in files:
files.remove(fn)
files.append(fn.split('/')[-1])
for fn in files:
x = xdat.findInList(fn, files2)
if len(x) != 1:
return False
return True
bad = 0
pair1 = []
pair2 = []
for i in range(int(len(result)/2)):
p1 = result[i*2][0]
p2 = result[i*2+1][0]
files = glob.glob(p1 + '*.nc')
files2 = glob.glob(p2 + '*.nc')
x = compareDirectories(files, files2)
if not x:
if p1.find('esgf_publish') > 0:
print(bad, p1, p2)
else:
print(bad, p2, p1)
pair1.append(p1)
pair2.append(p2)
bad += 1