-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathexportLogData.py
325 lines (208 loc) · 9.68 KB
/
exportLogData.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
# Module to export the collaboration network to different SNA tool formats
# First implementation to uciNet for WebKit SNA ACM SIG MIS paper - on this file
# Second implementation to GraphML (VISIONE) for OpenStack SNA - Open Journal special issue - using the exportgraphml form
from __future__ import absolute_import
from __future__ import print_function
import sys
import re
import csv
from datetime import *
import networkx as nx
import exportGraphMLformat
# Replace '@' and '.' by "AT" and "DOT"
def clearDotsAndAts(contribEmail):
#print ("clearing Strings From Dots and Ats from: [" + contribEmail+ "]")
pattern = re.compile('([\w\-\.]+@(\w[\w\-]+\.)+[\w\-]+)')
if (pattern.search(contribEmail)== None):
print ("ERROR Contributor have an invalidName")
exit()
tmp= re.sub('\.','DOT' ,contribEmail)
tmp= re.sub('\@','AT' ,tmp)
return tmp
# Create a Network file (uciNet style) in raw text
def createNetworkFile(tuplesList , outFileName):
print ("")
print(("Writing network on file:[" + outFileName + "]"))
f = open(outFileName, 'w')
f.write('#File generated by scrapLog.py for research purposes \n')
f.write('#Network connections by scrapping the changelog [' + sys.argv[1] + '] on ' + str(datetime.now()) + '\n')
for connection in tuplesList:
for author in connection[0]:
f.write(clearDotsAndAts(author) + "\t")
f.write('\n')
f.write('END \n')
# Create a Atributes file (uciNet style) in raw text
def createAtributesFile(logData , outFileName):
print ("")
print(("Writing atributes on file:[" + outFileName + "]"))
f = open(outFileName, 'w')
f.write('#File generated by scrapLog.py for research purposes \n')
f.write('#Node atributes by scrapping the changelog [' + sys.argv[1] + '] on ' + str(datetime.now()) + '\n')
f.write('NAME\tAFFILIATION\n')
for change in logData:
f.write(clearDotsAndAts(change[0][1]))
f.write('\t')
f.write(change[0][2])
f.write('\n')
f.write('END \n')
# Create a Network file (uciNet style) in CSV file for spreedsheet software
def createNetworkFileCSV(tuplesList , outFileName):
print ("")
print(("Writing network on file (.CSV):[" + outFileName + "]"))
csvfile = open(outFileName, 'w')
#csvfile = open(outFileName, 'wb')
atribWriter = csv.writer(csvfile, dialect='excel', delimiter=' ')
# Print headers
#csvfile.write('#File generated by scrapLog.py for research purposes \n')
#csvfile.write('#Network connections by scrapping the changelog [' + sys.argv[1] + '] on ' + str(datetime.now()) + '\n')
atribWriter.writerow(["NODE", "NODE"])
for connection in tuplesList:
tmp= []
for author in connection[0]:
tmp.append(clearDotsAndAts(author))
atribWriter.writerow(tmp)
csvfile.close()
# Success
print((str(len(tuplesList)) +" network relationships writen down :[" + outFileName + "]"))
# Create a Atributes file (uciNet style) in CSV file for spreadsheat software
def createAtributesFileCSV(logData , outFileName):
print ("")
print(("Writing atributes on file (.CSV):[" + outFileName + "]"))
csvfile= open(outFileName, 'w')
#csvfile= open(outFileName, 'wb')
atribWriter = csv.writer(csvfile, dialect='excel',delimiter=' ')
# Print option file headers
#atribWriter.writerow(['#File generated by scrapLog.py for research purposes'])
#atribWriter.writerow(['#Node atributes by scrapping the changelog ['+ sys.argv[1] + '] on ' + str(datetime.now())])
atribWriter.writerow(['NAME','AFFILIATION'])
for change in logData:
atribWriter.writerow([clearDotsAndAts(change[0][1]), change[0][2]])
csvfile.close()
# Success
print((str(len(logData)) +" node atributes writen down :[" + outFileName + "]"))
############### By core companies #############
# As in http://blog.bitergia.com/2013/02/06/report-on-the-activity-of-companies-in-the-webkit-project/
# All coreCompanies + bot + core
coreCompanies = ['apple', 'google', 'nokia', 'rim', 'igalia', 'intel', 'samsung', 'inf' , 'adobe' , 'torchmobile']
coreCompaniesColor = []
# Create a Network file (grouped by core companies) (uciNet style) in CSV file for spreedsheet software
def createNetworkByCoreCompaniesFileCSV(tuplesList , outFileName):
print ("")
print(("Writing network (grouped by core companies) on file (.CSV):[" + outFileName + "]"))
csvfile = open(outFileName, 'w')
#csvfile = open(outFileName, 'wb')
atribWriter = csv.writer(csvfile, dialect='excel', delimiter=' ')
atribWriter.writerow(["NODE", "NODE"])
# Its equal right ? Just the the atributes change
for connection in tuplesList:
tmp= []
for author in connection[0]:
tmp.append(clearDotsAndAts(author))
atribWriter.writerow(tmp)
csvfile.close()
# Create a Atributes, grouped by core companies, file (uciNet style) in CSV file for spreadsheat software
def createAtributesByCoreFileCSV(logData , outFileName):
print ("")
print(("Writing atributes (grouped by core companies) on file (.CSV):[" + outFileName + "]"))
csvfile= open(outFileName, 'w')
#csvfile= open(outFileName, 'wb')
atribWriter = csv.writer(csvfile, dialect='excel',delimiter=' ')
# Print option file headers
atribWriter.writerow(['NAME','AFFILIATION'])
for change in logData:
email = clearDotsAndAts(change[0][1])
affiliation = change[0][2]
if affiliation in coreCompanies:
atribWriter.writerow([email, affiliation])
elif email == "webkit.review.bot@gmail.com":
atribWriter.writerow([email, 'AutomatedBot'])
elif affiliation not in coreCompanies:
atribWriter.writerow([email, 'other'])
else:
print ("ERROR writing atributes grouped by company")
exit()
csvfile.close()
# export the grapth node and edges to GraphML format
# Must be readable by Visione
def createGraphML(network_with_affiliation_atributes :nx.Graph ,outFileName :str):
# iterator for nAf
print ("")
print(("\tExporting graph to file (.graphml):[" + outFileName + "]"))
# verify arguments data
## verify graph/network
if network_with_affiliation_atributes.order() < 2:
print ("\tERROR network have less than two nodes !!")
exit(1)
if network_with_affiliation_atributes.size() < 1:
print ("\tERROR network have less than one edge !!")
exit(1)
## verify affiliations
"verify every node as affiliation data"
for node, data in network_with_affiliation_atributes.nodes(data=True):
if len(data['affiliation']) == 0:
print ("invalid affiliation atribute")
print (node)
print (data['affiliation'])
exit(1)
if 'affiliation' not in data.keys():
print ("affiliation atribute is missing")
print (node)
print (data['affiliation'])
exit(1)
## verify outFilename
if type(outFileName) != str:
print ("\tERROR outfilename must be a string")
exit()
if len(outFileName) < 5 :
print ("\tERROR outfilename must be a long string. More than 5 caracters")
exit()
if outFileName[-8:] != ".graphML":
print ("\tERROR outfilename must finish with .grapthML extenssion")
exit()
# open the export file
print ("")
print(("\tWriting grapthML file (for VISIONE SNA tool or other ) on file:[" + outFileName + "]"))
gfile= open(outFileName, 'w')
# open XML headers
gfile.writelines(exportGraphMLformat.graphml_header)
# Add grapth atributes
gfile.writelines(exportGraphMLformat.setNodeAntributeKey(0,"e-mail","string"))
gfile.writelines(exportGraphMLformat.setNodeAntributeKey(1,"color","string"))
gfile.writelines(exportGraphMLformat.setNodeAntributeKey(2,"affiliation","string"))
# Open grapth
gfile.writelines(exportGraphMLformat.graph_opener)
# store the nodes id for each email/contributor
tmpNodeId = {}
"for now all colors are turquoise"
print ()
print ("\t\tWriting nodes in graphML file")
node_id = 0
for node, data in network_with_affiliation_atributes.nodes(data=True):
email=node
afl=data['affiliation']
#print(exportGraphml.addNode(nAf,[(0,email),(1,"turquoise"),(2,afl)]))
gfile.writelines(exportGraphMLformat.addNode(node_id,[(0,email),(1,"turquoise"),(2,afl)]))
# Give a each node and numeric id atribute data as well
network_with_affiliation_atributes.nodes[node]['id']= node_id
node_id+=1
print ()
print ("\t\tWriting edges in graphML file")
nTup=0
for edge in network_with_affiliation_atributes.edges():
nodeIdFrom = network_with_affiliation_atributes.nodes[edge[0]]['id']
nodeIdTo = network_with_affiliation_atributes.nodes[edge[1]]['id']
#print("edge=",edge)
#print("edge via id=",nodeIdFrom,nodeIdTo)
#print(exportGraphml.addEdge("e"+str(nTup),nodeIdFrom,nodeIdTo))
gfile.writelines(exportGraphMLformat.addEdge("e"+str(nTup),nodeIdFrom,nodeIdTo))
nTup+=1
if edge[0] == edge[1]:
print ("\t ERROR arc between the same mail/node/developer")
print(("\t edge=["+str(edge)+"]"))
sys.exit()
# Close grapth
gfile.writelines(exportGraphMLformat.graph_closer)
# close XML document
gfile.writelines(exportGraphMLformat.graphml_closer)
# close the export file
gfile.close()