-
Notifications
You must be signed in to change notification settings - Fork 12
/
Copy pathreassemble.py
executable file
·52 lines (44 loc) · 1.64 KB
/
reassemble.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
#!/usr/bin/env python3
#
import sys
import csv
from pprint import pprint
"""2018.10.28 John Dey
consume CSV output from pwalk and reassemble directory data.
File system data from pwalk is flattened and out of order.
Rewalk the tree data and create two new fields for each directory.
Create a tree sum of file count and bytes at each directory (node)
that represents the child nodes. Sums for the root will
become the total file count and sum size for every file.
Notes: I wrote this in Python as a proof of concept.
"""
def usage():
"""how to use and exit"""
print("usage: % inputfile.csv" % sys.argv[0])
sys.exit(1)
if len(sys.argv ) != 2:
usage()
dd = {}
with open(sys.argv[1], newline='') as csvfile:
pwalk = csv.reader(csvfile, delimiter=',', quotechar='"')
for row in pwalk:
if int(row[15]) >= 0: # only store directories
dd[int(row[0])] = {'parent': int(row[1]),
'depth': int(row[2]),
'dircnt': int(row[15]),
'sumcnt': int(row[15]), # or Zero?
'dirsiz': int(row[16]),
'sumsiz': int(row[16])}
if int(row[1]) == 0:
root = int(row[0])
dd[int(row[0])]['sumcnt'] += 1
dd[int(row[0])]['sumsiz'] += int(row[7])
print("Total directories: %d" % len(dd.keys()))
"""reassemble the tree"""
for inode in dd.keys():
parent = dd[inode]['parent']
while parent != 0:
dd[parent]['sumcnt'] += dd[inode]['dircnt']
dd[parent]['sumsiz'] += dd[inode]['dirsiz']
parent = dd[parent]['parent']
pprint(dd[root])