-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdelete_when_unzip_rar.py
176 lines (160 loc) · 6.38 KB
/
delete_when_unzip_rar.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
# -*- coding: utf-8 -*-
import os
import io
from stream_unzip import stream_unzip
import sys
import libarchive as libap
def shift_then_truncate(f,chunk_size=1024):
'''将文件对象向头部平移chunksize,并保留未被覆盖的后半部分(相当于删除头部chunksize字节)'''
# with open(file,'rb+') as f:
pointer = chunk_size
while True:
f.seek(pointer)
chunk = f.read(chunk_size)
if not chunk:
break
new_pointer = f.tell()
if new_pointer<=chunk_size: # 文件小于chunksize,不需要向头部移动
break
f.seek(pointer-chunk_size)
f.write(chunk) # 将第k段写入k-1段的空间内
pointer = new_pointer # 指向第k段末尾
# print('shift once') # debug
f.seek(pointer-chunk_size) # 指向平移后的末尾
f.truncate()
f.seek(0)
def chain_streams(streams, buffer_size=io.DEFAULT_BUFFER_SIZE):
"""
Chain an iterable of streams together into a single buffered stream.
Usage:
```
def generate_open_file_streams():
for file in filenames:
yield open(file, 'rb')
f = chain_streams(generate_open_file_streams())
f.read()
```
_From: https://stackoverflow.com/questions/24528278/stream-multiple-files-into-a-readable-object-in-python_
"""
class ChainStream(io.RawIOBase):
def __init__(self):
self.leftover = b''
self.stream_iter = iter(streams)
try:
self.stream = next(self.stream_iter)
except StopIteration:
self.stream = None
def readable(self):
return True
def seekable(self):
return False
def _read_next_chunk(self, max_length):
# Return 0 or more bytes from the current stream, first returning all
# leftover bytes. If the stream is closed returns b''
if self.leftover:
return self.leftover
elif self.stream is not None:
global first_read_flag
if first_read_flag:
first_read_flag=False
else:
shift_then_truncate(self.stream,max_length) # remove used bytes
bytes_return = self.stream.read(max_length)
# if len(bytes_return) != 0:
return bytes_return
else:
return b''
def readinto(self, b):
buffer_length = len(b)
chunk = self._read_next_chunk(buffer_length)
while len(chunk) == 0:
# move to next stream
if self.stream is not None:
self.stream.close()
try:
self.stream = next(self.stream_iter)
chunk = self._read_next_chunk(buffer_length)
# print(len(chunk)) # debug
except StopIteration:
# No more streams to chain together
# print('############ Finish') # debug
self.stream = None
b = b''
return 0 # indicate EOF
output = chunk[:buffer_length]
# print(len(chunk)) # debug
b[:len(output)] = output
return len(output)
return io.BufferedReader(ChainStream(), buffer_size=buffer_size)
def unzip_buffer(e,file_root):
''' 将已经解压的buffer中的内容写入文件 '''
for entry in e:
rel_file_path = str(entry) # 相对路径
# print(entry.filetype,' ',rel_file_path) # debug
file_path_name = os.path.join(file_root,rel_file_path)
if rel_file_path.endswith('/') or entry.filetype == 16384: # 文件夹,不写入内容
os.makedirs(file_path_name,exist_ok=True)
else:
file_multi_path,basename = os.path.split(file_path_name)
os.makedirs(file_multi_path,exist_ok=True)
with open(file_path_name, 'wb') as f1: # 文件
for block in entry.get_blocks():
f1.write(block)
# try:
# for block in entry.get_blocks():
# f1.write(block)
# except:
# print('>>>> Get blocks ERROR <<<<')
def generate_open_file_streams(filenames):
''' 将多个文件生成文件流迭代器 '''
count = 1
for file in filenames:
with open(file,'rb+') as f:
# if len(filenames)>1 and count==1: # 多分卷
# f.seek(4)
count+=1
yield f
f.close()
def main_unzip(file_path,chunk_size,password=None):
'''在本地流式解压文件,边解压边删除. '''
file_oripath,basename = os.path.split(file_path)
file_folder = os.path.splitext(basename)[0]
if not os.path.exists(os.path.join(file_oripath,file_folder)):
os.makedirs(os.path.join(file_oripath,file_folder))
global first_read_flag
first_read_flag = True
fp = open(file_path,'rb+')
try:
fs = chain_streams([fp],chunk_size)
# if password != None:
# password = password.encode() # 存疑
with libap.stream_reader(fs,passphrase=password) as e:
unzip_buffer(e,os.path.join(file_oripath,file_folder))
fp.close()
if not first_read_flag:
os.remove(file_path)
except Exception as err:
fp.close()
raise err
if __name__ == '__main__':
if len(sys.argv) <= 1 or len(sys.argv) >4:
raise AttributeError('Wrong input param')
password = None
if len(sys.argv) > 1:
FILE_PATH = sys.argv[1]
CHUNK_SIZE = 1024*1024*512.0 # 512MB per chunk
if len(sys.argv) > 2:
FILE_PATH = sys.argv[1]
CHUNK_SIZE = eval(sys.argv[2])
if len(sys.argv) > 3:
FILE_PATH = sys.argv[1]
CHUNK_SIZE = eval(sys.argv[2])
password = sys.argv[3]
CHUNK_SIZE = 10_240_000
main_unzip(FILE_PATH,CHUNK_SIZE,password)
# file_chunk_generator = read_file_by_chunk(file_path)
# for chunk in file_chunk_generator:
# sys.stdout.write('UnRAR.exe x -si '+chunk.hex())
# with libap.file_reader(file_path) as ar:
# unzip_buffer(ar)
# libap.extract_file(file_path)