Skip to content

Commit 5d261c3

Browse files
committed
🐛 fixes cv2 issue
1 parent 562fd94 commit 5d261c3

6 files changed

+68
-36
lines changed

Duplication Detecting Scripts/detect_duplicate_multiple_core.py

+12-10
Original file line numberDiff line numberDiff line change
@@ -3,16 +3,19 @@
33
import glob
44
import time
55
import multiprocessing
6-
import threading
6+
from os import path
77

8+
# setting up path
9+
basepath = path.dirname(__file__)
10+
11+
# loading environment
12+
image_path = path.abspath(path.join(basepath, "../images_in", "duplicate.jpg")) # Image we are running these tests against.
813

914
"""
1015
Multiprocessing
1116
"""
1217

13-
14-
image_name = 'c2.jpeg' # origional image path goes here
15-
original = cv2.imread(image_name)
18+
original = cv2.imread(image_path)
1619

1720

1821

@@ -21,15 +24,14 @@ def find_duplicates(image_):
2124
try:
2225
image_to_compare = cv2.imread(image_)
2326
if original.shape == image_to_compare.shape:
24-
25-
difference = cv2.subtract(original, image_to_compare)
27+
difference = cv2.subtract(original, image_to_compare) # set threshold here
2628
b, g, r = cv2.split(difference)
2729

2830
if cv2.countNonZero(b) == 0 and cv2.countNonZero(g) == 0 and cv2.countNonZero(r) == 0:
2931
duplicates = image_
30-
31-
32-
sift = cv2.xfeatures2d.SIFT_create()
32+
33+
# some operations that are not needed but added to simply consume some time for getting better results for observation.
34+
sift = cv2.SIFT_create()
3335
kp_1, desc_1 = sift.detectAndCompute(original, None)
3436
kp_2, desc_2 = sift.detectAndCompute(image_to_compare, None)
3537

@@ -54,7 +56,7 @@ def find_duplicates(image_):
5456
return duplicates
5557

5658
except Exception as e:
57-
pass
59+
print('[!] {}'.format(e))
5860

5961

6062

Duplication Detecting Scripts/detect_duplicate_multiple_thread.py

+11-6
Original file line numberDiff line numberDiff line change
@@ -5,16 +5,21 @@
55
import multiprocessing
66
import threading
77

8+
from os import path
9+
10+
# setting up path
11+
basepath = path.dirname(__file__)
12+
13+
# loading environment
14+
image_path = path.abspath(path.join(basepath, "../images_in", "duplicate.jpg")) # Image we are running these tests against.
815

916

1017
"""
1118
Threading
1219
"""
1320

1421

15-
16-
image_name = 'c2.jpeg' # origional image path goes here
17-
original = cv2.imread(image_name)
22+
original = cv2.imread(image_path)
1823

1924

2025

@@ -28,9 +33,9 @@ def find_duplicates(image_=''):
2833
b, g, r = cv2.split(difference)
2934

3035
if cv2.countNonZero(b) == 0 and cv2.countNonZero(g) == 0 and cv2.countNonZero(r) == 0:
31-
print(f'Duplicates Found: {image_name} is Duplicate of {image_}')
36+
print(f'Duplicates Found: {image_path} is Duplicate of {image_}')
3237

33-
sift = cv2.xfeatures2d.SIFT_create()
38+
sift = cv2.SIFT_create()
3439
kp_1, desc_1 = sift.detectAndCompute(original, None)
3540
kp_2, desc_2 = sift.detectAndCompute(image_to_compare, None)
3641

@@ -54,7 +59,7 @@ def find_duplicates(image_=''):
5459

5560

5661
except Exception as e:
57-
pass
62+
print('[!] {}'.format(e))
5863

5964

6065

Duplication Detecting Scripts/detect_duplicate_single_core.py

+12-5
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,14 @@
44
import time
55
import multiprocessing
66
import threading
7+
from os import path
8+
9+
# setting up path
10+
basepath = path.dirname(__file__)
11+
12+
# loading environment
13+
image_path = path.abspath(path.join(basepath, "../images_in", "duplicate.jpg")) # Image we are running these tests against.
14+
715

816
"""
917
Sequentiall on single core
@@ -21,8 +29,7 @@ def filebrowser(ext='', directory=''):
2129
## print(image_dir)
2230

2331

24-
image_name = 'c2.jpeg' # origional image path goes here
25-
original = cv2.imread(image_name)
32+
original = cv2.imread(image_path)
2633

2734

2835
start_time = time.time()
@@ -36,10 +43,10 @@ def filebrowser(ext='', directory=''):
3643
b, g, r = cv2.split(difference)
3744

3845
if cv2.countNonZero(b) == 0 and cv2.countNonZero(g) == 0 and cv2.countNonZero(r) == 0:
39-
print(f'Duplicates Found: {image_name} is Duplicate of {image_}')
46+
print(f'Duplicates Found: {image_path} is Duplicate of {image_}')
4047

4148

42-
sift = cv2.xfeatures2d.SIFT_create()
49+
sift = cv2.SIFT_create()
4350
kp_1, desc_1 = sift.detectAndCompute(original, None)
4451
kp_2, desc_2 = sift.detectAndCompute(image_to_compare, None)
4552

@@ -61,7 +68,7 @@ def filebrowser(ext='', directory=''):
6168
else:
6269
number_keypoints = len(kp_2)
6370
except Exception as e:
64-
pass
71+
print('[!] {}'.format(e))
6572

6673

6774

Duplication Detecting Scripts/detect_duplicates.py

+16-12
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,13 @@
44
import time
55
import multiprocessing
66
import threading
7+
from os import path
8+
9+
# setting up path
10+
basepath = path.dirname(__file__)
11+
12+
# loading environment
13+
image_path = path.abspath(path.join(basepath, "../images_in", "duplicate.jpg")) # Image we are running these tests against.
714

815
"""
916
Sequentiall on single core
@@ -21,8 +28,8 @@
2128
# ## print(image_dir)
2229

2330

24-
# image_name = 'c2.jpeg' # origional image path goes here
25-
# original = cv2.imread(image_name)
31+
# image_path = 'c2.jpeg' # origional image path goes here
32+
# original = cv2.imread(image_path)
2633

2734

2835
# start_time = time.time()
@@ -36,7 +43,7 @@
3643
# b, g, r = cv2.split(difference)
3744

3845
# if cv2.countNonZero(b) == 0 and cv2.countNonZero(g) == 0 and cv2.countNonZero(r) == 0:
39-
# print(f'Duplicates Found: {image_name} is Duplicate of {image_}')
46+
# print(f'Duplicates Found: {image_path} is Duplicate of {image_}')
4047

4148

4249
# sift = cv2.xfeatures2d.SIFT_create()
@@ -76,8 +83,8 @@
7683
"""
7784

7885

79-
# image_name = 'c2.jpeg' # origional image path goes here
80-
# original = cv2.imread(image_name)
86+
# image_path = 'c2.jpeg' # origional image path goes here
87+
# original = cv2.imread(image_path)
8188

8289

8390

@@ -156,10 +163,7 @@
156163
Threading
157164
"""
158165

159-
160-
161-
image_name = 'c2.jpeg' # origional image path goes here
162-
original = cv2.imread(image_name)
166+
original = cv2.imread(image_path)
163167

164168

165169

@@ -173,9 +177,9 @@ def find_duplicates(image_=''):
173177
b, g, r = cv2.split(difference)
174178

175179
if cv2.countNonZero(b) == 0 and cv2.countNonZero(g) == 0 and cv2.countNonZero(r) == 0:
176-
print(f'Duplicates Found: {image_name} is Duplicate of {image_}')
180+
print(f'Duplicates Found: {image_path} is Duplicate of {image_}')
177181

178-
sift = cv2.xfeatures2d.SIFT_create()
182+
sift = cv2.SIFT_create()
179183
kp_1, desc_1 = sift.detectAndCompute(original, None)
180184
kp_2, desc_2 = sift.detectAndCompute(image_to_compare, None)
181185

@@ -199,7 +203,7 @@ def find_duplicates(image_=''):
199203

200204

201205
except Exception as e:
202-
pass
206+
print('[!] {}'.format(e))
203207

204208

205209

README.md

+15-1
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,16 @@ PDC Project
4444
- directions to install
4545

4646
```bash
47-
git clone https://github.com/1UC1F3R616/Duplicate-Images-Detection-using-Parallel-Processing.git
47+
git clone https://github.com/1UC1F3R616/Duplicate-Images-Detection-using-Parallel-Processing.git # or download archive manually.
48+
cd Duplicate-Images-Detection-using-Parallel-Processing
49+
python3 -m venv env # create env.
50+
source ./env/bin/activate # activate env -> this is for linux. For windows make a google search.
51+
pip install -r requirements.txt
52+
python ./Duplication\ Detecting\ Scripts/ddi.py ./images_in/duplicate.jpg # this will return similar images
53+
python ./Duplication\ Detecting\ Scripts/detect_duplicate_multiple_core.py ./images_in/duplicate.jpg # observe the time taken
54+
python ./Duplication\ Detecting\ Scripts/detect_duplicate_multiple_thread.py ./images_in/duplicate.jpg # observe the time taken
55+
python ./Duplication\ Detecting\ Scripts/detect_duplicate_single_core.py ./images_in/duplicate.jpg # observe the time taken
56+
python ./Duplication\ Detecting\ Scripts/detect_duplicates.py ./images_in/duplicate.jpg # observe the time taken
4857
```
4958

5059
- directions to use cli script
@@ -63,6 +72,11 @@ It's of no use to simple publish a package that's hard to use with imports.
6372
You can have fun with cli script, Relax and Enjoy Quarntine
6473
```
6574

75+
```txt
76+
This doesn't find similar images but exact similar images. It's not an AI Project so bare with this but you can set threshold to find almost similar images.
77+
BY threshold I mean a simple if check where not only (image_pixels_A - image_pixels_B == 0) are considerd similar but something like (image_pixels_A - image_pixels_B < 0.5).
78+
```
79+
6680
</br>
6781

6882
- Link to [Report](https://github.com/1UC1F3R616/Duplicate-Images-Detection-using-Parallel-Processing/blob/master/(J%20Component)%20Final%20Report.pdf)

requirements.txt

+2-2
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,2 @@
1-
opencv-python==3.4.2.17
2-
opencv-contrib-python==3.4.2.17
1+
opencv-python
2+
opencv-contrib-python

0 commit comments

Comments
 (0)