-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathmain.py
203 lines (166 loc) · 7.32 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
import base64
import requests
import os
from PIL import Image, ImageDraw
import json
import xml.etree.ElementTree as ET
# OpenAI API Key
api_key = "YOUR_API_KEY"
# Function to encode the image
def encode_image(image_path):
with open(image_path, "rb") as image_file:
return base64.b64encode(image_file.read()).decode('utf-8')
# Function to perform object detection and labeling
def detect_objects(image_path, labels):
label_list = ', '.join([f"'{label}'" for label in labels])
prompt_text = (
f"Identify and outline the entire body of any {label_list} in the image. "
"Provide bounding boxes that encompass the full extent of each object, "
"ensuring that all parts, such as the face, legs, and tail, are included within. "
"Minimize the inclusion of background space as much as possible. "
"Return the bounding box coordinates in a JSON array named 'results', "
"with 'label' and 'coordinates' for each object. "
"The 'coordinates' should detail 'x1', 'y1' (top-left corner), and 'x2', 'y2' "
"(bottom-right corner) following the PIL draw method's requirements."
)
base64_image = encode_image(image_path)
headers = {
"Content-Type": "application/json",
"Authorization": f"Bearer {api_key}"
}
payload = {
"model": "gpt-4-vision-preview",
"messages": [
{
"role": "system",
"content": prompt_text
},
{
"role": "user",
"content": [
{
"type": "image_url",
"image_url": {
"url": f"data:image/jpeg;base64,{base64_image}"
}
}
]
}
],
"max_tokens": 700
}
try:
response = requests.post("https://api.openai.com/v1/chat/completions", headers=headers, json=payload)
response.raise_for_status() # Raises an HTTPError if the HTTP request returned an unsuccessful status code
res_json = response.json()["choices"][0]["message"]["content"].split("```json")[1].split("```")[0]
print(res_json)
result = json.loads(res_json)
image = Image.open(image_path)
original_width, original_height= image.size
scaled_side = 768
if (min(original_height, original_width) > scaled_side):
combined_scale_factor = min(original_height, original_width) / scaled_side
else:
combined_scale_factor = 1
for item in result["results"]:
coords = item["coordinates"]
item["coordinates"]["x1"] = int(coords["x1"] * combined_scale_factor)
item["coordinates"]["y1"] = int(coords["y1"] * combined_scale_factor)
item["coordinates"]["x2"] = int(coords["x2"] * combined_scale_factor)
item["coordinates"]["y2"] = int(coords["y2"] * combined_scale_factor)
image.save(image_path+"_annotated.jpg")
print(result["results"])
return result["results"]
except requests.exceptions.RequestException as e:
print(f"Error during API request: {e}")
return []
# Function to save annotations in TensorFlow Object Detection format
def save_tf_annotations(image_path, objects, output_folder, image_width, image_height):
image_filename = os.path.basename(image_path)
annotation_filename = os.path.splitext(image_filename)[0] + ".xml"
annotation_full_path = os.path.join(output_folder, annotation_filename) # Full path including output_folder
# Create an XML annotation file
annotation = ET.Element("annotation")
folder = ET.SubElement(annotation, "folder")
folder.text = "images"
filename = ET.SubElement(annotation, "filename")
filename.text = image_filename
size = ET.SubElement(annotation, "size")
width = ET.SubElement(size, "width")
height = ET.SubElement(size, "height")
depth = ET.SubElement(size, "depth")
width.text = str(image_width)
height.text = str(image_height)
depth.text = "3"
for obj in objects:
obj_elem = ET.SubElement(annotation, "object")
name = ET.SubElement(obj_elem, "name")
name.text = obj['label']
bbox = ET.SubElement(obj_elem, "bndbox")
xmin = ET.SubElement(bbox, "xmin")
ymin = ET.SubElement(bbox, "ymin")
xmax = ET.SubElement(bbox, "xmax")
ymax = ET.SubElement(bbox, "ymax")
xmin.text = str(obj['coordinates']['x1'])
ymin.text = str(obj['coordinates']['y1'])
xmax.text = str(obj['coordinates']['x2'])
ymax.text = str(obj['coordinates']['y2'])
# Create and write the XML annotation file
tree = ET.ElementTree(annotation)
tree.write(annotation_full_path)
# Function to save annotations in JSON format
def save_json_annotations(image_path, objects, output_folder):
image_filename = os.path.basename(image_path)
annotation_filename = os.path.splitext(image_filename)[0] + ".json"
annotation_path = os.path.join(output_folder, annotation_filename)
# Create a dictionary to store JSON annotations
json_annotations = {
"filename": image_filename,
"objects": objects
}
with open(annotation_path, "w") as annotation_file:
json.dump(json_annotations, annotation_file, indent=4)
def process_images_in_folder(input_folder, output_format, output_folder,labels):
# Updated with error handling
labelMap = []
for filename in os.listdir(input_folder):
if filename.lower().endswith((".jpg", ".jpeg", ".png", ".bmp")): # Supporting multiple image formats
image_path = os.path.join(input_folder, filename)
try:
objects = detect_objects(image_path,labels)
image = Image.open(image_path)
if output_format == "tf":
#Extract the label from the object and add it to the labelMap if it is not already there with the id
for obj in objects:
label = obj["label"]
if label not in labelMap:
labelMap.append(label)
save_tf_annotations(image_path, objects, output_folder, image.width, image.height)
elif output_format == "json":
save_json_annotations(image_path, objects, output_folder)
else:
print(f"Invalid output format: {output_format}")
except Exception as e:
print(f"Error processing image {filename}: {e}")
if output_format == "tf":
out= os.path.join(output_folder, "labelmap.pbtxt")
labelMapFile = open(out, "w")
for i in range(len(labelMap)):
labelMapFile.write("item {\n")
labelMapFile.write(" id: " + str(i+1) + "\n")
labelMapFile.write(" name: '" + labelMap[i] + "'\n")
labelMapFile.write("}\n")
# Main function
def main():
input_folder = "./test"
output_format = "tf" # Choose "tf" or "json"
output_folder = "./test"
labels = ['cat', 'dog', 'bird']
try:
os.makedirs(output_folder, exist_ok=True)
except OSError as e:
print(f"Error creating output directory: {e}")
return
process_images_in_folder(input_folder, output_format, output_folder,labels)
if __name__ == "__main__":
main()