Skip to content

Commit 8bc4585

Browse files
committed
Enhance GitHub PR Fetching with Update and Merge Tracking
- Modify `fetch_and_save_prs` to track both new and updated PRs - Add `total_prs_updated` counter to track existing PR updates - Implement `update_or_create` method for more robust PR database management - Improve logging to display total fetched, added, and updated PRs - Refactor PR saving logic to handle merged and existing PRs more effectively
1 parent b379194 commit 8bc4585

File tree

1 file changed

+55
-40
lines changed

1 file changed

+55
-40
lines changed

website/management/commands/fetch_gsoc_prs.py

+55-40
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,7 @@ def handle(self, *args, **options):
7878

7979
total_prs_fetched = 0
8080
total_prs_added = 0
81+
total_prs_updated = 0
8182

8283
for repo_full_name in all_repos:
8384
try:
@@ -93,12 +94,16 @@ def handle(self, *args, **options):
9394
self.stdout.write(f"Reset last_pr_page_processed for {repo_full_name}")
9495

9596
# Fetch closed PRs from the past specified days
96-
prs_fetched, prs_added = self.fetch_and_save_prs(repo, owner, repo_name, days, verbose)
97+
prs_fetched, prs_added, prs_updated = self.fetch_and_save_prs(repo, owner, repo_name, days, verbose)
9798

9899
total_prs_fetched += prs_fetched
99100
total_prs_added += prs_added
101+
total_prs_updated += prs_updated
100102

101-
self.stdout.write(f"Processed {repo_full_name}: Fetched {prs_fetched} PRs, Added {prs_added} new PRs")
103+
self.stdout.write(
104+
f"Processed {repo_full_name}: Fetched {prs_fetched} PRs, "
105+
f"Added {prs_added} new PRs, Updated {prs_updated} existing PRs"
106+
)
102107

103108
except Exception as e:
104109
logger.error(f"Error processing repository {repo_full_name}: {str(e)}", exc_info=True)
@@ -107,7 +112,9 @@ def handle(self, *args, **options):
107112
self.stdout.write(
108113
self.style.SUCCESS(
109114
f"Completed fetching PRs for GSoC repositories. "
110-
f"Total fetched: {total_prs_fetched}, Total added: {total_prs_added}"
115+
f"Total fetched: {total_prs_fetched}, "
116+
f"Total added: {total_prs_added}, "
117+
f"Total updated: {total_prs_updated}"
111118
)
112119
)
113120

@@ -158,10 +165,11 @@ def get_or_create_repo(self, owner, repo_name):
158165
def fetch_and_save_prs(self, repo, owner, repo_name, days, verbose=False):
159166
"""
160167
Fetch closed pull requests from GitHub API and save them to the database.
161-
Returns a tuple of (total_prs_fetched, total_prs_added).
168+
Returns a tuple of (total_prs_fetched, total_prs_added, total_prs_updated).
162169
"""
163170
total_prs_fetched = 0
164171
total_prs_added = 0
172+
total_prs_updated = 0
165173

166174
# Calculate date for filtering
167175
since_date = timezone.now() - timedelta(days=days)
@@ -211,9 +219,10 @@ def fetch_and_save_prs(self, repo, owner, repo_name, days, verbose=False):
211219
self.stdout.write(f"Found {merged_count} merged PRs on page {page}")
212220

213221
# Process this page of PRs
214-
prs_added = self.save_prs_to_db(repo, data, verbose)
222+
prs_added, prs_updated = self.save_prs_to_db(repo, data, verbose)
215223
total_prs_fetched += len(data)
216224
total_prs_added += prs_added
225+
total_prs_updated += prs_updated
217226

218227
# Update the repository's last processed page
219228
repo.last_pr_page_processed = page
@@ -243,26 +252,27 @@ def fetch_and_save_prs(self, repo, owner, repo_name, days, verbose=False):
243252
)
244253
self.stdout.write(f"Total merged PRs in database: {merged_prs}")
245254

246-
return total_prs_fetched, total_prs_added
255+
return total_prs_fetched, total_prs_added, total_prs_updated
247256

248257
@transaction.atomic
249258
def save_prs_to_db(self, repo, prs, verbose=False):
250259
"""
251260
Save pull requests to the database.
252-
Returns the number of new PRs added.
261+
Returns the number of new PRs added and updated.
253262
"""
254263
added_count = 0
264+
updated_count = 0
255265
skipped_count = 0
256266
skipped_not_merged = 0
257267

258268
self.stdout.write(f"Processing {len(prs)} PRs for {repo.name}")
259269

260270
for pr in prs:
261-
# Check if PR already exists in the database
262-
if GitHubIssue.objects.filter(issue_id=pr["id"]).exists():
263-
skipped_count += 1
271+
# Skip PRs that aren't merged
272+
if not pr.get("merged_at"):
273+
skipped_not_merged += 1
264274
if verbose:
265-
self.stdout.write(f"PR {pr['number']} already exists in the database")
275+
self.stdout.write(f"PR {pr['number']} is not merged, skipping")
266276
continue
267277

268278
# Parse dates
@@ -278,11 +288,6 @@ def save_prs_to_db(self, repo, prs, verbose=False):
278288
if pr["merged_at"]:
279289
merged_at = datetime.strptime(pr["merged_at"], "%Y-%m-%dT%H:%M:%SZ").replace(tzinfo=pytz.UTC)
280290
is_merged = True
281-
else:
282-
skipped_not_merged += 1
283-
if verbose:
284-
self.stdout.write(f"PR {pr['number']} is not merged, skipping")
285-
continue
286291

287292
# Try to find the user profile
288293
user_profile = None
@@ -291,31 +296,41 @@ def save_prs_to_db(self, repo, prs, verbose=False):
291296
if not user_profile and verbose:
292297
self.stdout.write(f"No user profile found for {pr['user']['html_url']}")
293298

294-
# Create the GitHubIssue
295-
github_issue = GitHubIssue(
296-
issue_id=pr["id"],
297-
title=pr["title"],
298-
body=pr["body"] or "",
299-
state=pr["state"],
300-
type="pull_request",
301-
created_at=created_at,
302-
updated_at=updated_at,
303-
closed_at=closed_at,
304-
merged_at=merged_at,
305-
is_merged=is_merged,
306-
url=pr["html_url"],
307-
repo=repo,
308-
user_profile=user_profile,
309-
)
310-
github_issue.save()
311-
312-
added_count += 1
313-
314-
if verbose:
315-
self.stdout.write(f"Added PR #{pr['number']}: {pr['title']}")
299+
# Prepare the data for the GitHubIssue
300+
issue_data = {
301+
"title": pr["title"],
302+
"body": pr["body"] or "",
303+
"state": pr["state"],
304+
"type": "pull_request",
305+
"created_at": created_at,
306+
"updated_at": updated_at,
307+
"closed_at": closed_at,
308+
"merged_at": merged_at,
309+
"is_merged": is_merged,
310+
"url": pr["html_url"],
311+
"repo": repo,
312+
"user_profile": user_profile,
313+
}
314+
315+
# Try to get the existing issue or create a new one
316+
try:
317+
github_issue, created = GitHubIssue.objects.update_or_create(issue_id=pr["id"], defaults=issue_data)
318+
319+
if created:
320+
added_count += 1
321+
if verbose:
322+
self.stdout.write(f"Added PR #{pr['number']}: {pr['title']}")
323+
else:
324+
updated_count += 1
325+
if verbose:
326+
self.stdout.write(f"Updated PR #{pr['number']}: {pr['title']}")
327+
except Exception as e:
328+
self.stdout.write(self.style.ERROR(f"Error saving PR #{pr['number']}: {str(e)}"))
329+
skipped_count += 1
316330

317-
self.stdout.write(f"Skipped {skipped_count} PRs that already exist in the database")
331+
self.stdout.write(f"Skipped {skipped_count} PRs due to errors")
318332
self.stdout.write(f"Skipped {skipped_not_merged} PRs that are not merged")
319333
self.stdout.write(f"Added {added_count} new PRs to the database")
334+
self.stdout.write(f"Updated {updated_count} existing PRs in the database")
320335

321-
return added_count
336+
return added_count, updated_count

0 commit comments

Comments
 (0)