Skip to content

Commit d23507f

Browse files
authored
Merge branch 'main' into contributors
2 parents f44ace0 + bd389c0 commit d23507f

File tree

7 files changed

+178
-59
lines changed

7 files changed

+178
-59
lines changed

website/admin.py

+31
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
from urllib.parse import urlparse
22

33
from django.contrib import admin
4+
from django.contrib.admin import SimpleListFilter
45
from django.contrib.auth.admin import UserAdmin
56
from django.contrib.auth.models import User
67
from django.template.defaultfilters import truncatechars
@@ -400,6 +401,31 @@ def unblock_user_agent(modeladmin, request, queryset):
400401
unblock_user_agent.short_description = "Unblock selected UserAgent"
401402

402403

404+
# Custom filter for IP address ranges
405+
class IPAddressRangeFilter(SimpleListFilter):
406+
title = "IP Address Range"
407+
parameter_name = "ip_range"
408+
409+
def lookups(self, request, model_admin):
410+
return (
411+
("internal", "Internal (127.0.0.1)"),
412+
("local", "Local (192.168.x.x)"),
413+
("vpn", "VPN (10.x.x.x)"),
414+
("ipv6", "IPv6"),
415+
)
416+
417+
def queryset(self, request, queryset):
418+
if self.value() == "internal":
419+
return queryset.filter(address__startswith="127.0.0.1")
420+
if self.value() == "local":
421+
return queryset.filter(address__startswith="192.168.")
422+
if self.value() == "vpn":
423+
return queryset.filter(address__startswith="10.")
424+
if self.value() == "ipv6":
425+
return queryset.filter(address__contains=":")
426+
return queryset
427+
428+
403429
class IPAdmin(admin.ModelAdmin):
404430
list_display = (
405431
"id",
@@ -414,6 +440,10 @@ class IPAdmin(admin.ModelAdmin):
414440
"referer",
415441
)
416442

443+
search_fields = ["address", "user", "agent", "path", "method", "referer"]
444+
list_filter = ["method", "created", IPAddressRangeFilter]
445+
date_hierarchy = "created"
446+
417447
actions = [block_ip, unblock_ip, block_user_agent, unblock_user_agent]
418448

419449

@@ -554,6 +584,7 @@ class GitHubIssueAdmin(admin.ModelAdmin):
554584
"is_merged",
555585
"user_profile",
556586
"sent_by_user",
587+
"repo",
557588
]
558589
search_fields = [
559590
"title",

website/management/commands/fetch_gsoc_prs.py

+108-49
Original file line numberDiff line numberDiff line change
@@ -41,12 +41,18 @@ def add_arguments(self, parser):
4141
default=None,
4242
help="Comma-separated list of repositories to process (e.g., 'OWASP-BLT/BLT,OWASP-BLT/BLT-Flutter')",
4343
)
44+
parser.add_argument(
45+
"--reset",
46+
action="store_true",
47+
help="Reset the last_pr_page_processed counter and start from the beginning",
48+
)
4449

4550
def handle(self, *args, **options):
4651
days = options["days"]
4752
limit = options["limit"]
4853
verbose = True # Always use verbose mode for debugging
4954
repos_arg = options["repos"]
55+
reset = options["reset"]
5056

5157
self.stdout.write(f"Fetching closed PRs from the past {days} days for GSoC repositories")
5258

@@ -72,6 +78,7 @@ def handle(self, *args, **options):
7278

7379
total_prs_fetched = 0
7480
total_prs_added = 0
81+
total_prs_updated = 0
7582

7683
for repo_full_name in all_repos:
7784
try:
@@ -80,16 +87,23 @@ def handle(self, *args, **options):
8087
# Check if the repository exists in our database
8188
repo = self.get_or_create_repo(owner, repo_name)
8289

83-
# Fetch closed PRs from the past year
84-
prs = self.fetch_closed_prs(owner, repo_name, days, verbose)
90+
# Reset the last_pr_page_processed if requested
91+
if reset:
92+
repo.last_pr_page_processed = 0
93+
repo.save()
94+
self.stdout.write(f"Reset last_pr_page_processed for {repo_full_name}")
8595

86-
# Save PRs to the database
87-
added_count = self.save_prs_to_db(repo, prs, verbose)
96+
# Fetch closed PRs from the past specified days
97+
prs_fetched, prs_added, prs_updated = self.fetch_and_save_prs(repo, owner, repo_name, days, verbose)
8898

89-
total_prs_fetched += len(prs)
90-
total_prs_added += added_count
99+
total_prs_fetched += prs_fetched
100+
total_prs_added += prs_added
101+
total_prs_updated += prs_updated
91102

92-
self.stdout.write(f"Processed {repo_full_name}: Fetched {len(prs)} PRs, Added {added_count} new PRs")
103+
self.stdout.write(
104+
f"Processed {repo_full_name}: Fetched {prs_fetched} PRs, "
105+
f"Added {prs_added} new PRs, Updated {prs_updated} existing PRs"
106+
)
93107

94108
except Exception as e:
95109
logger.error(f"Error processing repository {repo_full_name}: {str(e)}", exc_info=True)
@@ -98,7 +112,9 @@ def handle(self, *args, **options):
98112
self.stdout.write(
99113
self.style.SUCCESS(
100114
f"Completed fetching PRs for GSoC repositories. "
101-
f"Total fetched: {total_prs_fetched}, Total added: {total_prs_added}"
115+
f"Total fetched: {total_prs_fetched}, "
116+
f"Total added: {total_prs_added}, "
117+
f"Total updated: {total_prs_updated}"
102118
)
103119
)
104120

@@ -140,35 +156,43 @@ def get_or_create_repo(self, owner, repo_name):
140156
watchers=repo_data.get("watchers_count", 0),
141157
primary_language=repo_data.get("language"),
142158
is_owasp_repo=owner.upper() == "OWASP",
159+
last_pr_page_processed=0,
143160
)
144161
repo.save()
145162

146163
return repo
147164

148-
def fetch_closed_prs(self, owner, repo_name, days, verbose=False):
165+
def fetch_and_save_prs(self, repo, owner, repo_name, days, verbose=False):
149166
"""
150-
Fetch closed pull requests from GitHub API.
151-
Only fetches PRs from the past specified number of days.
167+
Fetch closed pull requests from GitHub API and save them to the database.
168+
Returns a tuple of (total_prs_fetched, total_prs_added, total_prs_updated).
152169
"""
153-
prs = []
154-
page = 1
155-
per_page = 100
170+
total_prs_fetched = 0
171+
total_prs_added = 0
172+
total_prs_updated = 0
156173

157174
# Calculate date for filtering
158175
since_date = timezone.now() - timedelta(days=days)
159176
since_date_str = since_date.strftime("%Y-%m-%dT%H:%M:%SZ")
160177

161178
self.stdout.write(f"Fetching PRs since {since_date_str} for {owner}/{repo_name}")
162179
self.stdout.write(f"Current date: {timezone.now().strftime('%Y-%m-%dT%H:%M:%SZ')}, Looking back {days} days")
180+
self.stdout.write(f"Starting from page {repo.last_pr_page_processed + 1}")
163181

182+
# Set up headers for GitHub API
164183
headers = {"Accept": "application/vnd.github.v3+json"}
165184
if settings.GITHUB_TOKEN:
166185
headers["Authorization"] = f"token {settings.GITHUB_TOKEN}"
167186
self.stdout.write("Using GitHub token for authentication")
168187
else:
169188
self.stdout.write("No GitHub token found, using unauthenticated requests (rate limits may apply)")
170189

171-
while True:
190+
# Start from the last processed page + 1
191+
page = repo.last_pr_page_processed + 1
192+
per_page = 100
193+
reached_end = False
194+
195+
while not reached_end:
172196
url = (
173197
f"https://api.github.com/repos/{owner}/{repo_name}/pulls"
174198
f"?state=closed&per_page={per_page}&page={page}&sort=updated&direction=desc"
@@ -185,6 +209,7 @@ def fetch_closed_prs(self, owner, repo_name, days, verbose=False):
185209
data = response.json()
186210
if not data:
187211
self.stdout.write(f"No more PRs found for {owner}/{repo_name} on page {page}")
212+
reached_end = True
188213
break
189214

190215
self.stdout.write(f"Fetched {len(data)} PRs from page {page}")
@@ -193,11 +218,21 @@ def fetch_closed_prs(self, owner, repo_name, days, verbose=False):
193218
merged_count = sum(1 for pr in data if pr.get("merged_at") is not None)
194219
self.stdout.write(f"Found {merged_count} merged PRs on page {page}")
195220

196-
prs.extend(data)
221+
# Process this page of PRs
222+
prs_added, prs_updated = self.save_prs_to_db(repo, data, verbose)
223+
total_prs_fetched += len(data)
224+
total_prs_added += prs_added
225+
total_prs_updated += prs_updated
226+
227+
# Update the repository's last processed page
228+
repo.last_pr_page_processed = page
229+
repo.last_pr_fetch_date = timezone.now()
230+
repo.save()
197231

198232
# Check if we've reached the last page
199233
if len(data) < per_page:
200234
self.stdout.write(f"Reached last page ({page}) for {owner}/{repo_name}")
235+
reached_end = True
201236
break
202237

203238
page += 1
@@ -208,27 +243,36 @@ def fetch_closed_prs(self, owner, repo_name, days, verbose=False):
208243
break
209244

210245
if verbose:
211-
self.stdout.write(f"Fetched {len(prs)} PRs for {owner}/{repo_name}")
212-
merged_prs = sum(1 for pr in prs if pr.get("merged_at") is not None)
213-
self.stdout.write(f"Of which {merged_prs} are merged PRs")
246+
self.stdout.write(f"Fetched {total_prs_fetched} PRs for {owner}/{repo_name}")
247+
merged_prs = sum(
248+
1
249+
for pr in GitHubIssue.objects.filter(
250+
repo=repo, type="pull_request", is_merged=True, created_at__gte=since_date
251+
)
252+
)
253+
self.stdout.write(f"Total merged PRs in database: {merged_prs}")
214254

215-
return prs
255+
return total_prs_fetched, total_prs_added, total_prs_updated
216256

217257
@transaction.atomic
218258
def save_prs_to_db(self, repo, prs, verbose=False):
219259
"""
220260
Save pull requests to the database.
221-
Returns the number of new PRs added.
261+
Returns the number of new PRs added and updated.
222262
"""
223263
added_count = 0
264+
updated_count = 0
224265
skipped_count = 0
266+
skipped_not_merged = 0
267+
268+
self.stdout.write(f"Processing {len(prs)} PRs for {repo.name}")
225269

226270
for pr in prs:
227-
# Check if PR already exists in the database
228-
if GitHubIssue.objects.filter(issue_id=pr["id"]).exists():
229-
skipped_count += 1
271+
# Skip PRs that aren't merged
272+
if not pr.get("merged_at"):
273+
skipped_not_merged += 1
230274
if verbose:
231-
self.stdout.write(f"PR {pr['number']} already exists in the database")
275+
self.stdout.write(f"PR {pr['number']} is not merged, skipping")
232276
continue
233277

234278
# Parse dates
@@ -249,29 +293,44 @@ def save_prs_to_db(self, repo, prs, verbose=False):
249293
user_profile = None
250294
if pr["user"] and pr["user"]["html_url"]:
251295
user_profile = UserProfile.objects.filter(github_url=pr["user"]["html_url"]).first()
296+
if not user_profile and verbose:
297+
self.stdout.write(f"No user profile found for {pr['user']['html_url']}")
298+
299+
# Prepare the data for the GitHubIssue
300+
issue_data = {
301+
"title": pr["title"],
302+
"body": pr["body"] or "",
303+
"state": pr["state"],
304+
"type": "pull_request",
305+
"created_at": created_at,
306+
"updated_at": updated_at,
307+
"closed_at": closed_at,
308+
"merged_at": merged_at,
309+
"is_merged": is_merged,
310+
"url": pr["html_url"],
311+
"repo": repo,
312+
"user_profile": user_profile,
313+
}
314+
315+
# Try to get the existing issue or create a new one
316+
try:
317+
github_issue, created = GitHubIssue.objects.update_or_create(issue_id=pr["id"], defaults=issue_data)
318+
319+
if created:
320+
added_count += 1
321+
if verbose:
322+
self.stdout.write(f"Added PR #{pr['number']}: {pr['title']}")
323+
else:
324+
updated_count += 1
325+
if verbose:
326+
self.stdout.write(f"Updated PR #{pr['number']}: {pr['title']}")
327+
except Exception as e:
328+
self.stdout.write(self.style.ERROR(f"Error saving PR #{pr['number']}: {str(e)}"))
329+
skipped_count += 1
252330

253-
# Create the GitHubIssue
254-
github_issue = GitHubIssue(
255-
issue_id=pr["id"],
256-
title=pr["title"],
257-
body=pr["body"] or "",
258-
state=pr["state"],
259-
type="pull_request",
260-
created_at=created_at,
261-
updated_at=updated_at,
262-
closed_at=closed_at,
263-
merged_at=merged_at,
264-
is_merged=is_merged,
265-
url=pr["html_url"],
266-
repo=repo,
267-
user_profile=user_profile,
268-
)
269-
github_issue.save()
270-
271-
added_count += 1
272-
273-
if verbose:
274-
self.stdout.write(f"Added PR #{pr['number']}: {pr['title']}")
331+
self.stdout.write(f"Skipped {skipped_count} PRs due to errors")
332+
self.stdout.write(f"Skipped {skipped_not_merged} PRs that are not merged")
333+
self.stdout.write(f"Added {added_count} new PRs to the database")
334+
self.stdout.write(f"Updated {updated_count} existing PRs in the database")
275335

276-
self.stdout.write(f"Skipped {skipped_count} PRs that already exist in the database")
277-
return added_count
336+
return added_count, updated_count
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
# Generated by Django 5.1.6 on 2025-03-10 21:04
2+
3+
from django.db import migrations, models
4+
5+
6+
class Migration(migrations.Migration):
7+
dependencies = [
8+
("website", "0225_hackathon_hackathonsponsor_hackathonprize_and_more"),
9+
]
10+
11+
operations = [
12+
migrations.AddField(
13+
model_name="repo",
14+
name="last_pr_fetch_date",
15+
field=models.DateTimeField(blank=True, help_text="When PRs were last fetched", null=True),
16+
),
17+
migrations.AddField(
18+
model_name="repo",
19+
name="last_pr_page_processed",
20+
field=models.IntegerField(default=0, help_text="Last page of PRs processed from GitHub API"),
21+
),
22+
]

website/models.py

+2
Original file line numberDiff line numberDiff line change
@@ -1488,6 +1488,8 @@ class Repo(models.Model):
14881488
ai_summary = models.TextField(null=True, blank=True)
14891489
created_at = models.DateTimeField(auto_now_add=True)
14901490
updated_at = models.DateTimeField(auto_now=True)
1491+
last_pr_page_processed = models.IntegerField(default=0, help_text="Last page of PRs processed from GitHub API")
1492+
last_pr_fetch_date = models.DateTimeField(null=True, blank=True, help_text="When PRs were last fetched")
14911493

14921494
def save(self, *args, **kwargs):
14931495
if not self.slug:

website/templates/gsoc.html

+2-4
Original file line numberDiff line numberDiff line change
@@ -209,10 +209,8 @@ <h3 class="text-xl font-semibold text-red-700 mb-2">OWASP BLT Project Ideas</h3>
209209
</div>
210210
<!-- Top Contributors Leaderboard -->
211211
<div class="bg-white rounded-xl shadow-lg hover:shadow-xl transition-all duration-300 p-8 mb-12 border-l-4 border-red-500">
212-
<h2 class="text-3xl font-bold text-gray-900 mb-6">Top Contributors for GSoC 2025 Projects</h2>
213-
<p class="text-gray-700 mb-4">
214-
For transparency, we are implementing a Top contributors List for students this year. This demonstrates our commitment to working students who are contunually contributing while also signaling our intent to other projects and organizations.
215-
</p>
212+
<h2 class="text-3xl font-bold text-gray-900 mb-6">Top OWASP GSoC 2025 Contributors and Projects</h2>
213+
<p class="text-gray-700 mb-4">A glimpse of the top contributors and projects for OWASP GSoC 2025.</p>
216214
<div class="grid grid-cols-1 md:grid-cols-2 gap-8">
217215
{% for project, data in projects.items %}
218216
<div class="bg-gray-50 rounded-lg p-6">

0 commit comments

Comments
 (0)