Skip to content

Commit 27aa94f

Browse files
Merge branch 'OWASP:main' into topics-link
2 parents fa30e97 + 77fb340 commit 27aa94f

23 files changed

+157
-134
lines changed

backend/apps/owasp/management/commands/owasp_scrape_chapters.py

-2
Original file line numberDiff line numberDiff line change
@@ -63,8 +63,6 @@ def handle(self, *args, **options):
6363
else:
6464
logger.info("Skipped related URL %s", verified_url)
6565

66-
chapter.leaders_raw = scraper.get_leaders()
67-
6866
chapter.invalid_urls = sorted(invalid_urls)
6967
chapter.related_urls = sorted(related_urls)
7068

backend/apps/owasp/management/commands/owasp_scrape_committees.py

+3-5
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ def handle(self, *args, **options):
2222
active_committees = Committee.active_committees.order_by("-created_at")
2323
active_committees_count = active_committees.count()
2424
offset = options["offset"]
25-
chapters = []
25+
committees = []
2626
for idx, committee in enumerate(active_committees[offset:]):
2727
prefix = f"{idx + offset + 1} of {active_committees_count}"
2828
print(f"{prefix:<10} {committee.owasp_url}")
@@ -63,14 +63,12 @@ def handle(self, *args, **options):
6363
else:
6464
logger.info("Skipped related URL %s", verified_url)
6565

66-
committee.leaders_raw = scraper.get_leaders()
67-
6866
committee.invalid_urls = sorted(invalid_urls)
6967
committee.related_urls = sorted(related_urls)
7068

71-
chapters.append(committee)
69+
committees.append(committee)
7270

7371
time.sleep(0.5)
7472

7573
# Bulk save data.
76-
Committee.bulk_save(chapters)
74+
Committee.bulk_save(committees)

backend/apps/owasp/management/commands/owasp_scrape_projects.py

-2
Original file line numberDiff line numberDiff line change
@@ -75,8 +75,6 @@ def handle(self, *args, **options):
7575
else:
7676
logger.info("Skipped related URL %s", verified_url)
7777

78-
project.leaders_raw = scraper.get_leaders()
79-
8078
project.invalid_urls = sorted(invalid_urls)
8179
project.related_urls = sorted(related_urls)
8280

backend/apps/owasp/models/common.py

+45-3
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
import yaml
88
from django.db import models
99
from django.db.models import Sum
10+
from requests.exceptions import RequestException
1011

1112
from apps.common.open_ai import OpenAi
1213
from apps.github.constants import (
@@ -40,6 +41,9 @@ class Meta:
4041
)
4142
is_active = models.BooleanField(verbose_name="Is active", default=True)
4243

44+
leaders_raw = models.JSONField(
45+
verbose_name="Entity leaders list", default=list, blank=True, null=True
46+
)
4347
tags = models.JSONField(verbose_name="OWASP metadata tags", default=list)
4448
topics = models.JSONField(
4549
verbose_name="GitHub repository topics", default=list, blank=True, null=True
@@ -62,6 +66,16 @@ def github_url(self):
6266
"""Get GitHub URL."""
6367
return f"https://github.com/owasp/{self.key}"
6468

69+
@property
70+
def leaders_md_raw_url(self):
71+
"""Return entity's raw leaders.md GitHub URL."""
72+
return (
73+
"https://raw.githubusercontent.com/OWASP/"
74+
f"{self.owasp_repository.key}/{self.owasp_repository.default_branch}/leaders.md"
75+
if self.owasp_repository
76+
else None
77+
)
78+
6579
@property
6680
def owasp_name(self):
6781
"""Get OWASP name."""
@@ -79,6 +93,9 @@ def deactivate(self):
7993

8094
def from_github(self, field_mapping, repository):
8195
"""Update instance based on GitHub repository data."""
96+
# Get leaders.
97+
self.leaders_raw = self.get_leaders()
98+
8299
# Normalize tags.
83100
self.tags = (
84101
[tag.strip(", ") for tag in self.tags.split("," if "," in self.tags else " ")]
@@ -126,6 +143,34 @@ def get_index_md_raw_url(self, repository=None):
126143
else None
127144
)
128145

146+
def get_leaders(self):
147+
"""Get leaders from leaders.md file on GitHub."""
148+
leaders = []
149+
150+
try:
151+
content = get_repository_file_content(self.leaders_md_raw_url)
152+
except (RequestException, ValueError) as e:
153+
logger.exception(
154+
"Failed to fetch leaders.md file",
155+
extra={"URL": self.leaders_md_raw_url, "error": str(e)},
156+
)
157+
return leaders
158+
159+
if not content:
160+
return leaders
161+
162+
try:
163+
for line in content.split("\n"):
164+
logger.debug("Processing line: %s", line)
165+
# Match both standard Markdown list items with links and variations.
166+
leaders.extend(re.findall(r"\*\s*\[([^\]]+)\](?:\([^)]*\))?", line))
167+
except AttributeError:
168+
logger.exception(
169+
"Unable to parse leaders.md content", extra={"URL": self.leaders_md_raw_url}
170+
)
171+
172+
return sorted(leaders)
173+
129174
def get_top_contributors(self, repositories=()):
130175
"""Get top contributors."""
131176
return [
@@ -153,9 +198,6 @@ class GenericEntityModel(models.Model):
153198
class Meta:
154199
abstract = True
155200

156-
leaders_raw = models.JSONField(
157-
verbose_name="Entity leaders list", default=list, blank=True, null=True
158-
)
159201
related_urls = models.JSONField(
160202
verbose_name="Entity related URLs", default=list, blank=True, null=True
161203
)

backend/apps/owasp/scraper.py

-10
Original file line numberDiff line numberDiff line change
@@ -55,16 +55,6 @@ def get_urls(self, domain=None):
5555
else self.page_tree.xpath("//div[@class='sidebar']//a/@href")
5656
)
5757

58-
def get_leaders(self):
59-
"""Get leaders."""
60-
leaders_header = self.page_tree.xpath("//div[@class='sidebar']//*[@id='leaders']")
61-
if leaders_header:
62-
leaders_ul = leaders_header[0].getnext()
63-
if leaders_ul is not None and leaders_ul.tag == "ul":
64-
return sorted(name.strip() for name in leaders_ul.xpath(".//li/a/text()"))
65-
66-
return []
67-
6858
def verify_url(self, url):
6959
"""Verify URL."""
7060
location = urlparse(url).netloc.lower()

backend/tests/owasp/management/commands/owasp_scrape_chapters_test.py

-2
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,6 @@ def test_handle(self, mock_bulk_save, command, mock_chapter, offset, chapters):
4444
"https://invalid.com/repo3",
4545
]
4646
mock_scraper.verify_url.side_effect = lambda url: None if "invalid" in url else url
47-
mock_scraper.get_leaders.return_value = "Leaders data"
4847
mock_scraper.page_tree = True
4948

5049
mock_chapter.get_related_url.side_effect = lambda url, **_: url
@@ -91,4 +90,3 @@ def test_handle(self, mock_bulk_save, command, mock_chapter, offset, chapters):
9190
expected_related_urls = ["https://example.com/repo1", "https://example.com/repo2"]
9291
assert chapter.invalid_urls == sorted(expected_invalid_urls)
9392
assert chapter.related_urls == sorted(expected_related_urls)
94-
assert chapter.leaders_raw == "Leaders data"

backend/tests/owasp/management/commands/owasp_scrape_committees_test.py

-2
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,6 @@ def test_handle(self, mock_bulk_save, command, mock_committee, offset, committee
4444
"https://invalid.com/repo3",
4545
]
4646
mock_scraper.verify_url.side_effect = lambda url: None if "invalid" in url else url
47-
mock_scraper.get_leaders.return_value = "Leaders data"
4847
mock_scraper.page_tree = True
4948

5049
mock_committee.get_related_url.side_effect = lambda url, **_: url
@@ -91,4 +90,3 @@ def test_handle(self, mock_bulk_save, command, mock_committee, offset, committee
9190
expected_related_urls = ["https://example.com/repo1", "https://example.com/repo2"]
9291
assert committee.invalid_urls == sorted(expected_invalid_urls)
9392
assert committee.related_urls == sorted(expected_related_urls)
94-
assert committee.leaders_raw == "Leaders data"

backend/tests/owasp/management/commands/owasp_scrape_projects_test.py

-2
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,6 @@ def test_handle(self, mock_github, mock_bulk_save, command, mock_project, offset
4747
"https://invalid.com/repo3",
4848
]
4949
mock_scraper.verify_url.side_effect = lambda url: None if "invalid" in url else url
50-
mock_scraper.get_leaders.return_value = "Leaders data"
5150
mock_scraper.page_tree = True
5251

5352
mock_github_instance = mock.Mock()
@@ -107,4 +106,3 @@ def test_handle(self, mock_github, mock_bulk_save, command, mock_project, offset
107106
]
108107
assert project.invalid_urls == sorted(expected_invalid_urls)
109108
assert project.related_urls == sorted(expected_related_urls)
110-
assert project.leaders_raw == "Leaders data"

backend/tests/owasp/models/chapter_test.py

+3
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
import pytest
44

55
from apps.github.models.repository import Repository
6+
from apps.github.models.user import User
67
from apps.owasp.models.chapter import Chapter, Prompt
78

89

@@ -161,6 +162,8 @@ def test_from_github(self):
161162
repository_mock.title = "Nest"
162163
repository_mock.pitch = "Nest Pitch"
163164
repository_mock.tags = ["react", "python"]
165+
repository_mock.leaders = ["Leader1", "Leader2"]
166+
repository_mock.owner = User(name="OWASP")
164167

165168
chapter = Chapter()
166169

backend/tests/owasp/models/committee_test.py

+3
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44

55
from apps.common.index import IndexBase
66
from apps.github.models.repository import Repository
7+
from apps.github.models.user import User
78
from apps.owasp.models.committee import Committee
89

910

@@ -70,6 +71,8 @@ def test_from_github(self):
7071
repository_mock.title = "Nest"
7172
repository_mock.pitch = "Nest Pitch"
7273
repository_mock.tags = ["react", "python"]
74+
repository_mock.leaders = ["Leader1, Leader2"]
75+
repository_mock.owner = User(name="OWASP")
7376

7477
committee = Committee()
7578

backend/tests/owasp/models/common_test.py

+22
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,28 @@ class Meta:
1212

1313

1414
class TestRepositoryBasedEntityModel:
15+
@pytest.mark.parametrize(
16+
("content", "expected_leaders"),
17+
[
18+
("* [Leader1](https://example.com)", ["Leader1"]),
19+
(
20+
"* [Leader1](https://example.com)\n* [Leader2](https://example.com)",
21+
["Leader1", "Leader2"],
22+
),
23+
("", []),
24+
],
25+
)
26+
def test_get_leaders(self, content, expected_leaders):
27+
model = EntityModel()
28+
repository = MagicMock()
29+
repository.name = "test-repo"
30+
model.repository = repository
31+
32+
with patch("apps.owasp.models.common.get_repository_file_content", return_value=content):
33+
leaders = model.get_leaders()
34+
35+
assert leaders == expected_leaders
36+
1537
@pytest.mark.parametrize(
1638
("key", "expected_url"),
1739
[

backend/tests/owasp/models/project_test.py

+3
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
import pytest
44

55
from apps.github.models.repository import Repository
6+
from apps.github.models.user import User
67
from apps.owasp.models.project import Project
78

89

@@ -105,6 +106,8 @@ def test_from_github(self):
105106
repository_mock.title = "Nest"
106107
repository_mock.pitch = "Nest Pitch"
107108
repository_mock.tags = "react, python"
109+
repository_mock.leaders = ["Leader1", "Leader2"]
110+
repository_mock.owner = User(name="OWASP")
108111

109112
project = Project()
110113

backend/tests/owasp/scraper_test.py

-10
Original file line numberDiff line numberDiff line change
@@ -108,16 +108,6 @@ def test_initialization_not_found(self, mock_session):
108108

109109
assert scraper.page_tree is None
110110

111-
def test_get_leaders_no_leaders(self, mock_session):
112-
invalid_html = b"<div class='sidebar'><div id='leaders'></div></div>"
113-
mock_response = Mock()
114-
mock_response.content = invalid_html
115-
mock_session.get.return_value = mock_response
116-
117-
scraper = OwaspScraper("https://test.org")
118-
119-
assert scraper.get_leaders() == []
120-
121111
def test_verify_url_invalid_url(self, mock_session):
122112
response = Mock()
123113
response.status_code = codes.ok

frontend/Makefile

+2-2
Original file line numberDiff line numberDiff line change
@@ -23,11 +23,11 @@ test-frontend: \
2323

2424
test-frontend-e2e:
2525
@docker build -f frontend/docker/Dockerfile.e2e.test frontend -t nest-test-frontend-e2e
26-
@docker run --env-file frontend/.env.example nest-test-frontend-e2e npm run test:e2e
26+
@docker run --env-file frontend/.env.example nest-test-frontend-e2e pnpm run test:e2e
2727

2828
test-frontend-unit:
2929
@docker build -f frontend/docker/Dockerfile.unit.test frontend -t nest-test-frontend-unit
30-
@docker run --env-file frontend/.env.example nest-test-frontend-unit npm run test:unit
30+
@docker run --env-file frontend/.env.example nest-test-frontend-unit pnpm run test:unit
3131

3232
update-frontend-dependencies:
3333
@CMD="pnpm update" $(MAKE) exec-frontend-command-it

0 commit comments

Comments
 (0)