-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathlpa_table_export.py
129 lines (115 loc) · 4.3 KB
/
lpa_table_export.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
import logging
import os
import tomllib
from pathlib import Path
import pandas as pd
from github import Github
from gitlab import Gitlab
from utils import (
get_codeberg_repository_data,
get_github_repository_data,
get_gitlab_repository_data,
get_gitlab_repository_data_with_webscraping,
get_sourcehunt_repository_data_with_webscraping,
)
logging.basicConfig(level=logging.INFO, format="%(levelname)s :: %(message)s")
logger = logging.getLogger(__name__)
REPOSITORY_APIS = {
"github.com": Github(os.environ.get("GITHUB_TOKEN")),
"gitlab.com": Gitlab(private_token=os.environ.get("GILAB_TOKEN")),
"invent.kde.org": Gitlab(
url="https://invent.kde.org", private_token=os.environ.get("GILAB_KDE_TOKEN")
),
}
IGNORED_FILES = ["_index.md"]
def _extract_repository_name(repository: str, repository_domain: str) -> str:
repository_name = (
repository.split("://")[-1].split(f"{repository_domain}/")[-1].strip("/")
)
if repository_name.count("/") > 1:
repository_name = (
repository_name.split("/")[0] + "/" + repository_name.split("/")[1]
)
return repository_name
def _get_repository_stats(repository: str, repository_domain: str) -> dict[str, str]:
if not repository:
return {}
repository_name = _extract_repository_name(repository, repository_domain)
match repository_domain:
case "github.com":
return get_github_repository_data(
REPOSITORY_APIS["github.com"], repository_name
)
case "codeberg.org":
return get_codeberg_repository_data(repository_name)
case "gitlab.com":
return get_gitlab_repository_data(
REPOSITORY_APIS["gitlab.com"], repository_name
)
case "invent.kde.org":
return get_gitlab_repository_data(
REPOSITORY_APIS["invent.kde.org"], repository_name
)
case "gitlab.gnome.org" | "source.puri.sm" | "gitlab.manjaro.org":
return get_gitlab_repository_data_with_webscraping(repository)
case "sr.ht" | "git.sr.ht":
return get_sourcehunt_repository_data_with_webscraping(repository)
return {}
list_files = Path("linuxphoneapps.frama.io/content/apps").glob("**/*.md")
list_data = []
for index, file in enumerate(list_files, 1):
if str(file.name) in IGNORED_FILES:
continue
logger.info(str(file))
with file.open() as f:
data = [line for line in f.readlines()]
app_config = tomllib.loads("".join(data).split("+++")[1])
app_id = app_config.get("extra", {}).get("app_id")
if not app_id:
continue
repository = app_config.get("extra", {}).get("repository")
repository_domain = repository.split("://")[1].split("/")[0] if repository else ""
repository_stats = _get_repository_stats(repository, repository_domain)
if len(app_config.get("taxonomies", {}).get("mobile_compatibility", [])) > 1:
logger.warning("More than one item in mobile_compatibility")
mobile_compatibility = (
app_config.get("taxonomies", {}).get("mobile_compatibility", [])[0].title()
)
frameworks = ", ".join(app_config.get("taxonomies", {}).get("frameworks", []))
packaged_in = ", ".join(
[
word.title()
for word in app_config.get("taxonomies", {}).get("packaged_in", [])
]
)
categories = ", ".join(
[
word.title()
for word in app_config.get("taxonomies", {}).get("categories", [])
]
)
# breakpoint()
list_data.append(
{
"name": app_config.get("title", ""),
"url": f"https://linuxphoneapps.org/apps/{app_id.lower()}",
"repository": repository,
**repository_stats,
"repository_domain": repository_domain,
"description": app_config.get("description"),
"categories": categories,
"compatibility": mobile_compatibility,
"frameworks": frameworks,
"distribution": packaged_in,
"created": app_config.get("date"),
"last_updated": app_config.get("updated"),
}
)
df = pd.DataFrame.from_records(list_data)
df = df.astype(
{
"repository_stars_count": "Int64",
}
)
df = df.sort_values(by=["name"])
df.to_csv("export.csv", index=False)