-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #12 from azuki774/develop-241225
add s3 upload in python
- Loading branch information
Showing
11 changed files
with
92 additions
and
136 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,18 +1,35 @@ | ||
FROM python:3.9-bookworm | ||
FROM python:3.13-bookworm | ||
|
||
# Required Packages | ||
RUN apt-get update && \ | ||
apt-get install -y curl unzip && \ | ||
apt-get install -y \ | ||
curl \ | ||
unzip \ | ||
wget \ | ||
unzip \ | ||
libglib2.0-0 \ | ||
libnss3 \ | ||
libgconf-2-4 \ | ||
libfontconfig1 && \ | ||
apt-get clean && \ | ||
rm -rf /var/lib/apt/lists/* | ||
|
||
# AWS Setup | ||
RUN curl -o /var/tmp/awscli.zip https://awscli.amazonaws.com/awscli-exe-linux-x86_64.zip && \ | ||
unzip -d /usr/local/bin/ /var/tmp/awscli.zip | ||
# Install Chrome | ||
RUN wget -q -O - https://dl-ssl.google.com/linux/linux_signing_key.pub | apt-key add - | ||
RUN echo "deb http://dl.google.com/linux/chrome/deb/ stable main" >> /etc/apt/sources.list.d/google.list | ||
RUN apt-get update && apt-get install -y google-chrome-stable && apt-get clean && rm -rf /var/lib/apt/lists/* | ||
|
||
# Install driver (Ref. https://sleepless-se.net/2024/03/19/python-selenium-docker/) | ||
RUN wget -O chrome.json https://googlechromelabs.github.io/chrome-for-testing/known-good-versions-with-downloads.json && \ | ||
LINUX_STABLE_URL=$(grep -oP '"url":".*?(?=")' chrome.json | grep 'linux64' | head -n 1 | cut -d'"' -f4) && \ | ||
wget -O chrome.zip $LINUX_STABLE_URL && \ | ||
unzip chrome.zip && \ | ||
rm chrome.zip chrome.json | ||
|
||
# Install Python module | ||
COPY /src/sbi/requirements.txt /tmp/ | ||
RUN pip install --upgrade pip && pip install -r /tmp/requirements.txt && mkdir -p /data | ||
COPY --chmod=755 build/sbi/main.sh /src/main.sh | ||
COPY src/sbi/ /src/ | ||
|
||
ENTRYPOINT ["/src/main.sh"] | ||
CMD [ "--s3-upload" ] | ||
ENTRYPOINT ["python3", "-u", "/src/main.py"] |
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -3,3 +3,4 @@ selenium==4.12.0 | |
webdriver-manager==4.0.2 | ||
python-json-logger>=2.0.7 | ||
pytest==8.3.4 | ||
boto3==1.35.87 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
import boto3 | ||
import os | ||
|
||
def upload_file(filepath): | ||
client = boto3.client( | ||
's3', | ||
endpoint_url=os.getenv("BUCKET_URL"), | ||
aws_access_key_id = os.getenv("AWS_ACCESS_KEY_ID"), | ||
aws_secret_access_key = os.getenv("AWS_SECRET_ACCESS_KEY"), | ||
region_name = os.getenv("AWS_REGION") | ||
) | ||
basename = os.path.basename(filepath) | ||
client.upload_file(filepath, os.getenv("BUCKET_NAME"), os.getenv("BUCKET_DIR") + "/" + basename) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,19 +1,18 @@ | ||
from selenium import webdriver | ||
from selenium.webdriver.chrome.service import Service as ChromeService | ||
from webdriver_manager.chrome import ChromeDriverManager | ||
import os | ||
|
||
def get_remote_driver(): | ||
def get_driver(): | ||
options=webdriver.ChromeOptions() | ||
options.add_argument("--headless") | ||
options.add_argument("--no-sandbox") | ||
options.add_argument("--disable-gpu") | ||
options.add_argument("--lang=ja-JP") | ||
options.add_argument("--disable-dev-shm-usage") | ||
# options.add_experimental_option("prefs", {"download.default_directory": "/data/" }) | ||
UA = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.169 Safari/537.36" | ||
options.add_argument("--user-agent=" + UA) | ||
driver = webdriver.Remote( | ||
command_executor=os.getenv("chromeAddr"), | ||
options=options | ||
) | ||
|
||
driver = webdriver.Chrome(options=options) | ||
driver.implicitly_wait(10) | ||
return driver |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -2,3 +2,4 @@ beautifulsoup4==4.12.2 | |
selenium==4.12.0 | ||
webdriver-manager==4.0.0 | ||
python-json-logger>=2.0.7 | ||
boto3==1.35.87 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
import boto3 | ||
from pathlib import Path | ||
import os | ||
|
||
def upload_files(dir_path): | ||
client = boto3.client( | ||
's3', | ||
endpoint_url=os.getenv("BUCKET_URL"), | ||
aws_access_key_id = os.getenv("AWS_ACCESS_KEY_ID"), | ||
aws_secret_access_key = os.getenv("AWS_SECRET_ACCESS_KEY"), | ||
region_name = os.getenv("AWS_REGION") | ||
) | ||
|
||
# dir_path ディレクトリ内のファイルを列挙 | ||
os.chdir(dir_path) | ||
for root, dirs, files in os.walk(dir_path): | ||
for f in files: # f: | ||
fullpath = os.path.join(root, f) | ||
relpath = Path(fullpath).relative_to(Path.cwd()) # s3アップロード時に dir_path そのもののパスは消すために移動 | ||
print(relpath) | ||
client.upload_file(relpath, os.getenv("BUCKET_NAME"), os.path.join(os.getenv("BUCKET_DIR"), relpath)) |