Skip to content

Commit

Permalink
Merge pull request #12 from azuki774/develop-241225
Browse files Browse the repository at this point in the history
add s3 upload in python
  • Loading branch information
azuki774 authored Dec 28, 2024
2 parents 53a0cd7 + 0d9aa80 commit d8f9a82
Show file tree
Hide file tree
Showing 11 changed files with 92 additions and 136 deletions.
8 changes: 2 additions & 6 deletions build/moneyforward/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -26,14 +26,10 @@ RUN wget -O chrome.json https://googlechromelabs.github.io/chrome-for-testing/kn
unzip chrome.zip && \
rm chrome.zip chrome.json

# AWS Setup
RUN curl -o /var/tmp/awscli.zip https://awscli.amazonaws.com/awscli-exe-linux-x86_64.zip && \
unzip -d /usr/local/bin/ /var/tmp/awscli.zip

# Install Python module
COPY /src/moneyforward/requirements.txt /tmp/
RUN pip install --upgrade pip && pip install -r /tmp/requirements.txt && mkdir -p /data
COPY --chmod=755 build/moneyforward/main.sh /src/main.sh
COPY src/moneyforward/ /src/

ENTRYPOINT ["/src/main.sh"]
CMD [ "--s3-upload" ]
ENTRYPOINT ["python3", "-u", "/src/main.py"]
55 changes: 0 additions & 55 deletions build/moneyforward/main.sh

This file was deleted.

31 changes: 24 additions & 7 deletions build/sbi/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,18 +1,35 @@
FROM python:3.9-bookworm
FROM python:3.13-bookworm

# Required Packages
RUN apt-get update && \
apt-get install -y curl unzip && \
apt-get install -y \
curl \
unzip \
wget \
unzip \
libglib2.0-0 \
libnss3 \
libgconf-2-4 \
libfontconfig1 && \
apt-get clean && \
rm -rf /var/lib/apt/lists/*

# AWS Setup
RUN curl -o /var/tmp/awscli.zip https://awscli.amazonaws.com/awscli-exe-linux-x86_64.zip && \
unzip -d /usr/local/bin/ /var/tmp/awscli.zip
# Install Chrome
RUN wget -q -O - https://dl-ssl.google.com/linux/linux_signing_key.pub | apt-key add -
RUN echo "deb http://dl.google.com/linux/chrome/deb/ stable main" >> /etc/apt/sources.list.d/google.list
RUN apt-get update && apt-get install -y google-chrome-stable && apt-get clean && rm -rf /var/lib/apt/lists/*

# Install driver (Ref. https://sleepless-se.net/2024/03/19/python-selenium-docker/)
RUN wget -O chrome.json https://googlechromelabs.github.io/chrome-for-testing/known-good-versions-with-downloads.json && \
LINUX_STABLE_URL=$(grep -oP '"url":".*?(?=")' chrome.json | grep 'linux64' | head -n 1 | cut -d'"' -f4) && \
wget -O chrome.zip $LINUX_STABLE_URL && \
unzip chrome.zip && \
rm chrome.zip chrome.json

# Install Python module
COPY /src/sbi/requirements.txt /tmp/
RUN pip install --upgrade pip && pip install -r /tmp/requirements.txt && mkdir -p /data
COPY --chmod=755 build/sbi/main.sh /src/main.sh
COPY src/sbi/ /src/

ENTRYPOINT ["/src/main.sh"]
CMD [ "--s3-upload" ]
ENTRYPOINT ["python3", "-u", "/src/main.py"]
58 changes: 0 additions & 58 deletions build/sbi/main.sh

This file was deleted.

11 changes: 11 additions & 0 deletions src/moneyforward/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@
import time
import logging
import datetime
import argparse
import s3
from pythonjsonlogger import jsonlogger
from selenium import webdriver
from selenium.webdriver.common.by import By
Expand All @@ -30,10 +32,19 @@
ACCOUNTS_PAGE="https://moneyforward.com/accounts"

def main():
parser = argparse.ArgumentParser()
parser.add_argument("--s3-upload", help="optional", action="store_true") # s3 upload機能の有効化フラグ
args = parser.parse_args()
global driver
try:
driver = driver.get_driver()
run_scenario()
if args.s3_upload:
# s3 upload 機能フラグが有効なとき
lg.info("s3 upload start")
s3.upload_file(SAVE_DIR + "/" + CF_FILENAME)
s3.upload_file(SAVE_DIR + "/" + CF_FILENAME_LASTMONTH)
lg.info("s3 upload complete")
except Exception as e:
lg.error("failed to run fetch program", e, stack_info=True)
finally:
Expand Down
1 change: 1 addition & 0 deletions src/moneyforward/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,3 +3,4 @@ selenium==4.12.0
webdriver-manager==4.0.2
python-json-logger>=2.0.7
pytest==8.3.4
boto3==1.35.87
13 changes: 13 additions & 0 deletions src/moneyforward/s3.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
import boto3
import os

def upload_file(filepath):
client = boto3.client(
's3',
endpoint_url=os.getenv("BUCKET_URL"),
aws_access_key_id = os.getenv("AWS_ACCESS_KEY_ID"),
aws_secret_access_key = os.getenv("AWS_SECRET_ACCESS_KEY"),
region_name = os.getenv("AWS_REGION")
)
basename = os.path.basename(filepath)
client.upload_file(filepath, os.getenv("BUCKET_NAME"), os.getenv("BUCKET_DIR") + "/" + basename)
11 changes: 5 additions & 6 deletions src/sbi/driver.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,18 @@
from selenium import webdriver
from selenium.webdriver.chrome.service import Service as ChromeService
from webdriver_manager.chrome import ChromeDriverManager
import os

def get_remote_driver():
def get_driver():
options=webdriver.ChromeOptions()
options.add_argument("--headless")
options.add_argument("--no-sandbox")
options.add_argument("--disable-gpu")
options.add_argument("--lang=ja-JP")
options.add_argument("--disable-dev-shm-usage")
# options.add_experimental_option("prefs", {"download.default_directory": "/data/" })
UA = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.169 Safari/537.36"
options.add_argument("--user-agent=" + UA)
driver = webdriver.Remote(
command_executor=os.getenv("chromeAddr"),
options=options
)

driver = webdriver.Chrome(options=options)
driver.implicitly_wait(10)
return driver
18 changes: 14 additions & 4 deletions src/sbi/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@
import os
import datetime
import logging
import argparse
import s3
from pythonjsonlogger import jsonlogger
from selenium import webdriver
from selenium.webdriver.common.by import By
Expand All @@ -26,10 +28,18 @@
PORT_URL = "https://site1.sbisec.co.jp/ETGate/?_ControlID=WPLETpfR001Control&_PageID=DefaultPID&_DataStoreID=DSWPLETpfR001Control&_ActionID=DefaultAID&getFlg=on"

def main():
parser = argparse.ArgumentParser()
parser.add_argument("--s3-upload", help="optional", action="store_true") # s3 upload機能の有効化フラグ
args = parser.parse_args()
global driver
try:
driver = driver.get_remote_driver()
run_scenario(driver=driver)
driver = driver.get_driver()
# run_scenario(driver=driver)
if args.s3_upload:
# s3 upload 機能フラグが有効なとき
lg.info("s3 upload start")
s3.upload_files(SAVE_DIR) # /data/ 配下のファイルをまとめてアップロード
lg.info("s3 upload complete")
except Exception as e:
lg.error("failed to run fetch program", e, stack_info=True)
finally:
Expand Down Expand Up @@ -108,8 +118,8 @@ def get_file_path(index):
today = datetime.date.today() # 出力:datetime.date(2020, 3, 22)
yyyymm = "{0:%Y%m}".format(today) # 202003
yyyymmdd = "{0:%Y%m%d}".format(today) # 20200322

filepath = SAVE_DIR + "/" + yyyymmdd + "_" + str(index) + ".csv"
os.makedirs(SAVE_DIR + "/" + yyyymm, exist_ok=True)
filepath = SAVE_DIR + "/" + yyyymm + "/" + yyyymmdd + "_" + str(index) + ".csv"
return filepath


Expand Down
1 change: 1 addition & 0 deletions src/sbi/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,3 +2,4 @@ beautifulsoup4==4.12.2
selenium==4.12.0
webdriver-manager==4.0.0
python-json-logger>=2.0.7
boto3==1.35.87
21 changes: 21 additions & 0 deletions src/sbi/s3.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
import boto3
from pathlib import Path
import os

def upload_files(dir_path):
client = boto3.client(
's3',
endpoint_url=os.getenv("BUCKET_URL"),
aws_access_key_id = os.getenv("AWS_ACCESS_KEY_ID"),
aws_secret_access_key = os.getenv("AWS_SECRET_ACCESS_KEY"),
region_name = os.getenv("AWS_REGION")
)

# dir_path ディレクトリ内のファイルを列挙
os.chdir(dir_path)
for root, dirs, files in os.walk(dir_path):
for f in files: # f:
fullpath = os.path.join(root, f)
relpath = Path(fullpath).relative_to(Path.cwd()) # s3アップロード時に dir_path そのもののパスは消すために移動
print(relpath)
client.upload_file(relpath, os.getenv("BUCKET_NAME"), os.path.join(os.getenv("BUCKET_DIR"), relpath))

0 comments on commit d8f9a82

Please sign in to comment.