diff --git a/build/moneyforward/Dockerfile b/build/moneyforward/Dockerfile index 8528c3f..fd19e01 100644 --- a/build/moneyforward/Dockerfile +++ b/build/moneyforward/Dockerfile @@ -26,14 +26,10 @@ RUN wget -O chrome.json https://googlechromelabs.github.io/chrome-for-testing/kn unzip chrome.zip && \ rm chrome.zip chrome.json -# AWS Setup -RUN curl -o /var/tmp/awscli.zip https://awscli.amazonaws.com/awscli-exe-linux-x86_64.zip && \ - unzip -d /usr/local/bin/ /var/tmp/awscli.zip - # Install Python module COPY /src/moneyforward/requirements.txt /tmp/ RUN pip install --upgrade pip && pip install -r /tmp/requirements.txt && mkdir -p /data -COPY --chmod=755 build/moneyforward/main.sh /src/main.sh COPY src/moneyforward/ /src/ -ENTRYPOINT ["/src/main.sh"] +CMD [ "--s3-upload" ] +ENTRYPOINT ["python3", "-u", "/src/main.py"] diff --git a/build/moneyforward/main.sh b/build/moneyforward/main.sh deleted file mode 100755 index da158ca..0000000 --- a/build/moneyforward/main.sh +++ /dev/null @@ -1,55 +0,0 @@ -#!/bin/bash -set -e -YYYYMM=`date '+%Y%m'` -YYYYMMDD=`date '+%Y%m%d'` - -# BUCKET_URL # from env (ex: "https://s3.ap-northeast-1.wasabisys.com") -# BUCKET_NAME # from env (ex: hoge-system-stg-bucket) -# BUCKET_DIR # from env (ex: fetcher/moneyforward) -# AWS_REGION # from env (ex: ap-northeast-1) -# AWS_ACCESS_KEY_ID # from env -# AWS_SECRET_ACCESS_KEY # from env -# user="xxxxxxxxx" # moneyforward id , from env -# pass="yyyyyyyyy" # moneyforward pass, from env - -AWS_BIN="/usr/local/bin/aws/dist/aws" -DATA_DIR="/data" -REMOTE_DIR="${BUCKET_DIR}" - -function fetch () { - echo "fetcher start" - python3 -u /src/main.py - echo "fetcher complete" -} - -function create_s3_credentials () { - echo "s3 credentials create start" - mkdir -p ~/.aws/ - - echo "[default]" >> ~/.aws/config - echo "region = ${AWS_REGION}" >> ~/.aws/config - - echo "[default]" >> ~/.aws/credentials - echo "aws_access_key_id = ${AWS_ACCESS_KEY_ID}" >> ~/.aws/credentials - echo "aws_secret_access_key = ${AWS_SECRET_ACCESS_KEY}" >> ~/.aws/credentials - - chmod 400 ~/.aws/config - chmod 400 ~/.aws/credentials - ls -la ~/.aws/ - echo "s3 credentials create complete" -} - -function s3_upload () { - echo "s3 upload start" - ${AWS_BIN} s3 cp ${DATA_DIR}/ "s3://${BUCKET_NAME}/${REMOTE_DIR}/" --recursive --endpoint-url="${BUCKET_URL}" - echo "s3 upload complete" -} - -fetch - -if [ -z $BUCKET_NAME ]; then - exit 0 -fi - -create_s3_credentials -s3_upload diff --git a/build/sbi/Dockerfile b/build/sbi/Dockerfile index f1f16bb..bfd1ba8 100644 --- a/build/sbi/Dockerfile +++ b/build/sbi/Dockerfile @@ -1,18 +1,35 @@ -FROM python:3.9-bookworm +FROM python:3.13-bookworm # Required Packages RUN apt-get update && \ - apt-get install -y curl unzip && \ + apt-get install -y \ + curl \ + unzip \ + wget \ + unzip \ + libglib2.0-0 \ + libnss3 \ + libgconf-2-4 \ + libfontconfig1 && \ apt-get clean && \ rm -rf /var/lib/apt/lists/* -# AWS Setup -RUN curl -o /var/tmp/awscli.zip https://awscli.amazonaws.com/awscli-exe-linux-x86_64.zip && \ - unzip -d /usr/local/bin/ /var/tmp/awscli.zip +# Install Chrome +RUN wget -q -O - https://dl-ssl.google.com/linux/linux_signing_key.pub | apt-key add - +RUN echo "deb http://dl.google.com/linux/chrome/deb/ stable main" >> /etc/apt/sources.list.d/google.list +RUN apt-get update && apt-get install -y google-chrome-stable && apt-get clean && rm -rf /var/lib/apt/lists/* +# Install driver (Ref. https://sleepless-se.net/2024/03/19/python-selenium-docker/) +RUN wget -O chrome.json https://googlechromelabs.github.io/chrome-for-testing/known-good-versions-with-downloads.json && \ + LINUX_STABLE_URL=$(grep -oP '"url":".*?(?=")' chrome.json | grep 'linux64' | head -n 1 | cut -d'"' -f4) && \ + wget -O chrome.zip $LINUX_STABLE_URL && \ + unzip chrome.zip && \ + rm chrome.zip chrome.json + +# Install Python module COPY /src/sbi/requirements.txt /tmp/ RUN pip install --upgrade pip && pip install -r /tmp/requirements.txt && mkdir -p /data -COPY --chmod=755 build/sbi/main.sh /src/main.sh COPY src/sbi/ /src/ -ENTRYPOINT ["/src/main.sh"] +CMD [ "--s3-upload" ] +ENTRYPOINT ["python3", "-u", "/src/main.py"] diff --git a/build/sbi/main.sh b/build/sbi/main.sh deleted file mode 100644 index 81e6ebf..0000000 --- a/build/sbi/main.sh +++ /dev/null @@ -1,58 +0,0 @@ -#!/bin/bash -set -e -YYYYMM=`date '+%Y%m'` -YYYYMMDD=`date '+%Y%m%d'` - -# BUCKET_URL # from env (ex: "https://s3.ap-northeast-1.wasabisys.com") -# BUCKET_NAME # from env (ex: hoge-system-stg-bucket) -# BUCKET_DIR # from env (ex: fetcher/sbi) -# AWS_REGION # from env (ex: ap-northeast-1) -# AWS_ACCESS_KEY_ID # from env -# AWS_SECRET_ACCESS_KEY # from env - -SCRAPERS_BIN="/usr/local/bin/myscrapers" -AWS_BIN="/usr/local/bin/aws/dist/aws" -DATA_DIR="/data" - -REMOTE_DIR="${BUCKET_DIR}/${YYYYMM}" - -function fetch () { - echo "fetcher start" - python3 -u /src/main.py - echo "fetcher complete" -} - -function create_s3_credentials { - echo "s3 credentials create start" - mkdir -p ~/.aws/ - - echo "[default]" >> ~/.aws/config - echo "region = ${AWS_REGION}" >> ~/.aws/config - - echo "[default]" >> ~/.aws/credentials - echo "aws_access_key_id = ${AWS_ACCESS_KEY_ID}" >> ~/.aws/credentials - echo "aws_secret_access_key = ${AWS_SECRET_ACCESS_KEY}" >> ~/.aws/credentials - - chmod 400 ~/.aws/config - chmod 400 ~/.aws/credentials - ls -la ~/.aws/ - echo "s3 credentials create complete" -} - -function s3_upload () { - echo "s3 upload start" - mkdir -p ${DATA_DIR}/${YYYYMM} - cp -f ${DATA_DIR}/*.csv ${DATA_DIR}/${YYYYMM}/ # ex. $DATA_DIR/YYYYMMDD_1.csv -> $DATA_DIR/$YYYYMM/YYYYMMDD_1.csv - rm ${DATA_DIR}/*.csv - ${AWS_BIN} s3 cp ${DATA_DIR}/${YYYYMM}/ "s3://${BUCKET_NAME}/${REMOTE_DIR}" --recursive --endpoint-url="${BUCKET_URL}" - echo "s3 upload complete" -} - -fetch - -if [ -z $BUCKET_NAME ]; then - exit 0 -fi - -create_s3_credentials -s3_upload diff --git a/src/moneyforward/main.py b/src/moneyforward/main.py index 0454c6c..16f3941 100644 --- a/src/moneyforward/main.py +++ b/src/moneyforward/main.py @@ -4,6 +4,8 @@ import time import logging import datetime +import argparse +import s3 from pythonjsonlogger import jsonlogger from selenium import webdriver from selenium.webdriver.common.by import By @@ -30,10 +32,19 @@ ACCOUNTS_PAGE="https://moneyforward.com/accounts" def main(): + parser = argparse.ArgumentParser() + parser.add_argument("--s3-upload", help="optional", action="store_true") # s3 upload機能の有効化フラグ + args = parser.parse_args() global driver try: driver = driver.get_driver() run_scenario() + if args.s3_upload: + # s3 upload 機能フラグが有効なとき + lg.info("s3 upload start") + s3.upload_file(SAVE_DIR + "/" + CF_FILENAME) + s3.upload_file(SAVE_DIR + "/" + CF_FILENAME_LASTMONTH) + lg.info("s3 upload complete") except Exception as e: lg.error("failed to run fetch program", e, stack_info=True) finally: diff --git a/src/moneyforward/requirements.txt b/src/moneyforward/requirements.txt index 6565cb8..f36d054 100644 --- a/src/moneyforward/requirements.txt +++ b/src/moneyforward/requirements.txt @@ -3,3 +3,4 @@ selenium==4.12.0 webdriver-manager==4.0.2 python-json-logger>=2.0.7 pytest==8.3.4 +boto3==1.35.87 diff --git a/src/moneyforward/s3.py b/src/moneyforward/s3.py new file mode 100644 index 0000000..d3e4211 --- /dev/null +++ b/src/moneyforward/s3.py @@ -0,0 +1,13 @@ +import boto3 +import os + +def upload_file(filepath): + client = boto3.client( + 's3', + endpoint_url=os.getenv("BUCKET_URL"), + aws_access_key_id = os.getenv("AWS_ACCESS_KEY_ID"), + aws_secret_access_key = os.getenv("AWS_SECRET_ACCESS_KEY"), + region_name = os.getenv("AWS_REGION") + ) + basename = os.path.basename(filepath) + client.upload_file(filepath, os.getenv("BUCKET_NAME"), os.getenv("BUCKET_DIR") + "/" + basename) diff --git a/src/sbi/driver.py b/src/sbi/driver.py index ed5556d..7b31e0d 100644 --- a/src/sbi/driver.py +++ b/src/sbi/driver.py @@ -1,19 +1,18 @@ from selenium import webdriver +from selenium.webdriver.chrome.service import Service as ChromeService +from webdriver_manager.chrome import ChromeDriverManager import os -def get_remote_driver(): +def get_driver(): options=webdriver.ChromeOptions() options.add_argument("--headless") options.add_argument("--no-sandbox") options.add_argument("--disable-gpu") options.add_argument("--lang=ja-JP") options.add_argument("--disable-dev-shm-usage") + # options.add_experimental_option("prefs", {"download.default_directory": "/data/" }) UA = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.169 Safari/537.36" options.add_argument("--user-agent=" + UA) - driver = webdriver.Remote( - command_executor=os.getenv("chromeAddr"), - options=options - ) - + driver = webdriver.Chrome(options=options) driver.implicitly_wait(10) return driver diff --git a/src/sbi/main.py b/src/sbi/main.py index d4e59eb..95394ed 100644 --- a/src/sbi/main.py +++ b/src/sbi/main.py @@ -2,6 +2,8 @@ import os import datetime import logging +import argparse +import s3 from pythonjsonlogger import jsonlogger from selenium import webdriver from selenium.webdriver.common.by import By @@ -26,10 +28,18 @@ PORT_URL = "https://site1.sbisec.co.jp/ETGate/?_ControlID=WPLETpfR001Control&_PageID=DefaultPID&_DataStoreID=DSWPLETpfR001Control&_ActionID=DefaultAID&getFlg=on" def main(): + parser = argparse.ArgumentParser() + parser.add_argument("--s3-upload", help="optional", action="store_true") # s3 upload機能の有効化フラグ + args = parser.parse_args() global driver try: - driver = driver.get_remote_driver() - run_scenario(driver=driver) + driver = driver.get_driver() + # run_scenario(driver=driver) + if args.s3_upload: + # s3 upload 機能フラグが有効なとき + lg.info("s3 upload start") + s3.upload_files(SAVE_DIR) # /data/ 配下のファイルをまとめてアップロード + lg.info("s3 upload complete") except Exception as e: lg.error("failed to run fetch program", e, stack_info=True) finally: @@ -108,8 +118,8 @@ def get_file_path(index): today = datetime.date.today() # 出力:datetime.date(2020, 3, 22) yyyymm = "{0:%Y%m}".format(today) # 202003 yyyymmdd = "{0:%Y%m%d}".format(today) # 20200322 - - filepath = SAVE_DIR + "/" + yyyymmdd + "_" + str(index) + ".csv" + os.makedirs(SAVE_DIR + "/" + yyyymm, exist_ok=True) + filepath = SAVE_DIR + "/" + yyyymm + "/" + yyyymmdd + "_" + str(index) + ".csv" return filepath diff --git a/src/sbi/requirements.txt b/src/sbi/requirements.txt index 6493585..8e23a34 100644 --- a/src/sbi/requirements.txt +++ b/src/sbi/requirements.txt @@ -2,3 +2,4 @@ beautifulsoup4==4.12.2 selenium==4.12.0 webdriver-manager==4.0.0 python-json-logger>=2.0.7 +boto3==1.35.87 diff --git a/src/sbi/s3.py b/src/sbi/s3.py new file mode 100644 index 0000000..6eacd77 --- /dev/null +++ b/src/sbi/s3.py @@ -0,0 +1,21 @@ +import boto3 +from pathlib import Path +import os + +def upload_files(dir_path): + client = boto3.client( + 's3', + endpoint_url=os.getenv("BUCKET_URL"), + aws_access_key_id = os.getenv("AWS_ACCESS_KEY_ID"), + aws_secret_access_key = os.getenv("AWS_SECRET_ACCESS_KEY"), + region_name = os.getenv("AWS_REGION") + ) + + # dir_path ディレクトリ内のファイルを列挙 + os.chdir(dir_path) + for root, dirs, files in os.walk(dir_path): + for f in files: # f: + fullpath = os.path.join(root, f) + relpath = Path(fullpath).relative_to(Path.cwd()) # s3アップロード時に dir_path そのもののパスは消すために移動 + print(relpath) + client.upload_file(relpath, os.getenv("BUCKET_NAME"), os.path.join(os.getenv("BUCKET_DIR"), relpath))