Skip to content

Commit

Permalink
feature: logging system (#11)
Browse files Browse the repository at this point in the history
* feature: init logging system

* fix: delete duplicate datatime

* fix: logging cmd format
  • Loading branch information
cnstark authored Dec 25, 2020
1 parent b269279 commit 678e120
Show file tree
Hide file tree
Showing 5 changed files with 97 additions and 10 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ __pycache__

running_log
private_key
server_log

*/migrations/*
!*/migrations/__init__.py
Expand Down
5 changes: 4 additions & 1 deletion gpu_info/utils.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,12 @@
import os
import subprocess
import json
import logging

from .models import GPUServer, GPUInfo

task_logger = logging.getLogger('django.task')


def ssh_execute(host, user, exec_cmd, private_key_path=None):
if private_key_path is None:
Expand Down Expand Up @@ -88,6 +91,6 @@ def update_gpu_info(self):
gpu_info.processes = '\n'.join(map(lambda x: json.dumps(x), gpu['processes']))
gpu_info.save()
except subprocess.CalledProcessError:
print('Update ' + server.ip + ' failed')
task_logger.error('Update ' + server.ip + ' failed')
server.valid = False
server.save()
77 changes: 77 additions & 0 deletions gpu_tasker/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -139,6 +139,83 @@
if not os.path.isdir(PRIVATE_KEY_DIR):
os.makedirs(PRIVATE_KEY_DIR)

SERVER_LOG_DIR = os.path.join(BASE_DIR, 'server_log')
if not os.path.isdir(SERVER_LOG_DIR):
os.makedirs(SERVER_LOG_DIR)

LOGGING = {
'version': 1,
'disable_existing_loggers': True,
'formatters': {
'standard': {
'format': '%(asctime)s [%(threadName)s:%(thread)d] [%(name)s:%(lineno)d] [%(module)s:%(funcName)s] [%(levelname)s]- %(message)s'},
},
'filters': {
'require_debug_true': {
'()': 'django.utils.log.RequireDebugTrue',
},
'require_debug_false': {
'()': 'django.utils.log.RequireDebugFalse',
}
},
'handlers': {
'null': {
'level': 'DEBUG',
'class': 'logging.NullHandler',
},
'console': {
'level': 'DEBUG',
'class': 'logging.StreamHandler',
'formatter': 'standard'
},
'req_err': {
'level': 'ERROR',
'class': 'logging.handlers.RotatingFileHandler',
'filename': os.path.join(SERVER_LOG_DIR, 'request_error.log'),
'maxBytes': 1024 * 1024 * 5,
'backupCount': 5,
'formatter': 'standard',
},
'req_info': {
'level': 'INFO',
'class': 'logging.handlers.RotatingFileHandler',
'filename': os.path.join(SERVER_LOG_DIR, 'request_info.log'),
'maxBytes': 1024 * 1024 * 5,
'backupCount': 5,
'formatter': 'standard',
},
'info_log': {
'level': 'INFO',
'class': 'logging.handlers.RotatingFileHandler',
'filename': os.path.join(SERVER_LOG_DIR, 'task_info.log'),
'maxBytes': 1024 * 1024 * 5,
'backupCount': 5,
'formatter': 'standard',
},
},
'loggers': { # logging管理器
'django': {
'handlers': ['req_info'],
'level': 'INFO',
'propagate': False
},
'django.request': {
'handlers': ['req_err'],
'level': 'ERROR',
'propagate': False,
},
'django.task': {
'handlers': ['info_log', 'console'],
'level': 'INFO',
'propagate': False,
},
'django.security.DisallowedHost': {
'handlers': ['null'],
'propagate': False,
},
}
}

try:
from gpu_tasker.email_settings import *
EMAIL_NOTIFICATION = True
Expand Down
12 changes: 7 additions & 5 deletions main.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import time
from datetime import datetime
import threading
import logging

import django

Expand All @@ -15,18 +16,19 @@
from task.models import GPUTask
from task.utils import run_task
from gpu_info.models import GPUServer
from gpu_info.utils import GPUInfoUpdater, add_hostname
from gpu_info.utils import GPUInfoUpdater

task_logger = logging.getLogger('django.task')


if __name__ == '__main__':
server_username, server_private_key_path, gpustat_path = get_admin_config()

gpu_updater = GPUInfoUpdater(server_username, gpustat_path, server_private_key_path)
while True:
print('{:s}, Running processes: {:d}'.format(
datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
threading.active_count() - 1)
)
task_logger.info('Running processes: {:d}'.format(
threading.active_count() - 1
))
start_time = time.time()
gpu_updater.update_gpu_info()
for task in GPUTask.objects.filter(status=0):
Expand Down
12 changes: 8 additions & 4 deletions task/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import json
import time
import traceback
import logging

from gpu_tasker.settings import RUNNING_LOG_DIR
from .models import GPUTask, GPUTaskRunningLog
Expand All @@ -12,6 +13,9 @@
send_task_start_email, send_task_finish_email, send_task_fail_email


task_logger = logging.getLogger('django.task')


def generate_ssh_cmd(host, user, exec_cmd, private_key_path=None):
exec_cmd = exec_cmd.replace('$', '\\$')
if private_key_path is None:
Expand All @@ -24,7 +28,7 @@ def generate_ssh_cmd(host, user, exec_cmd, private_key_path=None):
class RemoteProcess:
def __init__(self, user, host, cmd, workspace="~", private_key_path=None, output_file=None):
self.cmd = generate_ssh_cmd(host, user, "cd {} && {}".format(workspace, cmd), private_key_path)
print(self.cmd)
task_logger.info('cmd:\n' + self.cmd)
if output_file is not None:
self.output_file = output_file
with open(self.output_file, "wb") as out:
Expand Down Expand Up @@ -82,7 +86,7 @@ def run_task(task, available_server):
log_file_path
)
pid = process.pid()
print('Task {:d}-{:s} is running, pid: {:d}'.format(task.id, task.name, pid))
task_logger.info('Task {:d}-{:s} is running, pid: {:d}'.format(task.id, task.name, pid))

# save process status
running_log.pid = pid
Expand All @@ -97,7 +101,7 @@ def run_task(task, available_server):

# wait for return
return_code = process.get_return_code()
print('Task {:d}-{:s} stopped, return_code: {:d}'.format(task.id, task.name, return_code))
task_logger.info('Task {:d}-{:s} stopped, return_code: {:d}'.format(task.id, task.name, return_code))

# save process status
running_log.status = 2 if return_code == 0 else -1
Expand All @@ -112,7 +116,7 @@ def run_task(task, available_server):
send_task_fail_email(running_log)
except Exception:
es = traceback.format_exc()
print(es)
task_logger.error(es)
running_log.status = -1
running_log.save()
task.status = -1
Expand Down

0 comments on commit 678e120

Please sign in to comment.