diff --git a/README.md b/README.md index 2f6a576..e7140e5 100644 --- a/README.md +++ b/README.md @@ -163,6 +163,13 @@ df = fdr.DataReader('^NYICDX') # ICE U.S. Dollar Index (^NYICDX) 달러인덱스 # FRED 데이터 여러 항목 한번에 df = fdr.DataReader('FRED:M2,HSN1F,NASDAQCOM') # M2 통화량, HSN1F 주택판매지수, NASDAQCOM 나스닥종합지수 +# KRX지수및 지수 구 성종목 +df = fdr.SnapDataReader('KRX/INDEX/LIST') # KRX 전체 지수목록 + +df = fdr.SnapDataReader('KRX/INDEX/STOCK/1001') # KOSPI 지수구성종목 +df = fdr.SnapDataReader('KRX/INDEX/STOCK/1028') # 코스피 200 +df = fdr.SnapDataReader('KRX/INDEX/STOCK/5106') # KRX ESG Leaders 150 테마 지수 구성종목 + # 캔들차트 df = fdr.DataReader('005930', '2023-01-01', '2023-06-30') @@ -182,4 +189,4 @@ fdr.chart.plot(df) * [S&P500 가격 데이터 수집과 수익률 분석](https://nbviewer.jupyter.org/710b8f0a4bd9a8df91ae1be6c7e838b1) * [S&P500 팩터 데이터 수집과 분석](https://nbviewer.jupyter.org/35a1b0d5248bc9b09513e53be437ac42) -**2018-2023 [FinanceData.KR]()** +**2018-2024 [FinanceData.KR]()** diff --git a/__init__.py b/__init__.py index e62364f..6acc59e 100644 --- a/__init__.py +++ b/__init__.py @@ -2,9 +2,9 @@ from .data import (SnapDataReader) from .data import (StockListing) from .data import (EtfListing) -from . import (chart) +from .chart import (plot) -__version__ = '0.9.80' +__version__ = '0.9.90' __all__ = [ '__version__', diff --git a/back_chart.py b/back_chart.py new file mode 100644 index 0000000..69b4838 --- /dev/null +++ b/back_chart.py @@ -0,0 +1,254 @@ +#-*- coding: utf-8 -*- +# +# FinaceDataReader chart.py +# (c)2018-2024 FinaceData.KR + +from FinanceDataReader.chart import (plot, candle, line) + +# import numpy as np +# import pandas as pd +# from datetime import datetime, date +# import itertools + +# plotly_install_msg = f''' +# {'-' * 80} +# FinanceDataReade.chart.plot() dependen on plotly +# plotly not installed please install as follows + +# pip install plotly + +# FinanceDataReade.chart.plot()는 plotly에 의존성이 있습니다. +# 명령창에서 다음과 같이 plotly를 설치하세요 + +# pip install plotly +# ''' + +# try: +# import plotly.graph_objects as go +# from plotly.subplots import make_subplots +# except ModuleNotFoundError as e: +# raise ModuleNotFoundError(plotly_install_msg) + +# ## holiday Calendar +# holidays_url_base = 'https://raw.githubusercontent.com/FinanceData/FinanceDataReader/master/calendars' + +# holidays_krx,holidays_hyse = None, None + +# ## Chart plot +# def plot(df, tools=None, layout=None): +# ''' +# plot candle chart with DataFrame +# * df: OHLCV data(DataFrame) +# * updates: additional chart configurations +# ''' +# global holidays_krx, holidays_hyse + +# if holidays_krx is None: +# holidays_krx = pd.read_csv(f'{holidays_url_base}/holidays-krx.csv')['date'].values +# if holidays_hyse is None: +# holidays_hyse = pd.read_csv(f'{holidays_url_base}/holidays-nyse.csv')['date'].values + + +# tools = {'SMA': [10, 20, 60]} if not tools else tools +# layout = dict() if not layout else layout + +# x_ticks = df.index + +# change = df["Close"].pct_change() +# oc_ratio = (df["Close"]-df["Open"])/df["Open"] +# oh_ratio = (df["High"]-df["Open"])/df["Open"] +# hover_text = [f'DoD: {chg:.1%} OC: {oc:.1%}, OH: {oh:.1%}' for chg, oc, oh in zip(change, oc_ratio, oh_ratio)] + +# # OHLC candle chart +# candle = go.Candlestick( +# x=x_ticks, +# open=df["Open"], high=df["High"], low=df["Low"], close=df["Close"], +# name='', +# text = hover_text, +# increasing_fillcolor = 'red', +# decreasing_fillcolor = 'blue', +# increasing_line_color = 'red', +# decreasing_line_color = 'blue', +# increasing_line_width = 1.5, +# decreasing_line_width = 1.5, +# showlegend = False, +# opacity=0.75, +# ) + +# # volume bar chart +# vol_colors = np.where(df['Close'].shift(1) > df['Close'], 'blue', 'red') +# vol_bar = go.Bar( +# x=x_ticks, +# y=df['Volume'], +# showlegend=False, +# name='', +# opacity = 0.5, +# marker={'color': vol_colors}, +# ) + +# fig = make_subplots(rows=2, cols=1, +# shared_xaxes=True, +# vertical_spacing=0, +# row_width=[0.3, 0.7]) + +# fig.add_trace(candle, row=1, col=1) +# fig.add_trace(vol_bar, row=2, col=1) + +# # hide rangeslider +# fig.update_xaxes(rangeslider_visible=False) + +# # holidays +# holidays = holidays_krx +# if df.attrs.get('exchange') != 'KRX': +# holidays = holidays_hyse + +# # Remove non-business days +# fig.update_xaxes(rangebreaks = [ +# dict(bounds=['sat','mon']), # remove weekend +# dict(values=holidays), # remove non biz days +# # dict(bounds=[15.5, 9], pattern='hour'), # remove non biz hours +# ]) + +# # draw axes and grid +# fig.update_xaxes(showline=True, linewidth=1, linecolor='black', gridcolor='lightgray') +# fig.update_yaxes(showline=True, linewidth=1, linecolor='black', gridcolor='lightgray') + +# # x-axis tick format +# fig.update_xaxes(tickformat='%Y-%m-%d', row=2, col=1) +# fig.update_xaxes(tickangle=45) + +# # y-axis tick format +# fig.update_yaxes(tickformat=',', row='all', col=1) + +# # spikes +# fig.update_xaxes(showspikes=True, spikethickness=1, spikedash="dot", spikecolor="lightgray", spikemode="across", spikesnap='cursor') +# # fig.update_traces(xaxis="x2") # binding x-axis + +# # bgcolor +# fig.update_layout(plot_bgcolor='white') +# fig.update_layout(paper_bgcolor='white') + +# ## tools (tools: indicators and annotations) + +# # available_tools +# available_tools = ['SMA', 'EMA', 'HLINE', 'VLINE', 'VRECT'] + +# for key in tools: +# if key.upper() not in available_tools: +# raise ValueError(f"Unsupport tool: {key}") + +# tools = {key.upper(): tools[key] for key in tools} # keys to upper case + +# # default tools +# # default_ma_params = [10, 20, 60] # default moving averages params +# # if all(x not in tools.keys() for x in ['SMA', 'EMA']): +# # tools['SMA'] = default_ma_params + +# line_dashes = ['solid', 'dot', 'dash', 'longdash', 'dashdot', 'longdashdot'] +# line_colors = ['darkmagenta', 'gold', 'limegreen', 'maroon', 'chocolate', 'seagreen', 'coral'] +# line_style_cycler = itertools.cycle(itertools.product(line_dashes, line_colors)) + +# default_line_width = 0.75 + +# if 'SMA' in tools: # SMA: simple moving average +# args = tools.pop('SMA') +# for arg in args: +# line_dash, line_color = next(line_style_cycler) +# ma_args = dict() +# ma_args['line_width'] = default_line_width +# if type(arg) == int: +# window = arg +# ma_args['line_dash'] = line_dash +# ma_args['line_color'] = line_color +# ma_args['line_width'] = default_line_width +# elif type(arg) == dict: +# window = arg['window'] +# ma_args['line_dash'] = arg['line_dash'] if 'line_dash' in arg else line_dash +# ma_args['line_color'] = arg['line_color'] if 'line_color' in arg else line_color +# ma_args['line_width'] = arg['line_width'] if 'line_width' in arg else default_line_width +# ma_price = df['Close'].rolling(window).mean().round(0) +# ma_args['x'] = ma_price.index +# ma_args['y'] = ma_price +# ma_args['name'] = f'SMA_{window}' +# fig.add_trace(go.Scatter(**ma_args), row=1, col=1) + +# if 'EMA' in tools: # EMA: exponential moving average +# args = tools.pop('EMA') +# for arg in args: +# line_dash, line_color = next(line_style_cycler) +# ma_args = dict() +# ma_args['line_width'] = default_line_width +# if type(arg) == int: +# window = arg +# ma_args['line_dash'] = line_dash +# ma_args['line_color'] = line_color +# ma_args['line_width'] = default_line_width +# elif type(arg) == dict: +# window = arg['window'] +# ma_args.update(arg) +# ma_args['line_dash'] = arg['line_dash'] if 'line_dash' in arg else line_dash +# ma_args['line_color'] = arg['line_color'] if 'line_color' in arg else line_color +# ma_args['line_width'] = arg['line_width'] if 'line_width' in arg else default_line_width +# ma_price = df['Close'].ewm(span=window).mean() +# ma_args['x'] = ma_price.index +# ma_args['y'] = ma_price +# ma_args['name'] = f'EMA_{window}' +# fig.add_trace(go.Scatter(**ma_args), row=1, col=1) + +# if 'HLINE' in tools: # HLINE: Horizontal line +# hline_args = dict(line_width=1.5, line_dash="dot", line_color="tomato", layer="below") +# hline_value = tools.pop('HLINE') +# if hasattr(hline_value, '__iter__'): +# for hline in hline_value: +# if type(hline) in [int, float]: +# hline_args['y'] = hline +# elif type(hline) == dict: +# hline_args.update(hline) +# else: +# raise ValueError("'HLINE' must be list of str or list of dict") +# fig.add_hline(**hline_args) +# else: +# hline_args['y'] = hline_value +# fig.add_hline(**hline_args) # just one value + +# if 'VLINE' in tools: # VLINE: vertical line +# vline_args = dict(line_width=1.5, line_dash="dot", line_color="tomato", layer="below") +# vline_value = tools.pop('VLINE') +# if hasattr(vline_value, '__iter__'): +# for vline in vline_value: +# if type(vline) in [pd.Timestamp, str, datetime, date]: +# vline_args['x'] = pd.to_datetime(vline).timestamp() * 1000 +# elif type(vline) == dict: +# vline_args.update(vline) +# vline_args['x'] = pd.to_datetime(vline_args['x']).timestamp() * 1000 +# else: +# raise ValueError("'VLINE' must be list of str or list of dict") +# fig.add_vline(**vline_args) +# else: +# vline_args['y'] = vline_value +# fig.add_hline(**vline_args) # just one value + +# if 'VRECT' in tools: # VRECT: highlighting period +# vrect_list = tools.pop('VRECT') if 'VRECT' in tools else {} +# for vrect in vrect_list: +# vrect_args = dict(fillcolor="LightSalmon", opacity=0.3, layer="below", line_width=0) +# if type(vrect) == tuple: +# vrect_args['x0'] = str(vrect[0]) +# vrect_args['x1'] = str(vrect[1]) +# elif type(vrect) == dict: +# vrect_args.update(vrect) +# else: +# raise ValueError("'vrect' must be list of tuple or list of dict") +# fig.add_vrect(**vrect_args) + +# ## update_layout +# layout_defaults = { +# 'hovermode': 'x', # available hovermodes: 'closest', 'x', 'x unified', 'y', 'y unified' +# 'margin': go.layout.Margin(l=0, r=0, b=0, t=0), # margins +# 'width': 1280, +# 'height': 640, +# } +# layout.update(layout_defaults) +# fig.update_layout(layout) +# return fig + diff --git a/chart/__init__.py b/chart/__init__.py new file mode 100644 index 0000000..5c23863 --- /dev/null +++ b/chart/__init__.py @@ -0,0 +1,2 @@ +from .plot import plot +from .candle import candle \ No newline at end of file diff --git a/chart.py b/chart/candle.py similarity index 97% rename from chart.py rename to chart/candle.py index 84da40c..a8745f4 100644 --- a/chart.py +++ b/chart/candle.py @@ -8,8 +8,11 @@ from datetime import datetime, date import itertools -plotly_install_msg = f''' - {'-' * 80} +try: + import plotly.graph_objects as go + from plotly.subplots import make_subplots +except ModuleNotFoundError as e: + plotly_install_msg = f'''{'-' * 80} FinanceDataReade.chart.plot() dependen on plotly plotly not installed please install as follows @@ -20,20 +23,14 @@ pip install plotly ''' - -try: - import plotly.graph_objects as go - from plotly.subplots import make_subplots -except ModuleNotFoundError as e: raise ModuleNotFoundError(plotly_install_msg) ## holiday Calendar holidays_url_base = 'https://raw.githubusercontent.com/FinanceData/FinanceDataReader/master/calendars' - holidays_krx,holidays_hyse = None, None ## Chart plot -def plot(df, tools=None, layout=None): +def candle(df, tools=None, layout=None): ''' plot candle chart with DataFrame * df: OHLCV data(DataFrame) @@ -246,7 +243,8 @@ def plot(df, tools=None, layout=None): 'width': 1280, 'height': 640, } - layout.update(layout_defaults) - fig.update_layout(layout) + new_layout = {} + new_layout.update(layout_defaults) + new_layout.update(layout) + fig.update_layout(new_layout) return fig - diff --git a/chart/plot.py b/chart/plot.py new file mode 100644 index 0000000..ea7c77f --- /dev/null +++ b/chart/plot.py @@ -0,0 +1,75 @@ +#-*- coding: utf-8 -*- +# +# FinaceDataReader chart.py +# (c)2018-2023 FinaceData.KR + +import numpy as np + +try: + import plotly.graph_objects as go + from plotly.subplots import make_subplots +except ModuleNotFoundError as e: + plotly_install_msg = f'''{'-' * 80} + FinanceDataReade.chart.plot() dependen on plotly + plotly not installed please install as follows + + pip install plotly + + FinanceDataReade.chart.plot()는 plotly에 의존성이 있습니다. + 명령창에서 다음과 같이 plotly를 설치하세요 + + pip install plotly + ''' + raise ModuleNotFoundError(plotly_install_msg) + +import plotly.io as pio +# ['ggplot2', 'seaborn', 'simple_white', 'plotly', 'plotly_white', 'plotly_dark', 'presentation', 'xgridoff', 'ygridoff', 'gridon', 'none'] +pio.templates.default = "plotly_white" + +def plot(df, kind='line', x=None, y=None, secondary_y=None, title=None, layout=None): + fig = make_subplots(specs=[[{"secondary_y": True}]]) + + traces = [] # [{'kind': 'line', 'x': x, 'y': y, '2nd_y': False}] + + if y == None: + y = df.select_dtypes(include=np.number).columns.tolist() + + if type(y) == str: + traces = [{'kind':kind, 'x':x, 'y':df[y], 'name':y, 'ry':False}] + elif type(y) == list: + cols = y + ys = [df[col] for col in cols] + kinds = [kind] * len(cols) if type(kind) == str else kind + if x == None: + xs = [df.index] * len(cols) + elif type(x) == str: + xs = [df[x]] * len(cols) + elif type(x) == list: + xs = [df[col] for col in x] + names = cols + if secondary_y==None: + rys = [False] * len(cols) + elif type(secondary_y) == str: + rys = [col == secondary_y for col in cols] + elif type(secondary_y) == list: + rys = [col in secondary_y for col in cols] + for kind, x, y, col, name, secondary_y in zip(kinds, xs, ys, cols, names, rys): + traces.append({'kind':kind, 'x':x, 'y':y, 'name':name, 'ry':secondary_y}) + # print(traces) + for trace in traces: + if trace['kind'].lower() == 'line': + fig.add_trace(go.Scatter(x=trace['x'], y=trace['y'], mode='lines', name=trace['name'], opacity=0.7), secondary_y=trace['ry']) + elif trace['kind'].lower() == 'bar': + fig.add_trace(go.Bar(x=trace['x'], y=trace['y'], name=trace['name'], opacity=0.7), secondary_y=trace['ry']) + + ## update_layout + layout_defaults = { + } + new_layout = {} + if title: + new_layout.update({'title':title}) + new_layout.update(layout_defaults) + if layout: + new_layout.update(layout) + fig.update_layout(new_layout) + return fig \ No newline at end of file diff --git a/data.py b/data.py index d2b3f6c..1ff163a 100644 --- a/data.py +++ b/data.py @@ -1,15 +1,18 @@ # FinanceDataReader # 2018-2022 [FinanceData.KR](https://financedata.github.io/) Open Source Financial data reader -from FinanceDataReader.yahoo.data import (YahooDailyReader) -from FinanceDataReader.nasdaq.listing import (NasdaqStockListing) +from FinanceDataReader.ecos.data import (EcosDataReader, EcosKeyStatDataReader) +from FinanceDataReader.ecos.snap import (EcosSnapReader) from FinanceDataReader.krx.data import (KrxDailyReader, KrxDailyDetailReader, KrxIndexReader, KrxDelistingReader) from FinanceDataReader.krx.snap import (KrxSnapReader) from FinanceDataReader.krx.listing import (KrxStockListing, KrxDelisting, KrxMarcapListing, KrxAdministrative) +from FinanceDataReader.yahoo.data import (YahooDailyReader) +from FinanceDataReader.nasdaq.listing import (NasdaqStockListing) from FinanceDataReader.wikipedia.listing import (WikipediaStockListing) from FinanceDataReader.investing.data import (InvestingDailyReader) from FinanceDataReader.investing.listing import (InvestingEtfListing) from FinanceDataReader.naver.data import (NaverDailyReader) +from FinanceDataReader.naver.snap import (NaverSnapReader) from FinanceDataReader.naver.listing import (NaverStockListing, NaverEtfListing) from FinanceDataReader.fred.data import (FredReader) from FinanceDataReader._utils import (_convert_letter_to_num, _validate_dates) @@ -97,30 +100,44 @@ def DataReader(symbol:str, start=None, end=None, exchange=None, data_source=None return NaverDailyReader(codes, start, end).read() elif source == 'YAHOO': return YahooDailyReader(codes, start, end).read() + elif source == 'INVESTING': + return InvestingDailyReader(codes, start, end).read() elif source == 'FRED': return FredReader(codes, start, end).read() elif source in ['NASDAQ', 'NYSE', 'AMEX', 'SSE', 'SZSE', 'HKEX', 'TSE', 'HOSE']: return YahooDailyReader(codes, start, end, source).read() + elif source == 'ECOS': + return EcosDataReader(codes, start, end).read() + elif source == 'ECOS-KEYSTAT': + return EcosKeyStatDataReader(codes, start, end).read() else: msg = f'"{symbol}" is not implemented' raise NotImplementedError(msg) -def SnapDataReader(path:str) -> pd.DataFrame: +def SnapDataReader(ticker: str) -> pd.DataFrame: ''' read data snapshots from various finance data source * symbol: code or ticker usage: + - fdr.SnapDataReader('ECOS/KEYSTAT') # 100대 경제지표 - fdr.SnapDataReader('KRX/INDEX/LIST') # KRX 지수목록(KRX index list) - - fdr.SnapDataReader('KRX/INDEX/STOCK/1001') # # 지수구성종목 (1001: 코스피) - - fdr.SnapDataReader('NAVER/STOCK/005930/요약재무') - - fdr.SnapDataReader('NAVER/STOCK/005930/외국인') - - fdr.SnapDataReader('NAVER/STOCK/005930/투자정보') - - fdr.SnapDataReader('DART/기업목록') + - fdr.SnapDataReader('KRX/INDEX/STOCK/1001') # 지수구성종목 (1001: 코스피) + - fdr.SnapDataReader('NAVER/STOCK/005930/FINSTATE') # 재무제표 + - fdr.SnapDataReader('NAVER/STOCK/005930/FOREIGN') # 외국인소진율 + - fdr.SnapDataReader('NAVER/STOCK/005930/INVSTORS') # 투자자별종합매매동향 + - fdr.SnapDataReader('DART/CORP_CODES') ''' - path = path.upper() - if path.startswith('KRX/'): - return KrxSnapReader(path).read() + ticker = ticker.upper() + if ticker.startswith('KRX/'): + return KrxSnapReader(ticker).read() + elif ticker.startswith('ECOS/'): + return EcosSnapReader(ticker).read() + elif ticker.startswith('NAVER/'): + return NaverSnapReader(ticker).read() + else: + msg = f'"{ticker}" is not implemented' + raise NotImplementedError(msg) def StockListing(market: str) -> pd.DataFrame: ''' @@ -161,5 +178,3 @@ def EtfListing(country='KR'): # Deprecation warnings print('EtfListing() deprecated. Use fdr.StockListing("ETF/KR") instead of fdr.EtfListing("KR")') return None - - diff --git a/ecos/__init__.py b/ecos/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/ecos/data.py b/ecos/data.py new file mode 100644 index 0000000..e8d4811 --- /dev/null +++ b/ecos/data.py @@ -0,0 +1,163 @@ +# KRX data reader for FinanceDataReader +# 2024 FinacneData.KR + +import requests +import pandas as pd +import json +from datetime import datetime + +try: + import urllib3.contrib.pyopenssl + urllib3.contrib.pyopenssl.inject_into_urllib3() +except ImportError: + pass + +def _ecos_stat(stat_search_ds_list, start, end, freq='D'): + '''ECOS에서 데이터를 가져와 데이터프레임으로 반환합니다 + + codes (str, list): 통계항목과 계정항목을 '통계항목/계정항목' 형식으로 지정합니다. 리스트(혹은 튜플로 여러 통계항목을 지정할 수 있습니다) + start (datetime or str): 시작일 + end (datetime or str): 종료일 + freq (str): 데이터의 단위(주기)) + ''' + start = pd.to_datetime(start) + end = pd.to_datetime(end) + fmt = "%Y%m" if freq.upper() == 'M' else "%Y%m%d" + + payload = { + "header": { + "guidSeq": 1, + "trxCd": "OSUUA02R01", "scrId": "IECOSPCM02", "sysCd": "03", "fstChnCd": "WEB", + "langDvsnCd": "KO", "envDvsnCd": "D", + "sndRspnDvsnCd": "S", "sndDtm": "20400114", + "ipAddr": "124.40.40.5", "usrId": "IECOSPC", + "pageNum": 1, + "pageCnt": 10000 # 충분히 크게 + }, + "data": { + "statSrchDsList": stat_search_ds_list, + "statSrchFreqList": [ + { + "freq": freq, + "vlidStDtm": start.strftime(fmt), # 시작날짜 + "vlidEndDtm": end.strftime(fmt), # 끝날짜 + } + ], + "statTyp": "M", + "statDataCvsnCdList": [ + "00" + ], + "viewType": "01", + "holidayYn": "Y" + } + } + # print(payload) + + res = requests.post('https://ecos.bok.or.kr/serviceEndpoint/httpService/request.json', json.dumps(payload)) + jo = res.json() + if jo['message']['msgRepNum']: # 에러 메시지가 있는 경우 + print(jo['message']['detailMsgs']) + return pd.DataFrame() + + jo_list = json.loads(jo['data']['jsonCtnt']) + df = pd.json_normalize(jo_list).T + + # 컬럼명 지정 + df.columns = df.loc['항목명1'].values + + # 불필요한 row 삭제 + remove_indexes = ['통계표', 'StatisticalTable', + '코드(항목명1)', 'Code(ItemNames1)', '항목명1', 'ItemNames1', + '코드(항목명2)', 'Code(ItemNames2)', '항목명2', 'ItemNames2', + '코드(항목명3)', 'Code(ItemNames3)', '항목명3', 'ItemNames3', + '단위', 'Unit', '가중치', 'Wgt', '변환', 'Conversion', 'digit'] + df = df.drop(remove_indexes, errors='ignore') + + import warnings + warnings.filterwarnings('ignore', category=UserWarning) + + # 인덱스를 (object에서) DateTimeIndex 로 변환 + fmt = "%Y%m" if freq.upper() == 'M' else None + df.index = pd.to_datetime(df.index, format=fmt) + df = df.sort_index() + + # 컬럼은 모두 (object에서) 수치값으로 변환 + cols = df.columns + df[cols] = df[cols].apply(pd.to_numeric, errors='coerce') + return df + +def _ecos_keystat(keys, start, end, freq=None): + '''100대 통계지표 목록의 키를 사용하여 ECOS데이터를 가져옵니다 + ''' + base_freq_map = { + 'K051': 'D', 'K052': 'D', 'K063': 'D', 'K053': 'D', 'K055': 'D', 'K056': 'D', 'K062': 'D', 'K057': 'D', 'K058': 'M', 'K059': 'M', + 'K005': 'M', 'K006': 'M', 'K007': 'Q', 'K008': 'M', 'K002': 'M', 'K003': 'M', 'K004': 'M', 'K011': 'M', 'K152': 'D', 'K153': 'D', + 'K154': 'D', 'K156': 'D', 'K101': 'D', 'K102': 'D', 'K103': 'M', 'K107': 'M', 'K104': 'M', 'K108': 'M', 'K258': 'Q', 'K259': 'Q', + 'K260': 'Q', 'K261': 'Q', 'K462': 'Q', 'K257': 'Q', 'K263': 'A', 'K264': 'Q', 'K265': 'Q', 'K266': 'A', 'K220': 'M', 'K201': 'M', + 'K202': 'M', 'K203': 'M', 'K204': 'M', 'K205': 'M', 'K207': 'M', 'K206': 'M', 'K210': 'M', 'K453': 'M', 'K212': 'M', 'K215': 'M', + 'K213': 'M', 'K216': 'M', 'K218': 'M', 'K217': 'M', 'K219': 'M', 'K253': 'M', 'K254': 'M', 'K252': 'M', 'K268': 'M', 'K269': 'M', + 'K267': 'A', 'K256': 'A', 'K255': 'A', 'K306': 'Q', 'K463': 'Q', 'K456': 'A', 'K464': 'A', 'K303': 'M', 'K304': 'M', 'K301': 'M', + 'K302': 'M', 'K307': 'Q', 'K305': 'Q', 'K308': 'Q', 'K451': 'A', 'K460': 'A', 'K461': 'A', 'K351': 'M', 'K356': 'M', 'K357': 'M', + 'K465': 'M', 'K466': 'M', 'K358': 'M', 'K359': 'M', 'K360': 'M', 'K467': 'M', 'K155': 'M', 'K353': 'Q', 'K468': 'Q', 'K401': 'M', + 'K405': 'M', 'K406': 'M', 'K402': 'M', 'K403': 'M', 'K404': 'M', 'K407': 'M', 'K408': 'M', 'K409': 'M', 'KN11': 'M', 'K469': 'M'} + + key_list = [keys] if type(keys) == str else keys + base_freq = freq if freq else base_freq_map[key_list[0]] + + stat_search_ds_list = [] + for key in key_list: + if key not in base_freq_map: + raise ValueError(f'invalid key: {key}') + + payload = { + "header":{ + "guidSeq":1,"trxCd":"OSUSC04R01","scrId":"IECOSPCM04","sysCd":"03", + "fstChnCd":"WEB","langDvsnCd":"KO","envDvsnCd":"D","sndRspnDvsnCd":"S", + "sndDtm":"20220822","ipAddr":"124.50.40.5","usrId":"IECOSPC","pageNum":1,"pageCnt":1000 + }, + "data":{"key100statId":key} + } + res = requests.post('https://ecos.bok.or.kr/serviceEndpoint/httpService/request.json', json.dumps(payload)) + jo = res.json() + + stat_search_ds = { + 'dsId': jo['data']['dsId'], + 'dsItmId1': jo['data']['dsItmId1'], + 'dsItmId2': jo['data']['dsItmId2'], + 'dsItmId3': jo['data']['dsItmId3'], + 'dsItmVal1': jo['data']['dsItmVal1'], + 'dsItmVal2': jo['data']['dsItmVal2'], + 'dsItmVal3': jo['data']['dsItmVal3'], + } + if key in ['K258', 'K259', 'K260', 'K261', 'K462', 'K264', 'K265']: + stat_search_ds = { + 'dsId': jo['data']['dsId'], + 'dsItmId1': jo['data']['dsItmId1'], + 'dsItmVal1': jo['data']['dsItmVal1'], + } + stat_search_ds_list.append(stat_search_ds) + return _ecos_stat(stat_search_ds_list, start, end, base_freq) + +class EcosDataReader: + def __init__(self, symbol, start=None, end=None): + '''ex) ECOS:722Y001/0101000 (한국은행 기준금리)''' + self.symbol = symbol + self.start = datetime(1990,1,1) if start==None else pd.to_datetime(start) + self.end = datetime.today() if end==None else pd.to_datetime(end) + + def read(self): + df = _ecos_stat(self.symbol, self.start, self.end, freq='D') + df.attrs = {'exchange':'ECOS', 'source':'ECOS', 'data':'SERIES'} + return df + +class EcosKeyStatDataReader: + def __init__(self, symbol, start=None, end=None): + '''ex) # ECOS-KEYSTAT:K051''' + self.symbol = symbol + self.start = datetime(1990,1,1) if start==None else pd.to_datetime(start) + self.end = datetime.today() if end==None else pd.to_datetime(end) + + def read(self): + df = _ecos_keystat(self.symbol, self.start, self.end) + df.attrs = {'exchange':'ECOS', 'source':'ECOS', 'data':'SERIES'} + return df diff --git a/ecos/snap.py b/ecos/snap.py new file mode 100644 index 0000000..369fe9c --- /dev/null +++ b/ecos/snap.py @@ -0,0 +1,222 @@ +# KRX scaper for FinanceDataReader +# 2024 FinacneData.KR + +import io +import requests +import pandas as pd +import json + +key_stat_list_csv = ''' +key100statId,key100statNm,key100statEngNm,untNm,baseFreq +K051,한국은행 기준금리,Bank of Korea Base Rate,% ,D +K052,콜금리(익일물),Call Rate (Overnight),% ,D +K063,KORIBOR(3개월),KORIBOR(3 month),% ,D +K053,CD수익률(91일),CD(91 day),% ,D +K055,통안증권수익률(364일),Monetary Stabilization Bonds(364 day),% ,D +K056,국고채수익률(3년),Treasury Bonds(3 year),% ,D +K062,국고채수익률(5년),Treasury Bonds(5 year),% ,D +K057,"회사채수익률(3년,AA-)","Corporate Bonds(3 year, AA-)",% ,D +K058,예금은행 수신금리,Interest Rate on Time & Savings Deposits of CBs & SBs,% ,M +K059,예금은행 대출금리,Interest Rate on Loans & Discounts of CBs & SBs,% ,M +K005,예금은행총예금(말잔),Total Deposits of CBs & SBs(Avg.),십억원 ,M +K006,예금은행대출금(말잔),Loans of CBs & SBs(Avg.),십억원 ,M +K007,가계신용,Credit to Households,십억원 ,Q +K008,가계대출연체율,Delinquency Ratio for Loans to Households,% ,M +K002,"M1(협의통화, 평잔)","M1(Narrow Money, Avg.)",십억원 ,M +K003,"M2(광의통화, 평잔)","M2(Broad Money, Avg.)",십억원 ,M +K004,Lf(평잔),Lf(Avg.),십억원 ,M +K011,L(말잔),L(End of),십억원 ,M +K152,원/달러 환율(종가),KRW/USD(Closing Rate),원 ,D +K153,원/엔(100엔) 환율(매매기준율),KRW/JPY(100 Yen),원 ,D +K154,원/유로 환율(매매기준율),KRW/EURO,원 ,D +K156,원/위안 환율(종가),KRW/CNY(Closing Rate),원 ,D +K101,코스피지수,KOSPI,1980.01.04=100 ,D +K102,코스닥지수,KOSDAQ Index,1996.07.01=1000,D +K103,주식거래대금,Stocks Trading Value,천원 ,M +K107,고객예탁금,Customer Deposit,백만원 ,M +K104,채권거래대금,Bonds Trading Value,백만원 ,M +K108,국고채발행액,Issued Amount of Treasury Bonds,십억원 ,M +K258,"경제성장률(실질, 계절조정 전기대비)",GDP Growth Rate(S.A.),% ,Q +K259,"민간소비증감률(실질, 계절조정 전기대비)","Private Consumption(S.A., % Change)",% ,Q +K260,"설비투자증감률(실질, 계절조정 전기대비)","Facilities Investment(S.A.,% Change)",% ,Q +K261,"건설투자증감률(실질, 계절조정 전기대비)","Construction Investment(S.A.,% Change)",% ,Q +K462,"재화의 수출 증감률(실질, 계절조정 전기대비)","Exports of Goods and Services(S.A., % Change)",% ,Q +K257,"GDP(명목, 계절조정)","GDP(S.A.,at Current Price)",십억원 ,Q +K263,1인당GNI,Per Capita GNI,달러 ,A +K264,총저축률,Gross Saving Ratio,% ,Q +K265,국내총투자율,Gross Dom. Investment Ratio,% ,Q +K266,수출입의 대 GNI 비율,Ratio of Exports and Imports to GNI,% ,A +K220,전산업생산지수(농림어업제외),"Index of all industry production(excluding Agriculture, Forestry and Fishing)",2020=100,M +K201,제조업생산지수,Manufacturing Production Index,2020=100,M +K202,제조업출하지수,Manufacturing Shipment Index,2020=100,M +K203,제조업재고지수,Manufacturing Inventory Index,2020=100,M +K204,제조업가동률지수,Manufacturing Operation Ratio Index,2020=100,M +K205,서비스업생산지수,Index of Service,2020=100,M +K207,도소매업지수,Wholesale and Retail Sales Index,2020=100,M +K206,소매판매액지수,Retail Business Sales Index ,2020=100,M +K210,개인신용카드사용액,Amount of Personal Credit Cards Use,백만원 ,M +K453,자동차판매액지수,Motor Vehicle Sales Index,2020=100,M +K212,설비투자지수,Estimated Index of Equipment Investment,2015=100,M +K215,기계류내수출하지수,Machinery Shipment Index for Dom. Market,2020=100,M +K213,국내수요기계수주액,Value of Dom. Machinery Orders Received,백만원 ,M +K216,건설기성액,Value of Construction Completed,백만원 ,M +K218,건축허가면적,Permits Authorized for Bldg. Construction,㎡,M +K217,건설수주액,Value of Construction Orders Received,백만원 ,M +K219,건축착공면적,Results of Construction Start,㎡,M +K253,경기동행지수순환변동치,Cyclical Component of Composite Coincident Index,,M +K254,경기선행지수순환변동치,Cyclical Component of Composite Leading Index,,M +K252,소비자심리지수,Composite Consumer Sentiment Index,,M +K268,제조업업황실적BSI,"BSI(Manufact. Business Con., Tendency)",,M +K269,경제심리지수,Economic Sentiment Index,,M +K267,제조업매출액증감률,Growth Rate of Sales in Manufacturing,% ,A +K256,제조업매출액세전순이익률,Ordinary Income to Sales in Manufacturing,% ,A +K255,제조업부채비율,Debt Ratio in Manufacturing,% ,A +K306,가구당월평균소득,Monthly Ave. Income of Households,원 ,Q +K463,평균소비성향,Average of Propensity to Consume,% ,Q +K456,지니계수,Gini's Coefficient,,A +K464,5분위배율,"Income of Highest Quintile/Income of Lowest Quintile(by quintile, ratio)",,A +K303,실업률,Unemployment Rate,% ,M +K304,고용률,Employment Rate,% ,M +K301,경제활동인구,Economically Active Pop.,천명 ,M +K302,취업자수,Employed Persons,천명 ,M +K307,시간당명목임금지수,Nominal Wage per Hour(% change),2020=100,Q +K305,노동생산성지수,Labor Productivity(% Change),2020=100,Q +K308,단위노동비용지수,Unit Labor Cost(% Change),2020=100,Q +K451,추계인구,Population Projected,명 ,A +K460,고령인구비율(65세 이상),Share of the aged population (65+),% ,A +K461,합계출산율,Total Fertility Rate,명 ,A +K351,경상수지,Current Account,백만달러,M +K356,직접투자(자산),"Direct Investment, Assets",백만달러,M +K357,직접투자(부채),"Direct Investment, Liabilities",백만달러,M +K465,증권투자(자산),"Portfolio Investment, Assets",백만달러,M +K466,증권투자(부채),"Portfolio Investment, Liabilities",백만달러,M +K358,수출금액지수,Export value index,2015=100,M +K359,수입금액지수,Import value index,2015=100,M +K360,순상품교역조건지수,Net Barter Terms of Trade Index,2015=100,M +K467,소득교역조건지수,Income Terms of Trade Index,2015=100,M +K155,외환보유액,International Reserves,천달러 ,M +K353,대외채무,External Debt,백만달러,Q +K468,대외채권,External Assets,백만달러,Q +K401,소비자물가지수,Consumer Price Index,2020=100,M +K405,농산물 및 석유류제외 소비자물가지수,CPI Excluding Agricultural Products & Oils,2020=100,M +K406,생활물가지수,CPI For Living Necessaries,2020=100,M +K402,생산자물가지수,Producer Price Index,2015=100,M +K403,수출물가지수,Export Price Index,2015=100,M +K404,수입물가지수,Import Price Index,2015=100,M +K407,주택매매가격지수,Housing Sales Price Index,2021.6=100,M +K408,주택전세가격지수,Housing Jeonse Price Index,2021.6=100,M +K409,지가변동률(전기대비),Land Price Change Rates,% ,M +KN11,"국제유가(Dubai, 현물)",International Oil Price(Dubai),달러 ,M +K469,금,Gold Price(Spot),달러 ,M +''' + +def _ecos_keystat_listing(): + '''100대 통계지표의 데이터 항목을 데이터프레임으로 반환 + ''' + # 실시간 데이터를 가져오기 + # payload_text = { + # "header": { + # "guidSeq":1,"trxCd":"OSUSC03R01","scrId":"IECOSPCM04","sysCd":"03", + # "fstChnCd":"WEB","langDvsnCd":"KO","envDvsnCd":"D", "sndRspnDvsnCd":"S", + # "sndDtm":"20220814","ipAddr":"124.50.40.5","usrId":"IECOSPC","pageNum":1,"pageCnt":1000 + # }, + # "data":{"useYn":"Y"} + # } + # res = requests.post('https://ecos.bok.or.kr/serviceEndpoint/httpService/request.json', json.dumps(payload_text)) + # jo = res.json() + # return pd.DataFrame(jo['data']['dataList']) + return pd.read_csv(io.StringIO(key_stat_list_csv)) + + +_ecos_snap_csv = ''' +Ticker,Desc,Columns +ECOS/SNAP/523,"주요 단기 시장금리","한국은행 기준금리, 콜금리(익일물), KORIBOR(3개월), CD수익률(91일)" +ECOS/SNAP/512,"주요 장기 시장금리","통안증권수익률(1년), 국고채수익률(3년), 국고채수익률(5년), 회사채수익률(3년, AA-)" +ECOS/SNAP/861,"예금은행 여수신 금리","여신금리, 수신금리, 여수신 금리차" +ECOS/SNAP/517-1,"가계신용","가계신용" +ECOS/SNAP/517-2,"가계대출 연체율","가계대출 연체율" +ECOS/SNAP/527,"협의 및 광의 통화","M1(협의통화, 평잔), M2(광의통화, 평잔)" +ECOS/SNAP/528,"금융기관 및 광의 유동성","Lf(금융기관유동성, 평잔), L(광의유동성, 말잔)" +ECOS/SNAP/529,"원/달러 및 원/엔 환율","원/달러(종가, 좌축), 원/100엔(매매기준율, 우축)" +ECOS/SNAP/530,"원/유로 및 원/위안 환율","원/유로(매매기준율, 좌축), 원/위안(종가, 우축)" +ECOS/SNAP/531,"코스피 및 코스닥 지수","코스피, 코스닥" +ECOS/SNAP/532,"코스피 주식거래대금 및 고객예탁금","코스피, 코스닥" +ECOS/SNAP/533,"채권거래대금 및 국고채발행액","채권거래대금, 국고채발행액" +ECOS/SNAP/1184,"경제성장률 및 재화의 수출증가율","경제성장률, 재화의 수출증가율" +ECOS/SNAP/1191,"민간소비, 설비투자 및 건설투자 증가율","민간소비, 설비투자, 건설투자" +ECOS/SNAP/1193-1,"GDP","GDP" +ECOS/SNAP/1193-2,"1인당 GNI","1인당 GNI" +ECOS/SNAP/1195-1,"GDP 대비 총저축률","GDP 대비 총저축률" +ECOS/SNAP/1195-2,"GDP 대비 국내총투자율","GDP 대비 국내총투자율" +ECOS/SNAP/1195-3,"수출입의 대 GNI 비율","수출입의 대 GNI 비율" +ECOS/SNAP/1196,"전산업생산지수","전산업생산지수(농림어업제외), 제조업생산지수" +ECOS/SNAP/1198,"제조업 출하, 재고, 가동률지수","제조업출하지수, 제조업재고지수, 제조업가동률지수" +ECOS/SNAP/1200,"서비스업생산지수","서비스업생산지수, 도소매업지수" +ECOS/SNAP/1202,"소매 및 자동차 판매, 개인신용카드","소매판매액지수, 자동차판매액지수, 개인신용카드사용액" +ECOS/SNAP/1203,"설비투자 관련 지수","설비투자지수, 기계류내수출하지수, 국내수요기계수주액" +ECOS/SNAP/1205,"건설기성액 및 건설수주액","건설기성액, 건설수주액" +ECOS/SNAP/1206,"건축허가 및 건축착공 면적","건축허가면적, 건축착공면적" +ECOS/SNAP/1207,"경기순환지표","경기동행지수순환변동치, 경기선행지수순환변동치" +ECOS/SNAP/1208,"심리지표","소비자심리지수, 제조업업황실적BSI, 경제심리지수" +ECOS/SNAP/1209,"기업경영분석지표","제조업매출액증가율, 제조업매출액세전순이익률, 제조업부채비율" +ECOS/SNAP/1210,"가계 소득 및 소비","가구당월평균소득, 평균소비성향" +ECOS/SNAP/1211,"소득분배지표","지니계수(\~2021), 5분위배율(\~2021), 지니계수(2020\~), 5분위배율(2020\~)" +ECOS/SNAP/1212,"실업률 및 고용률","실업률, 고용률" +ECOS/SNAP/1213,"경제활동인구 및 취업자수","경제활동인구, 취업자수" +ECOS/SNAP/1214,"노동 관련 지수","시간당명목임금지수, 노동생산성지수, 단위노동비용지수" +ECOS/SNAP/1204,"추계인구 및 고령인구비율","추계인구, 고령인구비율(65세 이상)" +ECOS/SNAP/1201,"합계출산율","합계출산율" +ECOS/SNAP/1199,"경상수지","경상수지" +ECOS/SNAP/1194,"직접투자","직접투자(자산), 직접투자(부채)" +ECOS/SNAP/1192,"증권투자","증권투자(자산), 증권투자(부채)" +ECOS/SNAP/1190,"수출입 금액지수","수출금액지수, 수입금액지수" +ECOS/SNAP/1198,"교역조건지수","제조업출하지수, 제조업재고지수, 제조업가동률지수" +ECOS/SNAP/1188-1,"외환보유액","외환보유액" +ECOS/SNAP/1188-2,"대외채무","대외채무" +ECOS/SNAP/1188-3,"대외채권","대외채권" +ECOS/SNAP/1197,"소비자물가 상승률","소비자물가지수, 농산물 및 석유류제외, 생활물가지수" +ECOS/SNAP/1187,"생산자물가 및 수출입물가 상승률","생산자물가지수, 수출물가지수, 수입물가지수" +ECOS/SNAP/1186,"부동산가격","주택매매가격 상승률, 주택전세가격 상승률, 지가변동률" +ECOS/SNAP/1511,"원자재가격","국제유가(Dubai, 좌축), 금" +''' + +def _ecos_snap_reader(ticker): + full_code = ticker.replace('ECOS/SNAP/', '') + code, sub_code = full_code.split('-') if '-' in ticker else (full_code, None) + + url = f'https://snapshot.bok.or.kr/api/chart/exportChart?chart_id={code}' + r = requests.get(url) + df = pd.read_excel(io.BytesIO(r.content), index_col=0, skiprows=3) + df = df.drop(['단위', '주기', '기간']) + df.columns = [col.replace('(좌축)', '').replace('(우축)', '').replace('좌축, ', '').replace('우축, ','') for col in df.columns] + + if sub_code: + col_inx = (int(sub_code)-1) * 2 + df = df.iloc[:, col_inx:col_inx+2] + col_name = df.columns[0] + df.columns = ['날짜', col_name] + df['날짜'] = pd.to_datetime(df['날짜']) + df = df.set_index('날짜') + df = df.dropna(how='all') + else: + df.index = pd.to_datetime(df.index) + df.index.name = '날짜' + df = df.astype(float) + return df + + +class EcosSnapReader: + def __init__(self, ticker): + self.ticker = ticker.upper() + + def read(self): + snap_df = pd.read_csv((io.StringIO(_ecos_snap_csv))) + if self.ticker == 'ECOS/KEYSTAT/LIST': + return _ecos_keystat_listing() + elif self.ticker == 'ECOS/SNAP/LIST': + return snap_df + elif self.ticker in snap_df['Ticker'].values: + return _ecos_snap_reader(self.ticker) + else: + raise NotImplementedError(f'"{self.ticker}" is not implemented') + diff --git a/investing/data.py b/investing/data.py index 1fb6718..c851148 100644 --- a/investing/data.py +++ b/investing/data.py @@ -18,7 +18,7 @@ def __init__(self, symbol, start=None, end=None, exchange=None, data_source=None def _get_currid_investing(self, symbol, exchange=None): url = f'https://api.investing.com/api/search/v2/search?q={symbol}' r = requests.get(url, headers={'user-agent':'Mozilla/5.0', 'domain-id': 'en', 'dnt': '1'}) - print(r.text) + # print(r.text) jo = r.json() # print(json.dumps(jo['quotes'], indent=4)) diff --git a/krx/snap.py b/krx/snap.py index f9f4d42..88356e5 100644 --- a/krx/snap.py +++ b/krx/snap.py @@ -79,19 +79,19 @@ def _krx_index_listings(idx1, idx2, date=None): return df class KrxSnapReader: - def __init__(self, path): - self.path = path + def __init__(self, ticker): + self.ticker = ticker def read(self): - if self.path == 'KRX/INDEX/LIST': # 지수목록 + if self.ticker == 'KRX/INDEX/LIST': # 지수목록 df = _krx_index_codes() df['Code'] = df['full_code'] + df['short_code'] df = df.rename(columns={'codeName':'Name', 'marketName':'Market'}) return df[['Code', 'Name', 'Market']] - elif self.path.startswith('KRX/INDEX/STOCK/'): # 지수구성종목 - code = self.path.split('/')[-1] + elif self.ticker.startswith('KRX/INDEX/STOCK/'): # 지수구성종목 + code = self.ticker.split('/')[-1] df = _krx_index_listings(code[0], code[1:]) return df else: - raise NotImplementedError(f'"{self.path}" is not implemented') + raise NotImplementedError(f'"{self.ticker}" is not implemented') diff --git a/naver/snap.py b/naver/snap.py new file mode 100644 index 0000000..029d7d1 --- /dev/null +++ b/naver/snap.py @@ -0,0 +1,557 @@ +#-*- coding: utf-8 -*- +# (c) 2018~2024 FinaceData.KR + +import re +import io +import json +import pandas as pd +import requests +from bs4 import BeautifulSoup +from io import StringIO +from datetime import datetime +from itertools import product +from tqdm import tqdm + +def _marcap_market_page(sosok, page): + url = f'https://finance.naver.com/sise/sise_market_sum.nhn?sosok={sosok}&page={page}' + + # 거래량, 매수호가, 거래대금(백만), 시가총액(억), 영업이익(억), PER(배): cookies = {'field_list': '12|06108810'} + # 시가, 매도호가, 전일거래량, 자산총계(억), 영업이익증가율, ROE(%): cookies = {'field_list': '12|01882048'} + # 고가, 매수총잔량, 외국인비율, 부채총계(억), 당기순이익(억), ROA(%): cookies = {'field_list': '12|00441424'} + # 저가, 매도총잔량, 상장주식수(천주), 매출액(억), 주당순이익(원), PBR(배): cookies = {'field_list': '12|00234202'} + # 매출액증가율, 보통주배당금(원), 유보율(%): cookies = {'field_list': '12|00000181'} + + field_list = [ + # field_list, columns + ('12|06108810', ['N', '종목명', '현재가', '전일비', '등락률', '액면가', '거래량', '매수호가', '거래대금', '시가총액', '영업이익', 'PER']), + ('12|01882048', ['시가', '매도호가', '전일거래량', '자산총계', '영업이익증가율', 'ROE']), + ('12|00441424', ['고가', '매수총잔량', '외국인비율', '부채총계', '당기순이익', 'ROA']), + ('12|00234202', ['저가', '매도총잔량', '상장주식수', '매출액', '주당순이익', 'PBR']), + ('12|00000181', ['매출액증가율', '보통주배당금', '유보율']), + ] + + marcap = pd.DataFrame() + marcap['시장'] = sosok + + for field in field_list: + f, cols = field + cookies = {'field_list': f} + html = requests.get(url, cookies=cookies).text + df = pd.read_html(html)[1] + if len(df) == 0: + break + marcap[field[1]] = df[field[1]] + + if len(marcap) == 0: + return marcap + soup = BeautifulSoup(html, 'lxml') + table = soup.find_all('table')[1] + trs = table.find_all('tr') + + codes = [] + for tr in trs[1:]: + tds = tr.find_all('td') + code = tds[1].a['href'].split('=')[1] if len(tds) >= 2 else None + codes.append(code) + + marcap.insert(1, '종목코드', codes) + marcap.dropna(how='all', inplace=True) + marcap.reset_index(drop=True, inplace=True) + marcap['등락률'] = marcap['등락률'].astype(str).str.replace('%', '').replace(',', '').astype(float) / 100.0 + marcap['ROE'] = marcap['ROE'] / 100.0 + marcap['ROA'] = marcap['ROA'] / 100.0 + marcap['유보율'] = marcap['유보율'] / 100.0 + + marcap = marcap[['N', '종목코드', '종목명', '현재가', '전일비', '등락률', '액면가', + '거래량', '시가', '고가', '저가', '매수호가', '매도호가', '매수총잔량', '매도총잔량', + '거래대금', '전일거래량', '외국인비율', '상장주식수', '시가총액', '자산총계', '부채총계', + '매출액', '매출액증가율', '영업이익', '영업이익증가율', '당기순이익', '주당순이익', '보통주배당금', + 'PER', 'ROE', 'ROA', 'PBR', '유보율']] + return marcap + +def marcap(market='KRX', verbose=1): + ''' + 시가총액순 종목 데이터를 반환합니다. + * market: 'KOSPI'=코스피, 'KOSDAQ'=코스닥, 'KRX'=코스피+코스닥 + * verbose: 1=진행상태를 표시합니다 + ''' + kospi_prod = list(product([0], range(1, 32+1))) + kosdaq_prod = list(product([1], range(1, 29+1))) + market = market.strip().upper() + if market == 'KOSPI': + page_prod = kospi_prod + total = 32 + elif market == 'KOSDAQ': + page_prod = kosdaq_prod + total = 29 + elif market == 'KRX': + page_prod = kospi_prod + kosdaq_prod + total = 32+29 + else: + raise ValueError("market must be one of 'KOSPI', 'KOSDAQ' or 'KRX'") + + df_list = [] + for i, (sosok, page) in tqdm(enumerate(page_prod), total=total): + df = _marcap_market_page(sosok, page) + df_list.append(df) + # print('.', end='') if verbose else print('', end='') + + df_merged = pd.concat(df_list) + df_merged.sort_values(by='시가총액', ascending=False, inplace=True) + return df_merged + + +def _to_float(x, half=None): + ''' + * x: 변환대상 값 + * half('l' 구분자):None=전체, 0=첫번째 절반, 1=두번째 절반 + ''' + x = re.sub('[\t\n, 조억원배%]', '', str(x)) + if half != None and len(x.split('l')) > 1: + return pd.to_numeric(x.split('l')[half], errors='coerce').item() + return pd.to_numeric(x, errors='coerce').item() + +def factors(code): + ''' + 다양한 팩터데이터(dict)를 반환합니다. + * code: 종목코드 + + 반환값(dict): 반환값의 항목은 다음과 같습니다 + '회사개요', '시가총액', '상장주식수', + '외국인한도주식수', '외국인보유주식수', '외국인소진율', + '목표주가', '최고52주', '최저52주', + 'PER', 'EPS', '추정PER', '추정EPS', 'PBR', 'BPS', '배당수익률', + '동일업종_PER', '동일업종_등락률' + '자사주_보유지분', '자사주_주식수', + ''' + + keys = [ + '회사개요', '시가총액', '상장주식수', + '외국인한도주식수', '외국인보유주식수', '외국인소진율', + '목표주가', '최고52주', '최저52주', + 'PER', 'EPS', '추정PER', '추정EPS', 'PBR', 'BPS', '배당수익률', + '동일업종_PER', '동일업종_등락률' + '자사주_보유지분', '자사주_주식수', + ] + factor_data = dict.fromkeys(keys) + + ## 회사개요 + url = 'https://finance.naver.com/item/main.nhn?code=' + code + r = requests.get(url) + soup = BeautifulSoup(r.text, features="lxml") + + summary_info = soup.find(id='summary_info') + text = summary_info.text.strip() if summary_info else '' + factor_data['회사개요'] = '\n'.join(text.split('\n')[1:4]) + + try: + df_list = pd.read_html(r.text, match='상장주식수') + df = df_list[0] + factor_data['시가총액'] = _to_float(df.iloc[0,1]) + factor_data['상장주식수'] = _to_float(df.iloc[2,1]) + except ValueError as e: + print(code, e) + + try: + df_list = pd.read_html(r.text, match='외국인한도주식수') + df = df_list[0] + factor_data['외국인한도주식수'] = _to_float(df.iloc[0,1]) + factor_data['외국인보유주식수'] = _to_float(df.iloc[1,1]) + factor_data['외국인소진율'] = _to_float(df.iloc[2,1]) + except ValueError as e: + print(code, e) + + try: + df_list = pd.read_html(r.text, match='목표주가') + df = df_list[0] + factor_data['목표주가'] = _to_float(df.iloc[0,1], half=1) + factor_data['최고52주'] = _to_float(df.iloc[1,1], half=0) + factor_data['최저52주'] = _to_float(df.iloc[1,1], half=1) + except ValueError as e: + print(code, e) + + try: + df_list = pd.read_html(r.text, match='추정PER') + df = df_list[0] + factor_data['PER'] = _to_float(df.iloc[0,1], half=0) + factor_data['EPS'] = _to_float(df.iloc[0,1], half=1) + factor_data['추정PER'] = _to_float(df.iloc[1,1], half=0) + factor_data['추정EPS'] = _to_float(df.iloc[1,1], half=1) + factor_data['PBR'] = _to_float(df.iloc[2,1], half=0) + factor_data['BPS'] = _to_float(df.iloc[2,1], half=1) + factor_data['배당수익률'] = _to_float(df.iloc[3,1]) + except ValueError as e: + print(code, e) + + try: + df_list = pd.read_html(r.text, match='동일업종 PER') + df = df_list[0] + factor_data['동일업종_PER'] = _to_float(df.iloc[0,1]) + factor_data['동일업종_등락률'] = _to_float(df.iloc[1,1]) + except ValueError as e: + print(code, e) + + # 기업현황 + try: + url = f'https://navercomp.wisereport.co.kr/v2/company/c1010001.aspx?cmp_cd={code}' + dfs = pd.read_html(url, encoding='utf-8') + + # 기업현황 / 시세및 주주현황 + df = dfs[4].set_index('주요주주') + 자사주_주식수, 자사주_보유지분 = 0, 0 + if '자사주' in df.index: + 자사주_주식수 = df.loc['자사주']['보유주식수(보통)'] + 자사주_보유지분 = df.loc['자사주']['보유지분(%)'] + + factor_data['자사주_주식수'] = 자사주_주식수 + factor_data['자사주_보유지분'] = 자사주_보유지분 + except ValueError as e: + print(code, e) + + return factor_data + + +def stock_price_day(code, start=None, end=None): + ''' + 기간(start ~ end)사이의 종목(code)의 일별 가격 데이터를 데이터프레임으로 반환합니다 + * code: 종목코드 + * start: 시작일(기본값: 1970-01-01) + * end: 종료일(기본값: 오늘) + ''' + + start, end = pd.to_datetime(start), pd.to_datetime(end) + start = datetime(1970, 1, 1) if start is None else start + end = datetime.today() if end is None else end + + url = 'https://fchart.stock.naver.com/sise.nhn?timeframe=day&count=6000&requestType=0&symbol=' + r = requests.get(url + code) + + data_list = re.findall('', r.text, re.DOTALL) + if len(data_list) == 0: + return pd.DataFrame() + data = '\n'.join(data_list) + df = pd.read_csv(StringIO(data), delimiter='|', header=None, dtype={0:str}) + df.columns = ['Date', 'Open', 'High', 'Low', 'Close', 'Volume'] + df['Date'] = pd.to_datetime(df['Date'], format='%Y%m%d') + df.set_index('Date', inplace=True) + df.sort_index(inplace=True) + df['Change'] = df['Close'].pct_change() + + return df.query('index>=%r and index<=%r' % (start, end)) + + +def stock_price_minute(code, date=None): + ''' + 지정한 날짜의 분봉 데이터를 데이터프레임으로 반환합니다 (지난 5영업일까지 가능) + * code: 종목코드 + * date: 날짜 (기본값: 오늘) + ''' + dt = datetime.today() if date==None else pd.to_datetime(date) + dt_str = dt.strftime('%Y%m%d') + + df_list = [] + prev_html = '' + + # 1~40 page 크롤링 + for page in range(1,50): + url = f'https://finance.naver.com/item/sise_time.nhn?code={code}&thistime={dt_str}180000&page={page}' + r = requests.get(url, headers={'user-agent': 'Mozilla/5.0'}) + df = pd.read_html(r.text, header=0)[0] + if page > 1 and prev_html==r.text: + break + prev_html = r.text + df.dropna(inplace=True) + df_list.append(df) + if len(df) == 0 or df.iloc[-1, 0] == '09:00': + break + result = pd.concat(df_list) + result['체결시각'] = dt.strftime('%Y-%m-%d') + ' ' + result['체결시각'] + result['체결시각'] = pd.to_datetime(result['체결시각']) + result.set_index('체결시각', inplace=True) + result.sort_index(inplace=True) + return result + + +def finstate_detail(code, rpt='0', freq='0', gubun='MAIN'): + ''' + 네이버 파이낸스로 부터 상세재무제표를 읽어온다 + :param code: 종목코드: '005930' + :param rpt: 종류: '0'=손익계산서(기본값), '1'=재무상태표, '2'=현금흐름표 + :param freq: 기간: '0'=연간(기본값), '1'=분기 + :param gubun: 구분: 'MAIN'=주재무제표(기본값), 'IFRSS'=KIFRS별도, 'IFRSL'=IFRS연결, 'GAAPS'=GAAP개별, 'GAAPL'=GAAP연결 + ''' + + # encparam 가져오기 + url = 'https://navercomp.wisereport.co.kr/v2/company/c1010001.aspx?cmp_cd=005930' + html_text = requests.get(url).text + encparam = re.findall ("encparam: '(.*?)'", html_text)[0] + + url = f'https://navercomp.wisereport.co.kr/v2/company/cF3002.aspx?cmp_cd={code}&frq={freq}&rpt={rpt}&finGubun={gubun}&frqTyp={freq}&cn=&encparam={encparam}' + # 페이지 가져오기 + headers={'Referer': url} + r = requests.get(url, headers=headers) + jo = json.loads(r.text) + + # DataFrame 생성 + df = pd.json_normalize(jo, 'DATA') + + # DATA1~DATA6 컬럼 이름 바꾸기 + jo_yymm = jo['YYMM'][:6] + date_str_list = [] + for yymm in jo_yymm: + m = re.search('(\d{4}/\d{0,2}).*', yymm) + date_str_list.append(m.group(1) if m else '') + data_n_list = ['DATA' + str(i) for i in range(1,7)] + yymm_cols = zip(data_n_list, date_str_list) + cols_map = dict(yymm_cols) + df.rename(columns=cols_map, inplace=True) + if not len(df): + print(f'no data found {code}') + return df + df['ACC_NM'] = df['ACC_NM'].str.strip().replace('[\.*\[\]]', '', regex=True) + df.set_index(['ACCODE', 'ACC_NM'], inplace=True) + df = df.iloc[:, 5:11] # 날짜 컬럼만 추출 + df = df.T # Transpose (컬럼, 인덱스 바꾸기) + df.index = pd.to_datetime(df.index) + df.index.name = '날짜' + return df + + +def finstate_summary(code, fin_type='0', freq='Y'): + ''' + 요약제무제표데이터를 데이터프레임으로 반환합니다 + :param code: 종목코드 + :param fin_type: 재무제표 종류 '0'=주재무제표, '1'=K-GAAP개별, '2'=K-GAAP연결, '3'=K-IFRS별도, '4'=K-IFRS연결 + :param freq: 기간: Y=년(기본), Q=분기, 'A'=연간분기 전체 + ''' + # encparam 읽어오기 + url = 'https://navercomp.wisereport.co.kr/v2/company/c1010001.aspx?cmp_cd=005930' + html_text = requests.get(url).text + + if not re.search("encparam: '(.*?)'", html_text): + print('encparam not found') # encparam이 없는 경우 + return None + encparam = re.findall ("encparam: '(.*?)'", html_text)[0] + + url = f'https://navercomp.wisereport.co.kr/v2/company/ajax/cF1001.aspx?cmp_cd={code}&fin_typ={fin_type}&freq_typ={freq}&encparam={encparam}' + r = requests.get(url, headers={'Referer': url}) + df_list = pd.read_html(io.StringIO(r.text), encoding='euc-kr') + df = df_list[1] + df.columns = [col[1] for col in df.columns] + df.set_index('주요재무정보', inplace=True) + df.columns = [re.sub('[^\.\d]', '', col) for col in df.columns] + df.columns = [pd.to_datetime(col, format='%Y%m', errors='coerce') for col in df.columns] + df = df.transpose() + df.index.name = '날짜' + return df + + +def invest_index(code, rpt='5', frq='1', finGubun='IFRSL'): + ''' + 네이버 파이낸스로 부터 투자지표 읽어옵니다 + * code (종목코드): '005930' + * rpt (종류): '1'=수익성, '2'=성장성, '3'=안정성, '4'=활동성, '5'=가치분석 (기본값) + * frq (기간): '0'=연간, '1'=분기(기본값) + * finGubun (구분): 'MAIN'=주재무제표, 'IFRSS'=KIFRS별도, 'IFRSL'=IFRS연결(기본값), 'GAAPS'=GAAP개별, 'GAAPL'=GAAP연결 + ''' + # encparam 읽어오기 + url = 'http://companyinfo.stock.naver.com/v1/company/c1040001.aspx?cmp_cd=005930' + html_text = requests.get(url).text + + if not re.search("encparam: '(.*?)'", html_text): + print('encparam not found') # encparam이 없는 경우 + return None + encparam = re.findall ("encparam: '(.*?)'", html_text)[0] + + # 투자지표 데이터 가져오기 + url = f'http://companyinfo.stock.naver.com/v1/company/cF4002.aspx?' \ + f'cmp_cd={code}&frq={frq}&rpt={rpt}&finGubun={finGubun}&frqTyp={frq}&cn=&encparam={encparam}' + + # DataFrame 생성 + headers={'Referer': 'http://companyinfo.stock.naver.com'} + jo = json.loads(requests.get(url, headers=headers).text) + df = pd.json_normalize(jo, 'DATA') + + # DATA1~DATA6 컬럼 이름 바꾸기 + jo_yymm = jo['YYMM'][:6] + date_str_list = [] + for yymm in jo_yymm: + m = re.search('(\d{4}/\d{0,2}).*', yymm) + date_str_list.append(m.group(1) if m else '') + data_n_list = ['DATA' + str(i) for i in range(1,7)] + yymm_cols = zip(data_n_list, date_str_list) + cols_map = dict(yymm_cols) + df.rename(columns=cols_map, inplace=True) + df['ACC_NM'] = df['ACC_NM'].str.strip().replace('[\.*\[\]]', '', regex=True) + df = df.drop_duplicates(['ACCODE', 'ACC_NM'], keep='last') + df.set_index(['ACCODE', 'ACC_NM'], inplace=True) + df = df.iloc[:, 5:11] # 날짜 컬럼만 추출 + df = df.T # Transpose (컬럼, 인덱스 바꾸기) + df = df.dropna(how='all') + df.index = pd.to_datetime(df.index) + df.index.name = '날짜' + return df + + +def investors(code): + ''' + 투자자별 매매 동향을 반환합니다 (20일) + * code: 종목코드 + ''' + url = f'https://finance.naver.com/item/frgn.nhn?code={code}' + r = requests.get(url, headers={'User-Agent':'Mozilla/5.0 AppleWebKit/537.36 Edg/122.0.0.0'}) + print() + try: + df_list = pd.read_html(io.StringIO(r.text), encoding='euc-kr') + except ValueError as e: + print(e) + raise Exception(f'invalid stock code or url: {url}') + + df = df_list[2].dropna(how='all').copy() + df.columns = ['날짜', '종가', '전일비', '등락률', '거래량', '기관순매매량', '외국인순매매량', '외국인보유주수', '외국인보유율'] + df['날짜'] = pd.to_datetime(df['날짜']) + df['등락률'] = df['등락률'].str.replace('%', '').astype('float') + df.sort_values('날짜', inplace=True) + df.set_index('날짜', inplace=True) + return df + + +def sector_stock_list(verbose=False): + ''' + 업종별 종목리스트 데이터를 가져옵니다 + + 반환값(DataFrame): 컬럼=[종목코드,종목명,시장,업종명,업종코드] + ''' + url = 'https://finance.naver.com/sise/sise_group.nhn?type=upjong' + r = requests.get(url) + soup = BeautifulSoup(r.text, 'lxml') + a_list = soup.select('a[href*=sise_group_detail]') + + row_list = [] + for ix, a in enumerate(a_list): + sector_name = a.text + sector_no = a['href'].replace('/sise/sise_group_detail.nhn?type=upjong&no=', '') + sector_url = 'https://finance.naver.com' + a['href'] + url = f'https://finance.naver.com/sise/sise_group_detail.nhn?type=upjong&no={sector_no}' + r = requests.get(url) + soup = BeautifulSoup(r.text, 'lxml') + divs = soup.select('div[class="name_area"]') + if verbose: + print(f'{ix:2} {sector_name}({len(divs)}종목) {sector_url}') + for div in divs: + code = div.a['href'].replace('/item/main.nhn?code=', '') + name = div.text + market = 'KOSDAQ' if ' *' in name else 'KOSPI' + name = name.replace(' *', '') + row_list.append([code, name, market, sector_name, sector_no]) + # print(code, name, market, sector_name, sector_no) + sector_stocks = pd.DataFrame(row_list, columns=['종목코드', '종목명', '시장', '업종명', '업종코드']) + return sector_stocks + +def __up(sosok=0): + url = f'https://finance.naver.com/sise/sise_rise.naver?sosok={sosok}' + + # 거래량, 거래대금(백만), 매수호가, 시가총액(억), 영업이익(억), PER(배): cookies = {'field_list': '2|06108810'} + # 시가, 매도호가, 자산총계(억), 영업이익증가율, ROE(%): cookies = {'field_list': '2|01882048'} + # 고가, 매수총잔량, 외국인비율, 부채총계(억), 당기순이익(억), ROA(%): cookies = {'field_list': '12|00441424'} + # 저가, 매도총잔량, 상장주식수(천주), 매출액(억), 주당순이익(원), PBR(배): cookies = {'field_list': '12|00234202'} + # 매출액증가율, 보통주배당금(원), 유보율(%): cookies = {'field_list': '12|00000181'} + + field_list = [ + # field_list, columns + ('2|06108810', ['N', '종목명', '현재가', '전일비', '등락률', '거래량', '거래대금', '매수호가', '시가총액', '영업이익', 'PER']), + ('2|01882048', ['시가', '매도호가', '전일거래량', '자산총계', '영업이익증가율', 'ROE']), + ('2|00441424', ['고가', '매수총잔량', '외국인비율', '부채총계', '당기순이익', 'ROA']), + ('2|00234202', ['저가', '매도총잔량', '상장주식수', '매출액', '주당순이익', 'PBR']), + ('2|00000181', ['매출액증가율', '보통주배당금', '유보율']), + ] + + up = pd.DataFrame() + up['시장'] = sosok + + for field in field_list: + f, cols = field + cookies = {'field_list': f} + html = requests.get(url, cookies=cookies).text + df = pd.read_html(html)[1] + if len(df) == 0: + break + up[field[1]] = df[field[1]] + + if len(up) == 0: + return up + soup = BeautifulSoup(html, 'lxml') + table = soup.find_all('table')[1] + trs = table.find_all('tr') + + codes = [] + for tr in trs[1:]: + tds = tr.find_all('td') + code = tds[1].a['href'].split('=')[1] if len(tds) >= 2 else None + codes.append(code) + + up.insert(1, '종목코드', codes) + up.dropna(how='all', inplace=True) + up.reset_index(drop=True, inplace=True) + up['등락률'] = up['등락률'].astype(str).str.replace('%', '').replace(',', '').astype(float) / 100.0 + up['ROE'] = up['ROE'] / 100.0 + up['ROA'] = up['ROA'] / 100.0 + up['유보율'] = up['유보율'] / 100.0 + + up = up[['N', '종목코드', '종목명', '현재가', '전일비', '등락률', + '거래량', '시가', '고가', '저가', '매수호가', '매도호가', '매수총잔량', '매도총잔량', + '거래대금', '전일거래량', '외국인비율', '상장주식수', '시가총액', '자산총계', '부채총계', + '매출액', '매출액증가율', '영업이익', '영업이익증가율', '당기순이익', '주당순이익', '보통주배당금', + 'PER', 'ROE', 'ROA', 'PBR', '유보율']] + return up + +def up(): + return pd.concat([__up(0), __up(1)]).sort_values('등락률', ascending=False) + + +def free_float_rate(code): + ''' + 유동비율을 반환합니다 (100% 기준) + * code: 종목코드 + ''' + url = f'https://navercomp.wisereport.co.kr/v2/company/c1070001.aspx?cmp_cd={code}' + df_list = pd.read_html(url, encoding='utf-8') + df = df_list[1] + return float(df[('유동주식', '유동주식비율')][0].replace('%', '')) + + +class NaverSnapReader: + def __init__(self, ticker): + self.ticker = ticker.upper() + + def read(self): + if self.ticker.startswith('NAVER/FINSTATE'): + tokens = self.ticker.split('/') + if len(tokens) < 3: + usage_text = '''Usage examples: + NAVER/FINSTATE/005930 + NAVER/FINSTATE-Q/005930 + NAVER/FINSTATE-Q/005930 + NAVER/FINSTATE-Q1/005930 + NAVER/FINSTATE-Y/005930 + NAVER/FINSTATE-Y3/005930 + ''' + raise ValueError(usage_text) + fin_type, freq='0', 'Y' # default + if '-' in tokens[1]: + for ch in tokens[1].split('-')[1]: # for each char in options + fin_type = ch if ch in '01234' else fin_type + freq = ch if ch in 'YQA' else freq + code = tokens[2] + return finstate_summary(code, fin_type, freq) + elif self.ticker.startswith('NAVER/INVESTORS'): + tokens = self.ticker.split('/') + if len(tokens) < 3: + usage_text = '''Usage examples: + NAVER/INVESTORS/005930 + NAVER/INVESTORS/000660 + ''' + raise ValueError(usage_text) + return investors(tokens[2]) + else: + raise NotImplementedError(f'"{self.ticker}" is not implemented') diff --git a/yahoo/data.py b/yahoo/data.py index c324626..aafdbc7 100644 --- a/yahoo/data.py +++ b/yahoo/data.py @@ -60,10 +60,10 @@ def read(self): sym_list = [s.strip() for s in self.symbol.split(',') if s] for sym in sym_list: df = _yahoo_data_reader(sym, self.exchange, self.start, self.end) - if len(df): - df_list.append(df) - merged = pd.concat([x['Adj Close'] for x in df_list], axis=1) - merged.columns = sym_list + df = df[['Adj Close']] + df = df.rename(columns={'Adj Close':sym}) + df_list.append(df) + merged = pd.concat(df_list, axis=1) merged.attrs = {'exchange':self.exchange, 'source':'YAHOO', 'data':'PRICE'} return merged \ No newline at end of file