import pandas as pd
from pathlib import Path
class SingletonType(type):
def __call__(cls, *args, **kwargs):
try:
return cls.__instance
except AttributeError:
cls.__instance = super(SingletonType, cls).__call__(*args, **kwargs)
return cls.__instance
class CodeMan(object, metaclass=SingletonType):
FILENAME = 'codelist.csv'
def __init__(self):
self.code_df = self.get_code_list()
def get_code_list(self):
code_df = None
if Path(CodeMan.FILENAME).is_file():
print('READ CSV')
code_df = self.read_csv()
else:
print('LOAD DATA')
code_df = pd.read_html('http://kind.krx.co.kr/corpgeneral/corpList.do?method=download&searchType=13', header=0)[0]
code_df.종목코드 = code_df.종목코드.map('{:06d}'.format)
code_df = code_df[['회사명', '종목코드']]
code_df = code_df.rename(columns={'회사명': 'name', '종목코드': 'code'})
self.write_csv(code_df)
return code_df
def show(self):
print(self.code_df)
def write_csv(self, df):
print('WRITE DATA')
data_df = pd.DataFrame(df, columns=['code', 'name'])
data_df.to_csv(CodeMan.FILENAME, index=False)
def read_csv(self):
df = pd.DataFrame.from_csv(CodeMan.FILENAME, header=0, parse_dates=False)
df.reset_index(inplace=True)
df.code =df.code.map('{:06d}'.format)
return df
def get_daily_url(self, item_name):
code = self.code_df.query("name=='{}'".format(item_name))['code'].to_string(index=False)
url = 'http://finance.naver.com/item/sise_day.nhn?code={code}'.format(code=code)
print("요청 URL = {}".format(url))
return url
def get_price_data(self, item_name, pageno):
url = self.get_daily_url(item_name)
df = pd.DataFrame()
for page in range(1, pageno+1):
pg_url = '{url}&page={page}'.format(url=url, page=page)
df = df.append(pd.read_html(pg_url, header=0)[0], ignore_index=True)
df = df.dropna()
df = df.rename(columns= {'날짜': 'Date', '종가': 'Close', '전일비': 'Diff', '시가': 'Open', '고가': 'High', '저가': 'Low', '거래량': 'Volume'})
df[['Close', 'Diff', 'Open', 'High', 'Low', 'Volume']] = df[['Close', 'Diff', 'Open', 'High', 'Low', 'Volume']].astype(int)
df['Date'] = pd.to_datetime(df['Date'])
df = df.sort_values(by=['Date'], ascending=True)
print(df.head())
return df
def get_min_url(self, item_name, thistime):
code = self.code_df.query("name=='{}'".format(item_name))['code'].to_string(index=False)
url = 'https://finance.naver.com/item/sise_time.nhn?code={code}&thistime={thistime}'.format(code=code, thistime=thistime)
print("요청 URL = {}".format(url))
return url
def get_min_price_data(self, item_name, date, pageno):
url = self.get_min_url(item_name, date)
df = pd.DataFrame()
for page in range(1, pageno+1):
pg_url = '{url}&page={page}'.format(url=url, page=page)
# print(pg_url)
subdf = pd.read_html(pg_url, header=0)[0]
subdf = subdf.dropna()
df = df.append(subdf, ignore_index=True)
if len(subdf)<10:
break
df = df.rename(columns= {'체결시각': 'Date', '체결가': 'Price', '전일비': 'Diff', '매도': 'Ask', '매수': 'Bid', '거래량': 'Volume', '변동량': 'Qty'})
df[['Price', 'Diff','Qty', 'Ask', 'Bid', 'Volume']] = df[['Price', 'Diff','Qty', 'Ask', 'Bid', 'Volume']].astype(int)
df['Date'] = pd.to_datetime(df['Date'])
df =df.set_index(['Date'])
# print(df_volume)
df = df.sort_values(by=['Date'], ascending=True)
return df
def get_ohlc(self, df, period):
df_ohlc = df['Price'].resample(period).ohlc()
# df_ohlc = df.resample('30T', how={'price': 'ohlc'})
df_qty = df['Qty'].resample(period).sum()
frames = [df_ohlc, df_qty]
df2 = pd.concat(frames, axis=1)
return df2
def step1(self, name, date, pageno):
df = self.get_min_price_data(name, date, pageno)
df2 = self.get_ohlc(df, '5T')
print(df2)
# CodeMan.__call__().write_csv()
# CodeMan.__call__().get_price_data('코리안리', 5)
# CodeMan.__call__().get_min_price_data('코리안리', '2018082223', 3)
CodeMan.__call__().step1('코리안리', '2018082223', 100)
------------- 결과 ----------------
open high low close Qty
Date
2018-08-23 09:00:00 10750.0 10750.0 10700.0 10750.0 11020
2018-08-23 09:05:00 10700.0 10700.0 10700.0 10700.0 735
2018-08-23 09:10:00 10700.0 10700.0 10700.0 10700.0 1647
.....