@ -8,8 +8,7 @@ ROOT_DIR = abspath(join(dirname(abspath(__file__)), ".."))
from loguru import logger
logger . remove ( )
logger . add ( sys . stderr , level = " INFO " )
logger . add ( ROOT_DIR + " /logs/ { time:YYYYMMDD-HHmmss} " + f " _ { running_which_env } .log " , rotation = " 10 MB " , compression = " zip " , level = " INFO " )
logger . add ( ROOT_DIR + " /logs/ { time:YYYYMMDD-HHmmss} " + f " _ { running_which_env } .log " , rotation = " 10 MB " , compression = " zip " , level = " DEBUG " )
import pandas as pd
@ -37,15 +36,84 @@ def run_add_1day_code_init_minKline(date,code_list):
logger . info ( f ' Getting a df of { df . shape } : { code_list [ 0 ] [ : - 4 ] } on { date } ' )
ddb . append_hft_table ( ddb . ddf_hft_mink_tbname , df )
def run_create_db_minKline ( ) :
date = ' 20221101 '
# def run_pool_add_byday_code_init_minKline(date_list,code_list):
# df_list=[]
# code_list_filtered=code_list
# ddb1 = DDBfm(running_which_env)
# tb=ddb1.load_tb(tableName=ddb1.ddf_hft_mink_tbname)
# # tb=ddb1.sess.loadTable(dbPath=ddb1.ddb_hft_dbPath, tableName=ddb1.ddf_hft_mink_tbname)
# for date in date_list:
# with TSLfm() as tsl:
# df = tsl.process_result_data_type(tsl.get_mkt_min_k(date,date,code_list))
# if df.empty:
# continue
# code_list_filtered = ddb1.get_missing_code_date_in_tb(tb,date,code_list)
# if len(code_list_filtered)==0:
# continue
# logger.info(f"getting {'+'.join(code_list_filtered)} on {date}")
# df=df[df['code'].isin(code_list_filtered)]
# df_list.append(df)
# ddb1.close_sess()
# del ddb1
# if df_list:
# df_all = pd.concat(df_list)
# ddb2 = DDBfm(running_which_env,pool=True)
# logger.info(f'Getting a df of {df_all.shape}: {code_list[0][:-4]} on {"+".join(date_list)}')
# ddb2.append_pool_hft_table(ddb2.ddf_hft_mink_tbname,df_all)
# ddb2.clear_pool()
# del ddb2
def run_pool_add_byday_code_init_tick ( date_list , code_list ) :
df_list = [ ]
code_list_filtered = code_list
for date in date_list :
ddb1 = DDBfm ( running_which_env )
code_list_filtered = ddb1 . get_missing_code_date_in_tb ( ddb1 . ddf_hft_mink_tbname , date , code_list )
if len ( code_list_filtered ) == 0 :
continue
logger . info ( f " getting { ' + ' . join ( code_list_filtered ) } on { date } " )
ddb1 . close_sess ( )
del ddb1
with TSLfm ( ) as tsl :
code_list = [ ' T2212 ' ]
df = tsl . process_result_data_type ( tsl . get_mkt_min_k ( date , date , code_list ) )
# print(df)
df = tsl . process_result_data_type ( tsl . get_trade_tick ( date , date , code_list_filtered ) )
if not df . empty :
df_list . append ( df )
df_all = pd . concat ( df_list )
ddb2 = DDBfm ( running_which_env , pool = True )
logger . info ( f ' Getting a df of { df_all . shape } : { code_list [ 0 ] [ : - 4 ] } on { " + " . join ( date_list ) } ' )
ddb2 . append_pool_hft_table ( ddb2 . ddf_hft_tick_tbname , df_all )
ddb2 . close_sess ( )
del ddb2
def run_create_hft_db ( date = ' 20221101 ' ) :
ddb = DDBfm ( running_which_env )
ddb . create_hft_database ( )
ddb . create_hft_table ( ddb . ddf_hft_mink_tbname , df )
with TSLfm ( ) as tsl :
code_list = [ ' T2212 ' ]
df_mink = tsl . process_result_data_type ( tsl . get_mkt_min_k ( date , date , code_list ) )
# print(df)
ddb . create_hft_table ( ddb . ddf_hft_mink_tbname , df_mink )
with TSLfm ( ) as tsl :
code_list = [ ' T2212 ' ]
df_tick = tsl . process_result_data_type ( tsl . get_trade_tick ( date , date , code_list ) )
# print(df)
ddb . create_hft_table ( ddb . ddf_hft_tick_tbname , df_tick )
def run ( ) :
@ -60,38 +128,79 @@ def run():
# print(all_code_dict_by_init)
start_date = ' 2022-09-30 '
end_date = ' 2022-1 1-09 '
end_date = ' 2022-1 0-31 '
allDates = pd . date_range ( start_date , end_date , freq = ' D ' )
allDates = [ i . replace ( ' - ' , ' ' ) for i in list ( allDates . astype ( ' str ' ) ) ]
for date in allDates :
for code_init in all_code_dict_by_init :
for ind , code_init in enumerate ( all_code_dict_by_init ) :
logger . info ( f " Getting { code_init } (no. { ind } ) " )
code_list = all_code_dict_by_init [ code_init ]
run_add_1day_code_init_minKline ( date , code_list )
# date = '20221101'
# with TSLfm() as tsl:
# # code_list = tsl.get_code_list("国债期货")
# # code_list += tsl.get_code_list("股指期货")
# # code_list += tsl.get_code_list("上市期货")
# # code_list=sorted(list(set(code_list)))
# # print(code_list_pickel)
# code_list=['CF2211']
# df = tsl.process_result_data_type(tsl.get_mkt_min_k(date,date,code_list))
# print(df)
# ddb = DDBfm('prd')
# ddb.create_hft_database()
# ddb.create_hft_table(ddb.ddf_hft_mink_tbname,df)
def run_pool_dates_by_code_init_n_group ( typ = ' mink ' , group_amount = 10 , start_date = ' 20220101 ' , end_date = ' 20221031 ' ) :
logger . info ( " Running run_pool_dates_by_group " )
all_code_dict_by_init = { }
for c in code_list_pickel :
init = c [ : - 4 ]
if init in all_code_dict_by_init :
all_code_dict_by_init [ init ] . append ( c )
else :
all_code_dict_by_init [ init ] = [ c ]
# print(all_code_dict_by_init)
allDates = pd . date_range ( start_date , end_date , freq = ' D ' )
dates_dict_by_day = { }
for d in list ( allDates . astype ( ' str ' ) ) :
group_no = int ( d [ - 2 : ] ) % group_amount
if group_no not in dates_dict_by_day :
dates_dict_by_day [ group_no ] = [ d . replace ( ' - ' , ' ' ) ]
else :
dates_dict_by_day [ group_no ] . append ( d . replace ( ' - ' , ' ' ) )
logger . debug ( dates_dict_by_day )
for group_no in dates_dict_by_day :
date_list = dates_dict_by_day [ group_no ]
num_of_init = len ( all_code_dict_by_init )
for ind , code_init in enumerate ( all_code_dict_by_init ) :
# done: 'T','TS','TS','TF'
if code_init in [ ' T ' ] : # todo filtered this ,,'TF', 'IC','IF','IH','IM'
logger . info ( f " Getting { code_init } (no. { ind } / { num_of_init } of group { group_no } / { group_amount } ) " )
code_list = all_code_dict_by_init [ code_init ]
if typ == ' mink ' :
# logger.info('Running mink')
logger . error ( ' mink by day to be fixed ' )
# run_pool_add_byday_code_init_minKline(date_list,code_list)
elif typ == ' tick ' :
logger . info ( ' Running tick ' )
run_pool_add_byday_code_init_tick ( date_list , code_list )
# if ddb.search_code_date_in_tb(ddb.ddf_hft_mink_tbname,date,'CF2211'):
# logger.warning(f"Possible duplicates on {date} and ")
# ddb.append_hft_table(ddb.ddf_hft_mink_tbname,df)
if __name__ == ' __main__ ' :
run ( )
# run_create_db_minKline()
# run()
# run_create_hft_db() # including two tables
import time
tic = time . perf_counter ( )
run_pool_dates_by_code_init_n_group ( typ = ' tick ' )
# run_pool_dates_by_code_init_n_group(typ='mink',group_amount=5)
toc = time . perf_counter ( )
logger . info ( f " Running used { toc - tic : 0.4f } seconds " )
# all t taks Running used 588.5782 seconds for 10 months
# 600/60=10 min 12min for take code_init
# 12* 71 = 850 min / 60 = 15 hr for all code for each year