import sys running_which_env='dev' from os.path import dirname, abspath, join ROOT_DIR = abspath(join(dirname(abspath(__file__)), "..")) from loguru import logger logger.remove() logger.add(sys.stderr, level="INFO") logger.add(ROOT_DIR+"/logs/{time:YYYYMMDD-HHmmss}"+f"_{running_which_env}.log", rotation="10 MB", compression="zip", level="DEBUG") import pandas as pd from DDBfm import DDBfm from TSLfm import TSLfm from code_list import code_list_pickel def run_add_1day_code_init_minKline(date,code_list): ddb = DDBfm(running_which_env) code_list_filtered = [] for code in code_list: if ddb.search_code_date_in_tb(ddb.ddf_hft_mink_tbname,date,code): logger.warning(f"Possible duplicates on {date} and {code}") else: code_list_filtered.append(code) if len(code_list_filtered)==0: return 0 with TSLfm() as tsl: df = tsl.process_result_data_type(tsl.get_mkt_min_k(date,date,code_list_filtered)) if not df.empty: logger.info(f'Getting a df of {df.shape}: {code_list[0][:-4]} on {date}') ddb.append_hft_table(ddb.ddf_hft_mink_tbname,df) # def run_pool_add_byday_code_init_minKline(date_list,code_list): # df_list=[] # code_list_filtered=code_list # ddb1 = DDBfm(running_which_env) # tb=ddb1.load_tb(tableName=ddb1.ddf_hft_mink_tbname) # # tb=ddb1.sess.loadTable(dbPath=ddb1.ddb_hft_dbPath, tableName=ddb1.ddf_hft_mink_tbname) # for date in date_list: # with TSLfm() as tsl: # df = tsl.process_result_data_type(tsl.get_mkt_min_k(date,date,code_list)) # if df.empty: # continue # code_list_filtered = ddb1.get_missing_code_date_in_tb(tb,date,code_list) # if len(code_list_filtered)==0: # continue # logger.info(f"getting {'+'.join(code_list_filtered)} on {date}") # df=df[df['code'].isin(code_list_filtered)] # df_list.append(df) # ddb1.close_sess() # del ddb1 # if df_list: # df_all = pd.concat(df_list) # ddb2 = DDBfm(running_which_env,pool=True) # logger.info(f'Getting a df of {df_all.shape}: {code_list[0][:-4]} on {"+".join(date_list)}') # ddb2.append_pool_hft_table(ddb2.ddf_hft_mink_tbname,df_all) # ddb2.clear_pool() # del ddb2 def check_if_date_codelist_exists(typ,date,code_list): code_list_filtered=code_list ddb1 = DDBfm(running_which_env) if typ=='tick': tbName = ddb1.ddf_hft_tick_tbname elif typ=='mink': tbName = ddb1.ddf_hft_mink_tbname code_list_filtered = ddb1.get_missing_code_date_in_tb(tbName,date,code_list) if code_list_filtered: logger.info(f"Need to download {'+'.join(code_list_filtered)} on {date} in {tbName}") else: logger.info(f"all codes checked in database {tbName} on {date}") ddb1.close_sess() del ddb1 return code_list_filtered def run_pool_add_by_datelist_codeinit(typ,date_list,code_list,if_check=1): df_list=[] for date in date_list: if if_check: code_list_filtered = check_if_date_codelist_exists(typ,date,code_list) else: code_list_filtered = code_list with TSLfm() as tsl: if typ == 'tick': df = tsl.process_result_data_type(tsl.get_trade_tick(date,date,code_list_filtered)) elif typ == 'mink': df = tsl.process_result_data_type(tsl.get_mkt_min_k(date,date,code_list_filtered)) if not df.empty: df_list.append(df) if not df_list: return 0 df_all = pd.concat(df_list) ddb2 = DDBfm(running_which_env,pool=True) logger.info(f'Getting a df of {df_all.shape}: {code_list[0][:-4]} on {"+".join(date_list)}') ddb2.append_pool_hft_table(ddb2.ddf_hft_tick_tbname,df_all) ddb2.close_sess() del ddb2 def run_create_hft_db(date = '20221101'): ddb = DDBfm(running_which_env) ddb.create_hft_database() with TSLfm() as tsl: code_list=['T2212'] df_mink = tsl.process_result_data_type(tsl.get_mkt_min_k(date,date,code_list)) # print(df) ddb.create_hft_table(ddb.ddf_hft_mink_tbname,df_mink) with TSLfm() as tsl: code_list=['T2212'] df_tick = tsl.process_result_data_type(tsl.get_trade_tick(date,date,code_list)) # print(df) ddb.create_hft_table(ddb.ddf_hft_tick_tbname,df_tick) def run(): all_code_dict_by_init={} for c in code_list_pickel: init = c[:-4] if init in all_code_dict_by_init: all_code_dict_by_init[init].append(c) else: all_code_dict_by_init[init]=[c] # print(all_code_dict_by_init) start_date='2022-09-30' end_date='2022-10-31' allDates = pd.date_range(start_date, end_date, freq ='D') allDates = [i.replace('-','') for i in list(allDates.astype('str'))] for date in allDates: for ind,code_init in enumerate(all_code_dict_by_init): logger.info(f"Getting {code_init} (no.{ind})") code_list = all_code_dict_by_init[code_init] run_add_1day_code_init_minKline(date,code_list) def run_pool_dates_by_code_init_n_group(typ='mink',gp_amt=10,start_date='20220101',end_date='20221031',if_check=1): logger.info("Running run_pool_dates_by_group") all_code_dict_by_init={} for c in code_list_pickel: init = c[:-4] if init in all_code_dict_by_init: all_code_dict_by_init[init].append(c) else: all_code_dict_by_init[init]=[c] # print(all_code_dict_by_init) allDates = pd.date_range(start_date, end_date, freq ='D') dates_dict_by_day={} for d in list(allDates.astype('str')): group_no = int(d[-2:])%gp_amt if group_no not in dates_dict_by_day: dates_dict_by_day[group_no] = [d.replace('-','')] else: dates_dict_by_day[group_no].append(d.replace('-','')) logger.debug(dates_dict_by_day) for group_no in dates_dict_by_day: date_list=dates_dict_by_day[group_no] num_of_init = len(all_code_dict_by_init) for ind,code_init in enumerate(all_code_dict_by_init): # done: 'T','TS','TS','TF' # if code_init in ['T']: # todo filtered this ,,'TF', 'IC','IF','IH','IM' logger.info(f"Getting {code_init} (no.{ind}/{num_of_init} of group {group_no}/{gp_amt})") code_list = all_code_dict_by_init[code_init] if typ=='mink': # logger.info('Running mink') run_pool_add_by_datelist_codeinit('mink',date_list,code_list,if_check) # run_pool_add_byday_code_init_minKline(date_list,code_list) elif typ=='tick': logger.info('Running tick') run_pool_add_by_datelist_codeinit('tick',date_list,code_list,if_check) if __name__ == '__main__': import time # run_create_hft_db() # including two tables tic = time.perf_counter() typ='mink' st_d='20221101' en_d='20221102' if_check = 1 logger.info(f"Going to run {typ} from {st_d} to {en_d} with if_check dupliactes={if_check} in {running_which_env}, plz check if this info is correct.\n\n\n\n") run_pool_dates_by_code_init_n_group(typ=typ,gp_amt=3,start_date=st_d,end_date=en_d,if_check=if_check) # run_pool_dates_by_code_init_n_group(typ='mink',group_amount=5) toc = time.perf_counter() logger.info(f"Running used {toc - tic:0.4f} seconds") # all t taks Running used 588.5782 seconds for 10 months # 600/60=10 min 12min for take code_init # 12* 71 = 850 min / 60 = 15 hr for all code for each year