From 0d0745aa92456d4adcc095cae9428c06a978401b Mon Sep 17 00:00:00 2001
From: yzlocal <local@local>
Date: Fri, 11 Nov 2022 09:15:56 +0800
Subject: [PATCH] update_dataloader

---
 src/data_loader.py | 43 +++++++++++++++++++++++++------------------
 1 file changed, 25 insertions(+), 18 deletions(-)

diff --git a/src/data_loader.py b/src/data_loader.py
index 09eccd0..89a92ef 100644
--- a/src/data_loader.py
+++ b/src/data_loader.py
@@ -69,19 +69,27 @@ def run_add_1day_code_init_minKline(date,code_list):
 #         del ddb2
 
 
-def run_pool_add_byday_code_init_tick(date_list,code_list):
-    df_list=[]
+def check_if_date_codelist_exists(date,code_list):
     code_list_filtered=code_list
-    for date in date_list:
-
-        ddb1 = DDBfm(running_which_env)
-        code_list_filtered = ddb1.get_missing_code_date_in_tb(ddb1.ddf_hft_mink_tbname,date,code_list)
-        if len(code_list_filtered)==0:
-            continue
+    ddb1 = DDBfm(running_which_env)
+    code_list_filtered = ddb1.get_missing_code_date_in_tb(ddb1.ddf_hft_mink_tbname,date,code_list)
+    if code_list_filtered:
         logger.info(f"getting {'+'.join(code_list_filtered)} on {date}")
-        ddb1.close_sess()
-        del ddb1
+    else:
+        logger.info(f"all checked in database")
+    ddb1.close_sess()
+    del ddb1
+    return code_list_filtered
 
+
+def run_pool_add_byday_code_init_tick(date_list,code_list,if_check=1):
+    df_list=[]
+    
+    for date in date_list:
+        if if_check:
+            code_list_filtered = check_if_date_codelist_exists(date,code_list)
+        else:
+            code_list_filtered = code_list
         with TSLfm() as tsl:
             df = tsl.process_result_data_type(tsl.get_trade_tick(date,date,code_list_filtered))
         if not df.empty:
@@ -142,7 +150,7 @@ def run():
 
 
     
-def run_pool_dates_by_code_init_n_group(typ='mink',group_amount=10,start_date='20220101',end_date='20221031'):
+def run_pool_dates_by_code_init_n_group(typ='mink',gp_amt=10,start_date='20220101',end_date='20221031',if_check=1):
     logger.info("Running run_pool_dates_by_group")
     all_code_dict_by_init={}
     for c in code_list_pickel:
@@ -159,7 +167,7 @@ def run_pool_dates_by_code_init_n_group(typ='mink',group_amount=10,start_date='2
     dates_dict_by_day={}
 
     for d in list(allDates.astype('str')):
-        group_no = int(d[-2:])%group_amount
+        group_no = int(d[-2:])%gp_amt
         if group_no not in dates_dict_by_day:
             dates_dict_by_day[group_no] = [d.replace('-','')]
         else:
@@ -173,8 +181,8 @@ def run_pool_dates_by_code_init_n_group(typ='mink',group_amount=10,start_date='2
         num_of_init = len(all_code_dict_by_init)
         for ind,code_init in enumerate(all_code_dict_by_init):
             # done: 'T','TS','TS','TF'
-            if code_init  in ['T']: # todo filtered this ,,'TF', 'IC','IF','IH','IM'
-                logger.info(f"Getting {code_init} (no.{ind}/{num_of_init} of group {group_no}/{group_amount})")
+            # if code_init in ['T']: # todo filtered this ,,'TF', 'IC','IF','IH','IM'
+                logger.info(f"Getting {code_init} (no.{ind}/{num_of_init} of group {group_no}/{gp_amt})")
                 code_list = all_code_dict_by_init[code_init]
                 if typ=='mink':
                     # logger.info('Running mink')
@@ -183,18 +191,17 @@ def run_pool_dates_by_code_init_n_group(typ='mink',group_amount=10,start_date='2
                     # run_pool_add_byday_code_init_minKline(date_list,code_list)
                 elif typ=='tick':
                     logger.info('Running tick')
-                    run_pool_add_byday_code_init_tick(date_list,code_list)
+                    run_pool_add_byday_code_init_tick(date_list,code_list,if_check)
 
 
 
 if __name__ == '__main__':
     # run()
-    
-    # run_create_hft_db() # including two tables
+    run_create_hft_db() # including two tables
 
     import time
     tic = time.perf_counter()
-    run_pool_dates_by_code_init_n_group(typ='tick')
+    run_pool_dates_by_code_init_n_group(typ='tick',gp_amt=3,start_date='20220601',end_date='20221031',if_check=0)
     # run_pool_dates_by_code_init_n_group(typ='mink',group_amount=5)
 
     toc = time.perf_counter()