From 4d2ad2b411d40569e91c3615d55ab8b7a3284066 Mon Sep 17 00:00:00 2001
From: yzlocal <local@local>
Date: Thu, 10 Nov 2022 11:04:44 +0800
Subject: [PATCH] single adding working

---
 src/DDBfm.py       | 15 ++++++++-------
 src/TSLfm.py       |  4 ++--
 src/data_loader.py | 28 +++++++++++++++++-----------
 3 files changed, 27 insertions(+), 20 deletions(-)

diff --git a/src/DDBfm.py b/src/DDBfm.py
index 25347f6..2b7e3b1 100644
--- a/src/DDBfm.py
+++ b/src/DDBfm.py
@@ -83,23 +83,24 @@ class DDBfm():
 
 
     def append_hft_table(self, tbName, df):
-        # load table to check? time&date&code
-
-        # print(df.shape)
-        
         appender = ddb.tableAppender(tableName=tbName, ddbSession=self.sess,dbPath=self.ddb_hft_dbPath)
         appender.append(df)
-            
+        logger.info(f"sucessfully append some df of {df.shape}")
 
     def search_code_date_in_tb(self,tbName,curr_date,curr_code):
         curr_date_formatted = curr_date[:4]+'.'+curr_date[4:6]+'.'+curr_date[6:]
         # print('?did i split this right')
         # print(curr_date_formatted)
         tb = self.sess.loadTable(dbPath=self.ddb_hft_dbPath, tableName=tbName)
+        logger.info(f"Quickly checking if data on {curr_code} {curr_date} exists...") # could do a slow checking of num of data
         try:
             # doing this cuz there's no method to check if a table is empty lol
-            df = tb.select('*').where(f"code=`{curr_code}").where(f"m_nDatetime>={curr_date_formatted}d").top(1).toDF() 
-            print(df)
+            cond=f"code=`{curr_code}, m_nDatetime.date()={curr_date_formatted}d"
+            # print(cond)
+            df = tb.select('*').where(cond).top(1).toDF()
+            if df.empty or df.shape[0]==0:
+                # print(df)
+                return 0
         except:
             return 0
         return 1
diff --git a/src/TSLfm.py b/src/TSLfm.py
index 82fdb26..f2d156a 100644
--- a/src/TSLfm.py
+++ b/src/TSLfm.py
@@ -214,7 +214,7 @@ class TSLfm:
         df = pd.DataFrame(r.value())
         if df.empty:
             logger.info('No data on this day.')
-            return 0
+            return pd.DataFrame()
         logger.info(f"Processing new df of shape {df.shape}, which looks like\n{df.head(5)}")
 
         # new = df["m_nDatetime"].str.split(" ", n = 1, expand = True)
@@ -238,6 +238,6 @@ if __name__ == '__main__':
     logger.add("../logs/{time:YYYYMMDD-HHmmss}_TSLfm.log", rotation="10 MB", compression="zip", level="INFO")
 
     with TSLfm() as tsl:
-        t_list=['CF2211']
+        t_list=['T2212']
         df = tsl.process_result_data_type(tsl.get_mkt_min_k('20221031','20221101',t_list))
         print(df)
diff --git a/src/data_loader.py b/src/data_loader.py
index 012ce2e..42f0ea4 100644
--- a/src/data_loader.py
+++ b/src/data_loader.py
@@ -3,14 +3,12 @@ import sys
 running_which_env='prd'
 
 
-
 from os.path import dirname, abspath, join
 ROOT_DIR = abspath(join(dirname(abspath(__file__)), ".."))
 from loguru import logger
 logger.remove()
 logger.add(sys.stderr, level="INFO")
-logger.add(ROOT_DIR+"/logs/{time:YYYYMMDD-HHmmss}_{running_which_env}.log", rotation="10 MB", compression="zip", level="INFO")
-
+logger.add(ROOT_DIR+"/logs/{time:YYYYMMDD-HHmmss}"+f"_{running_which_env}.log", rotation="10 MB", compression="zip", level="INFO")
 
 
 
@@ -22,20 +20,27 @@ from TSLfm import TSLfm
 from code_list import code_list_pickel
 
 def run_add_1day_code_init_minKline(date,code_list):
-    with TSLfm() as tsl:
-        df = tsl.process_result_data_type(tsl.get_mkt_min_k(date,date,code_list))
     ddb = DDBfm(running_which_env)
 
+    code_list_filtered = []
     for code in code_list:
         if ddb.search_code_date_in_tb(ddb.ddf_hft_mink_tbname,date,code):
             logger.warning(f"Possible duplicates on {date} and {code}")
-            return 0 
-    ddb.append_hft_table(ddb.ddf_hft_mink_tbname,df)
+        else:
+            code_list_filtered.append(code)
+    if len(code_list_filtered)==0:
+        return 0
+
+    with TSLfm() as tsl:
+        df = tsl.process_result_data_type(tsl.get_mkt_min_k(date,date,code_list_filtered))
+    if not df.empty:
+        logger.info(f'Getting a df of {df.shape}: {code_list[0][:-4]} on {date}')
+        ddb.append_hft_table(ddb.ddf_hft_mink_tbname,df)
     
 def run_create_db_minKline():
     date = '20221101'
     with TSLfm() as tsl:
-        code_list=['CF2211']
+        code_list=['T2212']
         df = tsl.process_result_data_type(tsl.get_mkt_min_k(date,date,code_list))
         # print(df)
     ddb = DDBfm(running_which_env)
@@ -54,8 +59,8 @@ def run():
 
     # print(all_code_dict_by_init)
 
-    start_date='2022-10-31'
-    end_date='2022-11-08'
+    start_date='2022-09-30'
+    end_date='2022-11-09'
     allDates = pd.date_range(start_date, end_date, freq ='D')
     allDates = [i.replace('-','') for i in list(allDates.astype('str'))]
 
@@ -88,4 +93,5 @@ def run():
     # ddb.append_hft_table(ddb.ddf_hft_mink_tbname,df)
 
 if __name__ == '__main__':
-    run()
\ No newline at end of file
+    run()
+    # run_create_db_minKline()
\ No newline at end of file