@ -119,12 +119,15 @@ class DDBHFTLoader(DDBLoader):
# 不能重复创建Pool对象, 因此需要在循环的最外侧创建好Pool对象, 然后传参进去
# 不能重复创建Pool对象, 因此需要在循环的最外侧创建好Pool对象, 然后传参进去
with Pool ( self . num_workers if num_workers is None else num_workers ) as pool :
with Pool ( self . num_workers if num_workers is None else num_workers ) as pool :
# Always reuse the connection object, to reduce the memory consumption.
with self . mssql_engine . connect ( ) as conn :
# Loop through the stock list.
for hft_type_name in self . hft_type_list :
for hft_type_name in self . hft_type_list :
print ( ' Will work on hft type: ' , hft_type_name )
print ( ' Will work on hft type: ' , hft_type_name )
with tqdm ( stock_list ) as pbar :
with tqdm ( stock_list ) as pbar :
for stock_id in pbar :
for stock_id in pbar :
pbar . set_description ( f " Working on stock { stock_id } " )
pbar . set_description ( f " Working on stock { stock_id } " )
self . dump_hft_to_ddb ( hft_type_name , stock_id , pbar = pbar , pool = pool )
self . dump_hft_to_ddb ( hft_type_name , stock_id , conn , pbar = pbar , pool = pool )
def _get_stock_date_list ( self , cache = False ) :
def _get_stock_date_list ( self , cache = False ) :
@ -354,7 +357,7 @@ class DDBHFTLoader(DDBLoader):
print ( ' - ' * 80 )
print ( ' - ' * 80 )
def dump_hft_to_ddb ( self , type_name , stock_id , trade_date= None , pbar = None , pool = None ) :
def dump_hft_to_ddb ( self , type_name , stock_id , conn, trade_date= None , pbar = None , pool = None ) :
if ( type_name , stock_id , ' OK ' ) in self . dump_journal_df . index :
if ( type_name , stock_id , ' OK ' ) in self . dump_journal_df . index :
message = f " Will skip ( { type_name } , { stock_id } ) as it appears in the dump journal. "
message = f " Will skip ( { type_name } , { stock_id } ) as it appears in the dump journal. "
if pbar is None :
if pbar is None :
@ -376,7 +379,6 @@ class DDBHFTLoader(DDBLoader):
# 经过尝试, 按个股来做batch查询效率还是可以接受的
# 经过尝试, 按个股来做batch查询效率还是可以接受的
# mssql中, 索引字段是(S_INFO_WINDCODE, TRADE_DT)
# mssql中, 索引字段是(S_INFO_WINDCODE, TRADE_DT)
with self . mssql_engine . connect ( ) as conn :
stat = """
stat = """
select * from [ Level2Bytes { mssql_type_name } ] . dbo . [ { mssql_type_name } ]
select * from [ Level2Bytes { mssql_type_name } ] . dbo . [ { mssql_type_name } ]
where S_INFO_WINDCODE = ' {stock_id} '
where S_INFO_WINDCODE = ' {stock_id} '
@ -392,6 +394,7 @@ class DDBHFTLoader(DDBLoader):
row_list = [ row for row in row_list
row_list = [ row for row in row_list
if pd . to_datetime ( row [ 1 ] ) not in _journal_dt . index ]
if pd . to_datetime ( row [ 1 ] ) not in _journal_dt . index ]
print ( f " Resume job for { stock_id } , with { len ( row_list ) } rows left. " )
print ( f " Resume job for { stock_id } , with { len ( row_list ) } rows left. " )
del ( _journal_dt )
num_rows = len ( row_list )
num_rows = len ( row_list )
# 如果行数为0, 则说明是空数据, 可以直接返回
# 如果行数为0, 则说明是空数据, 可以直接返回
@ -424,6 +427,7 @@ class DDBHFTLoader(DDBLoader):
) :
) :
sub_pbar . update ( )
sub_pbar . update ( )
del ( row_list )
self . dump_journal_writer . write ( f " { type_name } , { stock_id } ,OK \n " )
self . dump_journal_writer . write ( f " { type_name } , { stock_id } ,OK \n " )
self . dump_journal_writer . flush ( )
self . dump_journal_writer . flush ( )
@ -475,8 +479,8 @@ class DDBHFTLoader(DDBLoader):
ddb_sess . upload ( { df_table_name : df } )
ddb_sess . upload ( { df_table_name : df } )
# 因为在做Tick数据的时候, 偶然发生'CHUNK[xxx] does not exist.',所以在这里使用`append!`函数代换一下试试
# 因为在做Tick数据的时候, 偶然发生'CHUNK[xxx] does not exist.',所以在这里使用`append!`函数代换一下试试
ddb_sess . run ( " append!(loadTable( ' {dbPath} ' , ` {partitioned_table_name} ), {df_table_name} ) " . format (
#ddb_sess.run("append!(loadTable('{dbPath}', `{partitioned_table_name}), {df_table_name})".format (
#ddb_sess.run("tableInsert(loadTable('{dbPath}', `{partitioned_table_name}), {df_table_name})".format (
ddb_sess . run ( " tableInsert(loadTable( ' {dbPath} ' , ` {partitioned_table_name} ), {df_table_name} ) " . format (
dbPath = DDBHFTLoader . ddb_path ,
dbPath = DDBHFTLoader . ddb_path ,
partitioned_table_name = type_name + DDBHFTLoader . ddb_partition_table_suffix ,
partitioned_table_name = type_name + DDBHFTLoader . ddb_partition_table_suffix ,
df_table_name = df_table_name
df_table_name = df_table_name
@ -484,5 +488,6 @@ class DDBHFTLoader(DDBLoader):
# 由于不是复用`DDBHFTLoader`对象内部的Session, 因此如果不手动关闭就会造成内存逐渐泄漏
# 由于不是复用`DDBHFTLoader`对象内部的Session, 因此如果不手动关闭就会造成内存逐渐泄漏
ddb_sess . close ( )
ddb_sess . close ( )
del ( df )