@ -2,6 +2,8 @@ import importlib
import gzip
import pickle
import functools
import abc
import warnings
from pprint import pprint
from pathlib import Path
@ -11,6 +13,8 @@ from multiprocessing import Pool
import numpy as np
import pandas as pd
from pandas . core . common import SettingWithCopyWarning
warnings . simplefilter ( action = " ignore " , category = SettingWithCopyWarning )
import dolphindb as ddb
import dolphindb . settings as keys
@ -20,84 +24,465 @@ import sqlalchemy as sa
import ProtoBuffEntitys
def make_stock_daily_df ( blob , type_name , stock_id ) :
class DDBLoader ( object ) :
"""
用于做多进程录入ddb的函数
- 放了几个公用的配置字段 , 包括 :
1. SQL - Server的链接参数
2. DolphinDB的链接参数
- 放了几个 @abstractmethod在里面 , 不过如果不需要使用多态特性 , 那应该用处不大 :
1. create_ddb_database
2. create_ddb_partition_table
"""
blob = gzip . decompress ( blob )
dataArray = eval ( f " ProtoBuffEntitys. { type_name } Message_pb2. { type_name } Array() " )
dataArray . ParseFromString ( blob )
data_dict_list = [
{ field . name : val for field , val in entry . ListFields ( ) }
for entry in dataArray . dataArray
mssql_config = {
' host ' : ' 192.168.1.7 ' ,
' username ' : ' sa ' ,
' password ' : ' passw0rd! '
}
ddb_config = {
' host ' : ' 192.168.1.167 ' ,
' username ' : ' admin ' ,
' password ' : ' 123456 '
}
def __init__ ( self ) :
self . mssql_engine = sa . create_engine (
" mssql+pyodbc:// {username} : {password} @ {host} /master?driver=ODBC+Driver+18+for+SQL+Server " . format ( * * self . mssql_config ) ,
connect_args = {
" TrustServerCertificate " : " yes "
} , echo = False
)
self . ddb_sess = ddb . session ( self . ddb_config [ ' host ' ] , 8848 )
self . ddb_sess . login ( self . ddb_config [ ' username ' ] , self . ddb_config [ ' password ' ] )
@abc . abstractmethod
def create_ddb_database ( self , * args , * * kwargs ) :
"""
创建database函数 , 需要被子类具体实现 。
"""
return
@abc . abstractmethod
def create_ddb_partition_table ( self , * args , * * kwargs ) :
"""
创建分区表函数 , 需要被子类具体实现 。
"""
return
@staticmethod
def tscode_to_windcode ( series ) :
return series . apply ( lambda x : x [ 2 : ] + ' . ' + x [ : 2 ] )
@staticmethod
def make_symbol ( series ) :
return series . astype ( ' int32 ' ) . astype ( ' str ' ) \
. apply ( str . zfill , args = ( 6 , ) ) \
. apply ( lambda code : \
code + ' .SH ' if code [ 0 ] == ' 6 ' \
else code + ' .SZ ' )
@staticmethod
def make_date ( series ) :
# 特别是对于分红表, 如果某些关键日期还未公布, 则会填充0, 导致日期解析失败
series . loc [ series == 0 ] = np . nan
return pd . to_datetime (
series . astype ( str ) , format = ' % Y % m %d ' )
@staticmethod
def make_nparray ( series ) :
return series . apply ( lambda x : np . array ( x ) )
@staticmethod
def make_time ( series ) :
s_hr = series / / 10000000 * 3600000
s_min = series % 10000000 / / 100000 * 60000
s_sec = series % 100000 / / 1000
s_ms = series % 1000
return pd . to_timedelta ( s_hr + s_min + s_sec + s_ms , unit = ' ms ' )
class DDBPITLoader ( DDBLoader ) :
ddb_path = " dfs://pit_stock_ts "
ddb_dbname = " ddb_pit_stock_ts "
num_code_partition = 50
table_name_mapping = {
#'CBS_AFTER_ADJ' : 'bs_common_adj',
#'CBS_BEFORE_ADJ' : 'bs_common_ori',
#'CCFS_AFTER_ADJ' : 'cfs_common_adj',
#'CCFS_BEFORE_ADJ' : 'cfs_common_ori',
#'CIS_AFTER_ADJ' : 'is_common_adj',
#'CIS_BEFORE_ADJ' : 'is_common_ori',
' DIV_WIND ' : ' divident ' ,
#'EP_WIND' : 'earnings_preannouncement',
#'PEE_WIND' : 'preliminary_earnings_estimate'
}
meta_col_config = {
' WIND_CODE ' : ( ' code ' , ' SYMBOL ' ) ,
# mssql表中不需要记录的meta字段, 在这里直接设置为None
' IntCode ' : None ,
' ACTUAL_ANN_DT ' : None ,
' ReportPeriod ' : ( ' report_period ' , ' DATE ' ) ,
' AppearInPeriod ' : ( ' appear_in_period ' , ' DATE ' ) ,
' AppearAtDate ' : ( ' appear_at_date ' , ' DATE ' )
}
date_col_set = {
' report_period ' ,
' appear_in_period ' ,
' appear_at_date ' ,
' ReportPeriod ' ,
' AppearInPeriod ' ,
' AppearAtDate ' ,
' EQY_RECORD_DT ' ,
' EX_DT ' ,
' DVD_PAYOUT_DT ' ,
' S_DIV_PRELANDATE ' ,
' S_DIV_SMTGDATE ' ,
' DVD_ANN_DT ' ,
' S_DIV_PREANNDT '
}
ddb_type_mapping = {
' float ' : ' DOUBLE ' ,
' int ' : ' INT ' ,
' text ' : ' STRING ' ,
' varchar ' : ' STRING ' ,
' str ' : ' STRING '
}
# 基本面数据库现在存放在91服务器之上
mssql_config = {
' host ' : ' 192.168.1.91 ' ,
' username ' : ' sa ' ,
' password ' : ' xn.123 ' ,
' dbname ' : ' tr_statement '
}
def __init__ ( self ) :
super ( ) . __init__ ( )
# 重新设定mssql_engine对象, 此时我们需要使用基本面数据库
self . mssql_engine = sa . create_engine (
" mssql+pyodbc:// {username} : {password} @ {host} / {dbname} ?driver=ODBC+Driver+18+for+SQL+Server " . format ( * * self . mssql_config ) ,
connect_args = {
" TrustServerCertificate " : " yes "
} , echo = False
)
def create_ddb_database ( self ) :
self . ddb_sess . run ( """
{ dbName } = database (
directory = ' {dbPath} ' ,
partitionType = HASH ,
partitionScheme = [ SYMBOL , { num_code_partition } ] ,
engine = ' TSDB '
)
""" .format(
dbName = self . ddb_dbname ,
dbPath = self . ddb_path ,
num_code_partition = self . num_code_partition
) )
def _make_col_config ( self , mssql_table_name ) :
"""
Return :
mssql_col_name_list , ddb_col_name_list , ddb_col_type_list
"""
with self . mssql_engine . connect ( ) as conn :
col_sp_list = list ( conn . execute ( f " exec sp_columns { mssql_table_name } " ) . fetchall ( ) )
mssql_col_name_list , ddb_col_name_list , ddb_col_type_list = \
[ ] , [ ] , [ ]
for col_sp in col_sp_list :
_col_name = col_sp [ 3 ]
_col_type = col_sp [ 5 ]
# 对于meta字段, 需要根据meta配置表来进行处理
if _col_name in self . meta_col_config :
# 跳过mssql表中 不需要记录的meta字段
if self . meta_col_config [ _col_name ] is None :
continue
# 字段名和字段类型都要进行映射
mssql_col_name_list . append ( _col_name )
ddb_col_name_list . append ( self . meta_col_config [ _col_name ] [ 0 ] )
ddb_col_type_list . append ( self . meta_col_config [ _col_name ] [ 1 ] )
# 对于非meta字段, 仅需要检查是否是float类型, 对于float类型设置类型为DOUBLE
else :
# 需要之后被转换成DATE的字段, 一般在原表中为为INT类型
if _col_name in self . date_col_set :
mssql_col_name_list . append ( _col_name )
ddb_col_name_list . append ( _col_name )
ddb_col_type_list . append ( ' DATE ' )
# 按照对照表进行类型转换
elif _col_type in self . ddb_type_mapping :
mssql_col_name_list . append ( _col_name )
ddb_col_name_list . append ( _col_name )
ddb_col_type_list . append ( self . ddb_type_mapping [ _col_type ] )
# 对照表中没有的字段类型,就不加入到字段列表中了
else :
print ( f " !**Unrecognized type ' { _col_type } ' for column { _col_name } , will skip. " )
return mssql_col_name_list , ddb_col_name_list , ddb_col_type_list
def create_ddb_partition_table ( self , mssql_table_name ) :
""" 创建分区表 """
memory_table_name = mssql_table_name
partition_table_name = self . table_name_mapping [ mssql_table_name ]
mssql_col_name_list , ddb_col_name_list , ddb_col_type_list = \
self . _make_col_config ( mssql_table_name )
# 根据是否
if ' appear_in_period ' in ddb_col_name_list :
compress_methods = """ {
' report_period ' : ' delta ' ,
' appear_in_period ' : ' delta ' ,
' appear_at_date ' : ' delta '
} """
else :
compress_methods = """ {
' report_period ' : ' delta ' ,
' appear_at_date ' : ' delta '
} """
# 因为已经根据`appear_in_period`分列了调整前和调整后,因此不需要对它再进行排序了
sort_columns = " `code`report_period`appear_at_date "
# 1. 先创建内存表,内存表中设定好列名和列类型
# 2. 然后根据内存表创建分区表,设定分区列等信息
self . ddb_sess . run ( """
{ memory_table_name } = table (
{ capacity } : 0 ,
{ column_name_list } ,
[ { column_type_list } ]
) ;
if ( existsTable ( " {ddb_path} " , " {partition_table_name} " ) ) { {
dropTable ( { ddb_dbname } , " {partition_table_name} " ) ;
} }
{ partition_table_name } = createPartitionedTable (
dbHandle = { ddb_dbname } ,
table = { memory_table_name } ,
tableName = " {partition_table_name} " ,
partitionColumns = ' code ' ,
compressMethods = { compress_methods } ,
sortColumns = { sort_columns }
) ;
""" .format(
ddb_dbname = self . ddb_dbname ,
ddb_path = self . ddb_path ,
memory_table_name = memory_table_name ,
partition_table_name = partition_table_name ,
capacity = 10 ,
column_name_list = ' ` ' + ' ` ' . join ( ddb_col_name_list ) ,
column_type_list = ' , ' . join ( ddb_col_type_list ) ,
compress_methods = compress_methods . replace ( ' \n ' , ' ' ) . replace ( ' ' , ' ' ) ,
sort_columns = sort_columns
) )
print ( ' - ' * 80 )
print ( f " Did create parition table < { partition_table_name } >: " )
pprint ( self . ddb_sess . run ( f " schema( { partition_table_name } ); " ) )
return partition_table_name , mssql_col_name_list
def create_ddb_partition_tables ( self ) :
for mssql_table_name in self . table_name_mapping :
self . create_ddb_partition_table ( mssql_table_name )
def _dump_pit_to_ddb ( self , mssql_table_name ) :
print ( ' Will work on table ' , mssql_table_name )
# 返回的`mssql_col_name_list`可以用来对SQL-Server获取的dataframe进行列过滤
partition_table_name , mssql_col_name_list = \
self . create_ddb_partition_table ( mssql_table_name )
with self . mssql_engine . connect ( ) as conn :
stat = f " select distinct [WIND_CODE] from { mssql_table_name } "
stock_id_list = list ( conn . execute ( stat ) . fetchall ( ) )
with tqdm ( stock_id_list ) as pbar :
for ( stock_id , ) in pbar :
pbar . set_description ( f " Will work on { stock_id } " )
#pbar.set_description(f"Will fetch all data of {stock_id} from SQL Server")
stat = """
select * from { mssql_table_name }
where WIND_CODE = ' {stock_id} ' and AppearAtDate > 0
""" .format(
mssql_table_name = mssql_table_name ,
stock_id = stock_id
)
row_list = list ( conn . execute ( stat ) . fetchall ( ) )
num_rows = len ( row_list )
# 因为对AppearAtDate做了过滤, 所以有可能得到一个空的数据集
if num_rows == 0 :
print ( f " Will skip { stock_id } due to empty result. " )
continue
#pbar.set_description(f"Will work on dumping job on {stock_id} of len {num_rows}")
# 这里需要对select语句获取的所有列进行一次过滤, 以保证和partition table中的列一致
df = pd . DataFrame ( row_list ) [ mssql_col_name_list ]
# 需要把部分字段的int字段类型转换成DATE字段类型
for df_col in df . columns :
if df_col in self . date_col_set :
df [ df_col ] = DDBLoader . make_date ( df [ df_col ] )
# 因为在做数据库View的时候已经做过一轮转换了, 所以这里就不需要再次转换了
#df['WIND_CODE'] = DDBLoader.tscode_to_windcode(df['WIND_CODE'])
self . ddb_sess . upload ( { mssql_table_name : df } )
self . ddb_sess . run ( f " { partition_table_name } .tableInsert( { mssql_table_name } ) " )
def dump_pit_to_ddb ( self ) :
for mssql_table_name in self . table_name_mapping :
self . _dump_pit_to_ddb ( mssql_table_name )
class DDBDailyLoader ( DDBLoader ) :
ddb_path = " dfs://daily_stock_ts "
ddb_dbname = " db_daily_stock_ts "
daily_kline_cols = [
' code ' , ' m_nDate ' ,
' open ' , ' high ' , ' low ' , ' close ' , ' vol ' ,
' amount ' , ' cjbs ' , ' yclose ' ,
' PctChg ' , ' IsZt ' , ' IsDt ' , ' IsST ' , ' IsGoDelist ' ,
' FloatShares ' , ' MarketValues ' ,
' factor '
]
array_type_list = [
field . name
for field , val in dataArray . dataArray [ 0 ] . ListFields ( )
if isinstance ( field . default_value , list )
daily_kline_col_types = [
' SYMBOL ' , ' DATE ' ,
' DOUBLE ' , ' DOUBLE ' , ' DOUBLE ' , ' DOUBLE ' , ' DOUBLE ' ,
' DOUBLE ' , ' INT ' , ' DOUBLE ' ,
' DOUBLE ' , ' INT ' , ' INT ' , ' INT ' , ' INT ' ,
' DOUBLE ' , ' DOUBLE ' ,
' DOUBLE '
]
#pprint(array_type_list)
df = pd . DataFrame ( data_dict_list )
#df['code'] = make_symbol(df['code'])
df [ ' code ' ] = stock_id
df [ ' m_nDate ' ] = make_date ( df [ ' m_nDate ' ] )
df [ ' m_nTime ' ] = df [ ' m_nDate ' ] + make_time ( df [ ' m_nTime ' ] )
for field_name in array_type_list :
df [ field_name ] = make_nparray ( df [ field_name ] )
#print(f"Did create ddb table for dataframe of shape {df.shape}")
# self.make_table_skeleton(type_name, df.shape[0])
return df
def create_ddb_database ( self ) :
# TODO: daily数据库已经在DDBDailyFactor中被创建了
# 后续可以迁移过来,不过现在就暂时先不管了
pass
def dump_stock_daily_to_ddb ( row , type_name , stock_id ) :
"""
用于做多进程录入ddb的函数
"""
df_table_name = type_name
df = make_stock_daily_df ( row [ 2 ] , type_name , stock_id )
def load_ddb_database ( self ) :
self . ddb_sess . run ( """
{ dbName } = database ( directory = ' {dbPath} ' )
""" .format(
dbName = self . ddb_dbname ,
dbPath = self . ddb_path
) )
print ( ' Did load database from ' , self . ddb_path )
ddb_sess = ddb . session ( DDBLoader . ddb_config [ ' host ' ] , 8848 )
ddb_sess . login ( DDBLoader . ddb_config [ ' username ' ] , DDBLoader . ddb_config [ ' password ' ] )
ddb_sess . upload ( { df_table_name : df } )
ddb_sess . run ( " tableInsert(loadTable( ' {dbPath} ' , ` {partitioned_table_name} ), {df_table_name} ) " . format (
dbPath = DDBLoader . ddb_path ,
partitioned_table_name = type_name + DDBLoader . ddb_partition_table_suffix ,
df_table_name = df_table_name
) )
def create_ddb_partition_table ( self , memory_table_name , partition_table_name ) :
# TODO: 现在只做一个日频行情数据表,今后可能考虑把基本面数据也迁移过来
# 由于日频行情数据的表结构相对简单,所以直接把表结构写在这里代码里即可
# 搬迁数据的时候需要考虑按照逐个股票来搬迁,以免造成对内存的巨大压力
self . ddb_sess . run ( """
/ / 确保删除原表
if ( existsTable ( " {ddb_daily_path} " , " {partition_table_name} " ) ) { {
dropTable ( { ddb_daily_dbname } , " {partition_table_name} " ) ;
} }
/ / 然后根据内存表的结构 , 创建持久化的分区表
{ partition_table_name } = { ddb_daily_dbname } . createPartitionedTable (
table = { memory_table_name } ,
tableName = " {partition_table_name} " ,
partitionColumns = ` code ,
sortColumns = ` code ` m_nDate ,
compressMethods = { { m_nDate : " delta " } }
) ;
""" .format(
ddb_daily_path = self . ddb_path ,
ddb_daily_dbname = self . ddb_dbname ,
memory_table_name = memory_table_name ,
partition_table_name = partition_table_name ,
) )
def make_symbol ( series ) :
return series . astype ( ' int32 ' ) . astype ( ' str ' ) \
. apply ( str . zfill , args = ( 6 , ) ) \
. apply ( lambda code : \
code + ' .SH ' if code [ 0 ] == ' 6 ' \
else code + ' .SZ ' )
def create_ddb_memory_table ( self , memory_table_name , capacity ) :
self . ddb_sess . run ( """
/ / 先创建一个空的内存表用来表征结构 , 如果无需插入数据 , capacity可以设为10
{ memory_table_name } = table ( { capacity } : 0 , { col_names } , [ { col_types } ] ) ;
""" .format(
memory_table_name = memory_table_name ,
capacity = capacity ,
col_names = ' ` ' + ' ` ' . join ( self . daily_kline_cols ) ,
col_types = ' , ' . join ( self . daily_kline_col_types )
) )
def make_date ( series ) :
return pd . to_datetime (
series . astype ( str ) , format = ' % Y % m %d ' )
def dump_daily_kline_to_ddb ( self ) :
# 先创建一个分区表,然后再逐个股票的数据插入
# 1. 需要额外控制在插入第一个股票数据的时候创建分区表比较麻烦
# 2. python程序中的dataframe直接上传到dolphindb内存表, 不需要考虑内存表字段类型, 分区表中设置好即可
def make_nparray ( series ) :
return series . apply ( lambda x : np . array ( x ) )
memory_table_name = ' daily_kline_mem '
partition_table_name = ' daily_kline '
self . create_ddb_memory_table ( memory_table_name , 10 )
print ( ' Did create ddb memory table. ' )
pprint ( self . ddb_sess . run ( f " schema( { memory_table_name } ) " ) )
self . create_ddb_partition_table ( memory_table_name , partition_table_name )
print ( ' Did create ddb partition table. ' )
pprint ( self . ddb_sess . run ( f " schema( { partition_table_name } ) " ) )
def make_time ( series ) :
s_hr = series / / 10000000 * 3600000
s_min = series % 10000000 / / 100000 * 60000
s_sec = series % 100000 / / 1000
s_ms = series % 1000
return pd . to_timedelta ( s_hr + s_min + s_sec + s_ms , unit = ' ms ' )
with self . mssql_engine . connect ( ) as conn :
stat = " select distinct [StockID] from [StockDaily].dbo.[DailyKLine] "
stock_id_list = list ( conn . execute ( stat ) . fetchall ( ) )
with tqdm ( stock_id_list ) as pbar :
for ( stock_id , ) in pbar :
pbar . set_description ( f " Will work on { stock_id } " )
#pbar.set_description(f"Will fetch all data of {stock_id} from SQL Server")
stat = """
select * from [ StockDaily ] . dbo . [ DailyKLine ]
where StockID = ' {stock_id} '
""" .format(
stock_id = stock_id
)
row_list = list ( conn . execute ( stat ) . fetchall ( ) )
num_rows = len ( row_list )
#pbar.set_description(f"Will work on dumping job on {stock_id} of len {num_rows}")
df = pd . DataFrame ( row_list )
df [ ' date ' ] = DDBLoader . make_date ( df [ ' date ' ] )
df [ ' StockID ' ] = DDBLoader . tscode_to_windcode ( df [ ' StockID ' ] )
self . ddb_sess . upload ( { memory_table_name : df } )
#print('Did upload dataframe to ddb.')
#pprint(self.ddb_sess.run(f"schema({memory_table_name})"))
#break
self . ddb_sess . run ( f " { partition_table_name } .tableInsert( { memory_table_name } ) " )
class DDBLoader ( object ) :
class DDBHFTLoader ( DDBLoader ) :
"""
0. 从sql - server中读取calendar数据 , 并创建成员变量df_calendar , df_calendar可以保存在本地pickle作为缓存
| - ` def make_calendar_df ( self ) - > df_calendar `
@ -162,18 +547,6 @@ class DDBLoader(object):
13 : ' INT ' ,
}
mssql_config = {
' host ' : ' 192.168.1.7 ' ,
' username ' : ' sa ' ,
' password ' : ' passw0rd! '
}
ddb_config = {
' host ' : ' 192.168.1.7 ' ,
' username ' : ' admin ' ,
' password ' : ' 123456 '
}
# this value may be used by factor makers, which may loop through code partitions
num_code_partition = 50
@ -182,18 +555,6 @@ class DDBLoader(object):
ddb_dump_journal_fname = ' ddb_dump_journal.csv '
def __init__ ( self ) :
self . mssql_engine = sa . create_engine (
" mssql+pyodbc:// {username} : {password} @ {host} /master?driver=ODBC+Driver+18+for+SQL+Server " . format ( * * self . mssql_config ) ,
connect_args = {
" TrustServerCertificate " : " yes "
} , echo = False
)
self . ddb_sess = ddb . session ( self . ddb_config [ ' host ' ] , 8848 )
self . ddb_sess . login ( self . ddb_config [ ' username ' ] , self . ddb_config [ ' password ' ] )
def init_ddb_database ( self , df_calendar ) :
"""
1. 创建ddb_database
@ -490,6 +851,7 @@ class DDBLoader(object):
print ( " Will create new Pool object, but this is not encourage for large batch work. " )
pool = Pool ( self . num_worker )
# 在单个股票内部,对不同日期进行并行处理,对内存使用较为友好,不需要同时载入多个股票海量的全历史数据
with tqdm ( total = num_rows , leave = False ) as sub_pbar :
for _ in pool . imap_unordered (
functools . partial (
@ -505,18 +867,85 @@ class DDBLoader(object):
self . dump_journal_writer . flush ( )
@staticmethod
def make_stock_daily_df ( blob , type_name , stock_id ) :
"""
用于做多进程录入ddb的函数
"""
blob = gzip . decompress ( blob )
dataArray = eval ( f " ProtoBuffEntitys. { type_name } Message_pb2. { type_name } Array() " )
dataArray . ParseFromString ( blob )
data_dict_list = [
{ field . name : val for field , val in entry . ListFields ( ) }
for entry in dataArray . dataArray
]
array_type_list = [
field . name
for field , val in dataArray . dataArray [ 0 ] . ListFields ( )
if isinstance ( field . default_value , list )
]
#pprint(array_type_list)
df = pd . DataFrame ( data_dict_list )
#df['code'] = make_symbol(df['code'])
df [ ' code ' ] = stock_id
df [ ' m_nDate ' ] = make_date ( df [ ' m_nDate ' ] )
df [ ' m_nTime ' ] = df [ ' m_nDate ' ] + make_time ( df [ ' m_nTime ' ] )
for field_name in array_type_list :
df [ field_name ] = make_nparray ( df [ field_name ] )
#print(f"Did create ddb table for dataframe of shape {df.shape}")
# self.make_table_skeleton(type_name, df.shape[0])
return df
@staticmethod
def dump_stock_daily_to_ddb ( row , type_name , stock_id ) :
"""
用于做多进程录入ddb的函数
"""
df_table_name = type_name
df = make_stock_daily_df ( row [ 2 ] , type_name , stock_id )
ddb_sess = ddb . session ( DDBLoader . ddb_config [ ' host ' ] , 8848 )
ddb_sess . login ( DDBLoader . ddb_config [ ' username ' ] , DDBLoader . ddb_config [ ' password ' ] )
ddb_sess . upload ( { df_table_name : df } )
ddb_sess . run ( " tableInsert(loadTable( ' {dbPath} ' , ` {partitioned_table_name} ), {df_table_name} ) " . format (
dbPath = DDBLoader . ddb_path ,
partitioned_table_name = type_name + DDBLoader . ddb_partition_table_suffix ,
df_table_name = df_table_name
) )
def main ( ) :
loader = DDBLoader ( )
df_calendar = loader . make_calendar_df ( )
loader . init_ddb_database ( df_calendar )
print ( ' Did finish init_ddb_database ' )
# PIT基本面数据
loader = DDBPITLoader ( )
loader . create_ddb_database ( )
#loader.create_ddb_partition_tables()
loader . dump_pit_to_ddb ( )
# 日频行情数据
#loader = DDBDailyLoader()
#loader.load_ddb_database()
#loader.dump_daily_kline_to_ddb()
# 高频数据
#df_calendar = loader.make_calendar_df()
#loader.init_ddb_database(df_calendar)
#print('Did finish init_ddb_database')
#loader.load_ddb_database()
#print('Did load ddb database')
loader . init_ddb_table_data ( df_calendar )
print ( ' Did finish init_table_data ' )
#loader.init_ddb_table_data(df_calendar )
#print('Did finish init_table_data' )
if __name__ == ' __main__ ' :