123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518 |
- module fundit::rankingCalculator
- use fundit::sqlUtilities
- use fundit::dataPuller
- use fundit::dataSaver
- /*
- * 汇集所有参与排名的指标信息
- *
- */
- def get_indicator_info() {
- ids = [1,
- 2, 6, 9, 10, 11, 12, 21, 50, 52, 59,
- 14, 15, 16, 17, 18, 19, 40, 58,
- 37, 38, 41, 42, 43, 44, 45, 46, 47, 48, 49,
- 33, 34, 35, 36,
- 66, 53, 54, 55, 56, 57
- ];
- names = ['ret',
- 'maxdrawdown', 'kurtosis', 'skewness', 'stddev', 'alpha', 'beta', 'downsidedev', 'maxdrawdown_months', 'maxdrawdown_recoverymonths', 'winrate',
- 'kapparatio', 'treynorratio', 'jensen', 'omegaratio', 'sharperatio', 'sortinoratio_MAR', 'calmarratio', 'sortinoratio',
- 'per_con', 'info_ratio', 'var', 'cvar', 'smddvar', 'smddcvar', 'smdd_lpm1', 'smdd_lpm2', 'smdd_downside_dev', 'tracking_error', 'm2',
- 'upsidecapture_ret', 'downsidecapture_ret', 'upsidecapture_ratio', 'downsidecapture_ratio',
- 'stability', 'jc_stddev', 'gzstyle_stddev', 'gzstrategy_stddev', 'zz_stddev', 'zx_stddev'
- ];
- is_ASCs = [false,
- true, true, false, true, false, false, true, true, true, false,
- false, false, false, false, false, false, false, false,
- false, false, true, true, true, true, true, true, true, true, false,
- false, false, false, true,
- true, true, true, true, true, true
- ];
- return table(names AS name, ids AS id, is_ASCs AS is_ASC);
- }
- /*
- * 自定义百分位计算
- *
- */
- defg perRank(x, is_ASC) {
- return (100 * x.rank(ascending=is_ASC, percent=true)).round(0);
-
- }
- /*
- * 动态生成用于排序的SQL脚本
- *
- * @param data_table <TABLE>: 指标横表
- * @param indicator_table <TABLE>: 指标表,有 id, name, is_ASC 字段
- *
- * TODO: portfolio, cf, manager, company,
- * TODO: bfi & category
- *
- */
- def gen_ranking_sql(data_table, indicator_table) {
- ranking = create_entity_indicator_ranking();
- ranking_num = create_entity_indicator_ranking_num();
- for(indicator in indicator_table) {
- // 与 MySQL 不同,这里统一把近4年和成立以来的排名去掉
- if(indicator.id == 1)
- v_trailing = ['1m', '3m', '6m', '1y', '2y', '3y', '5y', '10y', 'ytd'];
- else {
- v_trailing = ['6m', '1y', '2y', '3y', '5y', '10y', 'ytd'];
- v_missing_trailing = ['1m', '3m'];
- }
- // 绝对排名和百分位排名
- t_ranking = sql(select = (sqlCol(['entity_id', 'end_date', 'category_id']), <indicator.id as indicator_id>,
- sqlCol(indicator.name + '_' + v_trailing,, 'indicator_' + v_trailing),
- sqlCol(indicator.name + '_' + v_trailing, rank{, indicator.is_ASC}, 'absrank_' + v_trailing),
- sqlCol(indicator.name + '_' + v_trailing, perRank{, indicator.is_ASC}, 'perrank_' + v_trailing)
- ),
- from = data_table,
- where = < category_id IS NOT NULL>,
- groupBy = sqlCol(['category_id', 'end_date']),
- groupFlag = 0 ).eval(); // context by
- // 为了满足表结构的要求, 非收益的指标要补上1m和3m的字段,虽然都是NULL
- if(indicator.id != 1) {
- v_tmp_col = ['indicator_' + v_missing_trailing, 'absrank_' + v_missing_trailing, 'perrank_' + v_missing_trailing].flatten();
- v_tmp_type = [take(DOUBLE, v_missing_trailing.size()), take(INT, v_missing_trailing.size()), take(INT, v_missing_trailing.size())].flatten();
- t_ranking.addColumn(v_tmp_col, v_tmp_type);
- }
-
- t_ranking.reorderColumns!(ranking.colNames());
- ranking.tableInsert(t_ranking);
- // 平均值、集合数量、各分位的阈值
- t_ranking_num = sql(select =(sqlCol(['end_date', 'category_id']),
- sqlCol('raise_type', mean, 'raise_type'), <indicator.id as indicator_id>,
- sqlCol(indicator.name + '_' + v_trailing, mean, 'avg_' + v_trailing),
- sqlCol(indicator.name + '_' + v_trailing, count, 'avg_' + v_trailing + '_cnt'),
- sqlCol(indicator.name + '_' + v_trailing, percentile{, iif(indicator.is_ASC, 5, 95)}, 'perrank_percent_5_' + v_trailing),
- sqlCol(indicator.name + '_' + v_trailing, percentile{, iif(indicator.is_ASC, 10, 90)}, 'perrank_percent_10_' + v_trailing),
- sqlCol(indicator.name + '_' + v_trailing, percentile{, iif(indicator.is_ASC, 25, 75)}, 'perrank_percent_25_' + v_trailing),
- sqlCol(indicator.name + '_' + v_trailing, percentile{, iif(indicator.is_ASC, 50, 50)}, 'perrank_percent_50_' + v_trailing),
- sqlCol(indicator.name + '_' + v_trailing, percentile{, iif(indicator.is_ASC, 75, 25)}, 'perrank_percent_75_' + v_trailing),
- sqlCol(indicator.name + '_' + v_trailing, percentile{, iif(indicator.is_ASC, 90, 10)}, 'perrank_percent_90_' + v_trailing),
- sqlCol(indicator.name + '_' + v_trailing, percentile{, iif(indicator.is_ASC, 95, 5)}, 'perrank_percent_95_' + v_trailing),
- sqlCol(indicator.name + '_' + v_trailing, iif(indicator.is_ASC, min, max), 'best_' + v_trailing),
- sqlCol(indicator.name + '_' + v_trailing, iif(indicator.is_ASC, max, min), 'worst_' + v_trailing)
- ),
- from = data_table,
- where = < category_id IS NOT NULL>,
- groupBy = sqlCol(['category_id', 'end_date']),
- groupFlag = 1).eval(); // group by
- // 为了满足表结构的要求, 非收益的指标要补上1m和3m的字段,虽然都是NULL
- if(indicator.id != 1) {
- v_tmp_col = ['avg_' + v_missing_trailing, 'avg_' + v_missing_trailing + '_cnt', 'perrank_percent_5_' + v_missing_trailing,
- 'perrank_percent_10_' + v_missing_trailing, 'perrank_percent_25_' + v_missing_trailing,
- 'perrank_percent_50_' + v_missing_trailing, 'perrank_percent_75_' + v_missing_trailing,
- 'perrank_percent_90_' + v_missing_trailing, 'perrank_percent_95_' + v_missing_trailing,
- 'best_' + v_missing_trailing, 'worst_' + v_missing_trailing
- ].flatten();
- v_tmp_type = [take(DOUBLE, v_missing_trailing.size()), take(INT, v_missing_trailing.size()), take(DOUBLE, v_missing_trailing.size()),
- take(DOUBLE, v_missing_trailing.size()), take(DOUBLE, v_missing_trailing.size()),
- take(DOUBLE, v_missing_trailing.size()), take(DOUBLE, v_missing_trailing.size()),
- take(DOUBLE, v_missing_trailing.size()), take(DOUBLE, v_missing_trailing.size()),
- take(DOUBLE, v_missing_trailing.size()),take(DOUBLE, v_missing_trailing.size())
- ].flatten();
- t_ranking_num.addColumn(v_tmp_col, v_tmp_type);
- }
- t_ranking_num.reorderColumns!(ranking_num.colNames());
- ranking_num.tableInsert(t_ranking_num);
- }
- return ranking, ranking_num;
- }
- /*
- * 运行排名SQL脚本
- *
- *
- */
- def run_ranking_sql(ranking_by, mutable data_table, indicator_table) {
- // data_table = t
- // v_tables = v_ranking_tables
- // ranking_by = 'strategy'
- ret = array(ANY, 0);
- if(ranking_by == 'bfi') {
- UPDATE data_table SET category_id = factor_id;
- v_ranking = gen_ranking_sql(data_table, indicator_table);
-
- ret.append!(v_ranking[0]); // ranking table
- ret.append!(v_ranking[1]); // ranking_num table
-
- } else {
- // 策略排名
- UPDATE data_table SET category_id = strategy$STRING;
- v_ranking = gen_ranking_sql(data_table, indicator_table);
- ret.append!(v_ranking[0]); // ranking table
- ret.append!(v_ranking[1]); // ranking_num table
- // 子策略排名
- UPDATE data_table SET category_id = substrategy$STRING;
- v_ranking = gen_ranking_sql(data_table, indicator_table);
-
- ret.append!(v_ranking[0]); // ranking table
- ret.append!(v_ranking[1]); // ranking_num table
- }
- return ret;
- }
- /*
- * 为排名做数据准备
- *
- * TODO: 对少量组合做优化
- *
- * @return <VECTOR>: 包含两个表,一个指标数据表,一个是指标信息表
- *
- */
- def prepare_data_for_ranking(ranking_by, entity_type, entity_info, end_date, isFromMySQL=true) {
- // return
- table_desc = get_performance_table_description(entity_type);
- tb_data_return = get_monthly_indicator_data(table_desc.table_name[0], end_date, isFromMySQL);
- entity_id_name = table_desc.sec_id_col[0];
- // risk
- table_desc = get_risk_stats_table_description(entity_type);
- tb_data_risk_stats = get_monthly_indicator_data(table_desc.table_name[0], end_date, isFromMySQL);
- // risk adjusted return
- table_desc = get_riskadjret_stats_table_description(entity_type);
- tb_data_riskadjret_stats = get_monthly_indicator_data(table_desc.table_name[0], end_date, isFromMySQL);
- // others
- table_desc = get_indicator_table_description(entity_type);
- tb_data_indicator_stats = get_monthly_indicator_data(table_desc.table_name[0], end_date, isFromMySQL);
- // 做个大宽表
- matchingCols = [entity_id_name, 'end_date'];
- tb_data = lj(lj(lj(tb_data_return, tb_data_indicator_stats, matchingCols), tb_data_risk_stats, matchingCols), tb_data_riskadjret_stats, matchingCols);
- if(ranking_by == 'bfi') {
- // 去掉被移到 fund_ty_bfi_bm_indicator 表中的重复字段
- v_dups = [38, 48, 11, 12, 59, 16];
- v_dup_col = EXEC name + suffix
- FROM cj(get_indicator_info(), table(['_6m', '_1y', '_2y', '_3y', '_5y', '_10y', '_ytd'] AS suffix))
- WHERE id IN v_dups;
-
- tb_data.dropColumns!(v_dup_col);
- // bfi table
- table_desc = get_bfi_by_category_group_table_description(entity_type);
- tb_bfi = SELECT portfolio_id, end_date, factor_id FROM get_monthly_indicator_data(table_desc.table_name[0], end_date, isFromMySQL);
- // bfi (as benchmark) indicator
- table_desc = get_bfi_indicator_table_description(entity_type);
- tb_data_bfi_indicator = get_monthly_indicator_data(table_desc.table_name[0], end_date, isFromMySQL);
- matchingCols2 = [entity_id_name, 'end_date', 'factor_id'];
- tb_data = lj(ej(tb_data, tb_bfi, matchingCols), tb_data_bfi_indicator, matchingCols2);
- v_indicator_id = [1, // 对应 fund_performance, 取消39(年化收益) 因为没有意义
- 41, 42, 49, // 对应 fund_indicator, 取消37 (per_con), 43, 44, 45, 46, 47 (smdd模型) 因为dolphin 未计算
- 2, 6, 9, 10, 21, // 对应 fund_risk_stats, 取消50, 52 因为 dolphin 未计算
- 14, 15, 17, 18, 40, 58, // 对应 fund_riskadjret_stats 取消19 (MAR Sortino ratio) 因为 dolphin 未计算
- 11, 12, 16, 33, 34, 35, 36, 38, 48, 59 // 对应 fund_ty_bfi_bm_indicator
- ]; // 取消 pf_fund_factor_stability 66 (stabiliy) 因为 dolphin 未计算
- // 取消 fund_rbsa_style 53, 54, 55, 56, 57(风格稳定性) 因为 dolphin 未计算
- } else {
- // upside/downside capture
- table_desc = get_capture_style_table_description(entity_type);
- tb_data_capture_stats = get_monthly_indicator_data(table_desc.table_name[0], end_date, isFromMySQL);
- tb_data = lj(tb_data, tb_data_capture_stats, matchingCols);
- v_indicator_id = [1, // 对应 fund_performance, 取消39(年化收益) 因为没有意义
- 38, 41, 42, 48, 49, // 对应 fund_indicator, 取消37 (per_con), 43, 44, 45, 46, 47 (smdd模型) 因为dolphin 未计算
- 2, 6, 9, 10, 11, 12, 21, 59, // 对应 fund_risk_stats, 取消50, 52 因为 dolphin 未计算
- 14, 15, 16, 17, 18, 40, 58, // 对应 fund_riskadjret_stats 取消19 (MAR Sortino ratio) 因为 dolphin 未计算
- 33, 34, 35, 36 // 对应 fund_style_stats
- ];
- }
- tb_data.rename!(entity_id_name, 'entity_id');
- t = SELECT * FROM entity_info en
- INNER JOIN tb_data d ON en.entity_id = d.entity_id
- WHERE en.strategy IS NOT NULL;
- if(ranking_by == 'bfi')
- UPDATE t SET category_id = factor_id;
- else if(ranking_by == 'substrategy')
- UPDATE t SET category_id = substrategy$STRING;
- else
- UPDATE t SET category_id = strategy$STRING;
-
-
- indicator_table = SELECT * FROM get_indicator_info() WHERE id IN v_indicator_id;
- return t, indicator_table;
- }
- /*
- * 通用指标排名计算
- *
- * @param ranking_by <STRING>: strategy, bfi
- *
- */
- def cal_indicator_ranking(ranking_by, entity_type, entity_info, end_date, isFromMySQL=true) {
- // 当前只对基金做排名, 其它类型参考基金排名做相对排名
- if(!(entity_type in ['MF', 'HF'])) return null;
- v = prepare_data_for_ranking(ranking_by, entity_type, entity_info, end_date, isFromMySQL);
- v_ranking_tables = run_ranking_sql(ranking_by, v[0], v[1]);
- return v_ranking_tables;
- }
- /*
- * 将源指标表横表变竖表,以方便参考排名计算
- *
- *
- */
- def run_transformation_sql(entity_type, data_table, ranking_by, indicator_info) {
- // 只有 portfolio_id 是整型,其它的都是字符串
- is_id_integer = false;
- if(entity_type == 'PF') is_id_integer = true;
- tb_ranking = create_entity_indicator_ranking(is_id_integer);
- for(indicator in indicator_info) {
- // 只有收益需要1m, 3m
- if(indicator.id == 1)
- v_trailing = ['1m', '3m', '6m', '1y', '2y', '3y', '5y', '10y', 'ytd'];
- else {
- v_trailing = ['6m', '1y', '2y', '3y', '5y', '10y', 'ytd'];
- v_missing = ['1m', '3m'];
- }
- t = sql(select = (sqlCol(['entity_id', 'end_date', 'category_id']), <indicator.id as indicator_id>,
- sqlCol(indicator.name + '_' + v_trailing,, 'indicator_' + v_trailing)
- ),
- from = data_table
- ).eval();
- // 给非收益指标补上1m, 3m的三套指标
- if(indicator.id != 1 )
- {
- v_tmp_col = ['indicator_' + v_missing, 'absrank_' + v_missing, 'perrank_' + v_missing].flatten();
- v_tmp_type = [take(DOUBLE, v_missing.size()), take(INT, v_missing.size()), take(INT, v_missing.size())].flatten();
- t.addColumn(v_tmp_col, v_tmp_type);
- }
- // 给所有指标补上 absrank 和 perrank 两套指标
- v_tmp_col = ['absrank_' + v_trailing, 'perrank_' + v_trailing].flatten();
- v_tmp_type = [take(INT, v_trailing.size()), take(INT, v_trailing.size())].flatten();
- t.addColumn(v_tmp_col, v_tmp_type);
-
- INSERT INTO tb_ranking
- SELECT * FROM (sql(select = sqlCol(tb_ranking.colNames()),
- from = t).eval());
- }
- return tb_ranking;
- }
- /*
- * 将源风险指标表横表变竖表,以方便排名计算
- *
- *
- */
- def transform_data_for_ranking (entity_type, entity_info, end_date, ranking_by, isFromMySQL=true) {
- if(entity_info.isVoid() || entity_info.size() == 0) return null;
- v = prepare_data_for_ranking(ranking_by, entity_type, entity_info, end_date, isFromMySQL);
- tb_ranking = run_transformation_sql(entity_type, v[0], ranking_by, v[1]);
- return tb_ranking;
- }
- /*
- *
- * 参考某指定类排名,计算相对排名
- *
- * @param benchmark_ranking <TABLE>: 被参考的排名表,如公募混合基金
- * @param entity_ranking <TABLE>: 被计算的指标表,排名被填充在原表中
- * @param isFromMySQL <BOOL>
- *
- *
- * Example: cal_relative_ranking(get_fund_indicator_ranking(NULL, 2024.09M, 102, true),
- * transform_risk_stats_for_ranking('PF', get_entity_info('PF', NULL), 2024.09M, true),
- * true);
- */
- def cal_relative_ranking(benchmark_ranking, mutable entity_ranking, isFromMySQL=true) {
- v_trailing = ['1m', '3m', '6m', '1y', '2y', '3y', '5y', '10y', 'ytd'];
- for(tr in v_trailing) {
- indicator_val_col = 'indicator_' + tr;
-
- // 乘上100,000 是为了满足 window join 的字段必须是INT或DURATION
- tb_tmp = sql(select = (sqlCol(['entity_id', 'end_date', 'category_id', 'indicator_id']),
- sqlColAlias(makeCall(round, binaryExpr(sqlCol(indicator_val_col), 1000000, *), 0), indicator_val_col + '_int')),
- from = entity_ranking,
- where = < _$indicator_val_col is not null >,
- orderBy = sqlCol(['end_date', 'category_id', 'indicator_id', indicator_val_col])
- ).eval();
-
- tb_tmp2 = sql(select = (sqlCol(['end_date', 'category_id', 'indicator_id']),
- sqlColAlias(makeCall(round, binaryExpr(sqlCol(indicator_val_col), 1000000, *), 0), indicator_val_col + '_int'),
- sqlCol('absrank_' + tr), sqlCol('perrank_' + tr)
- ),
- from = benchmark_ranking,
- where = < _$indicator_val_col is not null >,
- orderBy = sqlCol(['end_date', 'category_id', 'indicator_id', indicator_val_col])
- ).eval();
- absrank_col = 'absrank_' + tr;
- perrank_col = 'perrank_' + tr;
- // 用 pwj 来找最接近的排名
- tb_tmp_ranking = sql(select = (sqlCol(['entity_id', 'end_date', 'category_id', 'indicator_id']),
- sqlCol(indicator_val_col + '_int'),
- sqlCol(['absrank_max', 'perrank_max'])),
- from = pwj(tb_tmp, tb_tmp2,
- window = 0:1,
- aggs = [<max(_$absrank_col) as 'absrank_max'>, <max(_$perrank_col) as 'perrank_max'>],
- matchingCols = ['end_date', 'category_id', 'indicator_id', indicator_val_col + '_int'])
- ).eval();
- // 计算的结果填入排名表
- sqlUpdate(table = entity_ranking,
- updates = [<absrank_max as _$absrank_col>, <perrank_max as _$perrank_col>],
- from = <ej(entity_ranking, tb_tmp_ranking, ['entity_id', 'end_date', 'category_id','indicator_id'])>
- ).eval();
- }
- }
- /*
- * 排名数据入库
- *
- * @param ranking_by <STRING>: 'strategy', 'bfi'
- * @param ranking_tables <VECTOR>: 当 ranking_by = 'strategy' 时包含4个数据表的向量,分别是一级策略排名,一级策略排名阈值,二级策略排名,二级策略排名阈值
- * ranking_by = 'bfi' 时包含2个数据表的向量,分别是bfi策略排名,bfi策略排名阈值
- */
- def save_ranking_tables(ranking_by, ranking_tables) {
- if(ranking_tables.isVoid()) return;
- entity_id_col = 'fund_id';
-
- if(ranking_by == 'bfi') {
- source_table = 'raw_db.pf_fund_bfi_bm_indicator_ranking';
- target_table = 'raw_db.pf_fund_bfi_bm_indicator_ranking';
- category_id_col = 'factor_id';
-
- } else {
- source_table = 'raw_db.pf_fund_indicator_ranking';
- target_table = 'raw_db.pf_fund_indicator_ranking';
- category_id_col = 'strategy';
- }
- t = ranking_tables[0];
- save_and_sync(t.rename!(['entity_id', 'category_id'], [entity_id_col, category_id_col]), source_table, target_table);
- t = ranking_tables[1];
- save_and_sync(t.rename!('category_id', category_id_col), source_table + '_num', target_table + '_num');
- if(ranking_by == 'strategy') {
- source_table = source_table.strReplace('_ranking', '_substrategy_ranking');
- target_table = target_table.strReplace('_ranking', '_substrategy_ranking');
- category_id_col = 'substrategy';
-
- t = ranking_tables[2];
- save_and_sync(t.rename!(['entity_id', 'category_id'], [entity_id_col, category_id_col]), source_table, target_table);
- t = ranking_tables[3];
- save_and_sync(t.rename!('category_id', category_id_col), source_table + '_num', target_table + '_num');
- }
-
- }
- /*
- * 参考排名数据入库
- *
- * @param ranking_tables <TABLE>:
- */
- def save_relative_ranking_table(entity_type, ranking_table, ranking_by) {
- if(ranking_table.isVoid()) return;
- source_table = '';
- target_table = '';
- if(entity_type == 'PF') {
- entity_id_col = 'portfolio_id';
- if(ranking_by == 'strategy') {
- source_table = 'raw_db.pf_portfolio_indicator_ranking';
- target_table = 'raw_db.pf_portfolio_indicator_ranking';
- } else if(ranking_by == 'substrategy') {save_relative_ranking_table
- source_table = 'raw_db.pf_portfolio_indicator_substrategy_ranking';
- target_table = 'raw_db.pf_portfolio_indicator_substrategy_ranking';
- } else if(ranking_by == 'bfi') {
- source_table = 'raw_db.pf_portfolio_bfi_bm_indicator_ranking';
- target_table = 'raw_db.pf_portfolio_bfi_bm_indicator_ranking';
- }
-
- } else if(entity_type == 'CF') {
- entity_id_col = 'fund_id';
- source_table = 'raw_db.pf_cus_fund_indicator_ranking';
- target_table = 'raw_db.pf_cus_fund_indicator_ranking'
-
- }
- save_and_sync(ranking_table, source_table, target_table);
-
- }
|