module fundit::rankingCalculator use fundit::sqlUtilities use fundit::operationDataPuller; use fundit::performanceDataPuller; use fundit::dataSaver /* * 汇集所有参与排名的指标信息 * */ def get_indicator_info() { ids = [1, 2, 6, 9, 10, 11, 12, 21, 50, 52, 59, 14, 15, 16, 17, 18, 19, 40, 58, 37, 38, 41, 42, 43, 44, 45, 46, 47, 48, 49, 33, 34, 35, 36, 66, 53, 54, 55, 56, 57, 71, 72, 73 ]; names = ['ret', 'maxdrawdown', 'kurtosis', 'skewness', 'stddev', 'alpha', 'beta', 'downsidedev', 'maxdrawdown_months', 'maxdrawdown_recoverymonths', 'winrate', 'kapparatio', 'treynorratio', 'jensen', 'omegaratio', 'sharperatio', 'sortinoratio_MAR', 'calmarratio', 'sortinoratio', 'per_con', 'info_ratio', 'var', 'cvar', 'smddvar', 'smddcvar', 'smdd_lpm1', 'smdd_lpm2', 'smdd_downside_dev', 'tracking_error', 'm2', 'upsidecapture_ret', 'downsidecapture_ret', 'upsidecapture_ratio', 'downsidecapture_ratio', 'stability', 'jc_stddev', 'gzstyle_stddev', 'gzstrategy_stddev', 'zz_stddev', 'zx_stddev', 'ms_return', 'ms_rar', 'ms_risk' ]; is_ASCs = [false, true, true, false, true, false, false, true, true, true, false, false, false, false, false, false, false, false, false, false, false, true, true, true, true, true, true, true, true, false, false, false, false, true, true, true, true, true, true, true, false, false, true ]; return table(names AS name, ids AS id, is_ASCs AS is_ASC); } /* * 自定义百分位计算 * */ defg perRank(x, is_ASC) { return (100 * x.rank(ascending=is_ASC, percent=true)).round(0); } /* * 动态生成用于排序的SQL脚本 * * @param data_table : 指标横表 * @param indicator_table
: 指标表,有 id, name, is_ASC 字段 * * TODO: portfolio, cf, manager, company, * TODO: bfi & category * */ def gen_ranking_sql(entity_type, data_table, indicator_table) { ranking = iif(entity_type IN ['PL', 'CO'], create_mc_indicator_ranking(), create_entity_indicator_ranking()); ranking_num = iif(entity_type IN ['PL', 'CO'], create_mc_indicator_ranking_num(), create_entity_indicator_ranking_num()); if(entity_type IN ['PL', 'CO']) v_groupby = ['curve_type', 'strategy', 'category_id', 'end_date']; else v_groupby = ['category_id', 'end_date']; for(indicator in indicator_table) { // 与 MySQL 不同,这里统一把近4年和成立以来的排名去掉 if(indicator.id == 1) { v_trailing = ['1m', '3m', '6m', '1y', '2y', '3y', '5y', '10y', 'ytd']; // 晨星指标只有3,5,10年 } else if(indicator.id in (71, 72, 73)) { v_trailing = ['3y', '5y', '10y']; v_missing_trailing = ['1m', '3m', '6m', '1y', '2y', 'ytd']; } else { v_trailing = ['6m', '1y', '2y', '3y', '5y', '10y', 'ytd']; v_missing_trailing = ['1m', '3m']; } // 绝对排名和百分位排名 t_ranking = sql(select = (sqlCol(['entity_id'].join(v_groupby)), , sqlCol(indicator.name + '_' + v_trailing,, 'indicator_' + v_trailing), sqlCol(indicator.name + '_' + v_trailing, rank{, indicator.is_ASC}, 'absrank_' + v_trailing), sqlCol(indicator.name + '_' + v_trailing, perRank{, indicator.is_ASC}, 'perrank_' + v_trailing) ), from = data_table, where = < category_id IS NOT NULL>, groupBy = sqlCol(v_groupby), groupFlag = 0 ).eval(); // context by // 为了满足表结构的要求, 非收益的指标要补上1m和3m的字段,虽然都是NULL if(indicator.id != 1) { v_tmp_col = ['indicator_' + v_missing_trailing, 'absrank_' + v_missing_trailing, 'perrank_' + v_missing_trailing].flatten(); v_tmp_type = [take(DOUBLE, v_missing_trailing.size()), take(INT, v_missing_trailing.size()), take(INT, v_missing_trailing.size())].flatten(); t_ranking.addColumn(v_tmp_col, v_tmp_type); } t_ranking.reorderColumns!(ranking.colNames()); ranking.tableInsert(t_ranking); // 平均值、集合数量、各分位的阈值 t_ranking_num = sql(select =(sqlCol(v_groupby), iif(entity_type IN ['PL', 'CO'], , // 取消基金经理/公司的 raise_type 字段 [sqlCol('raise_type', mean, 'raise_type'), ]), sqlCol(indicator.name + '_' + v_trailing, mean, 'avg_' + v_trailing), sqlCol(indicator.name + '_' + v_trailing, count, 'avg_' + v_trailing + '_cnt'), sqlCol(indicator.name + '_' + v_trailing, percentile{, iif(indicator.is_ASC, 5, 95)}, 'perrank_percent_5_' + v_trailing), sqlCol(indicator.name + '_' + v_trailing, percentile{, iif(indicator.is_ASC, 10, 90)}, 'perrank_percent_10_' + v_trailing), sqlCol(indicator.name + '_' + v_trailing, percentile{, iif(indicator.is_ASC, 25, 75)}, 'perrank_percent_25_' + v_trailing), sqlCol(indicator.name + '_' + v_trailing, percentile{, iif(indicator.is_ASC, 50, 50)}, 'perrank_percent_50_' + v_trailing), sqlCol(indicator.name + '_' + v_trailing, percentile{, iif(indicator.is_ASC, 75, 25)}, 'perrank_percent_75_' + v_trailing), sqlCol(indicator.name + '_' + v_trailing, percentile{, iif(indicator.is_ASC, 90, 10)}, 'perrank_percent_90_' + v_trailing), sqlCol(indicator.name + '_' + v_trailing, percentile{, iif(indicator.is_ASC, 95, 5)}, 'perrank_percent_95_' + v_trailing), sqlCol(indicator.name + '_' + v_trailing, iif(indicator.is_ASC, min, max), 'best_' + v_trailing), sqlCol(indicator.name + '_' + v_trailing, iif(indicator.is_ASC, max, min), 'worst_' + v_trailing) ), from = data_table, where = < category_id IS NOT NULL>, groupBy = sqlCol(v_groupby), groupFlag = 1).eval(); // group by // 为了满足表结构的要求, 非收益的指标要补上1m和3m的字段,虽然都是NULL if(indicator.id != 1) { v_tmp_col = ['avg_' + v_missing_trailing, 'avg_' + v_missing_trailing + '_cnt', 'perrank_percent_5_' + v_missing_trailing, 'perrank_percent_10_' + v_missing_trailing, 'perrank_percent_25_' + v_missing_trailing, 'perrank_percent_50_' + v_missing_trailing, 'perrank_percent_75_' + v_missing_trailing, 'perrank_percent_90_' + v_missing_trailing, 'perrank_percent_95_' + v_missing_trailing, 'best_' + v_missing_trailing, 'worst_' + v_missing_trailing ].flatten(); v_tmp_type = [take(DOUBLE, v_missing_trailing.size()), take(INT, v_missing_trailing.size()), take(DOUBLE, v_missing_trailing.size()), take(DOUBLE, v_missing_trailing.size()), take(DOUBLE, v_missing_trailing.size()), take(DOUBLE, v_missing_trailing.size()), take(DOUBLE, v_missing_trailing.size()), take(DOUBLE, v_missing_trailing.size()), take(DOUBLE, v_missing_trailing.size()), take(DOUBLE, v_missing_trailing.size()),take(DOUBLE, v_missing_trailing.size()) ].flatten(); t_ranking_num.addColumn(v_tmp_col, v_tmp_type); } t_ranking_num.reorderColumns!(ranking_num.colNames()); ranking_num.tableInsert(t_ranking_num); } return ranking, ranking_num; } /* * 运行排名SQL脚本 * * */ def run_ranking_sql(entity_type, ranking_by, mutable data_table, indicator_table) { // data_table = t // ranking_by = 'strategy' ret = array(ANY, 0); if(ranking_by == 'bfi') { UPDATE data_table SET category_id = factor_id; v_ranking = gen_ranking_sql(entity_type, data_table, indicator_table); ret.append!(v_ranking[0]); // ranking table ret.append!(v_ranking[1]); // ranking_num table } else { // 策略排名 UPDATE data_table SET category_id = strategy$STRING; v_ranking = gen_ranking_sql(entity_type, data_table, indicator_table); ret.append!(v_ranking[0]); // ranking table ret.append!(v_ranking[1]); // ranking_num table // 基金经理和公司没有子策略排名 if(! (entity_type IN ['PL', 'CO']) ) { // 子策略排名 UPDATE data_table SET category_id = substrategy$STRING; v_ranking = gen_ranking_sql(entity_type, data_table, indicator_table); ret.append!(v_ranking[0]); // ranking table ret.append!(v_ranking[1]); // ranking_num table } } return ret; } /* * 为排名做数据准备 * * TODO: 对少量组合做优化 * * @return : 包含两个表,一个指标数据表,一个是指标信息表 * */ def prepare_data_for_ranking(ranking_by, entity_type, entity_info, end_date, isFromMySQL=true) { // return table_desc = get_performance_table_description(entity_type); tb_data_return = get_monthly_indicator_data(table_desc.table_name[0], end_date, isFromMySQL); entity_id_name = table_desc.sec_id_col[0]; // risk table_desc = get_risk_stats_table_description(entity_type); tb_data_risk_stats = get_monthly_indicator_data(table_desc.table_name[0], end_date, isFromMySQL); // risk adjusted return table_desc = get_riskadjret_stats_table_description(entity_type); tb_data_riskadjret_stats = get_monthly_indicator_data(table_desc.table_name[0], end_date, isFromMySQL); // others table_desc = get_indicator_table_description(entity_type); tb_data_indicator_stats = get_monthly_indicator_data(table_desc.table_name[0], end_date, isFromMySQL); // 做个大宽表 if(entity_type IN ['PL', 'CO']) { matchingCols = [entity_id_name, 'curve_type', 'strategy', 'end_date']; matchingCols2 = [entity_id_name, 'curve_type', 'strategy', 'end_date', 'factor_id']; } else { matchingCols = [entity_id_name, 'end_date']; matchingCols2 = [entity_id_name, 'end_date', 'factor_id']; } tb_data = lj(lj(lj(tb_data_return, tb_data_indicator_stats, matchingCols), tb_data_risk_stats, matchingCols), tb_data_riskadjret_stats, matchingCols); if(ranking_by == 'bfi') { // 去掉被移到 fund_ty_bfi_bm_indicator 表中的重复字段 v_dups = [38, 48, 11, 12, 59, 16]; v_dup_col = EXEC name + suffix FROM cj(get_indicator_info(), table(['_6m', '_1y', '_2y', '_3y', '_5y', '_10y', '_ytd'] AS suffix)) WHERE id IN v_dups; tb_data.dropColumns!(v_dup_col); // bfi table table_desc = get_bfi_by_category_group_table_description(entity_type); tb_bfi = get_monthly_indicator_data(table_desc.table_name[0], end_date, isFromMySQL); // bfi (as benchmark) indicator table_desc = get_bfi_indicator_table_description(entity_type); tb_data_bfi_indicator = get_monthly_indicator_data(table_desc.table_name[0], end_date, isFromMySQL); tb_data = lj(ej(tb_data, tb_bfi, matchingCols), tb_data_bfi_indicator, matchingCols2); v_indicator_id = [1, // 对应 fund_performance, 取消39(年化收益) 因为没有意义 41, 42, 49, // 对应 fund_indicator, 取消37 (per_con), 43, 44, 45, 46, 47 (smdd模型) 因为dolphin 未计算 2, 6, 9, 10, 21, // 对应 fund_risk_stats, 取消50, 52 因为 dolphin 未计算 14, 15, 17, 18, 40, 58, // 对应 fund_riskadjret_stats 取消19 (MAR Sortino ratio) 因为 dolphin 未计算 11, 12, 16, 33, 34, 35, 36, 38, 48, 59 // 对应 fund_ty_bfi_bm_indicator ]; // 取消 pf_fund_factor_stability 66 (stabiliy) 因为 dolphin 未计算 // 取消 fund_rbsa_style 53, 54, 55, 56, 57(风格稳定性) 因为 dolphin 未计算 } else { // upside/downside capture table_desc = get_capture_style_table_description(entity_type); tb_data_capture_stats = get_monthly_indicator_data(table_desc.table_name[0], end_date, isFromMySQL); tb_data = lj(tb_data, tb_data_capture_stats, matchingCols); // morningstar data table_desc = get_ms_stats_table_description(entity_type); tb_data_ms_stats = get_monthly_indicator_data(table_desc.table_name[0], end_date, isFromMySQL); tb_data = lj(tb_data, tb_data_ms_stats, matchingCols); v_indicator_id = [1, // 对应 fund_performance, 取消39(年化收益) 因为没有意义 38, 41, 42, 48, 49, // 对应 fund_indicator, 取消37 (per_con), 43, 44, 45, 46, 47 (smdd模型) 因为dolphin 未计算 2, 6, 9, 10, 11, 12, 21, 59, // 对应 fund_risk_stats, 取消50, 52 因为 dolphin 未计算 14, 15, 16, 17, 18, 40, 58, // 对应 fund_riskadjret_stats 取消19 (MAR Sortino ratio) 因为 dolphin 未计算 33, 34, 35, 36, // 对应 fund_style_stats 71, 72, 73 // 对应 fund_ms_stats ]; } tb_data.rename!(entity_id_name, 'entity_id'); if(entity_type IN ['PL', 'CO']) t = SELECT * FROM entity_info en INNER JOIN tb_data d ON en.entity_id = d.entity_id AND en.curve_type = d.curve_type AND en.strategy = d.strategy WHERE en.strategy IS NOT NULL; else t = SELECT * FROM entity_info en INNER JOIN tb_data d ON en.entity_id = d.entity_id WHERE en.strategy IS NOT NULL; if(ranking_by == 'bfi') UPDATE t SET category_id = factor_id; else if(ranking_by == 'substrategy') UPDATE t SET category_id = substrategy$STRING; else UPDATE t SET category_id = strategy$STRING; indicator_table = SELECT * FROM get_indicator_info() WHERE id IN v_indicator_id; return t, indicator_table; } /* * 通用指标排名计算 * * @param ranking_by : strategy, bfi * */ def cal_indicator_ranking(ranking_by, entity_type, entity_info, end_date, isFromMySQL=true) { // 当前只对基金做排名, 其它类型参考基金排名做相对排名 if(!(entity_type in ['MF', 'HF', 'PL', 'CO'])) return null; v = prepare_data_for_ranking(ranking_by, entity_type, entity_info, end_date, isFromMySQL); v_ranking_tables = run_ranking_sql(entity_type, ranking_by, v[0], v[1]); return v_ranking_tables; } /* * 将源指标表横表变竖表,以方便参考排名计算 * * */ def run_transformation_sql(entity_type, data_table, ranking_by, indicator_info) { // 只有 portfolio_id 是整型,其它的都是字符串 is_id_integer = false; if(entity_type == 'PF') is_id_integer = true; tb_ranking = create_entity_indicator_ranking(is_id_integer); for(indicator in indicator_info) { // 只有收益需要1m, 3m if(indicator.id == 1) v_trailing = ['1m', '3m', '6m', '1y', '2y', '3y', '5y', '10y', 'ytd']; // 晨星指标只有3,5,10年 else if(indicator.id in (71, 72, 73)) { v_trailing = ['3y', '5y', '10y']; v_missing = ['1m', '3m', '6m', '1y', '2y', 'ytd']; } else { v_trailing = ['6m', '1y', '2y', '3y', '5y', '10y', 'ytd']; v_missing = ['1m', '3m']; } t = sql(select = (sqlCol(['entity_id', 'end_date', 'category_id']), , sqlCol(indicator.name + '_' + v_trailing,, 'indicator_' + v_trailing) ), from = data_table ).eval(); // 给非收益指标补上缺失的指标 if(indicator.id != 1 ) { v_tmp_col = ['indicator_' + v_missing, 'absrank_' + v_missing, 'perrank_' + v_missing].flatten(); v_tmp_type = [take(DOUBLE, v_missing.size()), take(INT, v_missing.size()), take(INT, v_missing.size())].flatten(); t.addColumn(v_tmp_col, v_tmp_type); } // 给所有指标补上 absrank 和 perrank 两套指标 v_tmp_col = ['absrank_' + v_trailing, 'perrank_' + v_trailing].flatten(); v_tmp_type = [take(INT, v_trailing.size()), take(INT, v_trailing.size())].flatten(); t.addColumn(v_tmp_col, v_tmp_type); INSERT INTO tb_ranking SELECT * FROM (sql(select = sqlCol(tb_ranking.colNames()), from = t).eval()); } return tb_ranking; } /* * 将源风险指标表横表变竖表,以方便排名计算 * * */ def transform_data_for_ranking (entity_type, entity_info, end_date, ranking_by, isFromMySQL=true) { if(entity_info.isVoid() || entity_info.size() == 0) return null; v = prepare_data_for_ranking(ranking_by, entity_type, entity_info, end_date, isFromMySQL); tb_ranking = run_transformation_sql(entity_type, v[0], ranking_by, v[1]); return tb_ranking; } /* * * 参考某指定类排名,计算相对排名 * * @param benchmark_ranking
: 被参考的排名表,如公募混合基金 * @param entity_ranking
: 被计算的指标表,排名被填充在原表中 * @param isFromMySQL * * * Example: cal_relative_ranking(get_fund_indicator_ranking(NULL, 2024.09M, 102, true), * transform_risk_stats_for_ranking('PF', get_entity_info('PF', NULL), 2024.09M, true), * true); */ def cal_relative_ranking(benchmark_ranking, mutable entity_ranking, isFromMySQL=true) { v_trailing = ['1m', '3m', '6m', '1y', '2y', '3y', '5y', '10y', 'ytd']; for(tr in v_trailing) { indicator_val_col = 'indicator_' + tr; // 乘上100,000 是为了满足 window join 的字段必须是INT或DURATION tb_tmp = sql(select = (sqlCol(['entity_id', 'end_date', 'category_id', 'indicator_id']), sqlColAlias(makeCall(round, binaryExpr(sqlCol(indicator_val_col), 1000000, *), 0), indicator_val_col + '_int')), from = entity_ranking, where = < _$indicator_val_col is not null >, orderBy = sqlCol(['end_date', 'category_id', 'indicator_id', indicator_val_col]) ).eval(); tb_tmp2 = sql(select = (sqlCol(['end_date', 'category_id', 'indicator_id']), sqlColAlias(makeCall(round, binaryExpr(sqlCol(indicator_val_col), 1000000, *), 0), indicator_val_col + '_int'), sqlCol('absrank_' + tr), sqlCol('perrank_' + tr) ), from = benchmark_ranking, where = < _$indicator_val_col is not null >, orderBy = sqlCol(['end_date', 'category_id', 'indicator_id', indicator_val_col]) ).eval(); absrank_col = 'absrank_' + tr; perrank_col = 'perrank_' + tr; // 用 pwj 来找最接近的排名 tb_tmp_ranking = sql(select = (sqlCol(['entity_id', 'end_date', 'category_id', 'indicator_id']), sqlCol(indicator_val_col + '_int'), sqlCol(['absrank_max', 'perrank_max'])), from = pwj(tb_tmp, tb_tmp2, window = 0:1, aggs = [, ], matchingCols = ['end_date', 'category_id', 'indicator_id', indicator_val_col + '_int']) ).eval(); // 计算的结果填入排名表 sqlUpdate(table = entity_ranking, updates = [, ], from = ).eval(); } } /* * 排名数据入库 * * @param ranking_by : 'strategy', 'bfi' * @param ranking_tables : 当 ranking_by = 'strategy' 时包含4个数据表的向量,分别是一级策略排名,一级策略排名阈值,二级策略排名,二级策略排名阈值 * ranking_by = 'bfi' 时包含2个数据表的向量,分别是bfi策略排名,bfi策略排名阈值 */ def save_ranking_tables(entity_type, ranking_by, ranking_tables) { if(ranking_tables.isVoid()) return; des_strategy = get_indicator_ranking_table_description(entity_type)[0]; des_bfi = get_bfi_bm_indicator_ranking_table_description(entity_type)[0]; entity_id_col = des_strategy.sec_id_col; t = ranking_tables[0]; if(ranking_by == 'bfi') { source_table = des_bfi.table_name.strReplace('pfdb', 'raw_db'); target_table = des_bfi.table_name.strReplace('pfdb', 'raw_db'); category_id_col = 'factor_id'; t.rename!(['entity_id', 'category_id'], [entity_id_col, category_id_col]); } else { source_table = des_strategy.table_name.strReplace('pfdb', 'raw_db'); target_table = des_strategy.table_name.strReplace('pfdb', 'raw_db'); category_id_col = 'strategy'; // 基金经理和公司自带 strategy 字段,而 category_id 就是 strategy 的复制,所以要去掉 // 基金和组合则没有这个字段,将 category_id 改名改回 strategy if(entity_type IN ['PL', 'CO']) t.rename!('entity_id', entity_id_col).dropColumns!('category_id'); else t.rename!(['entity_id', 'category_id'], [entity_id_col, category_id_col]); } save_and_sync(t, source_table, target_table); t = ranking_tables[1]; if(ranking_by == 'bfi') { t.rename!('category_id', category_id_col); } else { if(entity_type IN ['PL', 'CO']) t.dropColumns!('category_id'); else t.rename!('category_id', category_id_col); } save_and_sync(t, source_table + '_num', target_table + '_num'); // 基金有二级策略排名 if(ranking_by == 'strategy' && entity_type IN ['HF', 'MF']) { source_table = source_table.strReplace('_ranking', '_substrategy_ranking'); target_table = target_table.strReplace('_ranking', '_substrategy_ranking'); category_id_col = 'substrategy'; t = ranking_tables[2]; save_and_sync(t.rename!(['entity_id', 'category_id'], [entity_id_col, category_id_col]), source_table, target_table); t = ranking_tables[3]; save_and_sync(t.rename!('category_id', category_id_col), source_table + '_num', target_table + '_num'); } } /* * 参考排名数据入库 * * @param ranking_tables
: */ def save_relative_ranking_table(entity_type, ranking_table, ranking_by) { if(ranking_table.isVoid()) return; source_table = ''; target_table = ''; if(entity_type == 'PF') { entity_id_col = 'portfolio_id'; if(ranking_by == 'strategy') { source_table = 'raw_db.pf_portfolio_indicator_ranking'; target_table = 'raw_db.pf_portfolio_indicator_ranking'; } else if(ranking_by == 'substrategy') {save_relative_ranking_table source_table = 'raw_db.pf_portfolio_indicator_substrategy_ranking'; target_table = 'raw_db.pf_portfolio_indicator_substrategy_ranking'; } else if(ranking_by == 'bfi') { source_table = 'raw_db.pf_portfolio_bfi_bm_indicator_ranking'; target_table = 'raw_db.pf_portfolio_bfi_bm_indicator_ranking'; } } else if(entity_type == 'CF') { entity_id_col = 'fund_id'; source_table = 'raw_db.pf_cus_fund_indicator_ranking'; target_table = 'raw_db.pf_cus_fund_indicator_ranking' } save_and_sync(ranking_table, source_table, target_table); }