module fundit::rankingCalculator
use fundit::sqlUtilities
use fundit::operationDataPuller;
use fundit::performanceDataPuller;
use fundit::dataSaver
/*
* 汇集所有参与排名的指标信息
*
*/
def get_indicator_info() {
ids = [1,
2, 6, 9, 10, 11, 12, 21, 50, 52, 59,
14, 15, 16, 17, 18, 19, 40, 58,
37, 38, 41, 42, 43, 44, 45, 46, 47, 48, 49,
33, 34, 35, 36,
66, 53, 54, 55, 56, 57,
71, 72, 73
];
names = ['ret',
'maxdrawdown', 'kurtosis', 'skewness', 'stddev', 'alpha', 'beta', 'downsidedev', 'maxdrawdown_months', 'maxdrawdown_recoverymonths', 'winrate',
'kapparatio', 'treynorratio', 'jensen', 'omegaratio', 'sharperatio', 'sortinoratio_MAR', 'calmarratio', 'sortinoratio',
'per_con', 'info_ratio', 'var', 'cvar', 'smddvar', 'smddcvar', 'smdd_lpm1', 'smdd_lpm2', 'smdd_downside_dev', 'tracking_error', 'm2',
'upsidecapture_ret', 'downsidecapture_ret', 'upsidecapture_ratio', 'downsidecapture_ratio',
'stability', 'jc_stddev', 'gzstyle_stddev', 'gzstrategy_stddev', 'zz_stddev', 'zx_stddev',
'ms_return', 'ms_rar', 'ms_risk'
];
is_ASCs = [false,
true, true, false, true, false, false, true, true, true, false,
false, false, false, false, false, false, false, false,
false, false, true, true, true, true, true, true, true, true, false,
false, false, false, true,
true, true, true, true, true, true,
false, false, true
];
return table(names AS name, ids AS id, is_ASCs AS is_ASC);
}
/*
* 自定义百分位计算
*
*/
defg perRank(x, is_ASC) {
return (100 * x.rank(ascending=is_ASC, percent=true)).round(0);
}
/*
* 动态生成用于排序的SQL脚本
*
* @param data_table
: 指标横表
* @param indicator_table : 指标表,有 id, name, is_ASC 字段
*
* TODO: portfolio, cf, manager, company,
* TODO: bfi & category
*
*/
def gen_ranking_sql(entity_type, data_table, indicator_table) {
ranking = iif(entity_type IN ['PL', 'CO'], create_mc_indicator_ranking(), create_entity_indicator_ranking());
ranking_num = iif(entity_type IN ['PL', 'CO'], create_mc_indicator_ranking_num(), create_entity_indicator_ranking_num());
if(entity_type IN ['PL', 'CO'])
v_groupby = ['curve_type', 'strategy', 'category_id', 'end_date'];
else
v_groupby = ['category_id', 'end_date'];
for(indicator in indicator_table) {
// 与 MySQL 不同,这里统一把近4年和成立以来的排名去掉
if(indicator.id == 1) {
v_trailing = ['1m', '3m', '6m', '1y', '2y', '3y', '5y', '10y', 'ytd'];
// 晨星指标只有3,5,10年
} else if(indicator.id in (71, 72, 73)) {
v_trailing = ['3y', '5y', '10y'];
v_missing_trailing = ['1m', '3m', '6m', '1y', '2y', 'ytd'];
} else {
v_trailing = ['6m', '1y', '2y', '3y', '5y', '10y', 'ytd'];
v_missing_trailing = ['1m', '3m'];
}
// 绝对排名和百分位排名
t_ranking = sql(select = (sqlCol(['entity_id'].join(v_groupby)), ,
sqlCol(indicator.name + '_' + v_trailing,, 'indicator_' + v_trailing),
sqlCol(indicator.name + '_' + v_trailing, rank{, indicator.is_ASC}, 'absrank_' + v_trailing),
sqlCol(indicator.name + '_' + v_trailing, perRank{, indicator.is_ASC}, 'perrank_' + v_trailing)
),
from = data_table,
where = < category_id IS NOT NULL>,
groupBy = sqlCol(v_groupby),
groupFlag = 0 ).eval(); // context by
// 为了满足表结构的要求, 非收益的指标要补上1m和3m的字段,虽然都是NULL
if(indicator.id != 1) {
v_tmp_col = ['indicator_' + v_missing_trailing, 'absrank_' + v_missing_trailing, 'perrank_' + v_missing_trailing].flatten();
v_tmp_type = [take(DOUBLE, v_missing_trailing.size()), take(INT, v_missing_trailing.size()), take(INT, v_missing_trailing.size())].flatten();
t_ranking.addColumn(v_tmp_col, v_tmp_type);
}
t_ranking.reorderColumns!(ranking.colNames());
ranking.tableInsert(t_ranking);
// 平均值、集合数量、各分位的阈值
t_ranking_num = sql(select =(sqlCol(v_groupby),
iif(entity_type IN ['PL', 'CO'], , // 取消基金经理/公司的 raise_type 字段
[sqlCol('raise_type', mean, 'raise_type'), ]),
sqlCol(indicator.name + '_' + v_trailing, mean, 'avg_' + v_trailing),
sqlCol(indicator.name + '_' + v_trailing, count, 'avg_' + v_trailing + '_cnt'),
sqlCol(indicator.name + '_' + v_trailing, percentile{, iif(indicator.is_ASC, 5, 95)}, 'perrank_percent_5_' + v_trailing),
sqlCol(indicator.name + '_' + v_trailing, percentile{, iif(indicator.is_ASC, 10, 90)}, 'perrank_percent_10_' + v_trailing),
sqlCol(indicator.name + '_' + v_trailing, percentile{, iif(indicator.is_ASC, 25, 75)}, 'perrank_percent_25_' + v_trailing),
sqlCol(indicator.name + '_' + v_trailing, percentile{, iif(indicator.is_ASC, 50, 50)}, 'perrank_percent_50_' + v_trailing),
sqlCol(indicator.name + '_' + v_trailing, percentile{, iif(indicator.is_ASC, 75, 25)}, 'perrank_percent_75_' + v_trailing),
sqlCol(indicator.name + '_' + v_trailing, percentile{, iif(indicator.is_ASC, 90, 10)}, 'perrank_percent_90_' + v_trailing),
sqlCol(indicator.name + '_' + v_trailing, percentile{, iif(indicator.is_ASC, 95, 5)}, 'perrank_percent_95_' + v_trailing),
sqlCol(indicator.name + '_' + v_trailing, iif(indicator.is_ASC, min, max), 'best_' + v_trailing),
sqlCol(indicator.name + '_' + v_trailing, iif(indicator.is_ASC, max, min), 'worst_' + v_trailing)
),
from = data_table,
where = < category_id IS NOT NULL>,
groupBy = sqlCol(v_groupby),
groupFlag = 1).eval(); // group by
// 为了满足表结构的要求, 非收益的指标要补上1m和3m的字段,虽然都是NULL
if(indicator.id != 1) {
v_tmp_col = ['avg_' + v_missing_trailing, 'avg_' + v_missing_trailing + '_cnt', 'perrank_percent_5_' + v_missing_trailing,
'perrank_percent_10_' + v_missing_trailing, 'perrank_percent_25_' + v_missing_trailing,
'perrank_percent_50_' + v_missing_trailing, 'perrank_percent_75_' + v_missing_trailing,
'perrank_percent_90_' + v_missing_trailing, 'perrank_percent_95_' + v_missing_trailing,
'best_' + v_missing_trailing, 'worst_' + v_missing_trailing
].flatten();
v_tmp_type = [take(DOUBLE, v_missing_trailing.size()), take(INT, v_missing_trailing.size()), take(DOUBLE, v_missing_trailing.size()),
take(DOUBLE, v_missing_trailing.size()), take(DOUBLE, v_missing_trailing.size()),
take(DOUBLE, v_missing_trailing.size()), take(DOUBLE, v_missing_trailing.size()),
take(DOUBLE, v_missing_trailing.size()), take(DOUBLE, v_missing_trailing.size()),
take(DOUBLE, v_missing_trailing.size()),take(DOUBLE, v_missing_trailing.size())
].flatten();
t_ranking_num.addColumn(v_tmp_col, v_tmp_type);
}
t_ranking_num.reorderColumns!(ranking_num.colNames());
ranking_num.tableInsert(t_ranking_num);
}
return ranking, ranking_num;
}
/*
* 运行排名SQL脚本
*
*
*/
def run_ranking_sql(entity_type, ranking_by, mutable data_table, indicator_table) {
// data_table = t
// ranking_by = 'strategy'
ret = array(ANY, 0);
if(ranking_by == 'bfi') {
UPDATE data_table SET category_id = factor_id;
v_ranking = gen_ranking_sql(entity_type, data_table, indicator_table);
ret.append!(v_ranking[0]); // ranking table
ret.append!(v_ranking[1]); // ranking_num table
} else {
// 策略排名
UPDATE data_table SET category_id = strategy$STRING;
v_ranking = gen_ranking_sql(entity_type, data_table, indicator_table);
ret.append!(v_ranking[0]); // ranking table
ret.append!(v_ranking[1]); // ranking_num table
// 基金经理和公司没有子策略排名
if(! (entity_type IN ['PL', 'CO']) ) {
// 子策略排名
UPDATE data_table SET category_id = substrategy$STRING;
v_ranking = gen_ranking_sql(entity_type, data_table, indicator_table);
ret.append!(v_ranking[0]); // ranking table
ret.append!(v_ranking[1]); // ranking_num table
}
}
return ret;
}
/*
* 为排名做数据准备
*
* TODO: 对少量组合做优化
*
* @return : 包含两个表,一个指标数据表,一个是指标信息表
*
*/
def prepare_data_for_ranking(ranking_by, entity_type, entity_info, end_date, isFromMySQL=true) {
// return
table_desc = get_performance_table_description(entity_type);
tb_data_return = get_monthly_indicator_data(table_desc.table_name[0], end_date, isFromMySQL);
entity_id_name = table_desc.sec_id_col[0];
// risk
table_desc = get_risk_stats_table_description(entity_type);
tb_data_risk_stats = get_monthly_indicator_data(table_desc.table_name[0], end_date, isFromMySQL);
// risk adjusted return
table_desc = get_riskadjret_stats_table_description(entity_type);
tb_data_riskadjret_stats = get_monthly_indicator_data(table_desc.table_name[0], end_date, isFromMySQL);
// others
table_desc = get_indicator_table_description(entity_type);
tb_data_indicator_stats = get_monthly_indicator_data(table_desc.table_name[0], end_date, isFromMySQL);
// 做个大宽表
if(entity_type IN ['PL', 'CO']) {
matchingCols = [entity_id_name, 'curve_type', 'strategy', 'end_date'];
matchingCols2 = [entity_id_name, 'curve_type', 'strategy', 'end_date', 'factor_id'];
} else {
matchingCols = [entity_id_name, 'end_date'];
matchingCols2 = [entity_id_name, 'end_date', 'factor_id'];
}
tb_data = lj(lj(lj(tb_data_return, tb_data_indicator_stats, matchingCols), tb_data_risk_stats, matchingCols), tb_data_riskadjret_stats, matchingCols);
if(ranking_by == 'bfi') {
// 去掉被移到 fund_ty_bfi_bm_indicator 表中的重复字段
v_dups = [38, 48, 11, 12, 59, 16];
v_dup_col = EXEC name + suffix
FROM cj(get_indicator_info(), table(['_6m', '_1y', '_2y', '_3y', '_5y', '_10y', '_ytd'] AS suffix))
WHERE id IN v_dups;
tb_data.dropColumns!(v_dup_col);
// bfi table
table_desc = get_bfi_by_category_group_table_description(entity_type);
tb_bfi = get_monthly_indicator_data(table_desc.table_name[0], end_date, isFromMySQL);
// bfi (as benchmark) indicator
table_desc = get_bfi_indicator_table_description(entity_type);
tb_data_bfi_indicator = get_monthly_indicator_data(table_desc.table_name[0], end_date, isFromMySQL);
tb_data = lj(ej(tb_data, tb_bfi, matchingCols), tb_data_bfi_indicator, matchingCols2);
v_indicator_id = [1, // 对应 fund_performance, 取消39(年化收益) 因为没有意义
41, 42, 49, // 对应 fund_indicator, 取消37 (per_con), 43, 44, 45, 46, 47 (smdd模型) 因为dolphin 未计算
2, 6, 9, 10, 21, // 对应 fund_risk_stats, 取消50, 52 因为 dolphin 未计算
14, 15, 17, 18, 40, 58, // 对应 fund_riskadjret_stats 取消19 (MAR Sortino ratio) 因为 dolphin 未计算
11, 12, 16, 33, 34, 35, 36, 38, 48, 59 // 对应 fund_ty_bfi_bm_indicator
]; // 取消 pf_fund_factor_stability 66 (stabiliy) 因为 dolphin 未计算
// 取消 fund_rbsa_style 53, 54, 55, 56, 57(风格稳定性) 因为 dolphin 未计算
} else {
// upside/downside capture
table_desc = get_capture_style_table_description(entity_type);
tb_data_capture_stats = get_monthly_indicator_data(table_desc.table_name[0], end_date, isFromMySQL);
tb_data = lj(tb_data, tb_data_capture_stats, matchingCols);
// morningstar data
table_desc = get_ms_stats_table_description(entity_type);
tb_data_ms_stats = get_monthly_indicator_data(table_desc.table_name[0], end_date, isFromMySQL);
tb_data = lj(tb_data, tb_data_ms_stats, matchingCols);
v_indicator_id = [1, // 对应 fund_performance, 取消39(年化收益) 因为没有意义
38, 41, 42, 48, 49, // 对应 fund_indicator, 取消37 (per_con), 43, 44, 45, 46, 47 (smdd模型) 因为dolphin 未计算
2, 6, 9, 10, 11, 12, 21, 59, // 对应 fund_risk_stats, 取消50, 52 因为 dolphin 未计算
14, 15, 16, 17, 18, 40, 58, // 对应 fund_riskadjret_stats 取消19 (MAR Sortino ratio) 因为 dolphin 未计算
33, 34, 35, 36, // 对应 fund_style_stats
71, 72, 73 // 对应 fund_ms_stats
];
}
tb_data.rename!(entity_id_name, 'entity_id');
if(entity_type IN ['PL', 'CO'])
t = SELECT * FROM entity_info en
INNER JOIN tb_data d ON en.entity_id = d.entity_id AND en.curve_type = d.curve_type AND en.strategy = d.strategy
WHERE en.strategy IS NOT NULL;
else
t = SELECT * FROM entity_info en
INNER JOIN tb_data d ON en.entity_id = d.entity_id
WHERE en.strategy IS NOT NULL;
if(ranking_by == 'bfi')
UPDATE t SET category_id = factor_id;
else if(ranking_by == 'substrategy')
UPDATE t SET category_id = substrategy$STRING;
else
UPDATE t SET category_id = strategy$STRING;
indicator_table = SELECT * FROM get_indicator_info() WHERE id IN v_indicator_id;
return t, indicator_table;
}
/*
* 通用指标排名计算
*
* @param ranking_by : strategy, bfi
*
*/
def cal_indicator_ranking(ranking_by, entity_type, entity_info, end_date, isFromMySQL=true) {
// 当前只对基金做排名, 其它类型参考基金排名做相对排名
if(!(entity_type in ['MF', 'HF', 'PL', 'CO'])) return null;
v = prepare_data_for_ranking(ranking_by, entity_type, entity_info, end_date, isFromMySQL);
v_ranking_tables = run_ranking_sql(entity_type, ranking_by, v[0], v[1]);
return v_ranking_tables;
}
/*
* 将源指标表横表变竖表,以方便参考排名计算
*
*
*/
def run_transformation_sql(entity_type, data_table, ranking_by, indicator_info) {
// 只有 portfolio_id 是整型,其它的都是字符串
is_id_integer = false;
if(entity_type == 'PF') is_id_integer = true;
tb_ranking = create_entity_indicator_ranking(is_id_integer);
for(indicator in indicator_info) {
// 只有收益需要1m, 3m
if(indicator.id == 1)
v_trailing = ['1m', '3m', '6m', '1y', '2y', '3y', '5y', '10y', 'ytd'];
// 晨星指标只有3,5,10年
else if(indicator.id in (71, 72, 73)) {
v_trailing = ['3y', '5y', '10y'];
v_missing = ['1m', '3m', '6m', '1y', '2y', 'ytd'];
}
else {
v_trailing = ['6m', '1y', '2y', '3y', '5y', '10y', 'ytd'];
v_missing = ['1m', '3m'];
}
t = sql(select = (sqlCol(['entity_id', 'end_date', 'category_id']), ,
sqlCol(indicator.name + '_' + v_trailing,, 'indicator_' + v_trailing)
),
from = data_table
).eval();
// 给非收益指标补上缺失的指标
if(indicator.id != 1 )
{
v_tmp_col = ['indicator_' + v_missing, 'absrank_' + v_missing, 'perrank_' + v_missing].flatten();
v_tmp_type = [take(DOUBLE, v_missing.size()), take(INT, v_missing.size()), take(INT, v_missing.size())].flatten();
t.addColumn(v_tmp_col, v_tmp_type);
}
// 给所有指标补上 absrank 和 perrank 两套指标
v_tmp_col = ['absrank_' + v_trailing, 'perrank_' + v_trailing].flatten();
v_tmp_type = [take(INT, v_trailing.size()), take(INT, v_trailing.size())].flatten();
t.addColumn(v_tmp_col, v_tmp_type);
INSERT INTO tb_ranking
SELECT * FROM (sql(select = sqlCol(tb_ranking.colNames()),
from = t).eval());
}
return tb_ranking;
}
/*
* 将源风险指标表横表变竖表,以方便排名计算
*
*
*/
def transform_data_for_ranking (entity_type, entity_info, end_date, ranking_by, isFromMySQL=true) {
if(entity_info.isVoid() || entity_info.size() == 0) return null;
v = prepare_data_for_ranking(ranking_by, entity_type, entity_info, end_date, isFromMySQL);
tb_ranking = run_transformation_sql(entity_type, v[0], ranking_by, v[1]);
return tb_ranking;
}
/*
*
* 参考某指定类排名,计算相对排名
*
* @param benchmark_ranking : 被参考的排名表,如公募混合基金
* @param entity_ranking : 被计算的指标表,排名被填充在原表中
* @param isFromMySQL
*
*
* Example: cal_relative_ranking(get_fund_indicator_ranking(NULL, 2024.09M, 102, true),
* transform_risk_stats_for_ranking('PF', get_entity_info('PF', NULL), 2024.09M, true),
* true);
*/
def cal_relative_ranking(benchmark_ranking, mutable entity_ranking, isFromMySQL=true) {
v_trailing = ['1m', '3m', '6m', '1y', '2y', '3y', '5y', '10y', 'ytd'];
for(tr in v_trailing) {
indicator_val_col = 'indicator_' + tr;
// 乘上100,000 是为了满足 window join 的字段必须是INT或DURATION
tb_tmp = sql(select = (sqlCol(['entity_id', 'end_date', 'category_id', 'indicator_id']),
sqlColAlias(makeCall(round, binaryExpr(sqlCol(indicator_val_col), 1000000, *), 0), indicator_val_col + '_int')),
from = entity_ranking,
where = < _$indicator_val_col is not null >,
orderBy = sqlCol(['end_date', 'category_id', 'indicator_id', indicator_val_col])
).eval();
tb_tmp2 = sql(select = (sqlCol(['end_date', 'category_id', 'indicator_id']),
sqlColAlias(makeCall(round, binaryExpr(sqlCol(indicator_val_col), 1000000, *), 0), indicator_val_col + '_int'),
sqlCol('absrank_' + tr), sqlCol('perrank_' + tr)
),
from = benchmark_ranking,
where = < _$indicator_val_col is not null >,
orderBy = sqlCol(['end_date', 'category_id', 'indicator_id', indicator_val_col])
).eval();
absrank_col = 'absrank_' + tr;
perrank_col = 'perrank_' + tr;
// 用 pwj 来找最接近的排名
tb_tmp_ranking = sql(select = (sqlCol(['entity_id', 'end_date', 'category_id', 'indicator_id']),
sqlCol(indicator_val_col + '_int'),
sqlCol(['absrank_max', 'perrank_max'])),
from = pwj(tb_tmp, tb_tmp2,
window = 0:1,
aggs = [, ],
matchingCols = ['end_date', 'category_id', 'indicator_id', indicator_val_col + '_int'])
).eval();
// 计算的结果填入排名表
sqlUpdate(table = entity_ranking,
updates = [, ],
from =
).eval();
}
}
/*
* 排名数据入库
*
* @param ranking_by : 'strategy', 'bfi'
* @param ranking_tables : 当 ranking_by = 'strategy' 时包含4个数据表的向量,分别是一级策略排名,一级策略排名阈值,二级策略排名,二级策略排名阈值
* ranking_by = 'bfi' 时包含2个数据表的向量,分别是bfi策略排名,bfi策略排名阈值
*/
def save_ranking_tables(entity_type, ranking_by, ranking_tables) {
if(ranking_tables.isVoid()) return;
des_strategy = get_indicator_ranking_table_description(entity_type)[0];
des_bfi = get_bfi_bm_indicator_ranking_table_description(entity_type)[0];
entity_id_col = des_strategy.sec_id_col;
t = ranking_tables[0];
if(ranking_by == 'bfi') {
source_table = des_bfi.table_name.strReplace('pfdb', 'raw_db');
target_table = des_bfi.table_name.strReplace('pfdb', 'raw_db');
category_id_col = 'factor_id';
t.rename!(['entity_id', 'category_id'], [entity_id_col, category_id_col]);
} else {
source_table = des_strategy.table_name.strReplace('pfdb', 'raw_db');
target_table = des_strategy.table_name.strReplace('pfdb', 'raw_db');
category_id_col = 'strategy';
// 基金经理和公司自带 strategy 字段,而 category_id 就是 strategy 的复制,所以要去掉
// 基金和组合则没有这个字段,将 category_id 改名改回 strategy
if(entity_type IN ['PL', 'CO'])
t.rename!('entity_id', entity_id_col).dropColumns!('category_id');
else
t.rename!(['entity_id', 'category_id'], [entity_id_col, category_id_col]);
}
save_and_sync(t, source_table, target_table);
t = ranking_tables[1];
if(ranking_by == 'bfi') {
t.rename!('category_id', category_id_col);
} else {
if(entity_type IN ['PL', 'CO'])
t.dropColumns!('category_id');
else
t.rename!('category_id', category_id_col);
}
save_and_sync(t, source_table + '_num', target_table + '_num');
// 基金有二级策略排名
if(ranking_by == 'strategy' && entity_type IN ['HF', 'MF']) {
source_table = source_table.strReplace('_ranking', '_substrategy_ranking');
target_table = target_table.strReplace('_ranking', '_substrategy_ranking');
category_id_col = 'substrategy';
t = ranking_tables[2];
save_and_sync(t.rename!(['entity_id', 'category_id'], [entity_id_col, category_id_col]), source_table, target_table);
t = ranking_tables[3];
save_and_sync(t.rename!('category_id', category_id_col), source_table + '_num', target_table + '_num');
}
}
/*
* 参考排名数据入库
*
* @param ranking_tables :
*/
def save_relative_ranking_table(entity_type, ranking_table, ranking_by) {
if(ranking_table.isVoid()) return;
source_table = '';
target_table = '';
if(entity_type == 'PF') {
entity_id_col = 'portfolio_id';
if(ranking_by == 'strategy') {
source_table = 'raw_db.pf_portfolio_indicator_ranking';
target_table = 'raw_db.pf_portfolio_indicator_ranking';
} else if(ranking_by == 'substrategy') {save_relative_ranking_table
source_table = 'raw_db.pf_portfolio_indicator_substrategy_ranking';
target_table = 'raw_db.pf_portfolio_indicator_substrategy_ranking';
} else if(ranking_by == 'bfi') {
source_table = 'raw_db.pf_portfolio_bfi_bm_indicator_ranking';
target_table = 'raw_db.pf_portfolio_bfi_bm_indicator_ranking';
}
} else if(entity_type == 'CF') {
entity_id_col = 'fund_id';
source_table = 'raw_db.pf_cus_fund_indicator_ranking';
target_table = 'raw_db.pf_cus_fund_indicator_ranking'
}
save_and_sync(ranking_table, source_table, target_table);
}