123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351 |
- module fundit::bfiMatcher
- use fundit::sqlUtilities;
- use fundit::operationDataPuller;
- use fundit::performanceDataPuller;
- use fundit::dataSaver;
- /*
- * 返回预设的指标最小值
- *
- * NOTE: 对数据量的要求, Java 计算coe相关性表时用48,但计算bfi时用10,这里统一用10
- *
- */
- def get_min_threshold(data_name) {
- d = dict(STRING, FLOAT);
- d['correlation'] = 0.64;
- d['data_count'] = 10;
- d['beta'] = 0.64;
- d['t_value'] = 2.58;
- d['r2'] = 0.64;
- d['r2_neutral'] = 0.04;
- return d[data_name];
- }
- /*
- * T-value 的聚合函数版
- *
- */
- defg regressionT(y, x) {
- r = SELECT beta, tstat FROM ols(y, x, true, 1) WHERE rowNo(beta) = 1;
- return r[0]['tstat'];
- }
- /*
- * 计算 correlation & bfi-matching 所需要的数据指标(周数据计算,返回月度结果)
- *
- * NOTE: 与 Java BFI 不同,这里为了与RBSA保持统一,用收益率来计算相关性;转化成月度时用了各周平均值
- */
- def cal_monthly_closity(entity, nav1, nav2, win) {
- n1 = nav1;
- n1.sortBy!(['entity_id', 'price_date'], [1, 1]);
- n2 = nav2;
- n2.sortBy!(['benchmark_id', 'price_date'], [1, 1]);
- t_dates = SELECT entity_id, end_date, price_date FROM nav1 WHERE end_date >= entity.end_date;
- t0 = SELECT entity_id, end_date, n1.nav AS nav1, n1.nav.ratios()-1 AS ret1,
- n2.nav AS nav2, n2.nav.ratios()-1 AS ret2, tmoving(count, end_date, end_date, win) AS data_count
- FROM n1
- INNER JOIN n2 ON n1.end_date = n2.end_date
- ORDER BY end_date;
- t_rt = table(100:0, ['entity_id', 'end_date', 'price_date', 'corr', /* 'info', */ 't_value', 'beta'],
- [entity.entity_id.type(), t_dates.end_date[0].type(), DATE, DOUBLE, /* DOUBLE, */ DOUBLE, DOUBLE]);
-
- for(dt in t_dates) {
- if( (EXEC data_count FROM t0 WHERE end_date = dt.end_date)[0] >= get_min_threshold('data_count') ){
- rets = EXEC ret1, ret2 FROM t0 WHERE end_date BETWEEN(dt.end_date.temporalAdd(duration('-' + win$STRING)):dt.end_date);
-
- cor = corr(rets.ret1, rets.ret2);
- // info = mean(rets.ret1 - rets.ret2) \ std(rets.ret1 - rets.ret2); // 貌似没用
- t_value = regressionT(rets.ret1, rets.ret2);
- bta = beta(rets.ret1, rets.ret2); // 用 ols() 算的值和这个一样
-
- INSERT INTO t_rt VALUES (entity.entity_id, dt.end_date, dt.price_date, cor, /*info ,*/ t_value, bta);
- }
- }
- // 将每月各周的数据平均值作为月度数据返回
- return SELECT entity_id, price_date.month().last() AS end_date, price_date.last() AS price_date,
- corr.avg() AS corr,
- // info.avg() AS info,
- t_value.avg() AS t_value,
- beta.avg() AS beta
- FROM t_rt
- GROUP BY entity_id, price_date.month();
- }
- /*
- * 计算目标和各资产类别指数及BFI因子的累计净值相关系数
- *
- * @param entity_info <TABLE>: [COLUMNS] entity_id, end_date, price_date (这个 price_date 是需要计算的最早 price_date)
- * @param nav_entity <TABLE>: [COLUMNS] entity_id, end_date, price_date, nav
- * @param nav_index <TABLE>: [COLUMNS] entity_id, end_date, price_date, nav
- * @param entity_coe OUT <TABLE>: [COLUMNS] entity_id, end_date, index_id, coe_1y, coe_3y, coe_5y, t_value_1y, t_value_3y, t_value_5y, beta_1y, beta_3y, beta_5y
- *
- * NOTE: 1)整合 Java中 TampCalcCorrelationServiceImpl 和 BestFitIndexServiceImpl 做的计算, 取消了没用的 info_ratio
- * 2)周度数据时,nav表中的 end_date=price_date.weekEnd(); 月度数据时, end_date=price_date.month()
- * 3)暂时保留取NAV,现算return的方法
- *
- */
- def cal_index_coe(entity_info, nav_entity, nav_index, mutable entity_coe) {
- if(nav_entity.isVoid() || nav_entity.size() == 0 || nav_index.isVoid() || nav_index.size() == 0) return null;
- v_indexes = nav_index.entity_id.distinct();
- // 两次循环遍历所有entity和指数
- for(entity in entity_info) {
- //entity= entity_info[0]
- nav1 = SELECT entity_id, end_date, price_date, nav
- FROM nav_entity WHERE entity_id = entity.entity_id;
- for(index in v_indexes) {
- //index=v_indexes[0]
- nav2 = SELECT entity_id AS benchmark_id, end_date, price_date, nav
- FROM nav_index WHERE entity_id = index;
- if(nav2.isVoid() || nav2.size() == 0) continue; // 忽略已经停止更新的指数,或者是特殊的无风险利率 IN0000000M
- closity_1y = cal_monthly_closity(entity, nav1, nav2, 1y);
- closity_3y = cal_monthly_closity(entity, nav1, nav2, 3y);
- closity_5y = cal_monthly_closity(entity, nav1, nav2, 5y);
- INSERT INTO entity_coe
- SELECT c1.entity_id, c1.end_date, index,
- c1.corr AS coe_1y, c3.corr AS coe_3y, c5.corr AS coe_5y,
- //c1.corr2 AS coe_1y_2, c3.corr2 AS coe_3y_2, c5.corr2 AS coe_5y_2,
- //c1.info AS info_ratio_1y, c3.info AS info_ratio_3y, c5.info AS info_ratio_5y,
- c1.t_value AS t_value_1y, c3.t_value AS t_value_3y, c5.t_value AS t_value_5y,
- c1.beta AS beta_1y, c3.beta AS beta_3y, c5.beta AS beta_5y
- FROM closity_1y c1
- LEFT JOIN closity_3y c3 ON c1.end_date = c3.end_date
- LEFT JOIN closity_5y c5 ON c1.end_date = c5.end_date;
- }
- }
- }
- /*
- * 计算基金/组合和各资产类别指数及BFI因子的累计净值相关系数 (用周收益计算)
- *
- * @param entity_info <TABLE>: [COLUMNS] entity_id, price_date
- *
- * NOTE: 整合 Java中 TampCalcCorrelationServiceImpl 和 BestFitIndexServiceImpl 做的计算
- *
- * Example: cal_entity_index_coe('MF', get_fund_info(['MF00003PW1', 'MF00003PW2', 'MF00003RZI']).join(take(2024.09.30, 3) AS price_date).rename!('fund_id', 'entity_id'));
- * cal_entity_index_coe('PF', get_portfolio_info([166002]).join(take(2024.09.30, 1) AS price_date).rename!('portfolio_id', 'entity_id'));
- *
- */
- def cal_entity_index_coe(entity_type, entity_info) {
- if(entity_info.isVoid() || entity_info.size() == 0) return null;
- // 取数据集中最早日期作为因子的起始日期
- start_day = entity_info.price_date.min();
- // 取数据集每个基金组合指定日期之前5年至今的周净值
- s_json = (SELECT entity_id AS sec_id, price_date.temporalAdd(-5y) AS price_date FROM entity_info).toStdJson();
- nav_entity = get_nav_for_return_calculation(entity_type, 'w', s_json);
- if(nav_entity.isVoid() || nav_entity.size() == 0) return null;
- nav_entity = SELECT sec_id AS entity_id, price_date.weekEnd() AS end_date, price_date, cumulative_nav AS nav FROM nav_entity;
- // 取相关性计算及BFI用得到的指数/因子列表
- // 只有基金需要单独做相关性计算,目的是为基金推荐做数据准备
- if(entity_type in ('MF', 'HF'))
- v_indexes = (get_bfi_index_list().factor_id <- get_correlation_index_list().entity_id).distinct();
- else {
- v_indexes = get_bfi_index_list().factor_id;
- // Portfolio_id 改回整型
- v_port_id = nav_entity.entity_id$INT;
- nav_entity.replaceColumn!('entity_id', v_port_id);
- }
- s_json2 = table(v_indexes AS sec_id, take(start_day.temporalAdd(-5y), v_indexes.size()) AS price_date).toStdJson();
- // 取指数及因子周点位
- nav_index = get_nav_for_return_calculation('FA', 'w', s_json2).unionAll(get_nav_for_return_calculation('MI', 'w', s_json2));
- nav_index = SELECT sec_id AS entity_id, price_date.weekEnd() AS end_date, price_date, cumulative_nav AS nav FROM nav_index;
- // 按照SQL 建表
- entity_coe = create_entity_index_coe(iif(entity_type == 'PF', true, false));
- t_ei = entity_info.join(entity_info.price_date.weekEnd() AS end_date);
- cal_index_coe(t_ei, nav_entity, nav_index, entity_coe);
- return entity_coe;
- }
- /*
- * 匹配BFI, 逻辑和 Java BestFitIndexServiceImpl 类似
- *
- * @param entity_info <TABLE>: [COLUMNS] entity_id, strategy
- * @param entity_coe <TABLE>: [COLUMNS] entity_id, end_date, index_id, coe_1y, t_value_1y, beta_1y
- *
- * NOTE: Java 中的 rule2 还包括 FA00000VN7 (100%中证全指 IN0000007N) 是不对的,而且漏掉了CTA和FOF。已将DEV数据库中此因子划入category_group 74;另外找r2最小的因子也离谱
- * rule3 FA00000VMX (100%中证转债 中证转债)漏掉了公募债券(FOF, 相对价值(套利),多策略,公募混合是否要加?怕会和股票打架,待研究)
- *
- UPDATE pfdb.`cm_factor_information`
- SET category_group_id = 74, category='全市场', factor_name='全市场', category_group='规模', strategy=',101,102,103,107,', maximum_num=1, updaterid=123, updatetime='2024-11-25'
- WHERE factor_id = 'FA00000VN7';
-
- UPDATE pfdb.`cm_factor_information`
- SET category_group_id = 78, category_group='配置', maximum_num=1, updaterid=123, updatetime='2024-11-25'
- WHERE factor_id = 'FA00000VNB' AND category_group_id = 80;
-
- UPDATE pfdb.`cm_factor_information`
- SET category_group_id = 78, category_group='配置', maximum_num=1, updaterid=123, updatetime='2024-11-25'
- WHERE factor_id = 'FA00000VND' AND category_group_id = 76;
- *
- */
- def match_entity_bfi(entity_type, entity_info, entity_coe) {
- // 特殊因子:现金,可被应用于所有策略
- v_factor_cash = ['FA000000MJ'];
- //有一些特殊的因子只会被部分策略所用, 否则会引起歧义
- v_factor_1 = ['FA00000VMY', 'FA00000VMZ', 'FA00000VN0', 'FA00000VN1', 'FA00000VN2', 'FA00000VN3', 'FA00000VN4', 'FA00000VN5', 'FA00000VN6'];
- v_strategy_1 = [3, 7, 8, 105]; // 私募CTA, 私募FOF, 私募多策略, 公募商品
- v_factor_2 = ['FA00000SMB', 'FA00000VMG'];
- v_strategy_2 = [1, 3, 5, 7, 8]; // 私募股票(多空),CTA, 相对价值,私募FOF, 私募多策略
- v_factor_3 = ['FA00000VMX'];
- v_strategy_3 = [6, 103]; // 私募固收,公募债券
- // 只需要BFI因子的相关性数据
- coe = SELECT ei.strategy, entity_coe.*, l.*
- FROM ej(entity_info ei, ej(entity_coe, get_bfi_index_list() AS l, 'index_id', 'factor_id'), 'entity_id')
- ORDER BY entity_id, end_date, category_group_id, coe_1y DESC, order_id;
- t_bfi_raw = table(1000:0,
- ['entity_id', 'end_date', 'category_group_id', 'factor_id', 'rank', 'coe_1y', 'r2',
- //'rank2', 'coe_1y_2', 'r2_2',
- 'performance_flag', 't_value_1y', 'beta_1y', 'maximum_num', 'order_id', 'factor_name'],
- [iif(entity_type=='PF', INT, SYMBOL), MONTH, SHORT, SYMBOL, SHORT, DOUBLE, DOUBLE,
- //SHORT, DOUBLE, DOUBLE,
- STRING, DOUBLE, DOUBLE, SHORT, SHORT, STRING]);
- // 首先处理特殊情况 TODO: java treats rule2 differently by finding min R2 without checking t_value & corr
- v_special_rule = [v_factor_1, v_factor_2, v_factor_3];
- v_special_strategy = [v_strategy_1, v_strategy_2, v_strategy_3];
-
- for(i in 0..v_special_rule.size()-1) {
- INSERT INTO t_bfi_raw
- SELECT * FROM (
- SELECT entity_id, end_date, category_group_id, index_id AS factor_id,
- coe_1y.rank(false) AS rank, coe_1y, square(coe_1y) AS r2,
- 'w', t_value_1y, beta_1y,
- maximum_num, order_id, factor_name
- FROM coe
- WHERE strategy IN v_special_strategy[i]
- AND index_id IN v_special_rule[i].join(v_factor_cash)
- AND t_value_1y >= get_min_threshold('t_value')
- AND coe_1y >= get_min_threshold('correlation')
- AND order_id IS NOT NULL
- CONTEXT BY entity_id, end_date, category_group_id )
- WHERE rank < maximum_num;
- DELETE FROM coe WHERE index_id IN v_special_rule[i];
- }
-
- INSERT INTO t_bfi_raw
- SELECT * FROM (
- SELECT entity_id, end_date, category_group_id, index_id AS factor_id,
- coe_1y.rank(false) AS rank, coe_1y, square(coe_1y) AS r2,
- 'w', t_value_1y, beta_1y,
- maximum_num, order_id, factor_name
- FROM coe
- WHERE t_value_1y >= get_min_threshold('t_value')
- AND coe_1y >= get_min_threshold('correlation')
- AND order_id IS NOT NULL
- CONTEXT BY entity_id, end_date, category_group_id )
- WHERE rank < maximum_num;
- return SELECT * FROM t_bfi_raw ORDER BY entity_id, end_date, category_group_id;
- }
- /*
- * 计算基金经理和各资产类别指数及BFI因子的累计净值相关系数 (用月收益计算)
- * 参考基金/组合的 cal_entity_index_coe 和 match_entity_bfi 的主要代码
- *
- * @param entity_info <TABLE>: [COLUMNS] entity_id, curve_type, strategy, price_date
- *
- *
- * Example: match_mc_bfi('PL', get_personnel_info_for_perf(['PL000000AN', 'PL00000JOU']).join(take(2024.05.30, 5) AS price_date).rename!('manager_id', 'entity_id'));
- *
- */
- def match_mc_bfi(entity_type, entity_info) {
- if(entity_info.isVoid() || entity_info.size() == 0) return null;
- // 取数据集中最早日期作为因子的起始日期
- start_day = entity_info.price_date.min();
- // 取数据集每个基金组合指定日期之前5年至今的周净值
- s_json = (SELECT entity_id, curve_type, strategy, price_date.temporalAdd(-5y).temporalFormat('yyyy-MM') AS end_date FROM entity_info).toStdJson();
- nav_entity = get_mc_nav_for_return_calculation(entity_type, s_json, 2);
- if(nav_entity.isVoid() || nav_entity.size() == 0) return null;
- // fund_manager_fitted_curve 和 company_fitted_curve 表里没有price_date, 这里用 businessMonthEnd 日期填充
- nav_entity = SELECT entity_id, curve_type, strategy,
- temporalParse(end_date, 'yyyy-MM').month() AS end_date,
- temporalParse(end_date+'-01', 'yyyy-MM-dd').businessMonthEnd() AS price_date, cumulative_nav AS nav
- FROM nav_entity;
- // 取相关性计算及BFI用得到的指数/因子列表
- // 只有基金需要单独做相关性计算,目的是为基金推荐做数据准备
- v_indexes = get_bfi_index_list().factor_id;
- s_json2 = table(v_indexes AS sec_id, take(start_day.temporalAdd(-5y), v_indexes.size()) AS price_date).toStdJson();
- // 取指数及因子周点位
- nav_index = get_nav_for_return_calculation('FA', 'm', s_json2).unionAll(get_nav_for_return_calculation('MI', 'm', s_json2));
- nav_index = SELECT sec_id AS entity_id, price_date.month() AS end_date, price_date, cumulative_nav AS nav FROM nav_index;
- // 按照SQL 建表
- t_factor_bfi = create_mc_factor_bfi();
- v_curve_type = [1, 4, 7];
- for(cur in v_curve_type) {
- t_ei = SELECT entity_id, price_date.month() AS end_date, price_date FROM entity_info WHERE curve_type = cur AND strategy = 0;
- t_ne = SELECT entity_id, end_date, price_date, nav FROM nav_entity WHERE curve_type = cur AND strategy = 0;
- t_coe = create_mc_index_coe().dropColumns!(['curve_type', 'strategy']);
-
- cal_index_coe(t_ei, t_ne, nav_index, t_coe);
- // 基金经理/公司全策略暂时借用公募混合基金的策略ID 102
- t_ei.join!(take(102, t_ei.size()) AS strategy);
- t_bfi_raw = match_entity_bfi(entity_type, t_ei, t_coe);
- if(t_bfi_raw.isVoid() || t_bfi_raw.size() == 0) continue;
- INSERT INTO t_factor_bfi
- SELECT entity_id, cur, 0 AS strategy, end_date, factor_id, coe_1y AS coe, r2 AS r2, 'm' AS performance_flag, t_value_1y, beta_1y
- FROM t_bfi_raw;
- }
- return t_factor_bfi;
- }
|