@卖萌的兔 这里几十个文件呢,而且都很大。 我告诉你怎么算吧
```
# 功能:获得回报数据,包含十个分组和平均值
# 输入:因子数据,收益数据,计算的日期,指数
# 输出:当前计算日期的收益数据列表[1,2,3,4,5,6,7,8,9,10,'mean']
def get_return(factor_data,forward_return_data,date_str,index):
# 如果当前日期没有在因子数据,或者不在收益数据的列表中,则退出,返回nan
if date_str not in factor_data.index or date_str not in forward_return_data.index:
return [np.nan] *11
if index == 'all':
stock_list = get_all_securities(types=['stock'], date=date_str).index.tolist()
else:
stock_list = get_index_stocks(index, date=date_str)
# 因子数据和收益数据Series
tmp_factor = factor_data.ix[date_str]
tmp_factor = tmp_factor.ix[stock_list].dropna()
tmp_return = forward_return_data.ix[date_str].dropna()
# 收益平均值
tmp_return_mean = tmp_return.mean()
# 组合序列
n_quantile = 10
qt_mean_results = []
pct_quantiles = 1.0 / n_quantile
# 计算每个序列的收益
for i in range(n_quantile):
down = tmp_factor.quantile(pct_quantiles*i)
up = tmp_factor.quantile(pct_quantiles*(i+1))
if i + 1 == n_quantile:
i_quantile_index = tmp_factor[(tmp_factor<=up) & (tmp_factor>=down)].index
else:
i_quantile_index = tmp_factor[(tmp_factor<up) & (tmp_factor>=down)].index
mean_tmp = tmp_return[i_quantile_index].mean() #- tmp_return_mean
qt_mean_results.append(mean_tmp)
# 当前计算日期的收益数据列表[1,2,3,4,5,6,7,8,9,10,'mean']
qt_mean_results.append(tmp_return_mean)
return qt_mean_results
```
2018-02-22