本平台基于Jupyter实现,预置数个交易策略及其回测代码,用于课堂教学和实验室研究。
策略示例
因子策略之成交量排序
In [81]:
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.font_manager import FontProperties
import matplotlib as mpl
import datetime
import numpy as np
import statsmodels.api as sm
import math
import warnings
import seaborn as sns
sns.set()
warnings.filterwarnings('ignore')
plt.rcParams['font.sans-serif']=['SimHei']
plt.rcParams['axes.unicode_minus'] = False
import akshare as ak
In [82]:
import akshare as ak
df = ak.index_zh_a_hist(symbol="000300", period="daily", start_date="20050301", end_date="20210531")[['日期','开盘','收盘','最高','最低','成交量']]
df
df['ma20'] = pd.Series.rolling(df['收盘'],window = 20).mean()
In [83]:
#计算因子,成交量排名因子
import numpy as np
def cal_volumesort(df,n):
df['paiming'] = np.nan
for i in range(n,df.shape[0]):
sorted_volume = sorted(df['成交量'][i-n:i+1])
df['paiming'][i] = sorted_volume.index(df['成交量'][i])+1
df['paiming_guiyi'] =(2*df['paiming'] - n -2)/n
return df
dff = cal_volumesort(df,40)
dff
Out[83]:
日期 | 开盘 | 收盘 | 最高 | 最低 | 成交量 | ma20 | paiming | paiming_guiyi | |
---|---|---|---|---|---|---|---|---|---|
0 | 2005-03-01 | 1039.35 | 1035.93 | 1042.74 | 1031.17 | 9962090 | NaN | NaN | NaN |
1 | 2005-03-02 | 1036.37 | 1021.32 | 1045.76 | 1021.00 | 14225135 | NaN | NaN | NaN |
2 | 2005-03-03 | 1019.88 | 1027.71 | 1028.40 | 1014.75 | 9203085 | NaN | NaN | NaN |
3 | 2005-03-04 | 1027.93 | 1023.67 | 1031.85 | 1022.18 | 8173641 | NaN | NaN | NaN |
4 | 2005-03-07 | 1024.48 | 1029.87 | 1031.34 | 1024.48 | 7932949 | NaN | NaN | NaN |
… | … | … | … | … | … | … | … | … | … |
3948 | 2021-05-25 | 5165.04 | 5318.48 | 5324.36 | 5161.52 | 182996838 | 5113.5180 | 41.0 | 1.00 |
3949 | 2021-05-26 | 5326.12 | 5320.59 | 5344.28 | 5308.74 | 169607096 | 5122.7750 | 38.0 | 0.85 |
3950 | 2021-05-27 | 5311.37 | 5338.23 | 5378.48 | 5286.05 | 148462856 | 5135.8245 | 29.0 | 0.40 |
3951 | 2021-05-28 | 5338.73 | 5321.09 | 5360.28 | 5288.65 | 164605806 | 5147.3530 | 37.0 | 0.80 |
3952 | 2021-05-31 | 5318.08 | 5331.57 | 5331.63 | 5281.69 | 144603269 | 5157.9695 | 26.0 | 0.25 |
3953 rows × 9 columns
In [84]:
#策略实现:
# def run_original_strategy(df,N,S):
# df = df.copy()
# df = df.copy()
# df = cal_volumesort(df,N)
# df['flag'] = 0
# df['position'] = 0
# position = 0
# df = df.dropna().reset_index(drop=True)
# for i in range(df.shape[0]):
# if df.loc[i,'paiming_guiyi']>S and position == 0:
# df.loc[i,'flag']=1
# df.loc[i+1,'position'] = 1
# position = 1
# elif df.loc[i,'paiming_guiyi']<S and position == 1:
# df.loc[i,'flag']=-1
# df.loc[i+1,'position'] = 0
# position = 0
# else:
# df['position'][i+1] = df['position'][i]
# df['pct_change'] = df['收盘'].pct_change(1)
# df['net_value'] = (1+(df['pct_change'])*df.position).cumprod()
# df['benshen'] = (1+df['pct_change']).cumprod()
# return df.dropna()
# df1 = run_original_strategy(df,40,0.5)
# df1
#添加交易费用
def run_original_strategy(df,N,S):
df = df.copy()
df = df.copy()
df = cal_volumesort(df,N)
df['flag'] = 0
df['position'] = 0
position = 0
df = df.dropna().reset_index(drop=True)
df['trade_fee'] = 0.0
fee_rate = 0.001
for i in range(df.shape[0]):
if df.loc[i,'paiming_guiyi']>S and position == 0:
df.loc[i,'flag']=1
df.loc[i, 'trade_fee'] = df.loc[i, '收盘'] * fee_rate
df.loc[i+1,'position'] = 1
position = 1
elif df.loc[i,'paiming_guiyi']<S and position == 1:
df.loc[i,'flag']=-1
df.loc[i, 'trade_fee'] = df.loc[i, '收盘'] * fee_rate
df.loc[i+1,'position'] = 0
position = 0
else:
df['position'][i+1] = df['position'][i]
df['pct_change'] = df['收盘'].pct_change(1)
df['pct_change_adj'] = df['pct_change'] - df['trade_fee'] / df['收盘']
df['net_value'] = (1+(df['pct_change'])*df.position).cumprod()
df['net_value_n'] = (1+(df['pct_change_adj'])*df.position).cumprod()
df['benshen'] = (1+df['pct_change']).cumprod()
return df.dropna()
df1 = run_original_strategy(df,40,0.5)
df1
#如果再加入调仓周期呢(比如每月最后一天调仓)
Out[84]:
日期 | 开盘 | 收盘 | 最高 | 最低 | 成交量 | ma20 | paiming | paiming_guiyi | flag | position | trade_fee | pct_change | pct_change_adj | net_value | net_value_n | benshen | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
1 | 2005-04-27 | 938.57 | 926.60 | 938.91 | 925.90 | 10780611.0 | 964.6700 | 22.0 | 0.05 | 0.0 | 0.0 | 0.00000 | -0.011184 | -0.011184 | 1.000000 | 1.000000 | 0.988816 |
2 | 2005-04-28 | 923.53 | 942.07 | 945.50 | 914.83 | 14343486.0 | 964.6635 | 34.0 | 0.65 | 1.0 | 0.0 | 0.94207 | 0.016695 | 0.015695 | 1.000000 | 1.000000 | 1.005325 |
3 | 2005-04-29 | 940.81 | 932.40 | 942.45 | 929.81 | 11235419.0 | 962.3760 | 23.0 | 0.10 | -1.0 | 1.0 | 0.93240 | -0.010265 | -0.011265 | 0.989735 | 0.988735 | 0.995006 |
4 | 2005-05-09 | 934.65 | 909.17 | 937.39 | 909.17 | 8529115.0 | 959.7260 | 7.0 | -0.70 | 0.0 | 0.0 | 0.00000 | -0.024914 | -0.024914 | 0.989735 | 0.988735 | 0.970216 |
5 | 2005-05-10 | 905.54 | 913.08 | 913.39 | 892.31 | 10494331.0 | 957.6000 | 18.0 | -0.15 | 0.0 | 0.0 | 0.00000 | 0.004301 | 0.004301 | 0.989735 | 0.988735 | 0.974389 |
… | … | … | … | … | … | … | … | … | … | … | … | … | … | … | … | … | … |
3908 | 2021-05-25 | 5165.04 | 5318.48 | 5324.36 | 5161.52 | 182996838.0 | 5113.5180 | 41.0 | 1.00 | 1.0 | 0.0 | 5.31848 | 0.031595 | 0.030595 | 9.077395 | 6.360596 | 5.675588 |
3909 | 2021-05-26 | 5326.12 | 5320.59 | 5344.28 | 5308.74 | 169607096.0 | 5122.7750 | 38.0 | 0.85 | 0.0 | 1.0 | 0.00000 | 0.000397 | 0.000397 | 9.080997 | 6.363119 | 5.677840 |
3910 | 2021-05-27 | 5311.37 | 5338.23 | 5378.48 | 5286.05 | 148462856.0 | 5135.8245 | 29.0 | 0.40 | -1.0 | 1.0 | 5.33823 | 0.003315 | 0.002315 | 9.111104 | 6.377852 | 5.696664 |
3911 | 2021-05-28 | 5338.73 | 5321.09 | 5360.28 | 5288.65 | 164605806.0 | 5147.3530 | 37.0 | 0.80 | 1.0 | 0.0 | 5.32109 | -0.003211 | -0.004211 | 9.111104 | 6.377852 | 5.678373 |
3912 | 2021-05-31 | 5318.08 | 5331.57 | 5331.63 | 5281.69 | 144603269.0 | 5157.9695 | 26.0 | 0.25 | -1.0 | 1.0 | 5.33157 | 0.001970 | 0.000970 | 9.129048 | 6.384036 | 5.689557 |
3912 rows × 17 columns
In [48]:
#跑图
import matplotlib.pyplot as plt
fig = plt.figure(figsize = (10,4),dpi = 100)
x = df1['日期']
plt.plot(x,df1['net_value'],label = '策略净值',color = 'r')
plt.plot(x,df1['net_value_n'],label='策略净值(交易费率)',color='b')
plt.plot(x,df1['benshen'],label = '沪深300指数',color = 'c')
# xlab = range(0,df1.shape[0]-1,int(df1.shape[0]/12))
# plt.xticks(xlab,df1.loc[xlab,'日期'],rotation = 30)
plt.legend(loc = 'best')
plt.show()
In [30]:
#计算回测指标(每一年的年化收益率,夏普比率,最大回撤,)
df2 = df1.copy()
df2['日期'] = pd.to_datetime(df2['日期'])
#年化收益率
df2.set_index('日期',inplace=True)
annual_value = df2['net_value'].resample('Y')
list(annual_value)
#计算每一年有两种方法:resample('y')或者groupby(df['日期'].dt.year)
Out[30]:
[(Timestamp('2005-12-31 00:00:00', freq='A-DEC'), 日期 2005-04-27 1.000000 2005-04-28 1.000000 2005-04-29 0.989735 2005-05-09 0.989735 2005-05-10 0.989735 ... 2005-12-26 1.051362 2005-12-27 1.047920 2005-12-28 1.049698 2005-12-29 1.062362 2005-12-30 1.052582 Name: net_value, Length: 168, dtype: float64), (Timestamp('2006-12-31 00:00:00', freq='A-DEC'), 日期 2006-01-04 1.073076 2006-01-05 1.093251 2006-01-06 1.105676 2006-01-09 1.111625 2006-01-10 1.114931 ... 2006-12-25 1.719127 2006-12-26 1.719127 2006-12-27 1.719127 2006-12-28 1.719127 2006-12-29 1.772196 Name: net_value, Length: 241, dtype: float64), (Timestamp('2007-12-31 00:00:00', freq='A-DEC'), 日期 2007-01-04 1.794806 2007-01-05 1.799834 2007-01-08 1.850784 2007-01-09 1.910287 2007-01-10 1.958806 ... 2007-12-24 2.290320 2007-12-25 2.294578 2007-12-26 2.315787 2007-12-27 2.360871 2007-12-28 2.348001 Name: net_value, Length: 242, dtype: float64), (Timestamp('2008-12-31 00:00:00', freq='A-DEC'), 日期 2008-01-02 2.368599 2008-01-03 2.384843 2008-01-04 2.411946 2008-01-07 2.444028 2008-01-08 2.431475 ... 2008-12-25 2.581121 2008-12-26 2.581121 2008-12-29 2.581121 2008-12-30 2.581121 2008-12-31 2.581121 Name: net_value, Length: 246, dtype: float64), (Timestamp('2009-12-31 00:00:00', freq='A-DEC'), 日期 2009-01-05 2.581121 2009-01-06 2.581121 2009-01-07 2.581121 2009-01-08 2.581121 2009-01-09 2.581121 ... 2009-12-25 2.944005 2009-12-28 2.944005 2009-12-29 2.944005 2009-12-30 2.944005 2009-12-31 2.944005 Name: net_value, Length: 244, dtype: float64), (Timestamp('2010-12-31 00:00:00', freq='A-DEC'), 日期 2010-01-04 2.944005 2010-01-05 2.944005 2010-01-06 2.944005 2010-01-07 2.944005 2010-01-08 2.944005 ... 2010-12-27 3.246186 2010-12-28 3.246186 2010-12-29 3.246186 2010-12-30 3.246186 2010-12-31 3.246186 Name: net_value, Length: 242, dtype: float64), (Timestamp('2011-12-31 00:00:00', freq='A-DEC'), 日期 2011-01-04 3.246186 2011-01-05 3.231918 2011-01-06 3.231918 2011-01-07 3.231918 2011-01-10 3.172283 ... 2011-12-26 3.534381 2011-12-27 3.534381 2011-12-28 3.534381 2011-12-29 3.534381 2011-12-30 3.534381 Name: net_value, Length: 244, dtype: float64), (Timestamp('2012-12-31 00:00:00', freq='A-DEC'), 日期 2012-01-04 3.534381 2012-01-05 3.534381 2012-01-06 3.556443 2012-01-09 3.556443 2012-01-10 3.674733 ... 2012-12-25 3.583772 2012-12-26 3.597267 2012-12-27 3.578195 2012-12-28 3.630099 2012-12-31 3.692892 Name: net_value, Length: 243, dtype: float64), (Timestamp('2013-12-31 00:00:00', freq='A-DEC'), 日期 2013-01-04 3.695029 2013-01-07 3.711979 2013-01-08 3.696376 2013-01-09 3.696376 2013-01-10 3.696376 ... 2013-12-25 4.111625 2013-12-26 4.111625 2013-12-27 4.111625 2013-12-30 4.111625 2013-12-31 4.111625 Name: net_value, Length: 238, dtype: float64), (Timestamp('2014-12-31 00:00:00', freq='A-DEC'), 日期 2014-01-02 4.111625 2014-01-03 4.111625 2014-01-06 4.111625 2014-01-07 4.111625 2014-01-08 4.111625 ... 2014-12-25 5.126517 2014-12-26 5.126517 2014-12-29 5.140830 2014-12-30 5.143939 2014-12-31 5.143939 Name: net_value, Length: 245, dtype: float64), (Timestamp('2015-12-31 00:00:00', freq='A-DEC'), 日期 2015-01-05 5.143939 2015-01-06 5.143261 2015-01-07 5.147117 2015-01-08 5.147117 2015-01-09 5.147117 ... 2015-12-25 6.774694 2015-12-28 6.774694 2015-12-29 6.774694 2015-12-30 6.774694 2015-12-31 6.774694 Name: net_value, Length: 244, dtype: float64), (Timestamp('2016-12-31 00:00:00', freq='A-DEC'), 日期 2016-01-04 6.774694 2016-01-05 6.774694 2016-01-06 6.774694 2016-01-07 6.774694 2016-01-08 6.774694 ... 2016-12-26 7.278410 2016-12-27 7.278410 2016-12-28 7.278410 2016-12-29 7.278410 2016-12-30 7.278410 Name: net_value, Length: 244, dtype: float64), (Timestamp('2017-12-31 00:00:00', freq='A-DEC'), 日期 2017-01-03 7.278410 2017-01-04 7.278410 2017-01-05 7.278410 2017-01-06 7.278410 2017-01-09 7.278410 ... 2017-12-25 7.636594 2017-12-26 7.636594 2017-12-27 7.636594 2017-12-28 7.636594 2017-12-29 7.636594 Name: net_value, Length: 244, dtype: float64), (Timestamp('2018-12-31 00:00:00', freq='A-DEC'), 日期 2018-01-02 7.636594 2018-01-03 7.681415 2018-01-04 7.713961 2018-01-05 7.713961 2018-01-08 7.753866 ... 2018-12-24 7.019752 2018-12-25 7.019752 2018-12-26 7.019752 2018-12-27 7.019752 2018-12-28 7.019752 Name: net_value, Length: 243, dtype: float64), (Timestamp('2019-12-31 00:00:00', freq='A-DEC'), 日期 2019-01-02 7.019752 2019-01-03 7.019752 2019-01-04 7.019752 2019-01-07 7.062367 2019-01-08 7.047106 ... 2019-12-25 7.986407 2019-12-26 7.986407 2019-12-27 7.978552 2019-12-30 8.096781 2019-12-31 8.126437 Name: net_value, Length: 244, dtype: float64), (Timestamp('2020-12-31 00:00:00', freq='A-DEC'), 日期 2020-01-02 8.236851 2020-01-03 8.222409 2020-01-06 8.191345 2020-01-07 8.252701 2020-01-08 8.157661 ... 2020-12-25 9.292844 2020-12-28 9.292844 2020-12-29 9.292844 2020-12-30 9.292844 2020-12-31 9.292844 Name: net_value, Length: 243, dtype: float64), (Timestamp('2021-12-31 00:00:00', freq='A-DEC'), 日期 2021-01-04 9.393471 2021-01-05 9.573183 2021-01-06 9.660863 2021-01-07 9.832034 2021-01-08 9.799526 ... 2021-05-25 9.077395 2021-05-26 9.080997 2021-05-27 9.111104 2021-05-28 9.111104 2021-05-31 9.129048 Name: net_value, Length: 97, dtype: float64)]
In [85]:
import numpy as np
df3 = df1.copy()
df3['日期'] = pd.to_datetime(df3['日期'])
group_data = df3.groupby(df3['日期'].dt.year)
annual_returns = {}
annual_vol = {}
max_down = {}
sharp={}
trade_winrate={}
trade_count={}
for year,group in group_data:
#年化收益率
annual_returns[year] = (group['net_value'].iloc[-1] - group['net_value'].iloc[0])/group['net_value'].iloc[0]
#年化波动率
annual_vol[year] = group['pct_change'].std()* (252 ** 0.5)
#最大回撤
max_rate = pd.Series.rolling(group['net_value'],window = len(group),min_periods=1).max()
max_down[year] = ((max_rate - group['net_value'])/max_rate).max()
#计算夏普比率
sharp[year] = annual_returns[year]/annual_vol[year]
#做多胜率
sell_trade = np.array(group[group['flag'] < 0].net_value)
buy_trade = np.array(group[group['flag'] > 0].net_value)
if group[group['flag']!=0]['flag'].iloc[0] < 0:
buy_trade = np.insert(buy_trade,0,group['net_value'].iloc[0])
if group[group['flag']!=0]['flag'].iloc[-1] == 1:
sell_trade = np.append(sell_trade,group['net_value'].iloc[-1])
trade_pct = (sell_trade - buy_trade)/buy_trade
trade_winrate[year] = len(trade_pct[trade_pct>0])/len(trade_pct)
#交易次数
trade_count[year] = len(buy_trade)*2
results_df = pd.DataFrame({
'年化收益率':annual_returns,
'年化波动率':annual_vol,
'最大回撤':max_down,
'夏普比率':sharp,
'做多胜率':trade_winrate,
'交易次数':trade_count
})
results_df
Out[85]:
年化收益率 | 年化波动率 | 最大回撤 | 夏普比率 | 做多胜率 | 交易次数 | |
---|---|---|---|---|---|---|
2005 | 0.052582 | 0.208093 | 0.050736 | 0.252685 | 0.562500 | 32 |
2006 | 0.651510 | 0.222871 | 0.047632 | 2.923258 | 0.782609 | 46 |
2007 | 0.308220 | 0.368108 | 0.162072 | 0.837309 | 0.714286 | 42 |
2008 | 0.089725 | 0.484516 | 0.167409 | 0.185185 | 0.478261 | 46 |
2009 | 0.140592 | 0.326617 | 0.115427 | 0.430448 | 0.739130 | 46 |
2010 | 0.102643 | 0.251362 | 0.087140 | 0.408347 | 0.590909 | 44 |
2011 | 0.088779 | 0.206636 | 0.035459 | 0.429643 | 0.608696 | 46 |
2012 | 0.044848 | 0.204371 | 0.133545 | 0.219447 | 0.481481 | 54 |
2013 | 0.112745 | 0.222203 | 0.055411 | 0.507397 | 0.458333 | 48 |
2014 | 0.251072 | 0.192548 | 0.068150 | 1.303947 | 0.520000 | 50 |
2015 | 0.317025 | 0.394255 | 0.128959 | 0.804110 | 0.846154 | 26 |
2016 | 0.074353 | 0.222238 | 0.050307 | 0.334562 | 0.521739 | 46 |
2017 | 0.049212 | 0.101465 | 0.033810 | 0.485013 | 0.464286 | 56 |
2018 | -0.080775 | 0.214243 | 0.162613 | -0.377022 | 0.500000 | 60 |
2019 | 0.157653 | 0.198537 | 0.053411 | 0.794073 | 0.476190 | 42 |
2020 | 0.128203 | 0.227709 | 0.078808 | 0.563015 | 0.785714 | 28 |
2021 | -0.028150 | 0.222799 | 0.114079 | -0.126345 | 0.714286 | 14 |
In [86]:
#循环n和s,计算年化收益和最大回测
import pandas as pd
S=[0.2,0.3,0.4,0.6,0.7,0.8]
N=[10,15,20,25,30,35,40,45,50,55,60]
annual_rate = pd.DataFrame(index=N,columns=S)
max_down = pd.DataFrame(index=N,columns=S)
# print(annual_rate.index)
# print(max_down)
#年化收益率
for i in N:
for j in S:
df = run_original_strategy(df,i,j)
dff = df.copy()
annual_rate.loc[i,j] = (dff['net_value'].iloc[-1]/dff['net_value'].iloc[0])**(1/17)-1
max_down.loc[i,j] = dff['pct_change'].std()*((252*17) ** 0.5)
print(annual_rate)
print(max_down)
0.2 0.3 0.4 0.6 0.7 0.8 10 0.142209 0.0956948 0.0827609 0.0661283 0.0672422 0.00470183 15 0.116233 0.106971 0.110715 0.0815936 0.0752791 0.045797 20 0.130223 0.13268 0.121639 0.104067 0.0852824 0.0526898 25 0.124471 0.108583 0.10413 0.0977906 0.0885462 0.0709694 30 0.108111 0.0934919 0.0735281 0.0817172 0.0793524 0.0825152 35 0.0687295 0.0787064 0.0604639 0.0800909 0.0751698 0.0880687 40 0.0693751 0.080896 0.0727802 0.0739836 0.0692706 0.0715929 45 0.058269 0.0690542 0.0679535 0.0656895 0.0686604 0.0632936 50 0.0676958 0.060232 0.0536289 0.0553279 0.0549616 0.0520086 55 0.0585059 0.06025 0.0575609 0.0523109 0.0543271 0.05069 60 0.0472388 0.0574737 0.0531075 0.0470148 0.0485322 0.0399418 0.2 0.3 0.4 0.6 0.7 0.8 10 1.11352 1.11358 1.11426 1.11487 1.116 1.11278 15 1.11337 1.11446 1.11555 1.11674 1.11811 1.1198 20 1.1216 1.1241 1.12618 1.1284 1.13086 1.13292 25 1.1315 1.13204 1.13236 1.13495 1.13764 1.13941 30 1.13621 1.12595 1.12818 1.11895 1.11207 1.10798 35 1.10809 1.10433 1.09652 1.08361 1.064 1.0478 40 1.03548 1.01029 0.991673 0.979649 0.976349 0.97763 45 0.960713 0.95712 0.95398 0.956324 0.951205 0.949798 50 0.948967 0.946349 0.947248 0.952209 0.954829 0.95465 55 0.950977 0.955286 0.960424 0.963298 0.967207 0.964548 60 0.964005 0.965237 0.972303 0.977945 0.988824 1.00102
In [ ]:
发表回复