首先,看看让人眼晕的一段代码,而且作者自己抱怨,非常耗时。
```
# 选出打板的股票
def pick_high_limit(stocks,context,end_date,pre_date,pre_date_10):
log.info('【pick_high_limit】')
# 耗时
df_panel = get_price(stocks, count = 1,end_date=end_date, frequency='daily', fields=['open', 'close','high_limit','money','pre_close','low'])
df_close = df_panel['close']
# log.info(df_close)
df_open = df_panel['open']
df_high_limit = df_panel['high_limit']
df_low = df_panel['low']
df_money = df_panel['money']
df_pre_close = df_panel['pre_close']
high_limit_stock = []
for stock in (stocks):
if(stock[0:2] == '30' or stock[0:2] == '68'):
continue
_high_limit = (df_high_limit[stock].values)
_close = (df_close[stock].values)
_open = (df_open[stock].values)
_pre_close = (df_pre_close[stock].values)
_low = (df_low[stock].values)
if _close == _high_limit and _high_limit > _pre_close * 1.06:
df_panel_50 = get_price(stock, count = 60,end_date=end_date, frequency='daily', fields=['open', 'close','high_limit','money','low'],skip_paused=True)
sum_plus_max_50 = (df_panel_50.loc[:,'close'] == df_panel_50.loc[:,'high_limit']).sum()
# mean_60 = df_panel_50.loc[:,'close'].mean()
# df_panel_30 = get_price(stock, count = 30,end_date=end_date, frequency='daily', fields=['open', 'close','high_limit','money','low'],skip_paused=True)
# mean_30 = df_panel_30.loc[:,'close'].mean()
# df_panel_20 = get_price(stock, count = 20,end_date=end_date, frequency='daily', fields=['open', 'close','high_limit','money','low'],skip_paused=True)
# mean_20 = df_panel_20.loc[:,'close'].mean()
df_panel_5 = get_price(stock, count = 5,end_date=end_date, frequency='daily', fields=['open', 'close','high_limit','money','low'],skip_paused=True)
sum_plus_max_5 = (df_panel_5.loc[:,'close'] == df_panel_5.loc[:,'high_limit']).sum()
# df_panel_500 = get_price(stock, count = 500,end_date=end_date, frequency='daily', fields=['open', 'close','high_limit','money','low'],skip_paused=True)
# sum_plus_max_500 = (df_panel_500.loc[:,'close'] == df_panel_500.loc[:,'high_limit']).sum()
# low_allday_500 = df_panel_500.loc[:,"close"].min()
# high_alldayy_500 = df_panel_500.loc[:,"close"].max()
# rate_500 = (high_alldayy_500 - low_allday_500) / low_allday_500
# df_panel_10 = get_price(stock, count = 10,end_date=end_date, frequency='daily', fields=['open', 'close','high_limit','money','low'],skip_paused=True)
# sum_plus_max_10 = (df_panel_10.loc[:,'close'] == df_panel_10.loc[:,'high_limit']).sum()
df_panel_3 = get_price(stock, count = 2,end_date=pre_date, frequency='daily', fields=['open', 'close','high_limit','money','low'],skip_paused=True)
sum_plus_max_3 = (df_panel_3.loc[:,'close'] == df_panel_3.loc[:,'high_limit']).sum() # mean_10 * 0.0455 > mean_20 * 0.0181 and mean_20 * 0.0181 > mean_30 * 0.0158 and mean_30 * 0.0158 > mean_60 * 0.0094 and rate_500 < 2 and sum_plus_max_50 < = 4 and
if sum_plus_max_3 == 0 and sum_plus_max_5 < = 2:
high_limit_stock.append(stock)
return high_limit_stock
```
那怎么去处理这看的人头晕目眩的代码?
### 一、去掉那些被注释掉的语句
1.1 在Pycharm中,可以通过正则表达式`^\s*#.*$`,去查找替换成空行。
1.2 Reformat file(Ctrl+Shift+L)重新格式化Py文件
### 二、去掉无用的赋值语句
无用的赋值语句,在Pycharm里面,看的非常清楚,
```
df_money = df_panel['money']
......
sum_plus_max_50 = (df_panel_50.loc[:, 'close'] == df_panel_50.loc[:, 'high_limit']).sum()
......
```
当删除掉`sum_plus_max_50`这一条语句后,会发现,它前面的一天赋值语句,也成了无用的废语句了。
删完之后,再次ReFormat File
### 三、基本阅读理解,加上自己理解后的注释
变成了这样:
```
def pick_high_limit(stocks, context, end_date, pre_date, pre_date_10):
# 取数
df_panel = get_price(stocks, count=1, end_date=end_date, frequency='daily',
fields=['open', 'close', 'high_limit', 'money', 'pre_close', 'low'])
df_close = df_panel['close']
df_open = df_panel['open']
df_high_limit = df_panel['high_limit']
df_low = df_panel['low']
df_pre_close = df_panel['pre_close']
#
high_limit_stock = [] # 入选的股票
for stock in stocks:
if stock[0:2] == '30' or stock[0:2] == '68': # 过滤创业板和科创板
continue
_high_limit = df_high_limit[stock].values
_close = df_close[stock].values
_open = df_open[stock].values
_pre_close = df_pre_close[stock].values
_low = df_low[stock].values
if _close == _high_limit and _high_limit > _pre_close * 1.06: # 如果涨停且涨停的涨幅大于6%(6%显然是为了规避ST)
# 5个交易日内的涨停天数
df_panel_5 = get_price(stock, count=5, end_date=end_date, frequency='daily',
fields=['open', 'close', 'high_limit', 'money', 'low'], skip_paused=True)
sum_plus_max_5 = (df_panel_5.loc[:, 'close'] == df_panel_5.loc[:, 'high_limit']).sum()
# 前天、大前天的涨停天数
df_panel_3 = get_price(stock, count=2, end_date=pre_date, frequency='daily',
fields=['open', 'close', 'high_limit', 'money', 'low'], skip_paused=True)
sum_plus_max_3 = (df_panel_3.loc[:, 'close'] == df_panel_3.loc[:, 'high_limit']).sum()
# 若前天、大前天的涨停天数为0,且5个交易日的涨停天数不大于2天
if sum_plus_max_3 == 0 and sum_plus_max_5 < = 2:
high_limit_stock.append(stock) # 入选
return high_limit_stock
```
### 四、理清逻辑
通过阅读理解,了解了其选股逻辑是:
选出昨日(end_date)涨停的股票,排除掉5天内涨停数大于2天的、去掉前天(pre_date)、大前天涨停过的股票。
### 五、考虑效率
读取和处理数据,是策略程序的核心工作,这两方面也是运行效率的主要障碍,必须遵循:
1)`尽量减少读取的数据量`
1.1)减少取数的行数
- 比如其中过滤ST、创业板、科创板,应该在取数前完成,可以考虑放到基本过滤里面去;
- 逐步缩小取数的范围。比如可以先排除掉昨日未涨停的股票;在缩小的范围内再去排除5天2板以上的股票;最后在进一步缩小的范围内去排除前天、大前天涨停过的股票。
1.2)减少取数的字段
- 比如第一条取数语句,取了pre_close,仅仅是为了排除ST, 取open、low,后来根本没有用上,这都是不该取的数据。
2) `能成批处理的绝不用循环去one-by-one`
一般地,应尽量消灭循环。
比如第一步,筛选出昨日涨停的股票。怎么筛选?
```Python
for stock in stocks:
_high_limit = df_high_limit[stock].values
_close = df_close[stock].values
if _high_limit == _close:
......
```
这个写法,既不优雅,也不高效。换个写法:
```
# 条件1:昨天涨停
stocks = get_price(stocks, end_date=context.previous_date, fields=['close', 'high_limit', 'paused'],
count=1, panel=False).query('close==high_limit and paused==0')['code'].tolist()
```
一条语句,就搞定了。这样写,不仅仅是优雅简洁,更重要的不去存储那些中间变量,直接一步得到需要的结果,这些中间变量,也是需要占内存的。
###六、重写代码
```
def pick_high_limit(context, stocks):
"""
选出打板的股票
"""
yesterday = context.previous_date
# 条件1:昨天涨停
stocks = get_price(stocks, end_date=yesterday, fields=['close', 'high_limit', 'paused'],
count=1, panel=False).query('close==high_limit and paused==0')['code'].tolist()
if stocks:
# 条件2:昨天(含)5天内,涨停次数不超过2次
high_ups = get_price(stocks, end_date=yesterday, fields=['close', 'high_limit', 'paused'],
count=5, panel=False
).query('close==high_limit and paused==0').groupby('code')['close'].count()
remove_list = high_ups[high_ups > 2].index.tolist()
stocks = list(set(stocks) - set(remove_list))
if stocks:
# 条件3:前天,大前天,均未涨停
pre_date = get_trade_days(end_date=yesterday, count=2)[0] # 前天
high_ups = get_price(stocks, end_date=pre_date, fields=['close', 'high_limit', 'paused'],
count=2, panel=False
).query('close==high_limit and paused==0').groupby('code')['close'].count()
remove_list = high_ups.index.tolist()
stocks = list(set(stocks) - set(remove_list))
return stocks
```
怎么样,经过几个步骤,原来云山雾罩的程序,变得优雅简单、逻辑清晰、一目了然,是不是让人赏心悦目呢?修改后的程序运行起来,速度也是彻底秒杀原来的代码。