量化学习平台
文章
市场宽度
背离图
登录
注册
机器学习策略分享
策略
作者: 水滴
```python # 风险及免责提示:该策略由聚宽用户在聚宽社区分享,仅供学习交流使用。 # 原文一般包含策略说明,如有疑问请到原文和作者交流讨论。 # 原文网址:https://www.joinquant.com/post/33840 # 标题:机器学习策略分享 # 作者:leo713 from jqfactor import * from jqdata import * from jqlib.optimizer import * from datetime import date, timedelta import pandas as pd import talib as ta from functools import reduce from sklearn.linear_model import Lasso, ElasticNet from sklearn.svm import SVC from sklearn.preprocessing import scale, StandardScaler from sklearn.pipeline import Pipeline from sklearn.decomposition import PCA from sklearn.feature_selection import SelectKBest, f_classif import os.path ### 全局变量 # features = [] # 策略初始 def initialize(context): set_benchmark('000300.XSHG') set_option('use_real_price', True) log.set_level('order', 'error') set_option("avoid_future_data", True) set_order_cost(OrderCost(close_tax=0.001, open_commission=0.0003, close_commission=0.0003, min_commission=5), type='stock') run_daily(market_open, time='open', reference_security='000300.XSHG') g.timer = 0 ## 数据源 g.dataSource = DataSource() ''' ######################策略的交易逻辑###################### 每周计算因子值, 并买入前 20 支股票 ''' def tag_label(data): top_line = data.ret.quantile(0.7) bottom_line = data.ret.quantile(0.3) data = data[(data.ret > top_line) | (data.ret < bottom_line)] data['label'] = data.ret.apply(lambda x: 1 if x > top_line else 0) return data def backtest(backtest_dt, timer): dataSource = g.dataSource stocks = DataSource.get_stocks(backtest_dt) if (timer + 1) % 1 == 0: print('更新因子权重') dates = get_trade_days(end_date=backtest_dt, count=200)[::30] before_df = dataSource.get_range_features(dates) before_df = before_df.dropna() before_df = before_df.groupby('time').apply(tag_label) print('训练数据量:', len(before_df)) features = before_df.columns.difference(['ret', 'time', 'label']) model = Pipeline([ ('pca', PCA()), ('svc', SVC(C=0.1, probability=True)) ]).fit(before_df[features], before_df.label) current_df = dataSource.get(stocks, backtest_dt) current_df = current_df.dropna() print('当期数据量', len(current_df)) current_df['score'] = model.predict_proba(current_df[features])[:, 1] current_df['label'] = model.predict(current_df[features]) stock_to_buy = current_df[current_df.label == 1].sort_values('score').tail(10) stock_to_buy.loc[:, 'time'] = backtest_dt return stock_to_buy def market_open(context): g.timer += 1 if g.timer % 30 == 0: stock_to_buy = backtest(context.previous_date, g.timer) print('买入', stock_to_buy) rebalance_position(context, list(stock_to_buy.index)) def rebalance_position(context, stock_list): current_holding = context.portfolio.positions.keys() if len(stock_list) == 0: stocks_to_sell = stock_list else: stocks_to_sell = list(set(current_holding) - set(stock_list)) # 卖出 bulk_orders(stocks_to_sell, 0) total_value = context.portfolio.total_value if len(stock_list) > 0: bulk_optimize_orders(context, stock_list) # 批量买卖股票 def bulk_orders(stock_list,target_value): for i in stock_list: order_target_value(i, target_value) def bulk_optimize_orders(context, buy_list): optimized_weight = portfolio_optimizer(date=context.previous_date, securities = buy_list, target = MinVariance(count=250), constraints = [WeightConstraint(low=0.9, high=1.0), AnnualProfitConstraint(limit=0.20, count=250)], bounds=[], default_port_weight_range=[0., 1.0], ftol=1e-09, return_none_if_fail=True) total_value = context.portfolio.total_value # 获取总资产 if optimized_weight is None: return for stock in optimized_weight.keys(): value = total_value * optimized_weight[stock] # 确定每个标的的权重 order_target_value(stock, value) # 调整标的至目标权重 ''' ### 因子值 #### ''' from jqfactor import Factor class GROSSPROFITABILITY(Factor): # 设置因子名称 name = 'gross_profitability' # 设置获取数据的时间窗口长度 max_window = 1 # 设置依赖的数据 # 在策略中需要使用 get_fundamentals 获取的 income.total_operating_revenue, 在这里可以直接写做total_operating_revenue。 其他数据同理。 dependencies = ['total_operating_revenue','total_operating_cost','total_assets'] # 计算因子的函数, 需要返回一个 pandas.Series, index 是股票代码,value 是因子值 def calc(self, data): # 获取单季度的营业总收入数据 , index 是日期,column 是股票代码, value 是营业总收入 total_operating_revenue = data['total_operating_revenue'] # 获取单季度的营业总成本数据 total_operating_cost = data['total_operating_cost'] # 获取总资产 total_assets = data['total_assets'] # 计算 gross_profitability gross_profitability = (total_operating_revenue - total_operating_cost)/total_assets # 由于 gross_profitability 是一个一行 n 列的 dataframe,可以直接求 mean 转成 series return gross_profitability.mean() class MOM_N(Factor): dependencies = ['close'] def __init__(self, day): self.name = f'mom_{day}' self.max_window = day def calc(self, data): close = data['close'] return close.iloc[-1,:] / close.iloc[1,:] - 1 class CLOSE(Factor): name = 'close' max_window = 1 dependencies = ['close'] def calc(self, data): return data['close'].mean() class PCF(Factor): name = 'PCF' max_window = 1 dependencies = ['pcf_ratio'] def calc(self, data): return 1 / data['pcf_ratio'].mean() class BP(Factor): name = 'BP' max_window = 1 dependencies = ['pb_ratio'] def calc(self, data): return (1 / data['pb_ratio']).mean() class NET_PROFIT_INCREASE(Factor): name = 'NET_PROFIT_INCREASE' max_window = 1 dependencies = ['net_profit_1', 'net_profit_2', 'net_profit_3', 'net_profit_4'] def calc(self, data): df = data['net_profit_1'].transpose() df = df.merge(data['net_profit_2'].transpose(), how='inner', on='code') df = df.merge(data['net_profit_3'].transpose(), how='inner', on='code') df = df.merge(data['net_profit_4'].transpose(), how='inner', on='code') increase = df.pct_change(axis=1) factor = increase.mean(axis=1) / increase.std(axis=1) return factor class ROA_TTM(Factor): name = 'ROA_TTM' max_window = 1 dependencies = ['roa_1', 'roa_2', 'roa_3', 'roa_4'] def calc(self, data): df = data['roa_1'].transpose() df = df.merge(data['roa_2'].transpose(), how='inner', on='code') df = df.merge(data['roa_3'].transpose(), how='inner', on='code') df = df.merge(data['roa_4'].transpose(), how='inner', on='code') increase = df.pct_change(axis=1) factor = increase.mean(axis=1) / increase.std(axis=1) return factor class MARKET_CAP(Factor): name = 'MARKET_CAP' max_window = 1 dependencies = ['circulating_market_cap'] def calc(self, data): return np.log(data['circulating_market_cap'].mean()) class VSTD_20(Factor): name = 'VSTD_20' max_window = 20 dependencies = ['volume', 'close'] def calc(self, data): return data['volume'].std() class MOENY_MAIN_INCREASE(Factor): name = 'MOENY_MAIN_INCREASE' max_window = 20 dependencies = ['net_pct_main'] def calc(self, data): return data['net_pct_main'].mean() / data['net_pct_main'].std() class operating_revenue_increase(Factor): name = 'operating_revenue_increase' max_window = 1 dependencies = ['operating_revenue_1', 'operating_revenue_2', 'operating_revenue_3', 'operating_revenue_4'] def calc(self, data): df = data['operating_revenue_1'].transpose() df = df.merge(data['operating_revenue_2'].transpose(), how='inner', on='code') df = df.merge(data['operating_revenue_3'].transpose(), how='inner', on='code') df = df.merge(data['operating_revenue_4'].transpose(), how='inner', on='code') increase = df.pct_change(axis=1) return increase.mean(axis=1) class ROE(Factor): name = 'ROE' max_window = 1 dependencies = ['roe'] def calc(self, data): return data['roe'].mean() class ROA(Factor): name = 'ROA' max_window = 1 dependencies = ['roa'] def calc(self, data): return data['roa'].mean() class std_N(Factor): dependencies = ['close'] def __init__(self, window = 10): self.max_window = window self.name = f'std_{window}' def calc(self, data): return data['close'].std() class SHARE_HOLDERS_INCREASE(Factor): name = 'SHARE_HOLDERS_INCREASE' dependencies = ['a_share_holders'] max_window = 2 def calc(self, data): return data['a_share_holders'].pct_change() class CASH_RATIO(Factor): name = 'CASH_RATIO' dependencies = ['cash_to_current_liability'] max_window = 1 def calc(self, data): return data['cash_to_current_liability'].mean() class SP(Factor): name = 'SP' dependencies = ['ps_ratio'] max_window = 1 def calc(self, data): return (1 / data['ps_ratio']).mean() ''' ### DataSource ''' factor_custom = [ CASH_RATIO(), GROSSPROFITABILITY(), MOM_N(120), MOM_N(60), MOM_N(10), CLOSE(), PCF(), BP(), SP(), ROA(), ROA_TTM(), MARKET_CAP(), NET_PROFIT_INCREASE(), VSTD_20(), MOENY_MAIN_INCREASE(), operating_revenue_increase(), ROE(), std_N(20), std_N(40), std_N(120), # SHARE_HOLDERS_INCREASE() ] factor_custom_name = [i.name for i in factor_custom] class DataSource: cache_file = 'feature_cache.csv' def __init__(self): if os.path.isfile(self.cache_file): print('读取缓存文件') self.cache = pd.read_csv(self.cache_file, index_col=0) else: self.cache = pd.DataFrame(columns=['time']) def get(self, stocks, date): print('获取数据日期:', date) if len(self.cache) > 0 and len(self.cache[self.cache.time == str(date)]) > 0: print('命中缓存') feature = self.cache[self.cache.time == str(date)] else: feature = DataSource.get_features(stocks, date) self.update_cache(date, feature) return feature def update_cache(self, date, feature): feature.loc[:, 'time'] = str(date) self.cache = pd.concat([self.cache, feature], sort=False) print('更新数据源缓存') @staticmethod def get_features(stocks, date): factor_custom_df = calc_factors(stocks,factor_custom, start_date=date, end_date=date) result_df = pd.DataFrame(index=stocks) for i in factor_custom_name: factor_series = factor_custom_df[i].tail(1).transpose() factor_series = DataSource.format_factor(factor_series, date) result_df.loc[:, i] = factor_series return result_df @staticmethod def format_factor(factor_series, date, axis = 0): factor_series = winsorize_med(factor_series, scale=5, inclusive=True, inf2nan=True, axis=axis) factor_series = neutralize(factor_series, how=['jq_l1', 'market_cap'], fillna='sw_l1', date=date, axis=axis) factor_series = standardlize(factor_series, inf2nan=True, axis=axis) return factor_series def get_ret_df(self, pre, end, arr): stocks = self.get_stocks(pre) df = get_price(stocks, start_date=pre, end_date=end, panel=False, fq='post') df = df.groupby('code').apply(self.get_stock_ret) date = pre result_df = self.get(stocks, date) df = df.join(result_df) df.loc[:,'time'] = date arr.append(df) return end def get_range_features(self, dates): arr = [] reduce(lambda x,y: self.get_ret_df(x, y, arr), dates) return pd.concat(arr) def store(self): self.cache.to_csv(self.cache_file) @staticmethod def get_stocks(date): return get_index_stocks('000905.XSHG', date) @staticmethod def get_stock_ret(data): ret = data.close.pct_change() return pd.Series({ 'ret': ret.sum() }) ```
文章分类
关于作者
水滴
注册时间: