"""
``hierarchical_portfolio`` 模块旨在实现投资组合优化的最新进展之一分层聚类模型应用于分配。
所有的分层类都有类似于 ``EfficientFrontier`` 的 API,不过由于目前很多分层模型不支持不同的目标,所以实际的分配是通过调用 optimize() 来实现的。
目前已经实现:
- ``HRPOpt`` 实现了分层风险平价(HRP)组合。代码由 Marcos Lopez de Prado (2016) 许可转载。
"""
import collections
import numpy as np
import pandas as pd
import scipy.cluster.hierarchy as sch
import scipy.spatial.distance as ssd
from . import base_optimizer, risk_models
[文档]
class HRPOpt(base_optimizer.BaseOptimizer):
"""
HRPOpt 对象(继承自 BaseOptimizer)构建了一个分层风险平价投资组合。
可用变量:
- 输入
- ``n_assets`` - int
- ``tickers`` - str list
- ``returns`` - pd.DataFrame
- 输出:
- ``weights`` - np.ndarray
- ``clusters`` - linkage matrix corresponding to clustered assets.
公共方法:
- ``optimize()`` 使用 HRP 计算权重
- `portfolio_performance()` 计算优化后投资组合的期望收益率、波动率和夏普比率。
- ``set_weights()`` 从权重数据中创建 self.weights (np.ndarray)。
- ``clean_weights()`` 对权重进行四舍五入,并剪掉接近零的部分。
- ``save_weights_to_file()`` 将权重保存为 csv、json 或 txt。
"""
[文档]
def __init__(self, returns=None, cov_matrix=None):
"""
:param returns: asset historical returns
:type returns: pd.DataFrame
:param cov_matrix: covariance of asset returns
:type cov_matrix: pd.DataFrame.
:raises TypeError: if ``returns`` is not a dataframe
"""
if returns is None and cov_matrix is None:
raise ValueError("Either returns or cov_matrix must be provided")
if returns is not None and not isinstance(returns, pd.DataFrame):
raise TypeError("returns are not a dataframe")
self.returns = returns
self.cov_matrix = cov_matrix
self.clusters = None
if returns is None:
tickers = list(cov_matrix.columns)
else:
tickers = list(returns.columns)
super().__init__(len(tickers), tickers)
@staticmethod
def _get_cluster_var(cov, cluster_items):
"""
Compute the variance per cluster
:param cov: covariance matrix
:type cov: np.ndarray
:param cluster_items: tickers in the cluster
:type cluster_items: list
:return: the variance per cluster
:rtype: float
"""
# Compute variance per cluster
cov_slice = cov.loc[cluster_items, cluster_items]
weights = 1 / np.diag(cov_slice) # Inverse variance weights
weights /= weights.sum()
return np.linalg.multi_dot((weights, cov_slice, weights))
@staticmethod
def _get_quasi_diag(link):
"""
Sort clustered items by distance
:param link: linkage matrix after clustering
:type link: np.ndarray
:return: sorted list of indices
:rtype: list
"""
return sch.to_tree(link, rd=False).pre_order()
@staticmethod
def _raw_hrp_allocation(cov, ordered_tickers):
"""
Given the clusters, compute the portfolio that minimises risk by
recursively traversing the hierarchical tree from the top.
:param cov: covariance matrix
:type cov: np.ndarray
:param ordered_tickers: list of tickers ordered by distance
:type ordered_tickers: str list
:return: raw portfolio weights
:rtype: pd.Series
"""
w = pd.Series(1, index=ordered_tickers)
cluster_items = [ordered_tickers] # initialize all items in one cluster
while len(cluster_items) > 0:
cluster_items = [
i[j:k]
for i in cluster_items
for j, k in ((0, len(i) // 2), (len(i) // 2, len(i)))
if len(i) > 1
] # bi-section
# For each pair, optimize locally.
for i in range(0, len(cluster_items), 2):
first_cluster = cluster_items[i]
second_cluster = cluster_items[i + 1]
# Form the inverse variance portfolio for this pair
first_variance = HRPOpt._get_cluster_var(cov, first_cluster)
second_variance = HRPOpt._get_cluster_var(cov, second_cluster)
alpha = 1 - first_variance / (first_variance + second_variance)
w[first_cluster] *= alpha # weight 1
w[second_cluster] *= 1 - alpha # weight 2
return w
[文档]
def optimize(self, linkage_method="single"):
"""
使用 Scipy 分层聚类法,构建一个分层风险平价投资组合(见 `这里 <https://docs.scipy.org/doc/scipy/reference/generated/scipy.cluster.hierarchy.linkage.html>`_ )
:param linkage_method: which scipy linkage method to use
:type linkage_method: str
:return: weights for the HRP portfolio
:rtype: OrderedDict
"""
if linkage_method not in sch._LINKAGE_METHODS:
raise ValueError("linkage_method must be one recognised by scipy")
if self.returns is None:
cov = self.cov_matrix
corr = risk_models.cov_to_corr(self.cov_matrix).round(6)
else:
corr, cov = self.returns.corr(), self.returns.cov()
# Compute distance matrix, with ClusterWarning fix as
# per https://stackoverflow.com/questions/18952587/
# this can avoid some nasty floating point issues
matrix = np.sqrt(np.clip((1.0 - corr) / 2.0, a_min=0.0, a_max=1.0))
dist = ssd.squareform(matrix, checks=False)
self.clusters = sch.linkage(dist, linkage_method)
sort_ix = HRPOpt._get_quasi_diag(self.clusters)
ordered_tickers = corr.index[sort_ix].tolist()
hrp = HRPOpt._raw_hrp_allocation(cov, ordered_tickers)
weights = collections.OrderedDict(hrp.sort_index())
self.set_weights(weights)
return weights