pypfopt.hierarchical_portfolio 源代码

"""
``hierarchical_portfolio`` 模块旨在实现投资组合优化的最新进展之一分层聚类模型应用于分配。

所有的分层类都有类似于 ``EfficientFrontier`` 的 API,不过由于目前很多分层模型不支持不同的目标,所以实际的分配是通过调用 optimize() 来实现的。

目前已经实现:

- ``HRPOpt`` 实现了分层风险平价(HRP)组合。代码由 Marcos Lopez de Prado (2016) 许可转载。
"""

import collections

import numpy as np
import pandas as pd
import scipy.cluster.hierarchy as sch
import scipy.spatial.distance as ssd

from . import base_optimizer, risk_models


[文档] class HRPOpt(base_optimizer.BaseOptimizer): """ HRPOpt 对象(继承自 BaseOptimizer)构建了一个分层风险平价投资组合。 可用变量: - 输入 - ``n_assets`` - int - ``tickers`` - str list - ``returns`` - pd.DataFrame - 输出: - ``weights`` - np.ndarray - ``clusters`` - linkage matrix corresponding to clustered assets. 公共方法: - ``optimize()`` 使用 HRP 计算权重 - `portfolio_performance()` 计算优化后投资组合的期望收益率、波动率和夏普比率。 - ``set_weights()`` 从权重数据中创建 self.weights (np.ndarray)。 - ``clean_weights()`` 对权重进行四舍五入,并剪掉接近零的部分。 - ``save_weights_to_file()`` 将权重保存为 csv、json 或 txt。 """
[文档] def __init__(self, returns=None, cov_matrix=None): """ :param returns: asset historical returns :type returns: pd.DataFrame :param cov_matrix: covariance of asset returns :type cov_matrix: pd.DataFrame. :raises TypeError: if ``returns`` is not a dataframe """ if returns is None and cov_matrix is None: raise ValueError("Either returns or cov_matrix must be provided") if returns is not None and not isinstance(returns, pd.DataFrame): raise TypeError("returns are not a dataframe") self.returns = returns self.cov_matrix = cov_matrix self.clusters = None if returns is None: tickers = list(cov_matrix.columns) else: tickers = list(returns.columns) super().__init__(len(tickers), tickers)
@staticmethod def _get_cluster_var(cov, cluster_items): """ Compute the variance per cluster :param cov: covariance matrix :type cov: np.ndarray :param cluster_items: tickers in the cluster :type cluster_items: list :return: the variance per cluster :rtype: float """ # Compute variance per cluster cov_slice = cov.loc[cluster_items, cluster_items] weights = 1 / np.diag(cov_slice) # Inverse variance weights weights /= weights.sum() return np.linalg.multi_dot((weights, cov_slice, weights)) @staticmethod def _get_quasi_diag(link): """ Sort clustered items by distance :param link: linkage matrix after clustering :type link: np.ndarray :return: sorted list of indices :rtype: list """ return sch.to_tree(link, rd=False).pre_order() @staticmethod def _raw_hrp_allocation(cov, ordered_tickers): """ Given the clusters, compute the portfolio that minimises risk by recursively traversing the hierarchical tree from the top. :param cov: covariance matrix :type cov: np.ndarray :param ordered_tickers: list of tickers ordered by distance :type ordered_tickers: str list :return: raw portfolio weights :rtype: pd.Series """ w = pd.Series(1, index=ordered_tickers) cluster_items = [ordered_tickers] # initialize all items in one cluster while len(cluster_items) > 0: cluster_items = [ i[j:k] for i in cluster_items for j, k in ((0, len(i) // 2), (len(i) // 2, len(i))) if len(i) > 1 ] # bi-section # For each pair, optimize locally. for i in range(0, len(cluster_items), 2): first_cluster = cluster_items[i] second_cluster = cluster_items[i + 1] # Form the inverse variance portfolio for this pair first_variance = HRPOpt._get_cluster_var(cov, first_cluster) second_variance = HRPOpt._get_cluster_var(cov, second_cluster) alpha = 1 - first_variance / (first_variance + second_variance) w[first_cluster] *= alpha # weight 1 w[second_cluster] *= 1 - alpha # weight 2 return w
[文档] def optimize(self, linkage_method="single"): """ 使用 Scipy 分层聚类法,构建一个分层风险平价投资组合(见 `这里 <https://docs.scipy.org/doc/scipy/reference/generated/scipy.cluster.hierarchy.linkage.html>`_ ) :param linkage_method: which scipy linkage method to use :type linkage_method: str :return: weights for the HRP portfolio :rtype: OrderedDict """ if linkage_method not in sch._LINKAGE_METHODS: raise ValueError("linkage_method must be one recognised by scipy") if self.returns is None: cov = self.cov_matrix corr = risk_models.cov_to_corr(self.cov_matrix).round(6) else: corr, cov = self.returns.corr(), self.returns.cov() # Compute distance matrix, with ClusterWarning fix as # per https://stackoverflow.com/questions/18952587/ # this can avoid some nasty floating point issues matrix = np.sqrt(np.clip((1.0 - corr) / 2.0, a_min=0.0, a_max=1.0)) dist = ssd.squareform(matrix, checks=False) self.clusters = sch.linkage(dist, linkage_method) sort_ix = HRPOpt._get_quasi_diag(self.clusters) ordered_tickers = corr.index[sort_ix].tolist() hrp = HRPOpt._raw_hrp_allocation(cov, ordered_tickers) weights = collections.OrderedDict(hrp.sort_index()) self.set_weights(weights) return weights
[文档] def portfolio_performance(self, verbose=False, risk_free_rate=0.02, frequency=252): """ 优化后,计算(并可选择打印)最优投资组合的表现。目前计算的是期望收益率、波动率和夏普比率,假设收益率是日频的。 :param verbose: whether performance should be printed, defaults to False :type verbose: bool, optional :param risk_free_rate: risk-free rate of borrowing/lending, defaults to 0.02. The period of the risk-free rate should correspond to the frequency of expected returns. :type risk_free_rate: float, optional :param frequency: number of time periods in a year, defaults to 252 (the number of trading days in a year) :type frequency: int, optional :raises ValueError: if weights have not been calculated yet :return: expected return, volatility, Sharpe ratio. :rtype: (float, float, float) """ if self.returns is None: cov = self.cov_matrix mu = None else: cov = self.returns.cov() * frequency mu = self.returns.mean() * frequency return base_optimizer.portfolio_performance( self.weights, mu, cov, verbose, risk_free_rate )