复合分位回归
类似分位回归的,给定分位数序列 0 < τ 1 < τ 2 < ⋯ < τ K < 1 0<\tau_1<\tau_2<\cdots<\tau_K<1 0<τ1<τ2<⋯<τK<1,复合分位回归的目的不再是在一个分位点上最小化损失函数,而是在多个分位点上同时最小化check function. 则估计回归系数 β \boldsymbol{\beta} β的估计是通过如下目标函数得到的:
( b ^ 1 , … , b ^ K , β ^ C Q R ) = arg min b 1 … , b k , β ∑ k = 1 K { ∑ i = 1 n ρ τ k ( y i − b k − x i ⊤ β ) } (\hat{b}_1,\dots,\hat{b}_K,\hat{\boldsymbol{\beta}}^{\mathrm{CQR}})=\underset{b_1\dots,b_k,\boldsymbol{\beta}}{\argmin}\sum_{k=1}^K\left\{ \sum_{i=1}^n\rho_{\tau_k} (y_i-b_k-\bold{x}_i^{\top}\boldsymbol{\beta})\right\} (b^1,…,b^K,β^CQR)=b1…,bk,βargmink=1∑K{i=1∑nρτk(yi−bk−xi⊤β)}
通常我们会取等距分位序列: τ k = k K + 1 , k = 1 , 2 , … , K \tau_k=\frac{k}{K+1},k=1,2,\dots,K τk=K+1k,k=1,2,…,K. 同时给出估计量的渐近分布: n ( β ^ C Q R − β ∗ ) → N ( 0 , Σ C Q R ) \sqrt{n}(\hat{\boldsymbol{\beta}}^{\mathrm{CQR}}-\boldsymbol{\beta}^*)\to N(0,\boldsymbol{\Sigma}_{\mathrm{CQR}}) n(β^CQR−β∗)→N(0,ΣCQR)其中
Σ C Q R = C − 1 ∑ k , k ′ = 1 K min ( τ k , τ k ′ ) ( 1 − max ( τ k , τ k ′ ) ) ( ∑ k = 1 K f ( b τ k ∗ ) ) 2 \boldsymbol{\Sigma}_{\mathrm{CQR}}=\bold{C}^{-1}\frac{\sum_{k,k'=1}^K\min(\tau_k,\tau_{k'})(1-\max(\tau_k,\tau_k'))}{(\sum_{k=1}^Kf(b^*_{\tau_k}))^2} ΣCQR=C−1(∑k=1Kf(bτk∗))2∑k,k′=1Kmin(τk,τk′)(1−max(τk,τk′))
其中 C \bold{C} C是
lim n → ∞ 1 n X ⊤ X = C \lim_{n\to\infty}\frac{1}{n}\bold{X}^{\top}\bold{X}=\bold{C} n→∞limn1X⊤X=C
1.1 基于MM算法的Python实现
data = pd.read_csv(r"C:\Users\beida\Desktop\sc\rent.csv")
n = len(data); p = 1
x = np.array(data[["cons", 'area']])y = np.array(data["rent_euro"], dtype=np.float64)
#beta = np.matrix([100, 2.6]).reshape(p, 1)
# np.set_printoptions(precision=8)
tau = np.arange(1, 6)/6
k = len(tau)
maxit = 1000
toler = 0.0001
error = 10000
iteration = 1
p = 2
u = np.zeros(k)
r = np.zeros((n, k))
signw = np.zeros((n, k))
z = np.zeros((n, k))
newX = np.zeros((n, k))
#beta = np.matrix([1, 1], dtype=np.float64).reshape(2, 1)
beta = np.linalg.pinv(x.T.dot(x)).dot(x.T).dot(y)
#print(beta, "ols")while (iteration <= maxit) & (error > toler):betaold = beta.copy()#print(betaold, 'betaold')uv = np.sort(y - x.dot(beta), axis=0) # yesquantile1 = (n-1)*tau - np.floor((n - 1)*tau) # yesfor i in range(0, k):u[i] = quantile1[i] * uv[int(np.ceil((n - 1)*tau[i]))] \+ (1-quantile1[i]) * uv[int(np.floor((n - 1)*tau[i]))]yh = x.dot(beta)for i in range(0, k):r[:, i] = y - u[i] - yhsignw[:, i] = (1 - np.sign(r[:, i]))/2 * (1 - tau[i]) \+ (np.sign(r[:, i]) + 1) * tau[i]/2for j in range(0, p):xbeta = beta[j] * x[:, j]for i in range(0, k):z[:, i] = (r[:, i]+xbeta)/x[:, j]newX[:, i] = x[:, j] * signw[:, i]vz = z.flatten()order = vz.argsort()sortz = vz[order] # yesvnewX = newX.flatten()w = np.abs(vnewX[order])index = np.where(np.cumsum(w) > (np.sum(w)/2))[0][0]# print(index)beta[j] = sortz[index]error = np.sum(np.abs(beta-betaold))iteration = iteration + 1
print("beta:", beta)
print("tau:", tau)
print("cons:", np.percentile((y-x.dot(beta)), tau*100))
结果
> beta: [135.63724592 4.3381923 ]
> tau: [0.16666667 0.33333333 0.5 0.66666667 0.83333333]
> cons: [-114.9981428 -40.30912765 16.70578506 75.41938784 165.26588296]
1.2 基于MM算法的CQR及其R实现
cqrmm(x=x,y=y,tau=tau)
set.seed(1)
n=100
p=2
a=rnorm(n*p, mean = 1, sd =1)
x=matrix(a,n,p)
beta=rnorm(p,1,1)
beta=matrix(beta,p,1)
y=x%*%beta-matrix(rnorm(n,0.1,1),n,1)
tau=1:5/6
# x is 1000*10 matrix, y is 1000*1 vector, beta is 10*1 vector
cqr.mm(x,y,tau)cqrmm = function(x, y, beta, to, m, tau){if (missing(to)){toler = 1e-3}else{toler = to}if (missing(m)){maxit = 200}else{maxit = m}if (missing(tau)){cat('no tau_k input','\n')tau=1:5/6}else{tau=tau}x = xX = x#arma:: mat x=(xr),r,product,xt,denominator;#arma:: vec W,uv,v,y=(yr),delta;#arma:: vec betaold,beta=(betar),quantile,u,yh;#arma::uvec order, index;n=nrow(x)p = ncol(x)k=length(tau)error=10000epsilon=0.9999iteration=1;#u.zeros(k);#r.zeros(n,k);u <- numeric(k)r <- matrix(0, n, k)if (missing(beta)){beta = solve(t(x)%*%x, t(x)%*%y)}else{beta = beta}xt=t(x)product <- matrix(1, p, n)while (iteration<=maxit && error>toler){betaold = beta;yh = x%*% beta; #y_hatuv = sort(y - yh)# u is vec of the quantiles of given vectorquantile = (n-1) * tau - floor((n-1) * tau)for (i in 1:k){ u[i] = quantile[i] * uv[ceiling((n-1) * tau[i])] + (1 - quantile[i]) * uv[floor((n-1) * tau[i])]}for (i in 1:k){r[,i] = y-u[i]-yh;}denominator=1/(abs(r)+epsilon)W = rowSums(denominator)v <- k - 2 * sum(tau) - rowSums(r * denominator)for (i in 1:n){product[,i] = xt[,i]*W[i]}delta = solve(product%*%x, xt%*%v);beta = beta-deltaerror = sum(abs(delta))iteration = iteration + 1}b=quantile(y-X%*%beta, tau)return(list(beta=beta,b=b))
}
1.3 基于EM算法的R语言实现
library(MASS)
library(cqrReg)
set.seed(NULL)
tau.k = 1:5 / 6CQREM = function(X, y, tau, betar, weight, maxit, toler) {if (missing(betar)) {beta = solve(t(X) %*% X, t(X) %*% y)cat(beta,'\n')}if (missing(tau)) {tau.k = 1:5 / 6alpha = tau}K = length(tau)if (missing(weight)) {weight = rep(1, times = K)}if (missing(maxit)) {maxit = 1000}if(missing(toler)){toler = 1e-5}weight = weightalpha = taun = length(y)tau.k = tautheta.1 = (1 - 2 * tau.k) / (tau.k * (1 - tau.k))theta.2 = 2 / (tau.k * (1 - tau.k))error = 10000epsilon = 0.9999iteration = 1while (iteration <= maxit && error > toler) {#for (i in 1:maxit){ rbar = matrix(NA, n, K)mu_new = matrix(NA, n, K)mu = matrix(NA, n, K)w.ik = matrix(NA, n, K)d.ik = matrix(NA, n, K)r = y - X %*% betarfor (k in 1:K) {rbar[, k] <- sum(r - alpha[k])}rbardelta2 = sqrt((theta.1 ^ 2 + 2 * theta.2)) / abs(rbar)delta2delta3 = (theta.2 * weight) / (theta.1 ^ 2 + 2 * theta.2) + abs(rbar) / (sqrt(theta.1 ^ 2 + 2 * theta.2))delta3for (k in 1:K) {w.ik[, k] = delta2[, k] / (theta.2 * weight)[k]}w.ikw.i = rowSums(w.ik)w.iW = diag(w.i)W#svd_W <- svd(W)#U <- svd_W$u#V <- svd_W$v#D <- svd_W$d# 构建逆矩阵#W_inv <- diag(1/D)#W_reg <- W + lambda * diag(nrow(W))for (k in 1:K) {d.ik[, k] = (theta.1 + alpha * delta2)[, k] / (theta.2 * weight)[k]}d.ikd.i = rowSums(d.ik)d.iD = as.vector(d.i)ybar = y - solve(W) %*% Dybarbetabeta_new = solve(t(X) %*% W %*% X) %*% t(X) %*% W %*% ybarbeta_newA = matrix(NA, n, K)for (k in 1:K){A[,k] = (y-X%*%beta_new)*delta2[, k]}alphaalpha_new=colSums(A - n*theta.1)/colSums(delta2)for (k in 1:K) {mu[, k] = X %*% beta_new + alpha_new[k]}weight_new = (2 / (3 * n)) * colSums((y - mu) ^ 2 / (2 * theta.2) * delta2 +(theta.1 ^ 2 + 2 * theta.2) / (2 * theta.2) * delta3 -(theta.1 * (y - mu)) / (theta.2))#alpha_new = colSums(as.numeric((y[1] - t(# as.matrix(X[1,], row = 1, col = 2)#)) %*% beta) * delta2)alpha_newerror = sum(abs(beta_new))beta = beta_newalpha = alpha_newweight = weight_newiteration = iteration + 1#cat(error,'\n')}b = quantile(y - X %*% beta, tau.k)return(list(beta = beta, b = b))
}n = 300
p = 3
X = matrix(rnorm(n*p, 0, 1), n, p)
y = rnorm(n)
CQREM(X, y, tau=tau.k, maxit = 1000, tol=1e-5)
cqr.mm(X, y, tau= tau.k,toler = 1e-5)>$beta[,1]
[1,] -0.01504609
[2,] -0.14649880
[3,] 0.23344365$b16.66667% 33.33333% 50% 66.66667% 83.33333%
-0.881195701 -0.428558273 -0.000221886 0.470748757 1.102072419$beta[,1]
[1,] 0.05258097
[2,] -0.05178250
[3,] 0.13393040$b16.66667% 33.33333% 50% 66.66667% 83.33333%
-0.9269618879 -0.3647195276 -0.0001248183 0.4284496629 1.0367478727
EM算法的速度显著慢于MM算法,同时在估计结果上也有略微的差异。
1.4 基于凸优化(CVX)的复合分位回归
实际上还有一些暴力的求解方法,比如直接凸优化
library(CVXR)
library(cqrReg)
set.seed(1)
n=100
p=2
a=rnorm(n*p, mean = 1, sd =1)
x=matrix(a,n,p)
beta=rnorm(p,1,1)
beta=matrix(beta,p,1)
y=x%*%beta-matrix(rnorm(n,0.1,1),n,1)
tau=1:5/6
cqr.mm(x,y,tau)
# 假设数据已经定义好
Y <- y # 响应变量
X <- x # 解释变量矩阵
taus <- tau # 分位数向量# 定义决策变量
beta <- Variable(ncol(X))
b <- Variable(length(taus))# 定义分位数损失函数
quantile_loss <- function(residuals, tau) {sum(pos(residuals) * tau + pos(-residuals) * (1 - tau))
}# 构建目标函数的每一部分
objective_parts <- lapply(1:length(taus), function(i) {quantile_loss(Y - b[i] - X %*% beta, taus[i])
})# 合并目标函数的各个部分
objective <- Minimize(Reduce(`+`, objective_parts))# 定义优化问题
problem <- Problem(objective, list())# 求解
result <- solve(problem)# 提取结果
result$getValue(beta)
> [,1]
> [1,] 1.470763
> [2,] 2.758932result$getValue(b)
>
> [,1]
> [1,] -1.1859157
> [2,] -0.7531299
> [3,] -0.2539941
> [4,] 0.1203188
> [5,] 0.7931756> cqr.mm(x,y,tau)
$beta[,1]
[1,] 1.508830
[2,] 2.749125$b16.66667% 33.33333% 50% 66.66667% 83.33333%
-1.21780307 -0.78448530 -0.29961304 0.05043142 0.69900106
可以看到,与MM算法也存在一些差异
进一步思考
加权复合分位回归
复合分位回归是最小化一系列分位损失函数,但不难看出,CQR是赋予了每个 ρ τ k \rho_{\tau_k} ρτk完全相同的权重,那么就可以考虑不同的权重。这样得到的就是加权的CQR(WCQR)
( α ^ 1 , … , α ^ K , β ^ W C Q R ) = arg min α 1 , … , α K , β ∑ i = 1 N ∑ k = 1 K ω k ⋅ ρ τ k ( Y i − X i T β − α k ) . \left(\hat{\alpha}_{1}, \ldots, \hat{\alpha}_{K}, \hat{\beta}^{\mathrm{WCQR}}\right)=\arg \min _{\alpha_{1}, \ldots, \alpha_{K}, \beta} \sum_{i=1}^{N} \sum_{k=1}^{K} \omega_{k} \cdot \rho_{\tau_{k}}\left(Y_{i}-X_{i}^{T} \beta-\alpha_{k}\right) . (α^1,…,α^K,β^WCQR)=argα1,…,αK,βmini=1∑Nk=1∑Kωk⋅ρτk(Yi−XiTβ−αk).
求解原理与CQR类似的,加入设定相同的权重w,那么就应该得到与CQR完全相同的结果
Y <- y # 响应变量
X <- x # 解释变量矩阵
taus <- tau # 分位数向量
w = c(0.2,0.2,0.2,0.2,0.2)
# 定义决策变量
beta <- Variable(ncol(X))
b <- Variable(length(taus))# 定义分位数损失函数
quantile_loss <- function(residuals, tau) {sum(pos(residuals) * tau + pos(-residuals) * (1 - tau))
}# 构建目标函数的每一部分
objective_parts <- lapply(1:length(taus), function(i) {w[i]*quantile_loss(Y - b[i] - X %*% beta, taus[i])
})# 合并目标函数的各个部分
objective <- Minimize(Reduce(`+`, objective_parts))# 定义优化问题
problem <- Problem(objective, list())# 求解
result <- solve(problem)# 提取结果
beta_val <- result$getValue(beta)
b_val <- result$getValue(b)beta_val
b_val> beta_val[,1]
[1,] 1.470763
[2,] 2.758932
> b_val[,1]
[1,] -1.1859157
[2,] -0.7531299
[3,] -0.2539941
[4,] 0.1203188
[5,] 0.7931756
现在设定不同的权重w:
# 生成n个随机数
n <- length(taus)
random_numbers <- runif(n)# 归一化使得它们的总和为1
normalized_numbers <- random_numbers / sum(random_numbers)# 验证它们的总和是否为1
sum(normalized_numbers)Y <- y # 响应变量
X <- x # 解释变量矩阵
taus <- tau # 分位数向量
w = normalized_numbers
# 定义决策变量
beta <- Variable(ncol(X))
b <- Variable(length(taus))# 定义分位数损失函数
quantile_loss <- function(residuals, tau) {sum(pos(residuals) * tau + pos(-residuals) * (1 - tau))
}# 构建目标函数的每一部分
objective_parts <- lapply(1:length(taus), function(i) {w[i]*quantile_loss(Y - b[i] - X %*% beta, taus[i])
})# 合并目标函数的各个部分
objective <- Minimize(Reduce(`+`, objective_parts))# 定义优化问题
problem <- Problem(objective, list())# 求解
result <- solve(problem)# 提取结果
beta_val <- result$getValue(beta)
b_val <- result$getValue(b)> beta_val[,1]
[1,] 1.525781
[2,] 2.736585
> b_val[,1]
[1,] -1.23440023
[2,] -0.79443834
[3,] -0.32373464
[4,] 0.05565281
[5,] 0.79790504
可见估计值有了差异。