#-------------------------------------------------# # # # Week10: Maximum Likelihood Estimation des # # ARCH(d)-Modells # # # #-------------------------------------------------# Letzts Mal hatten wir uns mit der Likelihood-Methode vertaut gemacht und hatten folgenden Ausdruck fuer die log-Likelihood Funktion hergeleitet: log(tilde L) = (1) - sum_{k=1}^N { log[vol(t_{k-1})] + 1/2 * y_k^2 / vol^2(t_{k-1}) } Dabei konnten und hatten wir additive Konstanten wie etwa N*log(2*pi) weggelassen, weshalb wir eine Schlange oder tilde ueber das L gesetzt hatten. Die y_k waren die durch die Zeitreihendaten gegebenen returns, y_k = ret(t_k) = [ S(t_k)-S(t_{k-1}) ] / S(t_{k-1}) (2) und die genaue Form der Volatilitaet hing ab vom be- trachteten Modell, fuer das ARCH(1)-Modell war das vol^2(t_{k-1}) = w0*bsvol^2 + (1-w0)*ret^2(t_{k-1}) (3) und die Modell-Parameter waren dann das w0 und die bsvol. Fuer die DAX-Zeitreihe hatten wir dann DAX: w0 = 0.7 und bsvol = 1.4% (4) gefunden, und die bsvol war im wesentlichen identisch mit der realisierten Volatilitaet sd(ret) der DAX-Zeit- reihe, ueber den gesamten 10-jahres Zeitraum der Zeitreihe genommen. Fuer das realistischere ARCH(d)-Modell lautet die Vol-Spezifikation vol^2(t_{k-1}) = w0*bsvol^2 + (1-w0)*[ret^2(t_{k-1})+...+ret^2(t_{k-d})] / d (5) und die Modell-Parameter sind das w0, die bsvol und der Zeithorizont d. Wir wollen jetzt also dieses Modell an Zeitreihendaten fitten: ### Start R-Session: ### # before we fit to DAX-data, we check on an artificially # generated ARCH(d) return set: let's use the pathArchd # function from week8.txt to generate these artificial # return data: pathArchd = function( N , bsvol , w0 , d , vol0 ) { S = rep(0,N) ret = rep(0,N) phi = rnorm(N) S0 = 100 vol = rep(vol0,N) # ARCH-vol sumretd = 0 # the first step, k=1, is special: S[1] = S0*(1+vol0*phi[1]) ret[1] = (S[1]-S0)/S0 vol[1] = sqrt( w0*bsvol^2 + (1-w0)*ret[1]^2 ) sumretd = sumretd + ret[1]^2 for(k in 2:N) { if(k<=d) { S[k] = S[k-1]*(1+vol[k-1]*phi[k]) ret[k] = (S[k]-S[k-1])/S[k-1] sumretd = sumretd + ret[k]^2 vol[k] = sqrt( w0*bsvol^2 + (1-w0)*sumretd/k ) } else { S[k] = S[k-1]*(1+vol[k-1]*phi[k]) ret[k] = (S[k]-S[k-1])/S[k-1] sumretd = sumretd + ret[k]^2 sumretd = sumretd - ret[k-d]^2 vol[k] = sqrt( w0*bsvol^2 + (1-w0)*abs(sumretd)/d ) } }#next k par(mfrow=c(1,2)) plot(S,type="l",ylim=c(0,250)) plot(ret,type="l",ylim=c(-0.08,0.08)) result = list(S,ret) names(result) = c("S","ret") return(result) } # we generate an artificial ARCH(d) return set where we # know the actual model parameters (bsvol,w0,d): N = 2500 bsvol = 0.01 w0 = 0.3 d = 20 res = pathArchd(N,bsvol,w0,d,bsvol) ret = res$ret summary(ret) # we have to calculate the log-Likelihood function: # we use 'vectorized' calculation logic of R to make the code # a bit more performant -> code looks a bit less intuitive: logL = function( bsvol , w0 , d ) { w1 = 1 - w0 n = length(ret) cumsum_retsquared = cumsum(ret^2) # remove last d elements: cumsum_retsquared_shifted = cumsum_retsquared[-((n-d+1):n)] # fill in d zeros at the first d places: cumsum_retsquared_shifted = c(rep(0,d),cumsum_retsquared_shifted) sum_dretsquared = cumsum_retsquared - cumsum_retsquared_shifted # now we have to divide this sum by d, but the first d elements # are special: divisor1 = 1:d divisor2 = rep(d,n-d) divisor = c(divisor1,divisor2) sum_dretsquared = sum_dretsquared/divisor # now we can calculate ARCH(d)-vol: vol = sqrt( w0*bsvol^2 + w1*sum_dretsquared ) # shift vol-vector by 1 element ( there is a vol(t_{k-1}) in # the likelihood-function, not a vol(t_k) ) vol = vol[-n] # remove last one vol = c(bsvol,vol) # add a new first one # the actual calculation: terms = log(vol) + 1/2*ret^2/vol^2 result = -sum(terms) return(result) } # let's check: logL( bsvol=0.01, w0=0.2, d=20 ) # alright, some number # let's look for the maximum: # first, we fix bsvol to its natural value: bsvol = sd(ret) bsvol # we make a contour-plot in the (w0,d)-plane: w0 = seq(from=0.02,to=0.98,by=0.02) dd = 1:50 nw = length(w0) nd = length(dd) # matrix z = logL for contour-plot: z = matrix( 0 , nrow=nd , ncol=nw ) for(i in 1:nd) { for(j in 1:nw ) { z[i,j] = logL( bsvol , w0[j] , dd[i] ) } }# still very fast evaluation of logL! # let's look at the result: contour( dd , w0 , z ) contour( dd , w0 , z , nlevels = 50 ) contour( dd , w0 , z , nlevels = 50 , zlim=c(10000,11000) ) contour( dd , w0 , z , nlevels = 100 , zlim=c(10000,10600) ) # we recover correct parameter values: # w0 around 0.3 and d around 20 # differences between d = 15, 20 and 25 not that big, # depending on the particular realization of random numbers; # d below 10 clearly less favorable -> redo calculation # for dd = 10:40 (-> resolution on pictures more nice) # some more pictures, put dd to 10 to 40: image( dd , w0 , z ) persp( dd , w0 , z ) require(plot3D) persp3D( dd , w0 , z ) # let's fix d=20 and check that bsvol=sd(ret) corresponds # to the maximum: bsvol = seq(from=0.1/100,to=4/100,by=0.1/100) nbs = length(bsvol) z = matrix( 0 , nrow=nbs , ncol=nw ) for(i in 1:nbs) { for(j in 1:nw ) { z[i,j]=logL( bsvol[i] , w0[j] , d=20 ) } } # let's look at the result: contour( bsvol , w0 , z ) contour( bsvol , w0 , z , nlevels = 50 ) contour( bsvol , w0 , z , nlevels = 50 , zlim=c(10000,11000) ) contour( bsvol , w0 , z , nlevels = 100 , zlim=c(10000,10500) ) # -> looks all good. # now lets fit to DAX-data: dax = read.table("C:/Users/detlef/OneDrive/hochschule/Vorlesungen/WS2223/Datenanalyse-mit-R/DAX.txt",header=TRUE,sep=";") dax = na.omit(dax) summary(dax) rm(ret) S = dax$index n=length(S) ret = rep(0,n) for(i in 2:n) { ret[i] = (S[i]-S[i-1])/S[i-1] } plot(ret,type="l") w0 = seq(from=0.02,to=0.98,by=0.02) dd = 1:50 bsvol = sd(ret) bsvol nw = length(w0) nd = length(dd) z = matrix( 0 , nrow=nd , ncol=nw ) for(i in 1:nd) { for(j in 1:nw ) { z[i,j] = logL( bsvol , w0[j] , dd[i] ) } } # let's look at the result: contour( dd , w0 , z ) contour( dd , w0 , z , nlevels = 50 ) contour( dd , w0 , z , nlevels = 50 , zlim=c(9500,10500) ) contour( dd , w0 , z , nlevels = 80 , zlim=c(9800,10000) ) # apparently w0 less than 0.4 and d between 8 and 28: w0 = seq(from=0.01,to=0.4,by=0.01) dd = 8:28 bsvol = sd(ret) # # -> reevaluate z with above code # contour( dd , w0 , z , nlevels = 80 , zlim=c(9800,10000) ) # -> d = 14 and w0 around 0.15 # some more pictures: image( dd , w0 , z ) persp( dd , w0 , z ) require(plot3D) persp3D( dd , w0 , z ) # finally we check for bsvol: w0 = seq(from=0.01,to=0.4,by=0.01) bsvol = seq(from=0.1/100,to=4/100,by=0.1/100) nbs = length(bsvol) z = matrix( 0 , nrow=nbs , ncol=nw ) for(i in 1:nbs) { for(j in 1:nw ) { z[i,j]=logL( bsvol[i] , w0[j] , d=14 ) } } contour( bsvol , w0 , z ) contour( bsvol , w0 , z , nlevels = 50) contour( bsvol , w0 , z , nlevels = 80 , zlim=c(9800,10000)) # -> Maximum at w0 around 0.15, bsvol around 1.4% # thus: Maximum at approximately logL( bsvol=0.014 , w0=0.15, d=14 ) max(z) # das passt # direktes Bestimmen des Maximums, # ohne die Bilder anzuschauen: w0 = seq(from=0.02,to=0.4,by=0.02) dd = 10:40 nw = length(w0) nd = length(dd) bsvol = seq(from=0.1/100,to=4/100,by=0.05/100) nbs = length(bsvol) # matrix z = logL for contour-plot: z = array( 0 , dim=c(nbs,nw,nd) ) for(i in 1:nbs) { for(j in 1:nw ) { for(k in 1:nd) { z[i,j,k] = logL( bsvol[i] , w0[j] , dd[k] ) } } }# ok, dauert jetzt ein bischen.. max(z) # wo wird das Maximum angenommen? which(z==max(z), arr.ind=TRUE ) ijk = which(z==max(z), arr.ind=TRUE ) bsvolmax = bsvol[ ijk[1] ] w0max = w0[ ijk[2] ] dmax = dd[ ijk[3] ] bsvolmax w0max dmax logL( bsvol=0.014 , w0=0.15, d=14 ) logL( bsvolmax, w0max, dmax ) # ok, noch ein kleines bischen mehr..