------------------------------------------------------
#                                                    #
#     Chapter 11: Simulated Likelihood Inference     # 
#         for Stochastic Volatility Models           #
#                                                    #
------------------------------------------------------


Consider the model 

 S(t_k) = S(t_{k-1}) * [ 1 + vol(t_{k-1}) * phi_k ]               (1)

or 

  ret(t_k) = vol(t_{k-1}) * phi_k                                 (2)

with a general stochastic volatility process 

  vol_k = vol_k( vol_{k-1} , eps_k )                              (3)

where the (phi_k,eps_k) are mutually independent 
standard normal distributed random numbers. Let's 
abbreviate

            r_k := ret(t_k)
            v_k := vol(t_k)

and assume that the time series under consideration 
has length N+1 (instead of N). Then the likelihood 
function for the model (1-3) is given by 

L = int_{R^N} prod_{k=1}^N 1/sqrt{2pi} 1/v_k *                     

    prod_{k=1}^N exp( -1/2*r_{k+1}^2/v_k^2 ) * 

    prod_{k=1}^N 1/sqrt{2pi} exp( -1/2*eps_k^2 ) deps_k           (4)
 
where in the expression above v_k is a function of 
eps_k and v_{k-1}, 

          v_k = v_k( eps_k , v_{k-1} )                            (5)   

For example, for the GARCH-Diffusion model we had

 v_k^2 = v_{k-1}^2 + kappa * [ bsvol^2 - v_{k-1}^2 ]

                   + beta * v_{k-1}^2 * eps_k                     (6)


If we look at the expression (4), we see that it con-
tains a factor  

           prod_{k=1}^N 1/sqrt{2pi} 1/v_k                         (7)

Thus, if we would choose

              v_k^2 = sigma^2 * eps_k^2                           (8)

this factor looks somehow similar to the quantities 

           pi_N := phi_1^2 * ... * phi_N^2                        (9)

which we considered in Uebungsblatt 1-3 where we saw 
that we can't calculate these quantities with simple 
Monte Carlo. That is, for N = 2500, the following simple 
Monte Carlo sum does not converge for any reasonable 
M <= 10^6 or even M <= 10^9 simulations:

mcsum = 1/M sum_{k=1}^M phi_{1,k}^2 * ... * phi_{N,k}^2          (10)

Now, for the pi_N's we know that all factors phi_i^2 
are independent and we can calculate 

  E[pi_N] = E[phi_1^2] * ... * E[phi_N^2]                        (11)        

          = 1 .  

The Monte Carlo analog of the calculation (11) would be 

[1/M sum_{k_1} phi_{1,k_1}^2 ] * ... * [1/M sum_{k_N} phi_{1,k_N}^2 ]

 = 1/M^N sum_{k_1,...,k_N} phi_{1,k_1}^2 * ... * phi_{1,k_N}^2   (12)

Observe that in (10) and (12) the same set of random 
numbers can be used, namely N*M phi's. However, not too 
surprisingly, the calculation (12) has much better conver-
gence properties. Let's make a quick check:


#---------------------------------------------------------
# warmup excercise: improved MC-Calculation of the pi_N's:
N = 2500                      # length of time series
M = 40000                     # number of MC simulations
phi = matrix(rnorm(N*M),nrow=M,ncol=N)  # 100 Mio random numbers!

# old method:
piN = apply(phi*phi,1,cumprod)
piN = t(piN)
EpiN = colMeans(piN)
par(mfrow=c(1,2))
plot(EpiN,ylim=c(0,3))
plot(log(EpiN))

# new method:
EpiN = rep(0,N)
EpiN[1] = mcsum = sum(phi[,1]^2)/M
for(k in 2:N)
{
  mcsum = sum(phi[,k]^2)/M
  EpiN[k] = EpiN[k-1]*mcsum
}
par(mfrow=c(1,2))
plot(EpiN,ylim=c(0,3))
plot(log(EpiN),ylim=c(-1,1))

#
# end warmup excercise
#---------------------------------------------------------


THUS:    It is critical to perform an MC-sum at every factor, 
         at every probability density factor for the r_k's, 

     factor := 1/sqrt{2pi*v_k^2} * exp( -1/2*r_{k+1}^2/v_k^2 )   (13)

         and not just one sum at the very end.

HOWEVER: For a stochvol model, it is not clear how to do this, 
         since the factors (13) in the likelihood function (4) 
         are not completely independent, the vol-function v_k 
         depends, besides on the random number eps_k, also on 
         the vol v_{k-1} of the previous factor, and this vol 
         depends on the eps_{k-1}..

The following algorithm, which is taken from the very nice and 
very transparent publication of Pitt, Malik and Doucet: "Simu-
lated Likelihood Inference for Stochastic Volatility Models" 
of 2014, is quite performant and pretty general, such that it 
can be applied to a large class of models. To be specific, we 
implement it for the GARCH-Diffusion model (6):



logL_withComments = function( bsvol , w0 , d )
{
  N = length(ret)             # length of time series
  M = length(eps[,1])         # number of MC-simus; the M x N matrix eps 
                              # will be precalculated, such that, when 
                              # varying model parameters, always the same 
                              # random numbers will be taken.
  w = 1 - 1/d
  kappa = (1-w)*w0
  beta = (1-w)*(1-w0)*sqrt(2)
  vol0sq = bsvol^2            # we put start-vol = mean-vol, vol0 = volbar
  
  vol = rep(0,N)
  logL = 0
  
  volsq_withinfo    = rep( vol0sq , M )     # with    return information
  volsq_withoutinfo = rep( vol0sq , M )     # without return information

  for( k in 1:(N-1) )         # same loop as in new method of warmup exercise above: 
  {                           # for each factor_k we perform an MC-sum
    
    # GARCH-Diffusion vol-specification: v_k^2 =: volsq_k =>
    #
    # volsq_k = volsq_{k-1}  + kappa * [ vol0sq - volsq_{k-1} ] 
    #                        + beta * volsq_{k-1} * eps_k 
    #
    # this is a vector of length M = number MC-simus:

    #              [k]            [k-1]                              [k-1]
    volsq_withoutinfo = volsq_withinfo + kappa * ( vol0sq - volsq_withinfo ) + 
                                         beta * volsq_withinfo *  eps[,k] 
    volsq_withoutinfo = abs( volsq_withoutinfo ) 


    # the probability for the occurence of a given return ret_{k+1}, given 
    # the value of the volatility v_k of the previous day, is given by the 
    # probability density which we simply define as a quantity factor: 
    #
    #      factor :=  1/sqrt{2pi*v_k^2} * exp( -1/2*r_{k+1}^2/v_k^2 ) 
    #
    # this is also a vector of length M = number MC-simus:

    factor = dnorm( ret[k+1] , mean=0 , sd=sqrt(volsq_withoutinfo) )
    # factor = prob( r_{k+1} | v_k )  
    # v_k without return information, but with eps_k and with v_{k-1} information


    # to make the algorithm more transparent, we now use two different names, 
    # mcsum and normalization, for nearly the same quantity; the reason for this 
    # will become more clear below where we define the quantity prob_for_vol:

    mcsum_factor =  sum( factor ) / M     # this is a sum over MC-simus and this 
                                          # goes directly into the calculation of logL
    normalization = sum( factor )         # a normalization factor for the probabilities
                                          # prob( v_k | r_{k+1} ), defined below

    # the actual calculation of the log-Likelihood-function:
    logL = logL + log( mcsum_factor )


    # now we have to calculate the v_k's, the volsq_withinfo, which we will 
    # use for the calculation of the v_{k+1}'s in the GARCH_Diffusion equation 
    # at the very top:
    # instead of considering the factor as a probability that a given return
    # ret_{k+1} realizes, that is, factor = prob( r_{k+1} | v_k ), we consider 
    # it as a probability that, given this return ret_{k+1}, the squared vola-
    # tility vol(t_k)^2 will be equal to v_k^2 = volsq_withinfo :

    prob_for_vol = factor / normalization
    # prob_for_vol = prob( v_k | r_{k+1} )

    # now, to take these probabilities into account, the v_k's which are used 
    # for v_{k+1} = v_{k+1}( v_k , eps_{k+1} ) are simply drawn from the v_k's 
    # without return information, the quantities volsq_withoutinfo, where each 
    # volatility volsq_withoutinfo[j] in that vector comes with a probability 
    # prob_for_vol[j]. Fortunately, R has the built-in function sample() which 
    # does exactly this:

    volsq_withinfo = sample( volsq_withoutinfo, M , replace=TRUE , prob_for_vol )
    #          [k]


    # actually the algorithm does not only estimate the log-Likelihood function, 
    # but it can also estimate the whole vol-process itself: we do this purely 
    # for information and checking purposes, the quantity is not needed 
    # in the algorithm:

    mcsum_volsq = sum( volsq_withinfo ) / M 
    vol[k] = sqrt( mcsum_volsq )

  }#next k

  result = list( logL , vol )
  names(result) = c( "logL" , "vol" )
  return(result)
}
# This completes the algorithm. Let's copy and paste the code without all the 
# comments to see that it is actually quite short:

logL_withVol = function( bsvol , w0 , d )
{
  N = length(ret)             
  M = length(eps[,1])         

  w = 1 - 1/d
  kappa = (1-w)*w0
  beta = (1-w)*(1-w0)*sqrt(2)
  vol0sq = bsvol^2            
  
  vol = rep(0,N)
  logL = 0
  
  volsq_withinfo    = rep( vol0sq , M )     
  volsq_withoutinfo = rep( vol0sq , M )     

  for( k in 1:(N-1) )          
  {  
    # GARCH-Diffusion:                         
    volsq_withoutinfo = abs( volsq_withinfo + kappa * ( vol0sq - volsq_withinfo ) + 
                                         beta * volsq_withinfo *  eps[,k] )

    factor = dnorm( ret[k+1] , mean=0 , sd=sqrt(volsq_withoutinfo) )     
                                          
    normalization = sum( factor ) 
    mcsum_factor =  normalization / M         

    logL = logL + log( mcsum_factor )

    prob_for_vol = factor / normalization
    volsq_withinfo = sample( volsq_withoutinfo, M , replace=TRUE , prob_for_vol )

    mcsum_volsq = sum( volsq_withinfo ) / M 
    vol[k] = sqrt( mcsum_volsq )

  }#next k

  result = list( logL , vol )
  names(result) = c( "logL" , "vol" )
  return(result)
}

# we check the algorithm on an artificially generated return set with 
# GARCH-Diffusion dynamics. We modify the GenerateStochVolPaths()-function 
# of Loesung9.R:


GenerateGARCHDiffusionPath = function( N , bsvol , w0 , d )
{
  S0 = 100
  S   = rep(0,N)
  vol = rep(0,N)
  ret = rep(0,N)
  nu  = rep(0,N)    # nu := vol^2

  phi = rnorm(N)
  eps = rnorm(N)

  w = 1 - 1/d
  kappa = (1-w)*w0
  beta = (1-w)*(1-w0)*sqrt(2)
  vol0sq = bsvol^2   

  # the first step, as usual, is special:
  S[1] = S0 * ( 1 + bsvol * phi[1] )
  ret[1] = (S[1] - S0)/S0
  nu[1] = bsvol^2 + kappa*0 + beta*bsvol^2*eps[1]
  vol[1] = sqrt(abs(nu[1]))

  # now the actual simulation:
  for(k in 2:N)
  {
    S[k] = S[k-1] * ( 1 + vol[k-1] * phi[k] )
    ret[k] = (S[k]-S[k-1])/S[k-1]
    # GARCH-Diffusion:
    nu[k] = nu[k-1] + kappa*(bsvol^2-nu[k-1]) + beta*nu[k-1]*eps[k]
    vol[k] = sqrt(abs(nu[k]))
  }

  par(mfrow=c(1,2))
  plot(S,type="l",ylim=c(0,300),main="GARCH-Diffusion Preis")  
  plot(vol,type="l",ylim=c(-0.02,0.06),main="GARCH-Diffusion Vol")
  abline(a=bsvol,b=0,col=2)

  result = list( S , vol , ret )
  names(result) = c("S","vol","ret")
  return(result)
}

res = GenerateGARCHDiffusionPath( N=2500 , bsvol=1.5/100 , w0=0.15 , d=10 )
ret = res$ret
plot(ret,type="l")

# Now, let's try to recover the model parameters
# bsvol=1.5/100 , w0=0.15 , d=10
# from the return data set:

M = 500
N = length(ret)
eps = matrix( rnorm(N*M) , ncol=N , nrow=M)

bsvol = sd(ret)
bsvol

# first let's see whether we get a number at all:
resL = logL_withVol( bsvol , w0=0.15 , d=10 )
resL$logL                      # ok, looks good

# let's take a look at the estimated vol and compare to the exact vol 
# (of course, an exact vol we only have for artificially generated data; 
# for DAX or SPX we only have the return data ret): 
plot(res$vol,type="l",ylim=c(0,0.06))
lines(resL$vol,col="red")      # quite okay!

# to maximize logL, we don't need the estimated vols, so we just skip them 
# from the code. Since the sample()-function uses random numbers, we set a 
# seed at the beginning to make sure that when maximizing logL always the 
# same random numbers are used: 

logL = function( bsvol , w0 , d , seed=123 )
{
  set.seed(seed)
  N = length(ret)             
  M = length(eps[,1])         
  w = 1 - 1/d
  kappa = (1-w)*w0
  beta = (1-w)*(1-w0)*sqrt(2)
  vol0sq = bsvol^2            
  vol = rep(0,N)
  logL = 0
  volsq_withinfo    = rep( vol0sq , M )     
  volsq_withoutinfo = rep( vol0sq , M )     

  for( k in 1:(N-1) )          
  {                           
    volsq_withoutinfo = abs( volsq_withinfo + kappa * ( vol0sq - volsq_withinfo ) + 
                                         beta * volsq_withinfo *  eps[,k] )
    factor = dnorm( ret[k+1] , mean=0 , sd=sqrt(volsq_withoutinfo) )                                              
    normalization = sum( factor ) 
    mcsum_factor =  normalization / M         
    logL = logL + log( mcsum_factor )
    prob_for_vol = factor / normalization
    volsq_withinfo = sample( volsq_withoutinfo, M , replace=TRUE , prob_for_vol )
  }
  return(logL)
}

logL( bsvol=sd(ret), w0=0.15 , d=10 )


# we make a contour-plot in the (w0,d)-plane:
# takes about 2:40 min for M=500, N=2500:
w0 = seq(from=0.05,to=0.5,by=0.05)
dd = 1:40
nw = length(w0)
nd = length(dd)

z = matrix( 0 , nrow=nd , ncol=nw )
for(i in 1:nd)
{
  for(j in 1:nw )
  {
    z[i,j] = logL( bsvol=sd(ret) , w0[j] , dd[i] )
  }
}
# let's look at the result:
contour( dd , w0 , z )
contour( dd , w0 , z , nlevels = 50 )
contour( dd , w0 , z , nlevels = 100 ,zlim=c(7300,7500))

# maximum close to w0 = 0.15, d=10 !!
# we are able to recover model parameters!

# more refined:
# takes about 2:40 min for M=1000, N=2500: (half the number of points, 
#                                           double number of MC-simus)
w0 = seq(from=0.025,to=0.25,by=0.025)  
dd = 1:20
nw = length(w0)
nd = length(dd)

M = 1000
N = length(ret)
eps = matrix( rnorm(N*M) , ncol=N , nrow=M)

z = matrix( 0 , nrow=nd , ncol=nw )
for(i in 1:nd)
{
  for(j in 1:nw )
  {
    z[i,j] = logL( bsvol=sd(ret) , w0[j] , dd[i] )
  }
}

contour( dd , w0 , z , nlevels = 100 ,zlim=c(7300,7500))
contour( dd , w0 , z , nlevels = 100 ,zlim=c(7400,7500))
max(z)                               # 7441.77
logL( sd(ret) , w0=0.15 , d=10 )     # 7438.83
logL( sd(ret) , w0=0.2 , d=10 )      # 7441.77
# okay, w0 a bit larger..