Chapters
Other Information
|
###################################################
### The Available Data
###################################################
library(DMwR)
data(GSPC)
###################################################
### Handling time dependent data in R
###################################################
library(xts)
x1 <- xts(rnorm(100),seq(as.POSIXct("2000-01-01"),len=100,by="day"))
x1[1:5]
x2 <- xts(rnorm(100),seq(as.POSIXct("2000-01-01 13:00"),len=100,by="min"))
x2[1:4]
x3 <- xts(rnorm(3),as.Date(c('2005-01-01','2005-01-10','2005-01-12')))
x3
x1[as.POSIXct("2000-01-04")]
x1["2000-01-05"]
x1["20000105"]
x1["2000-04"]
x1["2000-03-27/"]
x1["2000-02-26/2000-03-03"]
x1["/20000103"]
mts.vals <- matrix(round(rnorm(25),2),5,5)
colnames(mts.vals) <- paste('ts',1:5,sep='')
mts <- xts(mts.vals,as.POSIXct(c('2003-01-01','2003-01-04',
'2003-01-05','2003-01-06','2003-02-16')))
mts
mts["2003-01",c("ts2","ts5")]
index(mts)
coredata(mts)
###################################################
### Reading the data from the CSV file
###################################################
GSPC <- as.xts(read.zoo('sp500.csv',header=T))
###################################################
### Getting the data from the Web
###################################################
library(tseries)
GSPC <- as.xts(get.hist.quote("^GSPC",start="1970-01-02",
quote=c("Open", "High", "Low", "Close","Volume","AdjClose")))
head(GSPC)
GSPC <- as.xts(get.hist.quote("^GSPC",
start="1970-01-02",end='2009-09-15',
quote=c("Open", "High", "Low", "Close","Volume","AdjClose")))
library(quantmod)
getSymbols('^GSPC')
getSymbols('^GSPC',from='1970-01-01',to='2009-09-15')
colnames(GSPC) <- c("Open", "High", "Low", "Close","Volume","AdjClose")
setSymbolLookup(IBM=list(name='IBM',src='yahoo'),
USDEUR=list(name='USD/EUR',src='oanda',
from=as.Date('2009-01-01')))
getSymbols(c('IBM','USDEUR'))
head(IBM)
head(USDEUR)
###################################################
### Reading the data from a MySQL database
###################################################
library(RODBC)
ch <- odbcConnect("QuotesDSN",uid="myusername",pwd="mypassword")
allQuotes <- sqlFetch(ch,"gspc")
GSPC <- xts(allQuotes[,-1],order.by=as.Date(allQuotes[,1]))
head(GSPC)
odbcClose(ch)
library(DBI)
library(RMySQL)
drv <- dbDriver("MySQL")
ch <- dbConnect(drv,dbname="Quotes","myusername","mypassword")
allQuotes <- dbGetQuery(ch,"select * from gspc")
GSPC <- xts(allQuotes[,-1],order.by=as.Date(allQuotes[,1]))
head(GSPC)
dbDisconnect(ch)
dbUnloadDriver(drv)
setSymbolLookup(GSPC=list(name='gspc',src='mysql',
db.fields=c('Index','Open','High','Low','Close','Volume','AdjClose'),
user='xpto',password='ypto',dbname='Quotes'))
getSymbols('GSPC')
###################################################
### Defining the Prediction Tasks
###################################################
T.ind <- function(quotes,tgt.margin=0.025,n.days=10) {
v <- apply(HLC(quotes),1,mean)
r <- matrix(NA,ncol=n.days,nrow=NROW(quotes))
## The following statment is wrong in the book (page 109)!
for(x in 1:n.days) r[,x] <- Next(Delt(Cl(quotes),v,k=x),x)
x <- apply(r,1,function(x) sum(x[x > tgt.margin | x < -tgt.margin]))
if (is.xts(quotes)) xts(x,time(quotes)) else x
}
candleChart(last(GSPC,'3 months'),theme='white',TA=NULL)
avgPrice <- function(p) apply(HLC(p),1,mean)
addAvgPrice <- newTA(FUN=avgPrice,col=1,legend='AvgPrice')
addT.ind <- newTA(FUN=T.ind,col='red',legend='tgtRet')
addAvgPrice(on=1)
addT.ind()
myATR <- function(x) ATR(HLC(x))[,'atr']
mySMI <- function(x) SMI(HLC(x))[,'SMI']
myADX <- function(x) ADX(HLC(x))[,'ADX']
myAroon <- function(x) aroon(x[,c('High','Low')])$oscillator
myBB <- function(x) BBands(HLC(x))[,'pctB']
myChaikinVol <- function(x) Delt(chaikinVolatility(x[,c("High","Low")]))[,1]
myCLV <- function(x) EMA(CLV(HLC(x)))[,1]
myEMV <- function(x) EMV(x[,c('High','Low')],x[,'Volume'])[,2]
myMACD <- function(x) MACD(Cl(x))[,2]
myMFI <- function(x) MFI(x[,c("High","Low","Close")], x[,"Volume"])
mySAR <- function(x) SAR(x[,c('High','Close')]) [,1]
myVolat <- function(x) volatility(OHLC(x),calc="garman")[,1]
library(randomForest)
data.model <- specifyModel(T.ind(GSPC) ~ Delt(Cl(GSPC),k=1:10) +
myATR(GSPC) + mySMI(GSPC) + myADX(GSPC) + myAroon(GSPC) +
myBB(GSPC) + myChaikinVol(GSPC) + myCLV(GSPC) +
CMO(Cl(GSPC)) + EMA(Delt(Cl(GSPC))) + myEMV(GSPC) +
myVolat(GSPC) + myMACD(GSPC) + myMFI(GSPC) + RSI(Cl(GSPC)) +
mySAR(GSPC) + runMean(Cl(GSPC)) + runSD(Cl(GSPC)))
set.seed(1234)
rf <- buildModel(data.model,method='randomForest',
training.per=c(start(GSPC),index(GSPC["1999-12-31"])),
ntree=50, importance=T)
ex.model <- specifyModel(T.ind(IBM) ~ Delt(Cl(IBM),k=1:3))
data <- modelData(ex.model,data.window=c('2009-01-01','2009-08-10'))
varImpPlot(rf@fitted.model,type=1)
imp <- importance(rf@fitted.model,type=1)
rownames(imp)[which(imp > 10)]
data.model <- specifyModel(T.ind(GSPC) ~ Delt(Cl(GSPC),k=1) + myATR(GSPC)
+ myADX(GSPC) + myEMV(GSPC) + myVolat(GSPC) + myMACD(GSPC)
+ mySAR(GSPC) + runMean(Cl(GSPC)) )
Tdata.train <- as.data.frame(modelData(data.model,
data.window=c('1970-01-02','1999-12-31')))
Tdata.eval <- na.omit(as.data.frame(modelData(data.model,
data.window=c('2000-01-01','2009-09-15'))))
Tform <- as.formula('T.ind.GSPC ~ .')
###################################################
### The Prediction Models
###################################################
set.seed(1234)
library(nnet)
norm.data <- scale(Tdata.train)
nn <- nnet(Tform,norm.data[1:1000,],size=10,decay=0.01,maxit=1000,linout=T,trace=F)
norm.preds <- predict(nn,norm.data[1001:2000,])
preds <- unscale(norm.preds,norm.data)
sigs.nn <- trading.signals(preds,0.1,-0.1)
true.sigs <- trading.signals(Tdata.train[1001:2000,'T.ind.GSPC'],0.1,-0.1)
sigs.PR(sigs.nn,true.sigs)
set.seed(1234)
library(nnet)
signals <- trading.signals(Tdata.train[,'T.ind.GSPC'],0.1,-0.1)
norm.data <- data.frame(signals=signals,scale(Tdata.train[,-1]))
nn <- nnet(signals ~ .,norm.data[1:1000,],size=10,decay=0.01,maxit=1000,trace=F)
preds <- predict(nn,norm.data[1001:2000,],type='class')
sigs.PR(preds,norm.data[1001:2000,1])
library(e1071)
sv <- svm(Tform,Tdata.train[1:1000,],gamma=0.001,cost=100)
s.preds <- predict(sv,Tdata.train[1001:2000,])
sigs.svm <- trading.signals(s.preds,0.1,-0.1)
true.sigs <- trading.signals(Tdata.train[1001:2000,'T.ind.GSPC'],0.1,-0.1)
sigs.PR(sigs.svm,true.sigs)
library(kernlab)
data <- cbind(signals=signals,Tdata.train[,-1])
ksv <- ksvm(signals ~ .,data[1:1000,],C=10)
ks.preds <- predict(ksv,data[1001:2000,])
sigs.PR(ks.preds,data[1001:2000,1])
library(earth)
e <- earth(Tform,Tdata.train[1:1000,])
e.preds <- predict(e,Tdata.train[1001:2000,])
sigs.e <- trading.signals(e.preds,0.1,-0.1)
true.sigs <- trading.signals(Tdata.train[1001:2000,'T.ind.GSPC'],0.1,-0.1)
sigs.PR(sigs.e,true.sigs)
###################################################
### From Predictions into Actions
###################################################
policy.1 <- function(signals,market,opened.pos,money,
bet=0.2,hold.time=10,
exp.prof=0.025, max.loss= 0.05
)
{
d <- NROW(market) # this is the ID of today
orders <- NULL
nOs <- NROW(opened.pos)
# nothing to do!
if (!nOs && signals[d] == 'h') return(orders)
# First lets check if we can open new positions
# i) long positions
if (signals[d] == 'b' && !nOs) {
quant <- round(bet*money/market[d,'Close'],0)
if (quant > 0)
orders <- rbind(orders,
data.frame(order=c(1,-1,-1),order.type=c(1,2,3),
val = c(quant,
market[d,'Close']*(1+exp.prof),
market[d,'Close']*(1-max.loss)
),
action = c('open','close','close'),
posID = c(NA,NA,NA)
)
)
# ii) short positions
} else if (signals[d] == 's' && !nOs) {
# this is the nr of stocks we already need to buy
# because of currently opened short positions
need2buy <- sum(opened.pos[opened.pos[,'pos.type']==-1,
"N.stocks"])*market[d,'Close']
quant <- round(bet*(money-need2buy)/market[d,'Close'],0)
if (quant > 0)
orders <- rbind(orders,
data.frame(order=c(-1,1,1),order.type=c(1,2,3),
val = c(quant,
market[d,'Close']*(1-exp.prof),
market[d,'Close']*(1+max.loss)
),
action = c('open','close','close'),
posID = c(NA,NA,NA)
)
)
}
# Now lets check if we need to close positions
# because their holding time is over
if (nOs)
for(i in 1:nOs) {
if (d - opened.pos[i,'Odate'] >= hold.time)
orders <- rbind(orders,
data.frame(order=-opened.pos[i,'pos.type'],
order.type=1,
val = NA,
action = 'close',
posID = rownames(opened.pos)[i]
)
)
}
orders
}
policy.2 <- function(signals,market,opened.pos,money,
bet=0.2,exp.prof=0.025, max.loss= 0.05
)
{
d <- NROW(market) # this is the ID of today
orders <- NULL
nOs <- NROW(opened.pos)
# nothing to do!
if (!nOs && signals[d] == 'h') return(orders)
# First lets check if we can open new positions
# i) long positions
if (signals[d] == 'b') {
quant <- round(bet*money/market[d,'Close'],0)
if (quant > 0)
orders <- rbind(orders,
data.frame(order=c(1,-1,-1),order.type=c(1,2,3),
val = c(quant,
market[d,'Close']*(1+exp.prof),
market[d,'Close']*(1-max.loss)
),
action = c('open','close','close'),
posID = c(NA,NA,NA)
)
)
# ii) short positions
} else if (signals[d] == 's') {
# this is the money already committed to buy stocks
# because of currently opened short positions
need2buy <- sum(opened.pos[opened.pos[,'pos.type']==-1,
"N.stocks"])*market[d,'Close']
quant <- round(bet*(money-need2buy)/market[d,'Close'],0)
if (quant > 0)
orders <- rbind(orders,
data.frame(order=c(-1,1,1),order.type=c(1,2,3),
val = c(quant,
market[d,'Close']*(1-exp.prof),
market[d,'Close']*(1+max.loss)
),
action = c('open','close','close'),
posID = c(NA,NA,NA)
)
)
}
orders
}
# Train and test periods
start <- 1
len.tr <- 1000
len.ts <- 500
tr <- start:(start+len.tr-1)
ts <- (start+len.tr):(start+len.tr+len.ts-1)
# getting the quotes for the testing period
data(GSPC)
date <- rownames(Tdata.train[start+len.tr,])
market <- GSPC[paste(date,'/',sep='')][1:len.ts]
# learning the model and obtaining its signal predictions
library(e1071)
s <- svm(Tform,Tdata.train[tr,],cost=10,gamma=0.01)
p <- predict(s,Tdata.train[ts,])
sig <- trading.signals(p,0.1,-0.1)
# now using the simulated trader
t1 <- trading.simulator(market,sig,
'policy.1',list(exp.prof=0.05,bet=0.2,hold.time=30))
t1
summary(t1)
tradingEvaluation(t1)
plot(t1,market,theme='white',name='SP500')
t2 <- trading.simulator(market,sig,'policy.2',list(exp.prof=0.05,bet=0.3))
summary(t2)
tradingEvaluation(t2)
start <- 2000
len.tr <- 1000
len.ts <- 500
tr <- start:(start+len.tr-1)
ts <- (start+len.tr):(start+len.tr+len.ts-1)
s <- svm(Tform,Tdata.train[tr,],cost=10,gamma=0.01)
p <- predict(s,Tdata.train[ts,])
sig <- trading.signals(p,0.1,-0.1)
t2 <- trading.simulator(market,sig,'policy.2',list(exp.prof=0.05,bet=0.3))
summary(t2)
tradingEvaluation(t2)
###################################################
### Model Evaluation and Selection
###################################################
MC.svmR <- function(form,train,test,b.t=0.1,s.t=-0.1,...) {
require(e1071)
t <- svm(form,train,...)
p <- predict(t,test)
trading.signals(p,b.t,s.t)
}
MC.svmC <- function(form,train,test,b.t=0.1,s.t=-0.1,...) {
require(e1071)
tgtName <- all.vars(form)[1]
train[,tgtName] <- trading.signals(train[,tgtName],b.t,s.t)
t <- svm(form,train,...)
p <- predict(t,test)
factor(p,levels=c('s','h','b'))
}
MC.nnetR <- function(form,train,test,b.t=0.1,s.t=-0.1,...) {
require(nnet)
t <- nnet(form,train,...)
p <- predict(t,test)
trading.signals(p,b.t,s.t)
}
MC.nnetC <- function(form,train,test,b.t=0.1,s.t=-0.1,...) {
require(nnet)
tgtName <- all.vars(form)[1]
train[,tgtName] <- trading.signals(train[,tgtName],b.t,s.t)
t <- nnet(form,train,...)
p <- predict(t,test,type='class')
factor(p,levels=c('s','h','b'))
}
MC.earth <- function(form,train,test,b.t=0.1,s.t=-0.1,...) {
require(earth)
t <- earth(form,train,...)
p <- predict(t,test)
trading.signals(p,b.t,s.t)
}
singleModel <- function(form,train,test,learner,policy.func,...) {
p <- do.call(paste('MC',learner,sep='.'),list(form,train,test,...))
eval.stats(form,train,test,p,policy.func=policy.func)
}
slide <- function(form,train,test,learner,relearn.step,policy.func,...) {
real.learner <- learner(paste('MC',learner,sep='.'),pars=list(...))
p <- slidingWindowTest(real.learner,form,train,test,relearn.step)
p <- factor(p,levels=1:3,labels=c('s','h','b'))
eval.stats(form,train,test,p,policy.func=policy.func)
}
grow <- function(form,train,test,learner,relearn.step,policy.func,...) {
real.learner <- learner(paste('MC',learner,sep='.'),pars=list(...))
p <- growingWindowTest(real.learner,form,train,test,relearn.step)
p <- factor(p,levels=1:3,labels=c('s','h','b'))
eval.stats(form,train,test,p,policy.func=policy.func)
}
eval.stats <- function(form,train,test,preds,b.t=0.1,s.t=-0.1,...) {
# Signals evaluation
tgtName <- all.vars(form)[1]
test[,tgtName] <- trading.signals(test[,tgtName],b.t,s.t)
st <- sigs.PR(preds,test[,tgtName])
dim(st) <- NULL
names(st) <- paste(rep(c('prec','rec'),each=3),
c('s','b','sb'),sep='.')
# Trading evaluation
date <- rownames(test)[1]
market <- GSPC[paste(date,"/",sep='')][1:length(preds),]
trade.res <- trading.simulator(market,preds,...)
c(st,tradingEvaluation(trade.res))
}
pol1 <- function(signals,market,op,money)
policy.1(signals,market,op,money,
bet=0.2,exp.prof=0.025,max.loss=0.05,hold.time=10)
pol2 <- function(signals,market,op,money)
policy.1(signals,market,op,money,
bet=0.2,exp.prof=0.05,max.loss=0.05,hold.time=20)
pol3 <- function(signals,market,op,money)
policy.2(signals,market,op,money,
bet=0.5,exp.prof=0.05,max.loss=0.05)
# The list of learners we will use
TODO <- c('svmR','svmC','earth','nnetR','nnetC')
# The data sets used in the comparison
DSs <- list(dataset(Tform,Tdata.train,'SP500'))
# Monte Carlo (MC) settings used
MCsetts <- mcSettings(20, # 20 repetitions of the MC exps
2540, # ~ 10 years for training
1270, # ~ 5 years for testing
1234) # random number generator seed
# Variants to try for all learners
VARS <- list()
VARS$svmR <- list(cost=c(10,150),gamma=c(0.01,0.001),
policy.func=c('pol1','pol2','pol3'))
VARS$svmC <- list(cost=c(10,150),gamma=c(0.01,0.001),
policy.func=c('pol1','pol2','pol3'))
VARS$earth <- list(nk=c(10,17),degree=c(1,2),thresh=c(0.01,0.001),
policy.func=c('pol1','pol2','pol3'))
VARS$nnetR <- list(linout=T,maxit=750,size=c(5,10),
decay=c(0.001,0.01),
policy.func=c('pol1','pol2','pol3'))
VARS$nnetC <- list(maxit=750,size=c(5,10),decay=c(0.001,0.01),
policy.func=c('pol1','pol2','pol3'))
# main loop
for(td in TODO) {
assign(td,
experimentalComparison(
DSs,
c(
do.call('variants',
c(list('singleModel',learner=td),VARS[[td]],
varsRootName=paste('single',td,sep='.'))),
do.call('variants',
c(list('slide',learner=td,
relearn.step=c(60,120)),
VARS[[td]],
varsRootName=paste('slide',td,sep='.'))),
do.call('variants',
c(list('grow',learner=td,
relearn.step=c(60,120)),
VARS[[td]],
varsRootName=paste('grow',td,sep='.')))
),
MCsetts)
)
# save the results
save(list=td,file=paste(td,'Rdata',sep='.'))
}
load('svmR.Rdata')
load('svmC.Rdata')
load('earth.Rdata')
load('nnetR.Rdata')
load('nnetC.Rdata')
tgtStats <- c('prec.sb','Ret','PercProf',
'MaxDD','SharpeRatio')
allSysRes <- join(subset(svmR,stats=tgtStats),
subset(svmC,stats=tgtStats),
subset(nnetR,stats=tgtStats),
subset(nnetC,stats=tgtStats),
subset(earth,stats=tgtStats),
by = 'variants')
rankSystems(allSysRes,5,maxs=c(T,T,T,F,T))
summary(subset(svmC,
stats=c('Ret','RetOverBH','PercProf','NTrades'),
vars=c('slide.svmC.v5','slide.svmC.v6')))
fullResults <- join(svmR,svmC,earth,nnetC,nnetR,by='variants')
nt <- statScores(fullResults,'NTrades')[[1]]
rt <- statScores(fullResults,'Ret')[[1]]
pp <- statScores(fullResults,'PercProf')[[1]]
s1 <- names(nt)[which(nt > 20)]
s2 <- names(rt)[which(rt > 0.5)]
s3 <- names(pp)[which(pp > 40)]
namesBest <- intersect(intersect(s1,s2),s3)
compAnalysis(subset(fullResults,
stats=tgtStats,
vars=namesBest))
plot(subset(fullResults,
stats=c('Ret','PercProf','MaxDD'),
vars=namesBest))
getVariant('single.nnetR.v12',nnetR)
###################################################
### The Trading System
###################################################
data <- tail(Tdata.train,2540)
results <- list()
for(name in namesBest) {
sys <- getVariant(name,fullResults)
results[[name]] <- runLearner(sys,Tform,data,Tdata.eval)
}
results <- t(as.data.frame(results))
results[,c('Ret','RetOverBH','MaxDD','SharpeRatio','NTrades','PercProf')]
getVariant('grow.nnetR.v12',fullResults)
model <- learner('MC.nnetR',list(maxit=750,linout=T,trace=F,size=10,decay=0.001))
preds <- growingWindowTest(model,Tform,data,Tdata.eval,relearn.step=120)
signals <- factor(preds,levels=1:3,labels=c('s','h','b'))
date <- rownames(Tdata.eval)[1]
market <- GSPC[paste(date,"/",sep='')][1:length(signals),]
trade.res <- trading.simulator(market,signals,policy.func='pol2')
plot(trade.res,market,theme='white',name='SP500 - final test')
library(PerformanceAnalytics)
rets <- Return.calculate(trade.res@trading$Equity)
chart.CumReturns(rets,main='Cumulative returns of the strategy',ylab='returns')
yearlyReturn(trade.res@trading$Equity)
plot(100*yearlyReturn(trade.res@trading$Equity),
main='Yearly percentage returns of the trading system')
abline(h=0,lty=2)
table.CalendarReturns(rets)
table.DownsideRisk(rets)
|