Update Comparaison_of_methods.rmd

This commit is contained in:
elisaduz 2022-05-06 09:27:44 +02:00
commit 365fbf1dcd
1 changed files with 165 additions and 42 deletions

View File

@ -16,11 +16,15 @@ library("caret")
## 1. Proposition for simulations under $\mathcal{H}_1$ ## 1. Proposition for simulations under $\mathcal{H}_1$
In this part, we propose a method that simulates a Poisson process under the hypothesis $\mathcal{H}_1$. The idea is to simulate a sample under $\mathcal{H}_0$, and add randomly a subsequence under the alternative hypothesis in this sequence. In this part, we propose a method that simulates a Poisson process under the hypothesis $\mathcal{H}_1$. The idea is to simulate a sample under $\mathcal{H}_0$, and add randomly a subsequence under the alternative hypothesis in this sequence.
The function `PoissonProcess` creates a sequence of Poisson process of a parameter lambda
```{r} ```{r}
PoissonProcess <- function(lambda,T) { PoissonProcess <- function(lambda,T) {
return(sort(runif(rpois(1,lambda*T),0,T))) return(sort(runif(rpois(1,lambda*T),0,T)))
} }
```
The following function creates a sequence under H0 and add a sequence under H1.
```{r}
SimulationH1 <- function(lambda0, lambda1,T,tau){ SimulationH1 <- function(lambda0, lambda1,T,tau){
ppH0=PoissonProcess(lambda0,T) ppH0=PoissonProcess(lambda0,T)
ppH1.segt=PoissonProcess(lambda1,tau) ppH1.segt=PoissonProcess(lambda1,tau)
@ -35,6 +39,7 @@ SimulationH1 <- function(lambda0, lambda1,T,tau){
``` ```
`TimeBetweenEvent` compute Time Between Event for a `pp` interval.
```{r} ```{r}
TimeBetweenEvent <- function(pp){ TimeBetweenEvent <- function(pp){
n=length(pp) n=length(pp)
@ -49,7 +54,9 @@ DataFrame <- function(pp,tbe){
``` ```
## 2. Simulation of the sequences under $\mathcal{H}_0$ via a Monte Carlo Method ## 2. Simulation of the sequences under $\mathcal{H}_0$ via a Monte Carlo Method
In this part, we will try to simulate, using a Monte Carlo method, a set of $10^5$ independant samples, under the assumption that $\lambda=\lambda_0$, hence, that we are under the null hypothesis $\mathcal{H}_0$. In this part, we will try to simulate, using a Monte Carlo method, a set of $10^5$ independant samples, under the assumption that $\lambda=\lambda_0$, hence, that we are under the null hypothesis $\mathcal{H}_0$.
The function `ScanStat` compute the scan statistic for a sequence, given some parameters $T$ and $\tau$. This function returns the value of the scan stat, and the index of the sequence where it happens
```{r} ```{r}
ScanStat <- function(pp, T, tau){ ScanStat <- function(pp, T, tau){
n=length(pp) n=length(pp)
@ -58,13 +65,15 @@ ScanStat <- function(pp, T, tau){
for (i in (1:stop)) { for (i in (1:stop)) {
x=which((pp>=pp[i])&(pp<=(pp[i]+tau))) x=which((pp>=pp[i])&(pp<=(pp[i]+tau)))
scan=length(x) scan=length(x)
if (scan>ScanStat) {ScanStat=scan} if (scan>ScanStat) {ScanStat=scan
max=i}
} }
return (c(i,ScanStat)) return (c(max,ScanStat))
} }
``` ```
We test the scan statistic method for different values of $\lambda_0$. The method of scan statistic we implemented will allow us to have access to the scan test statistic and where it happens in the sequence. We test the scan statistic method for different values of $\lambda_0$. The method of scan statistic we implemented will allow us to have access to the scan test statistic and where it happens in the sequence.
This function `EmpDistrib` compute the empirical distribution using a Monte Carlo estimator for the scan statistic method. It returns a Data Frame, containing the value of the scan, its probability and the value of its cumulative distribution function.
```{r} ```{r}
EmpDistrib <- function(lambda, n_sample,T,tau){ EmpDistrib <- function(lambda, n_sample,T,tau){
pp=PoissonProcess(lambda,T) pp=PoissonProcess(lambda,T)
@ -83,6 +92,8 @@ EmpDistrib <- function(lambda, n_sample,T,tau){
return(EmpDis) return(EmpDis)
} }
``` ```
This function plot the cumulative distribution function associated to an empirical distribution function
```{r} ```{r}
Plot_CDF <- function(lambda,n_sample,T,tau){ Plot_CDF <- function(lambda,n_sample,T,tau){
Emp=EmpDistrib(lambda,n_sample,T,tau) Emp=EmpDistrib(lambda,n_sample,T,tau)
@ -91,7 +102,7 @@ Plot_CDF <- function(lambda,n_sample,T,tau){
return(Emp) return(Emp)
} }
``` ```
### 2.1 Test of $\mathcal{H}_0: \lambda=\lambda_0$ against $\mathcal{H}_0: \lambda=\lambda_1$, where $\lambda_1 > \lambda_0$ ### 2.1. Test of $\mathcal{H}_0: \lambda=\lambda_0$ against $\mathcal{H}_0: \lambda=\lambda_1$, where $\lambda_1 > \lambda_0$
In this part, we will test different values for $\lambda_0$ and $\lambda_1$, and compute the probability of occurrence of a certain scan statistic. In this part, we will test different values for $\lambda_0$ and $\lambda_1$, and compute the probability of occurrence of a certain scan statistic.
```{r} ```{r}
@ -103,19 +114,12 @@ tau=1
ppH0=PoissonProcess(lambda0,T) ppH0=PoissonProcess(lambda0,T)
CDF=Plot_CDF(lambda0,n_sample,T,tau) CDF=Plot_CDF(lambda0,n_sample,T,tau)
``` ```
```{r}
n_sample=10**4
lambda1=4
T=10
tau=1
ppH0=PoissonProcess(lambda1,T)
CDF=Plot_CDF(lambda1,n_sample,T,tau)
```
Compute $p$-value for scan statistic of `ppH1` with `Emp`:
```{r} ```{r}
PValue <- function(Emp,ppH1, T, tau){ PValue <- function(Emp,ppH, T, tau){
scanH1=ScanStat(ppH1,T,tau)[2] scanH1=ScanStat(ppH,T,tau)[2]
index_scanH1=ScanStat(ppH1,T,tau)[1] index_scanH1=ScanStat(ppH,T,tau)[1]
index=Emp$index_scan index=Emp$index_scan
n=length(index) n=length(index)
if (scanH1< min(Emp$index_scan)){ if (scanH1< min(Emp$index_scan)){
@ -134,8 +138,8 @@ NbSeqH0=10000
NbSeqH1=NbSeqH0 NbSeqH1=NbSeqH0
DataH0=vector("list") DataH0=vector("list")
DataH1=vector("list") DataH1=vector("list")
lambda0=4 lambda0=2
lambda1=10 lambda1=5
T=10 T=10
tau=1 tau=1
@ -165,7 +169,7 @@ TimeBetweenEventList <- function(list,n_list){
} }
tbe0=TimeBetweenEventList(DataH0,NbSeqH0) tbe0=TimeBetweenEventList(DataH0,NbSeqH0)
``` ```
We compute the p-value associated to all 5 sequences, and stock them in a vector. We compute the p-value associated to all 10000 sequences, and stock them in a vector.
```{r} ```{r}
#We start by computing the empirical distribution for lambda0 #We start by computing the empirical distribution for lambda0
@ -183,8 +187,9 @@ for (i in 1:NbSeqH0){
index_scan = c(index_scan,result[3]) index_scan = c(index_scan,result[3])
} }
ScS_H0=data.frame(num=(1:NbSeqH0), scan_stat=scan, pvalue_scan=pvalue,class=c(pvalue<0.05)) ScS_H0=data.frame(num=(1:NbSeqH0), scan_stat=scan, pvalue_scan=pvalue,class=c(pvalue<0.05)*1)
sum(ScS_H0$class[which(ScS_H0$class==TRUE)])/NbSeqH0 ScS_H0
sum(ScS_H0$class[which(ScS_H0$class=='1')])/NbSeqH0
``` ```
```{r} ```{r}
@ -201,11 +206,13 @@ for (i in 1:NbSeqH1){
pvalue=c(pvalue,result[2]) pvalue=c(pvalue,result[2])
index_scan=c(index_scan,result[3]) index_scan=c(index_scan,result[3])
} }
ScS_H1=data.frame(num=1:NbSeqH1, scan_stat=scan, pvalue_scan=pvalue, class=(pvalue<0.05), begin_scan=index_scan) ScS_H1 = data.frame(num=1:NbSeqH1, scan_stat=scan, pvalue_scan=pvalue, class=(pvalue<0.05)*1, begin_scan=index_scan)
sum(ScS_H1$class[which(ScS_H1$class==TRUE)])/NbSeqH1 ScS_H1
sum(ScS_H1$class[which(ScS_H1$class=='1')])/NbSeqH1
``` ```
`ScanStatMC` compute local score for `Emp`:
```{r} ```{r}
ScanStatMC <- function(NbSeq, T, tau, Emp, pp0){ ScanStatMC <- function(NbSeq, T, tau, Emp, pp0){
scan=c() scan=c()
@ -226,7 +233,8 @@ ScanStatMC <- function(NbSeq, T, tau, Emp, pp0){
``` ```
## 3. Local score ## 3. Local score
### Distribution of scores via Monte Carlo ### 3.1. Distribution of scores via Monte Carlo
`ComputeE` compute `E` coefficient:
```{r} ```{r}
ComputeE <- function(lambda0, lambda1){ ComputeE <- function(lambda0, lambda1){
E = 1 E = 1
@ -240,6 +248,7 @@ ComputeE <- function(lambda0, lambda1){
} }
``` ```
`ScoreDistribEmpiric` compute score for empiric distribution:
```{r} ```{r}
ScoreDistribEmpiric <- function(lambda0, lambda1, n_sample, T){ ScoreDistribEmpiric <- function(lambda0, lambda1, n_sample, T){
E = ComputeE(lambda0, lambda1) E = ComputeE(lambda0, lambda1)
@ -306,6 +315,7 @@ ScoreDistribElisa <- function(lambda0, lambda1, T){
``` ```
```{r} ```{r}
distrib_score_mc=ScoreDistribEmpiric(2,3,10000,T) distrib_score_mc=ScoreDistribEmpiric(2,3,10000,T)
distrib_score_theo=ScoreDistribElisa(2,3,T) distrib_score_theo=ScoreDistribElisa(2,3,T)
@ -314,17 +324,30 @@ distrib_score_mc
distrib_score_theo distrib_score_theo
#par(mfrow = c(1,2)) distrib_score_mc = ScoreDistribEmpiric(2,3,10000,T)
barplot(distrib_score_mc[,2],col="blue",axes=F) distrib_score_theo = ScoreDistribElisa(2,3,T)
mtext("Distribution des scores via Monte_Carlo",side=1,line=2.5,col="blue")
axis(2, ylim=c(0,10)) plot_graph_distrib_score <- function(distrib_score_theo, distrib_score_mc){
par(new = T) # length(distrib_score_mc[,2])
barplot(distrib_score_theo[,2],col="red",axes=F) # length(distrib_score_theo[,2])
mtext("Distribution des scores via la méthode théorique",side=1,line=4,col="red")
#diff_distrib_score=abs(distrib_score_mc[,2]-distrib_score_theo[,2])
#par(mfrow = c(1,2))
barplot(distrib_score_mc[,2],col="blue",axes=F)
mtext("Distribution of scores via Monte Carlo",side=1,line=2.5,col="blue")
axis(2, ylim=c(0,10))
par(new = T)
barplot(distrib_score_theo[,2],col="red",axes=F)
mtext("Distribution of scores using the theoretical method",side=1,line=4,col="red")
}
plot_graph_distrib_score(distrib_score_theo, distrib_score_mc)
``` ```
### Local score calculation ### 3.2. Local score calculation
```{r} ```{r}
LocalScoreMC <- function(lambda0, lambda1, NbSeq, T, X_seq, P_X, tbe0){ LocalScoreMC <- function(lambda0, lambda1, NbSeq, T, X_seq, P_X, tbe0){
E = ComputeE(lambda0, lambda1) E = ComputeE(lambda0, lambda1)
@ -352,26 +375,41 @@ LocalScoreMC <- function(lambda0, lambda1, NbSeq, T, X_seq, P_X, tbe0){
## 4. Experience plan for comparaison ## 4. Experience plan for comparaison
```{r} ```{r}
NbSeq = 10**3 NbSeq = 10**2
T = 10 T = 10
for (lambda0 in (2:5)){ for (lambda0 in (2)){
Sensitivity = c() Sensitivity = c()
Specificity = c() Specificity = c()
accepted_lambda = c() accepted_lambda = c()
for (lambda1 in c(3:8)){ for (lambda1 in c(3)){
if (lambda0 < lambda1){ if (lambda0 < lambda1){
accepted_lambda=c(accepted_lambda,lambda1)
accepted_lambda = c(accepted_lambda,lambda1)
cat("For T = ", T, ", Nb = ", NbSeq, ", lambda0 = ", lambda0, " and lambda1 = ", lambda1, ":\n", sep = "") cat("For T = ", T, ", Nb = ", NbSeq, ", lambda0 = ", lambda0, " and lambda1 = ", lambda1, ":\n", sep = "")
tbe0=vector("list",length=NbSeq) tbe0 = vector("list",length=NbSeq)
pp0 = vector("list", length = NbSeq) pp0 = vector("list", length = NbSeq)
pp1 = vector("list", length = NbSeq)
tbe1 = vector("list", length = NbSeq)
theoretical_results = c(rep(0,NbSeq), rep(1,NbSeq))
for (i in (1:NbSeq)) { for (i in (1:NbSeq)) {
#Simulation for sequences under H0
ppi = PoissonProcess(lambda0,T) ppi = PoissonProcess(lambda0,T)
ni=length(ppi) ni=length(ppi)
pp0[[i]] = ppi pp0[[i]] = ppi
tbei=ppi[2:ni]-ppi[1:ni-1] tbei = ppi[2:ni]-ppi[1:ni-1]
tbe0[[i]]=tbei tbe0[[i]] = tbei
}
#Simulation for sequences under H1
ppj1 = SimulationH1(lambda0, lambda1, T, tau)
nj = length(ppj1)
pp1[[i]] = ppj1
tbej = ppj1[2:nj]-ppj1[1:nj-1]
tbe1[[i]] = tbej
}
#cat("- Empiric version:\n") #cat("- Empiric version:\n")
Score = ScoreDistribEmpiric(lambda0, lambda1, NbSeq, T) Score = ScoreDistribEmpiric(lambda0, lambda1, NbSeq, T)
@ -383,6 +421,9 @@ for (lambda0 in (2:5)){
LS_H0 = LocalScoreMC(lambda0, lambda1, NbSeq, T, X_seq, P_X, tbe0) LS_H0 = LocalScoreMC(lambda0, lambda1, NbSeq, T, X_seq, P_X, tbe0)
options(warn = -1) # Disable warnings print options(warn = -1) # Disable warnings print
SS_H0 = ScanStatMC(NbSeq, T, tau, Emp, pp0) SS_H0 = ScanStatMC(NbSeq, T, tau, Emp, pp0)
SS_H1 = ScanStatMC(NbSeq, T, tau, Emp, pp1)
SS_expected = c(SS_H0$class, SS_H1$class)
#cat("Local Score:\n") #cat("Local Score:\n")
#print(summary(LS_H0)) #print(summary(LS_H0))
@ -408,9 +449,9 @@ for (lambda0 in (2:5)){
#cat("Scan Statistics:\n") #cat("Scan Statistics:\n")
#print(summary(SS_H0)) #print(summary(SS_H0))
#cat("Confusion Matrix:\n") #cat("Confusion Matrix:\n")
print(confusionMatrix(factor(LS_H0$class), factor(SS_H0$class))$table) print(confusionMatrix(factor(theoretical_results), factor(SS_expected)))
Sensitivity = c(Sensitivity,confusionMatrix(factor(LS_H0$class), factor(SS_H0$class))$byClass[1]) #Sensitivity = c(Sensitivity,confusionMatrix(factor(theoretical_results), factor(SS_expected))$byClass[1])
Specificity = c(Specificity,confusionMatrix(factor(LS_H0$class), factor(SS_H0$class))$byClass[2]) #Specificity = c(Specificity,confusionMatrix(factor(theoretical_results), factor(SS_expected))$byClass[2])
cat("---\n") cat("---\n")
@ -425,3 +466,85 @@ for (lambda0 in (2:5)){
} }
``` ```
```{r}
theo = c(0,0,0,1,1,1)
exp = c(0,1,1,1,1,0)
confusionMatrix(factor(exp), factor(theo), positive = '1') #prédiction puis théorique
```
```{r}
NbSeq = 10**2
T = 10
lambda0 = 2
lambda1 = 5
n_sample=10**4
cat("For T = ", T, ", Nb = ", NbSeq, ", lambda0 = ", lambda0, " and lambda1 = ", lambda1, ":\n", sep = "")
tbe0 = vector("list",length=NbSeq)
pp0 = vector("list", length = NbSeq)
pp1 = vector("list", length = NbSeq)
tbe1 = vector("list", length = NbSeq)
theoretical_results = c(rep(0,NbSeq), rep(1,NbSeq))
for (i in (1:NbSeq)) {
#Simulation for sequences under H0
ppi = PoissonProcess(lambda0,T)
ni=length(ppi)
pp0[[i]] = ppi
tbei = ppi[2:ni]-ppi[1:ni-1]
tbe0[[i]] = tbei
#Simulation for sequences under H1
ppj1 = SimulationH1(lambda0, lambda1, T, tau)
nj = length(ppj1)
pp1[[i]] = ppj1
tbej = ppj1[2:nj]-ppj1[1:nj-1]
tbe1[[i]] = tbej
}
Emp = EmpDistrib(lambda0,n_sample,T,tau)
SS_H0 = ScanStatMC(NbSeq, T, tau, Emp, pp0)
SS_H1 = ScanStatMC(NbSeq, T, tau, Emp, pp1)
SS_expected = c(SS_H0$class, SS_H1$class)
#cat("Local Score:\n")
#print(summary(LS_H0))
#cat("Scan Statistics:\n")
#print(summary(SS_H0))
#cat("Confusion Matrix:\n")
#print(confusionMatrix(factor(LS_H0$class), factor(SS_H0$class)))
#cat("- Elisa version:\n")
Score = ScoreDistribElisa(lambda0, lambda1, T)
Emp = EmpDistrib(lambda0,n_sample,T,tau)
X_seq = Score$Score_X
P_X = Score$P_X
LS_H0 = LocalScoreMC(lambda0, lambda1, NbSeq, T, X_seq, P_X, tbe0)
options(warn = -1) # Disable warnings print
SS_H0 = ScanStatMC(NbSeq, T, tau, Emp, pp0)
#cat("Local Score:\n")
#print(summary(LS_H0))
#cat("Scan Statistics:\n")
#print(summary(SS_H0))
#cat("Confusion Matrix:\n")
print(confusionMatrix(factor(theoretical_results), factor(SS_expected)))
#Sensitivity = c(Sensitivity,confusionMatrix(factor(theoretical_results), factor(SS_expected))$byClass[1])
#Specificity = c(Specificity,confusionMatrix(factor(theoretical_results), factor(SS_expected))$byClass[2])
cat("---\n")
titleSens=TeX(paste(r'(Sensitivity for $\lambda_0=$)', lambda0))
plot(x=accepted_lambda,y=Sensitivity, type='l', main = titleSens)
titleSpec=TeX(paste(r'(Specificity for $\lambda_0=$)', lambda0))
plot(x=accepted_lambda,y=Specificity, type='l', main = titleSpec)
```