From ff0f6cf253f3e5565d65ea638eeecb97050693ac Mon Sep 17 00:00:00 2001
From: Paul-Corbalan <58653590+Paul-Corbalan@users.noreply.github.com>
Date: Tue, 19 Apr 2022 08:10:45 +0200
Subject: [PATCH 1/3] Update titles

---
 Comparaison_of_methods.rmd | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/Comparaison_of_methods.rmd b/Comparaison_of_methods.rmd
index b0f0a00..b4f38ad 100644
--- a/Comparaison_of_methods.rmd
+++ b/Comparaison_of_methods.rmd
@@ -91,7 +91,7 @@ Plot_CDF <- function(lambda,n_sample,T,tau){
     return(Emp)
 }
 ```
-### 2.1 Test of $\mathcal{H}_0: \lambda=\lambda_0$ against $\mathcal{H}_0: \lambda=\lambda_1$, where $\lambda_1 > \lambda_0$ 
+### 2.1. Test of $\mathcal{H}_0: \lambda=\lambda_0$ against $\mathcal{H}_0: \lambda=\lambda_1$, where $\lambda_1 > \lambda_0$ 
 In this part, we will test different values for $\lambda_0$ and $\lambda_1$, and compute the probability of occurrence of a certain scan statistic.
 
 ```{r}
@@ -226,7 +226,7 @@ ScanStatMC <- function(NbSeq, T, tau, Emp, pp0){
 ```
 
 ## 3. Local score
-### Distribution of scores via Monte Carlo
+### 3.1. Distribution of scores via Monte Carlo
 ```{r}
 ComputeE <- function(lambda0, lambda1){
     E = 1
@@ -305,7 +305,7 @@ mtext("Distribution des scores via la méthode théorique",side=1,line=4,col="re
 ```
 
 
-### Local score calculation
+### 3.2. Local score calculation
 ```{r}
 LocalScoreMC <- function(lambda0, lambda1, NbSeq, T, X_seq, P_X, tbe0){
   E = ComputeE(lambda0, lambda1)

From b7e079a55e4e7cc5793f628595576764dea2d6af Mon Sep 17 00:00:00 2001
From: Paul-Corbalan <58653590+Paul-Corbalan@users.noreply.github.com>
Date: Tue, 19 Apr 2022 10:30:49 +0200
Subject: [PATCH 2/3] Passing to function plot_graph_distrib_score

---
 Comparaison_of_methods.rmd | 28 +++++++++++++++-------------
 1 file changed, 15 insertions(+), 13 deletions(-)

diff --git a/Comparaison_of_methods.rmd b/Comparaison_of_methods.rmd
index b4f38ad..dc66d70 100644
--- a/Comparaison_of_methods.rmd
+++ b/Comparaison_of_methods.rmd
@@ -285,23 +285,25 @@ ScoreDistribElisa <- function(lambda0, lambda1, T){
 ```
 
 ```{r}
-distrib_score_mc=ScoreDistribEmpiric(2,3,10000,T)
+distrib_score_mc = ScoreDistribEmpiric(2,3,10000,T)
+distrib_score_theo = ScoreDistribElisa(2,3,T)
 
-distrib_score_theo=ScoreDistribElisa(2,3,T)
+plot_graph_distrib_score <- function(distrib_score_theo, distrib_score_mc){
+    # length(distrib_score_mc[,2])
+    # length(distrib_score_theo[,2])
 
+    #diff_distrib_score=abs(distrib_score_mc[,2]-distrib_score_theo[,2])
 
-length(distrib_score_mc[,2])
-length(distrib_score_theo[,2])
+    #par(mfrow = c(1,2))
+    barplot(distrib_score_mc[,2],col="blue",axes=F)
+    mtext("Distribution of scores via Monte Carlo",side=1,line=2.5,col="blue")
+    axis(2, ylim=c(0,10))
+    par(new = T)
+    barplot(distrib_score_theo[,2],col="red",axes=F)
+    mtext("Distribution of scores using the theoretical method",side=1,line=4,col="red") 
+}
 
-#diff_distrib_score=abs(distrib_score_mc[,2]-distrib_score_theo[,2])
-
-#par(mfrow = c(1,2))
-barplot(distrib_score_mc[,2],col="blue",axes=F)
-mtext("Distribution des scores via Monte_Carlo",side=1,line=2.5,col="blue")
-axis(2, ylim=c(0,10))
-par(new = T)
-barplot(distrib_score_theo[,2],col="red",axes=F)
-mtext("Distribution des scores via la méthode théorique",side=1,line=4,col="red") 
+plot_graph_distrib_score(distrib_score_theo, distrib_score_mc)
 ```
 
 

From 6e3480d478b75dcf117e3bc0162cb9695931a893 Mon Sep 17 00:00:00 2001
From: Paul-Corbalan <58653590+Paul-Corbalan@users.noreply.github.com>
Date: Tue, 19 Apr 2022 11:13:31 +0200
Subject: [PATCH 3/3] Merge codes and comments

---
 Comparaison_of_methods.rmd | 173 ++++++++++++++++++++++++++++++-------
 1 file changed, 141 insertions(+), 32 deletions(-)

diff --git a/Comparaison_of_methods.rmd b/Comparaison_of_methods.rmd
index dc66d70..315310e 100644
--- a/Comparaison_of_methods.rmd
+++ b/Comparaison_of_methods.rmd
@@ -16,11 +16,15 @@ library("caret")
 ## 1. Proposition for simulations under $\mathcal{H}_1$
 
 In this part, we propose a method that simulates a Poisson process under the hypothesis $\mathcal{H}_1$. The idea is to simulate a sample under $\mathcal{H}_0$, and add randomly a subsequence under the alternative hypothesis in this sequence. 
+The function `PoissonProcess` creates a sequence of Poisson process of a parameter lambda
 ```{r}
 PoissonProcess <- function(lambda,T) {
   return(sort(runif(rpois(1,lambda*T),0,T)))
 }
+```
 
+The following function creates a sequence under H0 and add a sequence under H1.
+```{r}
 SimulationH1 <- function(lambda0, lambda1,T,tau){
     ppH0=PoissonProcess(lambda0,T)
     ppH1.segt=PoissonProcess(lambda1,tau)
@@ -35,6 +39,7 @@ SimulationH1 <- function(lambda0, lambda1,T,tau){
 ```
 
 
+`TimeBetweenEvent` compute Time Between Event for a `pp` interval.
 ```{r}
 TimeBetweenEvent <- function(pp){
     n=length(pp)
@@ -49,7 +54,9 @@ DataFrame <- function(pp,tbe){
 ```
 
 ## 2. Simulation of the sequences under $\mathcal{H}_0$ via a Monte Carlo Method
-In this part, we will try to simulate, using a Monte Carlo method, a set of $10^5$ independant samples, under the assumption that $\lambda=\lambda_0$, hence, that we are under the null hypothesis $\mathcal{H}_0$.  
+In this part, we will try to simulate, using a Monte Carlo method, a set of $10^5$ independant samples, under the assumption that $\lambda=\lambda_0$, hence, that we are under the null hypothesis $\mathcal{H}_0$. 
+
+The function `ScanStat` compute the scan statistic for a sequence, given some parameters $T$ and $\tau$. This function returns the value of the scan stat, and the index of the sequence where it happens
 ```{r}
 ScanStat <- function(pp, T, tau){
     n=length(pp)
@@ -58,13 +65,15 @@ ScanStat <- function(pp, T, tau){
     for (i in (1:stop)) {
         x=which((pp>=pp[i])&(pp<=(pp[i]+tau)))
         scan=length(x)
-        if (scan>ScanStat) {ScanStat=scan}
+        if (scan>ScanStat) {ScanStat=scan
+        max=i}
   }   
-    return (c(i,ScanStat))
+    return (c(max,ScanStat))
 }
 ```
 
 We test the scan statistic method for different values of $\lambda_0$. The method of scan statistic we implemented will allow us to have access to the scan test statistic and where it happens in the sequence. 
+This function `EmpDistrib` compute the empirical distribution using a Monte Carlo estimator for the scan statistic method. It returns a Data Frame, containing the value of the scan, its probability and the value of its cumulative distribution function. 
 ```{r}
 EmpDistrib <- function(lambda, n_sample,T,tau){
     pp=PoissonProcess(lambda,T)
@@ -83,6 +92,8 @@ EmpDistrib <- function(lambda, n_sample,T,tau){
     return(EmpDis)
     }
 ```
+
+This function plot the cumulative distribution function associated to an empirical distribution function
 ```{r}
 Plot_CDF <- function(lambda,n_sample,T,tau){
     Emp=EmpDistrib(lambda,n_sample,T,tau)
@@ -103,19 +114,12 @@ tau=1
 ppH0=PoissonProcess(lambda0,T)
 CDF=Plot_CDF(lambda0,n_sample,T,tau)
 ```
-```{r}
-n_sample=10**4
-lambda1=4
-T=10
-tau=1
-ppH0=PoissonProcess(lambda1,T)
-CDF=Plot_CDF(lambda1,n_sample,T,tau)
-```
 
+Compute $p$-value for scan statistic of `ppH1` with `Emp`:
 ```{r}
-PValue <- function(Emp,ppH1, T, tau){
-    scanH1=ScanStat(ppH1,T,tau)[2]
-    index_scanH1=ScanStat(ppH1,T,tau)[1]
+PValue <- function(Emp,ppH, T, tau){
+    scanH1=ScanStat(ppH,T,tau)[2]
+    index_scanH1=ScanStat(ppH,T,tau)[1]
     index=Emp$index_scan
     n=length(index)
     if (scanH1< min(Emp$index_scan)){
@@ -134,8 +138,8 @@ NbSeqH0=10000
 NbSeqH1=NbSeqH0
 DataH0=vector("list")
 DataH1=vector("list")
-lambda0=4
-lambda1=10
+lambda0=2
+lambda1=5
 T=10
 tau=1
 
@@ -165,7 +169,7 @@ TimeBetweenEventList <- function(list,n_list){
 }
 tbe0=TimeBetweenEventList(DataH0,NbSeqH0)
 ```
-We compute the p-value associated to all 5 sequences, and stock them in a vector. 
+We compute the p-value associated to all 10000 sequences, and stock them in a vector. 
 
 ```{r}
 #We start by computing the empirical distribution for lambda0
@@ -183,8 +187,9 @@ for (i in 1:NbSeqH0){
     index_scan = c(index_scan,result[3])
 }
 
-ScS_H0=data.frame(num=(1:NbSeqH0), scan_stat=scan, pvalue_scan=pvalue,class=c(pvalue<0.05)) 
-sum(ScS_H0$class[which(ScS_H0$class==TRUE)])/NbSeqH0
+ScS_H0=data.frame(num=(1:NbSeqH0), scan_stat=scan, pvalue_scan=pvalue,class=c(pvalue<0.05)*1) 
+ScS_H0
+sum(ScS_H0$class[which(ScS_H0$class=='1')])/NbSeqH0
 ```
 
 ```{r}
@@ -201,11 +206,13 @@ for (i in 1:NbSeqH1){
     pvalue=c(pvalue,result[2])
     index_scan=c(index_scan,result[3])
 }
-ScS_H1=data.frame(num=1:NbSeqH1, scan_stat=scan, pvalue_scan=pvalue, class=(pvalue<0.05), begin_scan=index_scan)
-sum(ScS_H1$class[which(ScS_H1$class==TRUE)])/NbSeqH1
+ScS_H1 = data.frame(num=1:NbSeqH1, scan_stat=scan, pvalue_scan=pvalue, class=(pvalue<0.05)*1, begin_scan=index_scan)
+ScS_H1
+sum(ScS_H1$class[which(ScS_H1$class=='1')])/NbSeqH1
 
 ```
 
+`ScanStatMC` compute local score for `Emp`:
 ```{r}
 ScanStatMC <- function(NbSeq, T, tau, Emp, pp0){
     scan=c()
@@ -227,6 +234,7 @@ ScanStatMC <- function(NbSeq, T, tau, Emp, pp0){
 
 ## 3. Local score
 ### 3.1. Distribution of scores via Monte Carlo
+`ComputeE` compute `E` coefficient:
 ```{r}
 ComputeE <- function(lambda0, lambda1){
     E = 1
@@ -240,6 +248,7 @@ ComputeE <- function(lambda0, lambda1){
 }
 ```
 
+`ScoreDistribEmpiric` compute score for empiric distribution:
 ```{r}
 ScoreDistribEmpiric <- function(lambda0, lambda1, n_sample, T){
     E = ComputeE(lambda0, lambda1)
@@ -335,26 +344,41 @@ LocalScoreMC <- function(lambda0, lambda1, NbSeq, T, X_seq, P_X, tbe0){
 
 ## 4. Experience plan for comparaison
 ```{r}
-NbSeq = 10**3
+NbSeq = 10**2
 T = 10
-for (lambda0 in (2:5)){
+for (lambda0 in (2)){
   Sensitivity = c()
   Specificity = c()
   accepted_lambda = c()
 
-  for (lambda1 in c(3:8)){
+  for (lambda1 in c(3)){
     if (lambda0 < lambda1){
-      accepted_lambda=c(accepted_lambda,lambda1)
+      
+      accepted_lambda = c(accepted_lambda,lambda1)
       cat("For T = ", T, ", Nb = ", NbSeq, ", lambda0 = ", lambda0, " and lambda1 = ", lambda1, ":\n", sep = "")
-      tbe0=vector("list",length=NbSeq)
+      tbe0 = vector("list",length=NbSeq)
       pp0 =  vector("list", length = NbSeq)
+      pp1 =  vector("list", length = NbSeq)
+      tbe1 = vector("list", length =  NbSeq)
+      
+      theoretical_results = c(rep(0,NbSeq), rep(1,NbSeq))
+      
+      
       for (i in (1:NbSeq)) {
+        #Simulation for sequences under H0
         ppi = PoissonProcess(lambda0,T)
         ni=length(ppi)
         pp0[[i]] = ppi
-        tbei=ppi[2:ni]-ppi[1:ni-1]
-        tbe0[[i]]=tbei
-        }
+        tbei = ppi[2:ni]-ppi[1:ni-1]
+        tbe0[[i]] = tbei
+        
+        #Simulation for sequences under H1
+        ppj1 = SimulationH1(lambda0, lambda1, T, tau)
+        nj = length(ppj1)
+        pp1[[i]] = ppj1
+        tbej = ppj1[2:nj]-ppj1[1:nj-1]
+        tbe1[[i]] = tbej
+      }
             
       #cat("- Empiric version:\n")
       Score = ScoreDistribEmpiric(lambda0, lambda1, NbSeq, T)
@@ -366,6 +390,9 @@ for (lambda0 in (2:5)){
       LS_H0 = LocalScoreMC(lambda0, lambda1, NbSeq, T, X_seq, P_X, tbe0)
       options(warn = -1) # Disable warnings print
       SS_H0 = ScanStatMC(NbSeq, T, tau, Emp, pp0)
+      SS_H1 = ScanStatMC(NbSeq, T, tau, Emp, pp1)
+      
+      SS_expected = c(SS_H0$class, SS_H1$class)
             
       #cat("Local Score:\n")
       #print(summary(LS_H0))
@@ -391,9 +418,9 @@ for (lambda0 in (2:5)){
       #cat("Scan Statistics:\n")
       #print(summary(SS_H0))
       #cat("Confusion Matrix:\n")
-      print(confusionMatrix(factor(LS_H0$class), factor(SS_H0$class))$table)
-      Sensitivity = c(Sensitivity,confusionMatrix(factor(LS_H0$class), factor(SS_H0$class))$byClass[1])
-      Specificity = c(Specificity,confusionMatrix(factor(LS_H0$class), factor(SS_H0$class))$byClass[2])
+      print(confusionMatrix(factor(theoretical_results), factor(SS_expected)))
+      #Sensitivity = c(Sensitivity,confusionMatrix(factor(theoretical_results), factor(SS_expected))$byClass[1])
+      #Specificity = c(Specificity,confusionMatrix(factor(theoretical_results), factor(SS_expected))$byClass[2])
 
       cat("---\n")
       
@@ -408,3 +435,85 @@ for (lambda0 in (2:5)){
 }
 ```
 
+```{r}
+theo = c(0,0,0,1,1,1)
+exp = c(0,1,1,1,1,0)
+
+confusionMatrix(factor(exp), factor(theo), positive = '1') #prédiction puis théorique
+```
+
+```{r}
+NbSeq = 10**2
+T = 10
+lambda0 = 2
+lambda1 = 5
+n_sample=10**4
+
+cat("For T = ", T, ", Nb = ", NbSeq, ", lambda0 = ", lambda0, " and lambda1 = ", lambda1, ":\n", sep = "")
+tbe0 = vector("list",length=NbSeq)
+pp0 =  vector("list", length = NbSeq)
+pp1 =  vector("list", length = NbSeq)
+tbe1 = vector("list", length =  NbSeq)
+      
+theoretical_results = c(rep(0,NbSeq), rep(1,NbSeq))
+      
+      
+for (i in (1:NbSeq)) {
+  #Simulation for sequences under H0
+  ppi = PoissonProcess(lambda0,T)
+  ni=length(ppi)
+  pp0[[i]] = ppi
+  tbei = ppi[2:ni]-ppi[1:ni-1]
+  tbe0[[i]] = tbei
+        
+  #Simulation for sequences under H1
+  ppj1 = SimulationH1(lambda0, lambda1, T, tau)
+  nj = length(ppj1)
+  pp1[[i]] = ppj1
+  tbej = ppj1[2:nj]-ppj1[1:nj-1]
+  tbe1[[i]] = tbej
+  }
+            
+      Emp = EmpDistrib(lambda0,n_sample,T,tau)
+
+      SS_H0 = ScanStatMC(NbSeq, T, tau, Emp, pp0)
+      SS_H1 = ScanStatMC(NbSeq, T, tau, Emp, pp1)
+      
+      SS_expected = c(SS_H0$class, SS_H1$class)
+            
+      #cat("Local Score:\n")
+      #print(summary(LS_H0))
+      #cat("Scan Statistics:\n")
+      #print(summary(SS_H0))
+      #cat("Confusion Matrix:\n")
+      #print(confusionMatrix(factor(LS_H0$class), factor(SS_H0$class)))
+        
+      #cat("- Elisa version:\n")
+      Score = ScoreDistribElisa(lambda0, lambda1, T)
+      Emp = EmpDistrib(lambda0,n_sample,T,tau)
+
+      X_seq = Score$Score_X
+      P_X = Score$P_X
+            
+      LS_H0 = LocalScoreMC(lambda0, lambda1, NbSeq, T, X_seq, P_X, tbe0)
+      options(warn = -1) # Disable warnings print
+
+      SS_H0 = ScanStatMC(NbSeq, T, tau, Emp, pp0)
+        
+      #cat("Local Score:\n")
+      #print(summary(LS_H0))
+      #cat("Scan Statistics:\n")
+      #print(summary(SS_H0))
+      #cat("Confusion Matrix:\n")
+      print(confusionMatrix(factor(theoretical_results), factor(SS_expected)))
+      #Sensitivity = c(Sensitivity,confusionMatrix(factor(theoretical_results), factor(SS_expected))$byClass[1])
+      #Specificity = c(Specificity,confusionMatrix(factor(theoretical_results), factor(SS_expected))$byClass[2])
+
+      cat("---\n")
+      
+  titleSens=TeX(paste(r'(Sensitivity for $\lambda_0=$)', lambda0))
+  plot(x=accepted_lambda,y=Sensitivity, type='l', main = titleSens)
+  
+  titleSpec=TeX(paste(r'(Specificity for $\lambda_0=$)', lambda0))
+  plot(x=accepted_lambda,y=Specificity, type='l', main = titleSpec)
+```