From ff0f6cf253f3e5565d65ea638eeecb97050693ac Mon Sep 17 00:00:00 2001 From: Paul-Corbalan <58653590+Paul-Corbalan@users.noreply.github.com> Date: Tue, 19 Apr 2022 08:10:45 +0200 Subject: [PATCH 1/3] Update titles --- Comparaison_of_methods.rmd | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Comparaison_of_methods.rmd b/Comparaison_of_methods.rmd index b0f0a00..b4f38ad 100644 --- a/Comparaison_of_methods.rmd +++ b/Comparaison_of_methods.rmd @@ -91,7 +91,7 @@ Plot_CDF <- function(lambda,n_sample,T,tau){ return(Emp) } ``` -### 2.1 Test of $\mathcal{H}_0: \lambda=\lambda_0$ against $\mathcal{H}_0: \lambda=\lambda_1$, where $\lambda_1 > \lambda_0$ +### 2.1. Test of $\mathcal{H}_0: \lambda=\lambda_0$ against $\mathcal{H}_0: \lambda=\lambda_1$, where $\lambda_1 > \lambda_0$ In this part, we will test different values for $\lambda_0$ and $\lambda_1$, and compute the probability of occurrence of a certain scan statistic. ```{r} @@ -226,7 +226,7 @@ ScanStatMC <- function(NbSeq, T, tau, Emp, pp0){ ``` ## 3. Local score -### Distribution of scores via Monte Carlo +### 3.1. Distribution of scores via Monte Carlo ```{r} ComputeE <- function(lambda0, lambda1){ E = 1 @@ -305,7 +305,7 @@ mtext("Distribution des scores via la méthode théorique",side=1,line=4,col="re ``` -### Local score calculation +### 3.2. Local score calculation ```{r} LocalScoreMC <- function(lambda0, lambda1, NbSeq, T, X_seq, P_X, tbe0){ E = ComputeE(lambda0, lambda1) From b7e079a55e4e7cc5793f628595576764dea2d6af Mon Sep 17 00:00:00 2001 From: Paul-Corbalan <58653590+Paul-Corbalan@users.noreply.github.com> Date: Tue, 19 Apr 2022 10:30:49 +0200 Subject: [PATCH 2/3] Passing to function plot_graph_distrib_score --- Comparaison_of_methods.rmd | 28 +++++++++++++++------------- 1 file changed, 15 insertions(+), 13 deletions(-) diff --git a/Comparaison_of_methods.rmd b/Comparaison_of_methods.rmd index b4f38ad..dc66d70 100644 --- a/Comparaison_of_methods.rmd +++ b/Comparaison_of_methods.rmd @@ -285,23 +285,25 @@ ScoreDistribElisa <- function(lambda0, lambda1, T){ ``` ```{r} -distrib_score_mc=ScoreDistribEmpiric(2,3,10000,T) +distrib_score_mc = ScoreDistribEmpiric(2,3,10000,T) +distrib_score_theo = ScoreDistribElisa(2,3,T) -distrib_score_theo=ScoreDistribElisa(2,3,T) +plot_graph_distrib_score <- function(distrib_score_theo, distrib_score_mc){ + # length(distrib_score_mc[,2]) + # length(distrib_score_theo[,2]) + #diff_distrib_score=abs(distrib_score_mc[,2]-distrib_score_theo[,2]) -length(distrib_score_mc[,2]) -length(distrib_score_theo[,2]) + #par(mfrow = c(1,2)) + barplot(distrib_score_mc[,2],col="blue",axes=F) + mtext("Distribution of scores via Monte Carlo",side=1,line=2.5,col="blue") + axis(2, ylim=c(0,10)) + par(new = T) + barplot(distrib_score_theo[,2],col="red",axes=F) + mtext("Distribution of scores using the theoretical method",side=1,line=4,col="red") +} -#diff_distrib_score=abs(distrib_score_mc[,2]-distrib_score_theo[,2]) - -#par(mfrow = c(1,2)) -barplot(distrib_score_mc[,2],col="blue",axes=F) -mtext("Distribution des scores via Monte_Carlo",side=1,line=2.5,col="blue") -axis(2, ylim=c(0,10)) -par(new = T) -barplot(distrib_score_theo[,2],col="red",axes=F) -mtext("Distribution des scores via la méthode théorique",side=1,line=4,col="red") +plot_graph_distrib_score(distrib_score_theo, distrib_score_mc) ``` From 6e3480d478b75dcf117e3bc0162cb9695931a893 Mon Sep 17 00:00:00 2001 From: Paul-Corbalan <58653590+Paul-Corbalan@users.noreply.github.com> Date: Tue, 19 Apr 2022 11:13:31 +0200 Subject: [PATCH 3/3] Merge codes and comments --- Comparaison_of_methods.rmd | 173 ++++++++++++++++++++++++++++++------- 1 file changed, 141 insertions(+), 32 deletions(-) diff --git a/Comparaison_of_methods.rmd b/Comparaison_of_methods.rmd index dc66d70..315310e 100644 --- a/Comparaison_of_methods.rmd +++ b/Comparaison_of_methods.rmd @@ -16,11 +16,15 @@ library("caret") ## 1. Proposition for simulations under $\mathcal{H}_1$ In this part, we propose a method that simulates a Poisson process under the hypothesis $\mathcal{H}_1$. The idea is to simulate a sample under $\mathcal{H}_0$, and add randomly a subsequence under the alternative hypothesis in this sequence. +The function `PoissonProcess` creates a sequence of Poisson process of a parameter lambda ```{r} PoissonProcess <- function(lambda,T) { return(sort(runif(rpois(1,lambda*T),0,T))) } +``` +The following function creates a sequence under H0 and add a sequence under H1. +```{r} SimulationH1 <- function(lambda0, lambda1,T,tau){ ppH0=PoissonProcess(lambda0,T) ppH1.segt=PoissonProcess(lambda1,tau) @@ -35,6 +39,7 @@ SimulationH1 <- function(lambda0, lambda1,T,tau){ ``` +`TimeBetweenEvent` compute Time Between Event for a `pp` interval. ```{r} TimeBetweenEvent <- function(pp){ n=length(pp) @@ -49,7 +54,9 @@ DataFrame <- function(pp,tbe){ ``` ## 2. Simulation of the sequences under $\mathcal{H}_0$ via a Monte Carlo Method -In this part, we will try to simulate, using a Monte Carlo method, a set of $10^5$ independant samples, under the assumption that $\lambda=\lambda_0$, hence, that we are under the null hypothesis $\mathcal{H}_0$. +In this part, we will try to simulate, using a Monte Carlo method, a set of $10^5$ independant samples, under the assumption that $\lambda=\lambda_0$, hence, that we are under the null hypothesis $\mathcal{H}_0$. + +The function `ScanStat` compute the scan statistic for a sequence, given some parameters $T$ and $\tau$. This function returns the value of the scan stat, and the index of the sequence where it happens ```{r} ScanStat <- function(pp, T, tau){ n=length(pp) @@ -58,13 +65,15 @@ ScanStat <- function(pp, T, tau){ for (i in (1:stop)) { x=which((pp>=pp[i])&(pp<=(pp[i]+tau))) scan=length(x) - if (scan>ScanStat) {ScanStat=scan} + if (scan>ScanStat) {ScanStat=scan + max=i} } - return (c(i,ScanStat)) + return (c(max,ScanStat)) } ``` We test the scan statistic method for different values of $\lambda_0$. The method of scan statistic we implemented will allow us to have access to the scan test statistic and where it happens in the sequence. +This function `EmpDistrib` compute the empirical distribution using a Monte Carlo estimator for the scan statistic method. It returns a Data Frame, containing the value of the scan, its probability and the value of its cumulative distribution function. ```{r} EmpDistrib <- function(lambda, n_sample,T,tau){ pp=PoissonProcess(lambda,T) @@ -83,6 +92,8 @@ EmpDistrib <- function(lambda, n_sample,T,tau){ return(EmpDis) } ``` + +This function plot the cumulative distribution function associated to an empirical distribution function ```{r} Plot_CDF <- function(lambda,n_sample,T,tau){ Emp=EmpDistrib(lambda,n_sample,T,tau) @@ -103,19 +114,12 @@ tau=1 ppH0=PoissonProcess(lambda0,T) CDF=Plot_CDF(lambda0,n_sample,T,tau) ``` -```{r} -n_sample=10**4 -lambda1=4 -T=10 -tau=1 -ppH0=PoissonProcess(lambda1,T) -CDF=Plot_CDF(lambda1,n_sample,T,tau) -``` +Compute $p$-value for scan statistic of `ppH1` with `Emp`: ```{r} -PValue <- function(Emp,ppH1, T, tau){ - scanH1=ScanStat(ppH1,T,tau)[2] - index_scanH1=ScanStat(ppH1,T,tau)[1] +PValue <- function(Emp,ppH, T, tau){ + scanH1=ScanStat(ppH,T,tau)[2] + index_scanH1=ScanStat(ppH,T,tau)[1] index=Emp$index_scan n=length(index) if (scanH1< min(Emp$index_scan)){ @@ -134,8 +138,8 @@ NbSeqH0=10000 NbSeqH1=NbSeqH0 DataH0=vector("list") DataH1=vector("list") -lambda0=4 -lambda1=10 +lambda0=2 +lambda1=5 T=10 tau=1 @@ -165,7 +169,7 @@ TimeBetweenEventList <- function(list,n_list){ } tbe0=TimeBetweenEventList(DataH0,NbSeqH0) ``` -We compute the p-value associated to all 5 sequences, and stock them in a vector. +We compute the p-value associated to all 10000 sequences, and stock them in a vector. ```{r} #We start by computing the empirical distribution for lambda0 @@ -183,8 +187,9 @@ for (i in 1:NbSeqH0){ index_scan = c(index_scan,result[3]) } -ScS_H0=data.frame(num=(1:NbSeqH0), scan_stat=scan, pvalue_scan=pvalue,class=c(pvalue<0.05)) -sum(ScS_H0$class[which(ScS_H0$class==TRUE)])/NbSeqH0 +ScS_H0=data.frame(num=(1:NbSeqH0), scan_stat=scan, pvalue_scan=pvalue,class=c(pvalue<0.05)*1) +ScS_H0 +sum(ScS_H0$class[which(ScS_H0$class=='1')])/NbSeqH0 ``` ```{r} @@ -201,11 +206,13 @@ for (i in 1:NbSeqH1){ pvalue=c(pvalue,result[2]) index_scan=c(index_scan,result[3]) } -ScS_H1=data.frame(num=1:NbSeqH1, scan_stat=scan, pvalue_scan=pvalue, class=(pvalue<0.05), begin_scan=index_scan) -sum(ScS_H1$class[which(ScS_H1$class==TRUE)])/NbSeqH1 +ScS_H1 = data.frame(num=1:NbSeqH1, scan_stat=scan, pvalue_scan=pvalue, class=(pvalue<0.05)*1, begin_scan=index_scan) +ScS_H1 +sum(ScS_H1$class[which(ScS_H1$class=='1')])/NbSeqH1 ``` +`ScanStatMC` compute local score for `Emp`: ```{r} ScanStatMC <- function(NbSeq, T, tau, Emp, pp0){ scan=c() @@ -227,6 +234,7 @@ ScanStatMC <- function(NbSeq, T, tau, Emp, pp0){ ## 3. Local score ### 3.1. Distribution of scores via Monte Carlo +`ComputeE` compute `E` coefficient: ```{r} ComputeE <- function(lambda0, lambda1){ E = 1 @@ -240,6 +248,7 @@ ComputeE <- function(lambda0, lambda1){ } ``` +`ScoreDistribEmpiric` compute score for empiric distribution: ```{r} ScoreDistribEmpiric <- function(lambda0, lambda1, n_sample, T){ E = ComputeE(lambda0, lambda1) @@ -335,26 +344,41 @@ LocalScoreMC <- function(lambda0, lambda1, NbSeq, T, X_seq, P_X, tbe0){ ## 4. Experience plan for comparaison ```{r} -NbSeq = 10**3 +NbSeq = 10**2 T = 10 -for (lambda0 in (2:5)){ +for (lambda0 in (2)){ Sensitivity = c() Specificity = c() accepted_lambda = c() - for (lambda1 in c(3:8)){ + for (lambda1 in c(3)){ if (lambda0 < lambda1){ - accepted_lambda=c(accepted_lambda,lambda1) + + accepted_lambda = c(accepted_lambda,lambda1) cat("For T = ", T, ", Nb = ", NbSeq, ", lambda0 = ", lambda0, " and lambda1 = ", lambda1, ":\n", sep = "") - tbe0=vector("list",length=NbSeq) + tbe0 = vector("list",length=NbSeq) pp0 = vector("list", length = NbSeq) + pp1 = vector("list", length = NbSeq) + tbe1 = vector("list", length = NbSeq) + + theoretical_results = c(rep(0,NbSeq), rep(1,NbSeq)) + + for (i in (1:NbSeq)) { + #Simulation for sequences under H0 ppi = PoissonProcess(lambda0,T) ni=length(ppi) pp0[[i]] = ppi - tbei=ppi[2:ni]-ppi[1:ni-1] - tbe0[[i]]=tbei - } + tbei = ppi[2:ni]-ppi[1:ni-1] + tbe0[[i]] = tbei + + #Simulation for sequences under H1 + ppj1 = SimulationH1(lambda0, lambda1, T, tau) + nj = length(ppj1) + pp1[[i]] = ppj1 + tbej = ppj1[2:nj]-ppj1[1:nj-1] + tbe1[[i]] = tbej + } #cat("- Empiric version:\n") Score = ScoreDistribEmpiric(lambda0, lambda1, NbSeq, T) @@ -366,6 +390,9 @@ for (lambda0 in (2:5)){ LS_H0 = LocalScoreMC(lambda0, lambda1, NbSeq, T, X_seq, P_X, tbe0) options(warn = -1) # Disable warnings print SS_H0 = ScanStatMC(NbSeq, T, tau, Emp, pp0) + SS_H1 = ScanStatMC(NbSeq, T, tau, Emp, pp1) + + SS_expected = c(SS_H0$class, SS_H1$class) #cat("Local Score:\n") #print(summary(LS_H0)) @@ -391,9 +418,9 @@ for (lambda0 in (2:5)){ #cat("Scan Statistics:\n") #print(summary(SS_H0)) #cat("Confusion Matrix:\n") - print(confusionMatrix(factor(LS_H0$class), factor(SS_H0$class))$table) - Sensitivity = c(Sensitivity,confusionMatrix(factor(LS_H0$class), factor(SS_H0$class))$byClass[1]) - Specificity = c(Specificity,confusionMatrix(factor(LS_H0$class), factor(SS_H0$class))$byClass[2]) + print(confusionMatrix(factor(theoretical_results), factor(SS_expected))) + #Sensitivity = c(Sensitivity,confusionMatrix(factor(theoretical_results), factor(SS_expected))$byClass[1]) + #Specificity = c(Specificity,confusionMatrix(factor(theoretical_results), factor(SS_expected))$byClass[2]) cat("---\n") @@ -408,3 +435,85 @@ for (lambda0 in (2:5)){ } ``` +```{r} +theo = c(0,0,0,1,1,1) +exp = c(0,1,1,1,1,0) + +confusionMatrix(factor(exp), factor(theo), positive = '1') #prédiction puis théorique +``` + +```{r} +NbSeq = 10**2 +T = 10 +lambda0 = 2 +lambda1 = 5 +n_sample=10**4 + +cat("For T = ", T, ", Nb = ", NbSeq, ", lambda0 = ", lambda0, " and lambda1 = ", lambda1, ":\n", sep = "") +tbe0 = vector("list",length=NbSeq) +pp0 = vector("list", length = NbSeq) +pp1 = vector("list", length = NbSeq) +tbe1 = vector("list", length = NbSeq) + +theoretical_results = c(rep(0,NbSeq), rep(1,NbSeq)) + + +for (i in (1:NbSeq)) { + #Simulation for sequences under H0 + ppi = PoissonProcess(lambda0,T) + ni=length(ppi) + pp0[[i]] = ppi + tbei = ppi[2:ni]-ppi[1:ni-1] + tbe0[[i]] = tbei + + #Simulation for sequences under H1 + ppj1 = SimulationH1(lambda0, lambda1, T, tau) + nj = length(ppj1) + pp1[[i]] = ppj1 + tbej = ppj1[2:nj]-ppj1[1:nj-1] + tbe1[[i]] = tbej + } + + Emp = EmpDistrib(lambda0,n_sample,T,tau) + + SS_H0 = ScanStatMC(NbSeq, T, tau, Emp, pp0) + SS_H1 = ScanStatMC(NbSeq, T, tau, Emp, pp1) + + SS_expected = c(SS_H0$class, SS_H1$class) + + #cat("Local Score:\n") + #print(summary(LS_H0)) + #cat("Scan Statistics:\n") + #print(summary(SS_H0)) + #cat("Confusion Matrix:\n") + #print(confusionMatrix(factor(LS_H0$class), factor(SS_H0$class))) + + #cat("- Elisa version:\n") + Score = ScoreDistribElisa(lambda0, lambda1, T) + Emp = EmpDistrib(lambda0,n_sample,T,tau) + + X_seq = Score$Score_X + P_X = Score$P_X + + LS_H0 = LocalScoreMC(lambda0, lambda1, NbSeq, T, X_seq, P_X, tbe0) + options(warn = -1) # Disable warnings print + + SS_H0 = ScanStatMC(NbSeq, T, tau, Emp, pp0) + + #cat("Local Score:\n") + #print(summary(LS_H0)) + #cat("Scan Statistics:\n") + #print(summary(SS_H0)) + #cat("Confusion Matrix:\n") + print(confusionMatrix(factor(theoretical_results), factor(SS_expected))) + #Sensitivity = c(Sensitivity,confusionMatrix(factor(theoretical_results), factor(SS_expected))$byClass[1]) + #Specificity = c(Specificity,confusionMatrix(factor(theoretical_results), factor(SS_expected))$byClass[2]) + + cat("---\n") + + titleSens=TeX(paste(r'(Sensitivity for $\lambda_0=$)', lambda0)) + plot(x=accepted_lambda,y=Sensitivity, type='l', main = titleSens) + + titleSpec=TeX(paste(r'(Specificity for $\lambda_0=$)', lambda0)) + plot(x=accepted_lambda,y=Specificity, type='l', main = titleSpec) +```