Merge codes and comments

This commit is contained in:
Paul-Corbalan 2022-04-19 11:13:31 +02:00
parent b7e079a55e
commit 6e3480d478
1 changed files with 141 additions and 32 deletions

View File

@ -16,11 +16,15 @@ library("caret")
## 1. Proposition for simulations under $\mathcal{H}_1$ ## 1. Proposition for simulations under $\mathcal{H}_1$
In this part, we propose a method that simulates a Poisson process under the hypothesis $\mathcal{H}_1$. The idea is to simulate a sample under $\mathcal{H}_0$, and add randomly a subsequence under the alternative hypothesis in this sequence. In this part, we propose a method that simulates a Poisson process under the hypothesis $\mathcal{H}_1$. The idea is to simulate a sample under $\mathcal{H}_0$, and add randomly a subsequence under the alternative hypothesis in this sequence.
The function `PoissonProcess` creates a sequence of Poisson process of a parameter lambda
```{r} ```{r}
PoissonProcess <- function(lambda,T) { PoissonProcess <- function(lambda,T) {
return(sort(runif(rpois(1,lambda*T),0,T))) return(sort(runif(rpois(1,lambda*T),0,T)))
} }
```
The following function creates a sequence under H0 and add a sequence under H1.
```{r}
SimulationH1 <- function(lambda0, lambda1,T,tau){ SimulationH1 <- function(lambda0, lambda1,T,tau){
ppH0=PoissonProcess(lambda0,T) ppH0=PoissonProcess(lambda0,T)
ppH1.segt=PoissonProcess(lambda1,tau) ppH1.segt=PoissonProcess(lambda1,tau)
@ -35,6 +39,7 @@ SimulationH1 <- function(lambda0, lambda1,T,tau){
``` ```
`TimeBetweenEvent` compute Time Between Event for a `pp` interval.
```{r} ```{r}
TimeBetweenEvent <- function(pp){ TimeBetweenEvent <- function(pp){
n=length(pp) n=length(pp)
@ -49,7 +54,9 @@ DataFrame <- function(pp,tbe){
``` ```
## 2. Simulation of the sequences under $\mathcal{H}_0$ via a Monte Carlo Method ## 2. Simulation of the sequences under $\mathcal{H}_0$ via a Monte Carlo Method
In this part, we will try to simulate, using a Monte Carlo method, a set of $10^5$ independant samples, under the assumption that $\lambda=\lambda_0$, hence, that we are under the null hypothesis $\mathcal{H}_0$. In this part, we will try to simulate, using a Monte Carlo method, a set of $10^5$ independant samples, under the assumption that $\lambda=\lambda_0$, hence, that we are under the null hypothesis $\mathcal{H}_0$.
The function `ScanStat` compute the scan statistic for a sequence, given some parameters $T$ and $\tau$. This function returns the value of the scan stat, and the index of the sequence where it happens
```{r} ```{r}
ScanStat <- function(pp, T, tau){ ScanStat <- function(pp, T, tau){
n=length(pp) n=length(pp)
@ -58,13 +65,15 @@ ScanStat <- function(pp, T, tau){
for (i in (1:stop)) { for (i in (1:stop)) {
x=which((pp>=pp[i])&(pp<=(pp[i]+tau))) x=which((pp>=pp[i])&(pp<=(pp[i]+tau)))
scan=length(x) scan=length(x)
if (scan>ScanStat) {ScanStat=scan} if (scan>ScanStat) {ScanStat=scan
max=i}
} }
return (c(i,ScanStat)) return (c(max,ScanStat))
} }
``` ```
We test the scan statistic method for different values of $\lambda_0$. The method of scan statistic we implemented will allow us to have access to the scan test statistic and where it happens in the sequence. We test the scan statistic method for different values of $\lambda_0$. The method of scan statistic we implemented will allow us to have access to the scan test statistic and where it happens in the sequence.
This function `EmpDistrib` compute the empirical distribution using a Monte Carlo estimator for the scan statistic method. It returns a Data Frame, containing the value of the scan, its probability and the value of its cumulative distribution function.
```{r} ```{r}
EmpDistrib <- function(lambda, n_sample,T,tau){ EmpDistrib <- function(lambda, n_sample,T,tau){
pp=PoissonProcess(lambda,T) pp=PoissonProcess(lambda,T)
@ -83,6 +92,8 @@ EmpDistrib <- function(lambda, n_sample,T,tau){
return(EmpDis) return(EmpDis)
} }
``` ```
This function plot the cumulative distribution function associated to an empirical distribution function
```{r} ```{r}
Plot_CDF <- function(lambda,n_sample,T,tau){ Plot_CDF <- function(lambda,n_sample,T,tau){
Emp=EmpDistrib(lambda,n_sample,T,tau) Emp=EmpDistrib(lambda,n_sample,T,tau)
@ -103,19 +114,12 @@ tau=1
ppH0=PoissonProcess(lambda0,T) ppH0=PoissonProcess(lambda0,T)
CDF=Plot_CDF(lambda0,n_sample,T,tau) CDF=Plot_CDF(lambda0,n_sample,T,tau)
``` ```
```{r}
n_sample=10**4
lambda1=4
T=10
tau=1
ppH0=PoissonProcess(lambda1,T)
CDF=Plot_CDF(lambda1,n_sample,T,tau)
```
Compute $p$-value for scan statistic of `ppH1` with `Emp`:
```{r} ```{r}
PValue <- function(Emp,ppH1, T, tau){ PValue <- function(Emp,ppH, T, tau){
scanH1=ScanStat(ppH1,T,tau)[2] scanH1=ScanStat(ppH,T,tau)[2]
index_scanH1=ScanStat(ppH1,T,tau)[1] index_scanH1=ScanStat(ppH,T,tau)[1]
index=Emp$index_scan index=Emp$index_scan
n=length(index) n=length(index)
if (scanH1< min(Emp$index_scan)){ if (scanH1< min(Emp$index_scan)){
@ -134,8 +138,8 @@ NbSeqH0=10000
NbSeqH1=NbSeqH0 NbSeqH1=NbSeqH0
DataH0=vector("list") DataH0=vector("list")
DataH1=vector("list") DataH1=vector("list")
lambda0=4 lambda0=2
lambda1=10 lambda1=5
T=10 T=10
tau=1 tau=1
@ -165,7 +169,7 @@ TimeBetweenEventList <- function(list,n_list){
} }
tbe0=TimeBetweenEventList(DataH0,NbSeqH0) tbe0=TimeBetweenEventList(DataH0,NbSeqH0)
``` ```
We compute the p-value associated to all 5 sequences, and stock them in a vector. We compute the p-value associated to all 10000 sequences, and stock them in a vector.
```{r} ```{r}
#We start by computing the empirical distribution for lambda0 #We start by computing the empirical distribution for lambda0
@ -183,8 +187,9 @@ for (i in 1:NbSeqH0){
index_scan = c(index_scan,result[3]) index_scan = c(index_scan,result[3])
} }
ScS_H0=data.frame(num=(1:NbSeqH0), scan_stat=scan, pvalue_scan=pvalue,class=c(pvalue<0.05)) ScS_H0=data.frame(num=(1:NbSeqH0), scan_stat=scan, pvalue_scan=pvalue,class=c(pvalue<0.05)*1)
sum(ScS_H0$class[which(ScS_H0$class==TRUE)])/NbSeqH0 ScS_H0
sum(ScS_H0$class[which(ScS_H0$class=='1')])/NbSeqH0
``` ```
```{r} ```{r}
@ -201,11 +206,13 @@ for (i in 1:NbSeqH1){
pvalue=c(pvalue,result[2]) pvalue=c(pvalue,result[2])
index_scan=c(index_scan,result[3]) index_scan=c(index_scan,result[3])
} }
ScS_H1=data.frame(num=1:NbSeqH1, scan_stat=scan, pvalue_scan=pvalue, class=(pvalue<0.05), begin_scan=index_scan) ScS_H1 = data.frame(num=1:NbSeqH1, scan_stat=scan, pvalue_scan=pvalue, class=(pvalue<0.05)*1, begin_scan=index_scan)
sum(ScS_H1$class[which(ScS_H1$class==TRUE)])/NbSeqH1 ScS_H1
sum(ScS_H1$class[which(ScS_H1$class=='1')])/NbSeqH1
``` ```
`ScanStatMC` compute local score for `Emp`:
```{r} ```{r}
ScanStatMC <- function(NbSeq, T, tau, Emp, pp0){ ScanStatMC <- function(NbSeq, T, tau, Emp, pp0){
scan=c() scan=c()
@ -227,6 +234,7 @@ ScanStatMC <- function(NbSeq, T, tau, Emp, pp0){
## 3. Local score ## 3. Local score
### 3.1. Distribution of scores via Monte Carlo ### 3.1. Distribution of scores via Monte Carlo
`ComputeE` compute `E` coefficient:
```{r} ```{r}
ComputeE <- function(lambda0, lambda1){ ComputeE <- function(lambda0, lambda1){
E = 1 E = 1
@ -240,6 +248,7 @@ ComputeE <- function(lambda0, lambda1){
} }
``` ```
`ScoreDistribEmpiric` compute score for empiric distribution:
```{r} ```{r}
ScoreDistribEmpiric <- function(lambda0, lambda1, n_sample, T){ ScoreDistribEmpiric <- function(lambda0, lambda1, n_sample, T){
E = ComputeE(lambda0, lambda1) E = ComputeE(lambda0, lambda1)
@ -335,26 +344,41 @@ LocalScoreMC <- function(lambda0, lambda1, NbSeq, T, X_seq, P_X, tbe0){
## 4. Experience plan for comparaison ## 4. Experience plan for comparaison
```{r} ```{r}
NbSeq = 10**3 NbSeq = 10**2
T = 10 T = 10
for (lambda0 in (2:5)){ for (lambda0 in (2)){
Sensitivity = c() Sensitivity = c()
Specificity = c() Specificity = c()
accepted_lambda = c() accepted_lambda = c()
for (lambda1 in c(3:8)){ for (lambda1 in c(3)){
if (lambda0 < lambda1){ if (lambda0 < lambda1){
accepted_lambda=c(accepted_lambda,lambda1)
accepted_lambda = c(accepted_lambda,lambda1)
cat("For T = ", T, ", Nb = ", NbSeq, ", lambda0 = ", lambda0, " and lambda1 = ", lambda1, ":\n", sep = "") cat("For T = ", T, ", Nb = ", NbSeq, ", lambda0 = ", lambda0, " and lambda1 = ", lambda1, ":\n", sep = "")
tbe0=vector("list",length=NbSeq) tbe0 = vector("list",length=NbSeq)
pp0 = vector("list", length = NbSeq) pp0 = vector("list", length = NbSeq)
pp1 = vector("list", length = NbSeq)
tbe1 = vector("list", length = NbSeq)
theoretical_results = c(rep(0,NbSeq), rep(1,NbSeq))
for (i in (1:NbSeq)) { for (i in (1:NbSeq)) {
#Simulation for sequences under H0
ppi = PoissonProcess(lambda0,T) ppi = PoissonProcess(lambda0,T)
ni=length(ppi) ni=length(ppi)
pp0[[i]] = ppi pp0[[i]] = ppi
tbei=ppi[2:ni]-ppi[1:ni-1] tbei = ppi[2:ni]-ppi[1:ni-1]
tbe0[[i]]=tbei tbe0[[i]] = tbei
}
#Simulation for sequences under H1
ppj1 = SimulationH1(lambda0, lambda1, T, tau)
nj = length(ppj1)
pp1[[i]] = ppj1
tbej = ppj1[2:nj]-ppj1[1:nj-1]
tbe1[[i]] = tbej
}
#cat("- Empiric version:\n") #cat("- Empiric version:\n")
Score = ScoreDistribEmpiric(lambda0, lambda1, NbSeq, T) Score = ScoreDistribEmpiric(lambda0, lambda1, NbSeq, T)
@ -366,6 +390,9 @@ for (lambda0 in (2:5)){
LS_H0 = LocalScoreMC(lambda0, lambda1, NbSeq, T, X_seq, P_X, tbe0) LS_H0 = LocalScoreMC(lambda0, lambda1, NbSeq, T, X_seq, P_X, tbe0)
options(warn = -1) # Disable warnings print options(warn = -1) # Disable warnings print
SS_H0 = ScanStatMC(NbSeq, T, tau, Emp, pp0) SS_H0 = ScanStatMC(NbSeq, T, tau, Emp, pp0)
SS_H1 = ScanStatMC(NbSeq, T, tau, Emp, pp1)
SS_expected = c(SS_H0$class, SS_H1$class)
#cat("Local Score:\n") #cat("Local Score:\n")
#print(summary(LS_H0)) #print(summary(LS_H0))
@ -391,9 +418,9 @@ for (lambda0 in (2:5)){
#cat("Scan Statistics:\n") #cat("Scan Statistics:\n")
#print(summary(SS_H0)) #print(summary(SS_H0))
#cat("Confusion Matrix:\n") #cat("Confusion Matrix:\n")
print(confusionMatrix(factor(LS_H0$class), factor(SS_H0$class))$table) print(confusionMatrix(factor(theoretical_results), factor(SS_expected)))
Sensitivity = c(Sensitivity,confusionMatrix(factor(LS_H0$class), factor(SS_H0$class))$byClass[1]) #Sensitivity = c(Sensitivity,confusionMatrix(factor(theoretical_results), factor(SS_expected))$byClass[1])
Specificity = c(Specificity,confusionMatrix(factor(LS_H0$class), factor(SS_H0$class))$byClass[2]) #Specificity = c(Specificity,confusionMatrix(factor(theoretical_results), factor(SS_expected))$byClass[2])
cat("---\n") cat("---\n")
@ -408,3 +435,85 @@ for (lambda0 in (2:5)){
} }
``` ```
```{r}
theo = c(0,0,0,1,1,1)
exp = c(0,1,1,1,1,0)
confusionMatrix(factor(exp), factor(theo), positive = '1') #prédiction puis théorique
```
```{r}
NbSeq = 10**2
T = 10
lambda0 = 2
lambda1 = 5
n_sample=10**4
cat("For T = ", T, ", Nb = ", NbSeq, ", lambda0 = ", lambda0, " and lambda1 = ", lambda1, ":\n", sep = "")
tbe0 = vector("list",length=NbSeq)
pp0 = vector("list", length = NbSeq)
pp1 = vector("list", length = NbSeq)
tbe1 = vector("list", length = NbSeq)
theoretical_results = c(rep(0,NbSeq), rep(1,NbSeq))
for (i in (1:NbSeq)) {
#Simulation for sequences under H0
ppi = PoissonProcess(lambda0,T)
ni=length(ppi)
pp0[[i]] = ppi
tbei = ppi[2:ni]-ppi[1:ni-1]
tbe0[[i]] = tbei
#Simulation for sequences under H1
ppj1 = SimulationH1(lambda0, lambda1, T, tau)
nj = length(ppj1)
pp1[[i]] = ppj1
tbej = ppj1[2:nj]-ppj1[1:nj-1]
tbe1[[i]] = tbej
}
Emp = EmpDistrib(lambda0,n_sample,T,tau)
SS_H0 = ScanStatMC(NbSeq, T, tau, Emp, pp0)
SS_H1 = ScanStatMC(NbSeq, T, tau, Emp, pp1)
SS_expected = c(SS_H0$class, SS_H1$class)
#cat("Local Score:\n")
#print(summary(LS_H0))
#cat("Scan Statistics:\n")
#print(summary(SS_H0))
#cat("Confusion Matrix:\n")
#print(confusionMatrix(factor(LS_H0$class), factor(SS_H0$class)))
#cat("- Elisa version:\n")
Score = ScoreDistribElisa(lambda0, lambda1, T)
Emp = EmpDistrib(lambda0,n_sample,T,tau)
X_seq = Score$Score_X
P_X = Score$P_X
LS_H0 = LocalScoreMC(lambda0, lambda1, NbSeq, T, X_seq, P_X, tbe0)
options(warn = -1) # Disable warnings print
SS_H0 = ScanStatMC(NbSeq, T, tau, Emp, pp0)
#cat("Local Score:\n")
#print(summary(LS_H0))
#cat("Scan Statistics:\n")
#print(summary(SS_H0))
#cat("Confusion Matrix:\n")
print(confusionMatrix(factor(theoretical_results), factor(SS_expected)))
#Sensitivity = c(Sensitivity,confusionMatrix(factor(theoretical_results), factor(SS_expected))$byClass[1])
#Specificity = c(Specificity,confusionMatrix(factor(theoretical_results), factor(SS_expected))$byClass[2])
cat("---\n")
titleSens=TeX(paste(r'(Sensitivity for $\lambda_0=$)', lambda0))
plot(x=accepted_lambda,y=Sensitivity, type='l', main = titleSens)
titleSpec=TeX(paste(r'(Specificity for $\lambda_0=$)', lambda0))
plot(x=accepted_lambda,y=Specificity, type='l', main = titleSpec)
```