From 850684d213bed6cb560934d06d95f2f30d8d90a0 Mon Sep 17 00:00:00 2001 From: Paul-Corbalan <58653590+Paul-Corbalan@users.noreply.github.com> Date: Sat, 7 Jan 2023 07:30:24 +0100 Subject: [PATCH] Upload of code --- .gitignore | 67 ++ README.md | 23 + compare_data.py | 98 ++ compare_original_synthetic.ipynb | 130 +++ create_fake_data.ipynb | 133 +++ data_treatment.py | 100 ++ discriminator.py | 60 ++ environment.yml | Bin 0 -> 16094 bytes fake_data_analysis.ipynb | 127 +++ format_data.py | 18 + generator.py | 68 ++ original_data/diabetes.csv | 769 ++++++++++++++ train_generator_discriminator.py | 156 +++ utils.py | 24 + visualisation/Prog_R_IJB_2022.Rmd | 1643 +++++++++++++++++++++++++++++ 15 files changed, 3416 insertions(+) create mode 100644 .gitignore create mode 100644 README.md create mode 100644 compare_data.py create mode 100644 compare_original_synthetic.ipynb create mode 100644 create_fake_data.ipynb create mode 100644 data_treatment.py create mode 100644 discriminator.py create mode 100644 environment.yml create mode 100644 fake_data_analysis.ipynb create mode 100644 format_data.py create mode 100644 generator.py create mode 100644 original_data/diabetes.csv create mode 100644 train_generator_discriminator.py create mode 100644 utils.py create mode 100644 visualisation/Prog_R_IJB_2022.Rmd diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..741bbe7 --- /dev/null +++ b/.gitignore @@ -0,0 +1,67 @@ +# Prerequisites +*.d + +# Object files +*.o +*.ko +*.obj +*.elf + +# Linker output +*.ilk +*.map +*.exp + +# Precompiled Headers +*.gch +*.pch + +# Libraries +*.lib +*.a +*.la +*.lo + +# Shared objects (inc. Windows DLLs) +*.dll +*.so +*.so.* +*.dylib + +# Executables +*.exe +*.out +*.app +*.i*86 +*.x86_64 +*.hex + +# Debug files +*.dSYM/ +*.su +*.idb +*.pdb + +# Kernel Module Compile Results +*.mod* +*.cmd +.tmp_versions/ +modules.order +Module.symvers +Mkfile.old +dkms.conf + +__pycache__ +creditcard.csv +*.txt +*.dot +data/models/ +runs/ +fake_data/* +.ipynb_checkpoints/ +*.csv +images/ +*.pt + +*.RData +*.Rhistory diff --git a/README.md b/README.md new file mode 100644 index 0000000..914f9c1 --- /dev/null +++ b/README.md @@ -0,0 +1,23 @@ +# Use of "Generative Adversarial Networks" for the generation of virtual patients +## Abstract +The use of Generative Adversarial Networks (GANs) for the generation of virtual patients is a promising approach for healthcare applications. GANs are a type of deep learning model that can generate new, realistic data that is similar to existing data. In this report, we discuss the use of GANs for the generation of virtual patients. We review the current state of the art in GANs and their applications with the Pima Indians Diabetes Database. +We compare this work with what has been done previously with this dataset for the copulas from article *Agent-Based modeling in Medical Research. Example in Health Economics*[1]. +A practical case is discussed with the training of the Generative Adversarial Network, for several possible configurations and taking into account what is done in similar works (*Data augmentation using GANs*[2]). We then generated data with the GAN and copulas for comparison. +Through this comparison, we observe different generated data in terms of distribution for those generated with Generative Adversarial Networks, compared to the original data. The data generated by the copulas are much closer in terms of the spread. We also conclude that both methods are currently limited in generating atypical patient data, but still efficient in generating more conventional data. + +[1] Philippe Saint-Pierre, Romain Demeulemeester, Nadège Costa, and Nicolas Savy. Agent-based modeling in medical research. example in health economics. *arXiv preprint arXiv:2205.10131*, 2022. +[2] Fabio Henrique Kiyoiti dos Santos Tanaka and Claus Aranha. Data augmentation using gans. *arXiv preprint arXiv:1904.09135*, 2019. + +**Keywords:** GAN, copula, PIMA, synthetic data, virtual patients, ABM, healthcare, Artificial Intelligence (AI). + +## Dependencies +### Python libraries +To run Python scripts and Jupyter notebooks, please use the following command in terminal once [Anaconda](https://www.anaconda.com/) is install: +``` +conda env create --file environment.yml +``` +### R libraries +To run and compile the code [Prog_R_IJB_2022](./visualisation/Prog_R_IJB_2022.rmd) in PDF format, please use the following command in R terminal before running the file: +``` +install.packages(c("rvinecopulib", "e1071", "GGally", "caret", "MASS", "tidyverse", "corrr", "lsr", "cowplot", "EnvStats", "ggraph", "fitdistrplus", "truncdist", "truncnorm")) +``` diff --git a/compare_data.py b/compare_data.py new file mode 100644 index 0000000..6fefb42 --- /dev/null +++ b/compare_data.py @@ -0,0 +1,98 @@ +import pandas as pd +import seaborn as sns +import numpy as np +from scipy.stats import norm +import ipywidgets as widgets +import matplotlib.pyplot as plt +import glob +from data_treatment import DataAtts +from IPython.display import display +from sklearn.tree import DecisionTreeClassifier as DT +from sklearn.tree import export_graphviz # Decision tree from sklearn +import pydotplus # Decision tree plotting + +def compare_data (original_data, fake_data, size_of_fake, mode="save"): + dataAtts = DataAtts(original_data) + + data = pd.read_csv(original_data) + fake_data = pd.read_csv(fake_data).tail(size_of_fake) + print(dataAtts.message, "\n") + print(dataAtts.values_names[0], round(data[dataAtts.class_name].value_counts()[0]/len(data) * 100,2), '% of the dataset') + print(dataAtts.values_names[1], round(data[dataAtts.class_name].value_counts()[1]/len(data) * 100,2), '% of the dataset') + + classes = list(data) + + for name in classes: + if name=="Unnamed: 32": + continue + + plt.xlabel('Values') + plt.ylabel('Probability') + plt.title(name + " distribution") + real_dist = data[name].values + fake_dist = fake_data[name].values + plt.hist(real_dist, 50, density=True, alpha=0.5) + plt.hist(fake_dist, 50, density=True, alpha=0.5, facecolor='r') + if mode=="save": + plt.savefig('fake_data/'+ dataAtts.fname + "/"+name+'_distribution.png') + elif mode=="show": + plt.show() + plt.clf() + +def create_comparing_table(original_data_name, fake_data_name): + + dataAtts = DataAtts(original_data_name) + data = pd.read_csv(original_data_name) + fake_data = pd.read_csv(fake_data_name) + fake_data.loc[getattr(fake_data, dataAtts.class_name) >= 0.5, dataAtts.class_name] = 1 + fake_data.loc[getattr(fake_data, dataAtts.class_name) < 0.5, dataAtts.class_name] = 0 + + # Creates the training set + training_data = [["original", data.head(int(data.shape[0]*0.7))]] + fake_name = "fake" + str(fake_data_name).split("/")[2][0] + training_data.append([fake_name, fake_data.head(int(fake_data.shape[0]*0.7))]) + + test = data.tail(int(data.shape[0]*0.3)) + + print("| Database \t| Proportion \t| Test Error \t|") + print("| ---------\t| ---------: \t| :--------- \t|") + + for episode in training_data: + name = episode[0] + train = episode[1] + try: + positive=str(round(train[dataAtts.class_name].value_counts()[0]/len(train) * 100,2)) + except: + positive="0" + try: + negative=str(round(train[dataAtts.class_name].value_counts()[1]/len(train) * 100,2)) + except: + negative="0" + + + trainX = train.drop(dataAtts.class_name, 1) + testX = test.drop(dataAtts.class_name, 1) + y_train = train[dataAtts.class_name] + y_test = test[dataAtts.class_name] + #trainX = pd.get_dummies(trainX) + + clf1 = DT(max_depth = 3, min_samples_leaf = 1) + clf1 = clf1.fit(trainX,y_train) + export_graphviz(clf1, out_file="models/tree.dot", feature_names=trainX.columns, class_names=["0","1"], filled=True, rounded=True) + g = pydotplus.graph_from_dot_file(path="models/tree.dot") + + pred = clf1.predict_proba(testX) + if pred.shape[1] > 1: + pred = np.argmax(pred, axis=1) + else: + pred = pred.reshape((pred.shape[0])) + if negative=="0": + pred = pred-1 + + mse = round(((pred - y_test.values)**2).mean(axis=0), 4) + + string="| " + name + " \t| " + positive + "/" + negative + " \t| " + str(mse) + " \t|" + print(string) + + + \ No newline at end of file diff --git a/compare_original_synthetic.ipynb b/compare_original_synthetic.ipynb new file mode 100644 index 0000000..60d5741 --- /dev/null +++ b/compare_original_synthetic.ipynb @@ -0,0 +1,130 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Results Tables\n", + "Proportion: outcome=0/outcome=1" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "import pandas as pd\n", + "from data_treatment import DataAtts\n", + "from matplotlib import pyplot as plt\n", + "%matplotlib inline\n", + "\n", + "from sklearn.tree import DecisionTreeClassifier as DT\n", + "from sklearn.tree import export_graphviz # Decision tree from sklearn\n", + "\n", + "import pydotplus # Decision tree plotting\n", + "from IPython.display import Image\n", + "\n", + "import ipywidgets as widgets\n", + "import glob" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "file_name = 'original_data/diabetes.csv'\n", + "dataAtts = DataAtts(file_name) \n", + "data = pd.read_csv(file_name)\n", + "folder_name = file_name[14:-4]\n", + "\n", + "# Creates the training set\n", + "training_data = [[\"original\", data.head(int(data.shape[0]*0.7))]]\n", + "test = data.tail(int(data.shape[0]*0.3))\n", + "for file in glob.glob(\"fake_data\\\\\" + folder_name + \"\\\\*.csv\"):\n", + " name = \"fake\" + str(file).split(\"\\\\\")[2][0]\n", + " fake_data = pd.read_csv(file)\n", + " fake_data.loc[getattr(fake_data, dataAtts.class_name) >= 0.5, dataAtts.class_name] = 1\n", + " fake_data.loc[getattr(fake_data, dataAtts.class_name) < 0.5, dataAtts.class_name] = 0\n", + " fake_training=fake_data.head(int(fake_data.shape[0]*0.7))\n", + " training_data.append([name, fake_training])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "print(\"| Database \\t| Proportion \\t| Test Error \\t|\")\n", + "print(\"| ---------\\t| ---------: \\t| :--------- \\t|\")\n", + "\n", + "for episode in training_data:\n", + " name = episode[0]\n", + " train = episode[1]\n", + " try:\n", + " positive=str(round(train[dataAtts.class_name].value_counts()[0]/len(train) * 100,2))\n", + " except:\n", + " positive=\"0\"\n", + " try:\n", + " negative=str(round(train[dataAtts.class_name].value_counts()[1]/len(train) * 100,2))\n", + " except:\n", + " negative=\"0\"\n", + " \n", + " \n", + " trainX = train.drop(dataAtts.class_name, 1)\n", + " testX = test.drop(dataAtts.class_name, 1)\n", + " y_train = train[dataAtts.class_name]\n", + " y_test = test[dataAtts.class_name]\n", + " #trainX = pd.get_dummies(trainX)\n", + "\n", + " clf1 = DT(max_depth = 3, min_samples_leaf = 1)\n", + " clf1 = clf1.fit(trainX,y_train)\n", + " export_graphviz(clf1, out_file=\"models/tree.dot\", feature_names=trainX.columns, class_names=[\"0\",\"1\"], filled=True, rounded=True)\n", + " g = pydotplus.graph_from_dot_file(path=\"models/tree.dot\")\n", + "\n", + " pred = clf1.predict_proba(testX)\n", + " if pred.shape[1] > 1:\n", + " pred = np.argmax(pred, axis=1)\n", + " else:\n", + " pred = pred.reshape((pred.shape[0]))\n", + " if negative==\"0\":\n", + " pred = pred-1\n", + " \n", + " mse = round(((pred - y_test.values)**2).mean(axis=0), 4)\n", + " \n", + " string=\"| \" + name + \" \\t| \" + positive + \"/\" + negative + \" \\t| \" + str(mse) + \" \\t|\"\n", + " print(string)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "tabular_gan", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.8 | packaged by conda-forge | (main, Nov 24 2022, 14:07:00) [MSC v.1916 64 bit (AMD64)]" + }, + "vscode": { + "interpreter": { + "hash": "2f1136a7f15cd1225735fd9261403f7c342baa42a12d30e4630e4cfef11f2512" + } + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/create_fake_data.ipynb b/create_fake_data.ipynb new file mode 100644 index 0000000..18e1275 --- /dev/null +++ b/create_fake_data.ipynb @@ -0,0 +1,133 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import torch\n", + "import pandas as pd\n", + "from torch import nn, optim\n", + "from torch.autograd.variable import Variable\n", + "from torchvision import transforms, datasets\n", + "from data_treatment import DataSet, DataAtts\n", + "from discriminator import *\n", + "from generator import *\n", + "import ipywidgets as widgets\n", + "from IPython.display import display\n", + "import matplotlib.pyplot as plt\n", + "import glob\n", + "from format_data import *" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "folder_name = \"models/diabetes\"+\"/generator*.pt\"\n", + "model_widget = widgets.Dropdown(\n", + " options=glob.glob(folder_name),\n", + " description='Generator:',\n", + " disabled=False,\n", + ")\n", + "display(model_widget)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "original_db_name = \"models/diabetes\"[7:]\n", + "original_db_path = \"original_data/\" + original_db_name + \".csv\"\n", + "original_db = pd.read_csv(original_db_path)\n", + "original_db_size=original_db.shape[0]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "try:\n", + " checkpoint= torch.load(model_widget.value, map_location='cuda')\n", + "except:\n", + " checkpoint= torch.load(model_widget.value, map_location='cpu')\n", + "checkpoint['model_attributes']['out_features'] = len(original_db.columns)\n", + "generator = GeneratorNet(**checkpoint['model_attributes'])\n", + "generator.load_state_dict(checkpoint['model_state_dict'])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "size = original_db_size\n", + "new_data = generator.create_data(size)\n", + "df = pd.DataFrame(new_data, columns=original_db.columns)\n", + "#Changes the name to be easier to read\n", + "name = model_widget.value.split(\"/\")[-1][9:-4] + \"_size-\" + str(size)\n", + "df.to_csv( \"fake_data/\" + original_db_name + \"/\" + name + \".csv\", index=False)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#Do the same thing as the cells above but for all the files in the directory\n", + "import glob\n", + "for file in glob.glob(folder_name):\n", + " name = file.split(\"/\")[-1][9:-4]\n", + " print(name)\n", + " try:\n", + " checkpoint= torch.load(file, map_location='cuda')\n", + " except:\n", + " checkpoint= torch.load(file, map_location='cpu')\n", + " generator = GeneratorNet(**checkpoint['model_attributes'])\n", + " generator.load_state_dict(checkpoint['model_state_dict'])\n", + " size = original_db_size\n", + " new_data = generator.create_data(size)\n", + " new_data = format_output(new_data)\n", + " df = pd.DataFrame(new_data, columns=original_db.columns)\n", + " df = format_output_db(df)\n", + " name = name + \"_size-\" + str(size)\n", + " df.to_csv( \"fake_data/\" + original_db_name + \"/\" + name + \".csv\", index=False)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "tabular_gan", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.8 | packaged by conda-forge | (main, Nov 24 2022, 14:07:00) [MSC v.1916 64 bit (AMD64)]" + }, + "vscode": { + "interpreter": { + "hash": "2f1136a7f15cd1225735fd9261403f7c342baa42a12d30e4630e4cfef11f2512" + } + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/data_treatment.py b/data_treatment.py new file mode 100644 index 0000000..d0df8a8 --- /dev/null +++ b/data_treatment.py @@ -0,0 +1,100 @@ +import torch +import pandas as pd +import os +from sklearn import preprocessing +from sklearn.utils import shuffle +from torch import nn, optim +from torch.autograd.variable import Variable +from torchvision import transforms, datasets, utils +from torch.utils.data import Dataset, DataLoader + +class ToTensor(object): + """Convert ndarrays in sample to Tensors.""" + + def __call__(self, sample): + # print (sample.values[2]) + # print (torch.from_numpy(sample.values)[2].item()) + return torch.from_numpy(sample.values) + +class DataSet(Dataset): + """Face Landmarks dataset.""" + + def __init__(self, csv_file, root_dir, transform=transforms.Compose([ToTensor()]), training_porcentage=0.7, shuffle_db=False): + """ + Args: + csv_file (string): Path to the csv file with annotations. + root_dir (string): Directory with all the images. + transform (callable, optional): Optional transform to be applied + on a sample. + """ + # self.data = pd.read_csv(csv_file).head(100000) + self.file = pd.read_csv(csv_file) + if (shuffle): + self.file = shuffle(self.file) + self.data = self.file.head(int(self.file.shape[0]*training_porcentage)) + self.test_data = self.file.tail(int(self.file.shape[0]*(1-training_porcentage))) + self.root_dir = root_dir + self.transform = transform + + def __len__(self): + return len(self.data) + + def __getitem__(self, idx): + item = self.data.iloc[idx] + if self.transform: + item = self.transform(item) + return item + + def get_columns(self): + return self.data.columns + +class DataAtts(): + def __init__(self, file_name): + if file_name == "original_data/data.csv": + self.message = "Breast Cancer Wisconsin (Diagnostic) Data Set" + self.class_name = "diagnosis" + self.values_names = {0: "Benign", 1: "Malignant"} + self.class_len = 32 + self.fname="data" + elif file_name == "original_data/creditcard.csv": + self.message = "Credit Card Fraud Detection" + self.class_name = "Class" + self.values_names = {0: "No Frauds", 1: "Frauds"} + self.class_len = 31 + self.fname="creditcard" + elif file_name == "original_data/diabetes.csv": + self.message="Pima Indians Diabetes Database" + self.class_name = "Outcome" + self.values_names = {0: "Normal", 1: "Diabets"} + self.class_len = 9 + self.fname="diabetes" + + elif file_name == "original_data/data_escalonated.csv": + self.message = "Breast Cancer Wisconsin (Diagnostic) Data Set eSCALONATED" + self.class_name = "diagnosis" + self.values_names = {0: "Benign", 1: "Malignant"} + self.class_len = 32 + self.fname="data_escalonated" + elif file_name == "original_data/creditcard_escalonated.csv": + self.message = "Credit Card Fraud Detection eSCALONATED" + self.class_name = "Class" + self.values_names = {0: "No Frauds", 1: "Frauds"} + self.class_len = 31 + self.fname="creditcard_escalonated" + elif file_name == "original_data/diabetes_escalonated.csv": + self.message="Pima Indians Diabetes Database eSCALONATED" + self.class_name = "Outcome" + self.values_names = {0: "Normal", 1: "Diabets"} + self.class_len = 9 + self.fname="diabetes_escalonated" + elif file_name == "original_data/creditcard_1s_escalonated.csv": + self.message = "Credit Card Fraud Detection eSCALONATED" + self.class_name = "Class" + self.values_names = {0: "No Frauds", 1: "Frauds"} + self.class_len = 31 + self.fname="creditcard_1s_escalonated" + else: + print("File not found, exiting") + exit(1) + + diff --git a/discriminator.py b/discriminator.py new file mode 100644 index 0000000..276e0e3 --- /dev/null +++ b/discriminator.py @@ -0,0 +1,60 @@ +import torch +from torch import nn, optim +from torch.autograd.variable import Variable +from torchvision import transforms, datasets +from utils import real_data_target, fake_data_target + +class DiscriminatorNet(torch.nn.Module): + """ + A three hidden-layer discriminative neural network + """ + def __init__(self, in_features, leakyRelu=0.2, dropout=0.3, hidden_layers=[1024, 512, 256]): + super(DiscriminatorNet, self).__init__() + + out_features = 1 + self.layers = hidden_layers.copy() + self.layers.insert(0, in_features) + + for count in range(0, len(self.layers)-1): + self.add_module("hidden_" + str(count), + nn.Sequential( + nn.Linear(self.layers[count], self.layers[count+1]), + nn.LeakyReLU(leakyRelu), + nn.Dropout(dropout) + ) + ) + + self.add_module("out", + nn.Sequential( + nn.Linear(self.layers[-1], out_features), + torch.nn.Sigmoid() + ) + ) + + def forward(self, x): + for name, module in self.named_children(): + x = module(x) + return x + +# train_discriminator(d_optimizer, discriminator, loss, real_data, fake_data) +def train_discriminator(optimizer, discriminator, loss, real_data, fake_data): + # Reset gradients + optimizer.zero_grad() + + # 1.1 Train on Real Data + prediction_real = discriminator(real_data) + # Calculate error and backpropagate + error_real = loss(prediction_real, real_data_target(real_data.size(0))) + error_real.backward() + + # 1.2 Train on Fake Data + prediction_fake = discriminator(fake_data) + # Calculate error and backpropagate + error_fake = loss(prediction_fake, fake_data_target(real_data.size(0))) + error_fake.backward() + + # 1.3 Update weights with gradients + optimizer.step() + + # Return error + return error_real + error_fake, prediction_real, prediction_fake \ No newline at end of file diff --git a/environment.yml b/environment.yml new file mode 100644 index 0000000000000000000000000000000000000000..cf5e94c0947f07653470901064fbaf2b8fb78852 GIT binary patch literal 16094 zcmb`O=}sd{5{2vcmF6Ap3%G@CXka9s1z}?hG#DDQG(7y=IbR;4t1_#orDufL?8?Yk zPOO>!=RdpZqPnj()xWF1t7rYas$Qx){eG&>tF3L-mJ8QkVt1h*6R$ZkhwxZB{VpqLZH+tem&op1Z)N@yQ4Q`&bBk%Fr zqrRcY&$U)YXFKvXG<)+1^OMO`I>p z_gZ{hiQbu>oQoDTgWk0`S&E01-dX9@ag=6y_9jWe&#fc~+KUG5;B}#=SBVGqXz#F> ze$f7pq!ZJELnOD5~t5K+cdc_~_T!N*Y~@hqZp+>FX?U(3EG0i+4YHTK#195X9%$?}fbJ1?fk#mBd#{Q?Gut?Y(A6e>HVk8rrN<5d^ zE%`xKe03xJ%!D}N?!8_EGapHBaOntu=fK?%hKbX6i4xi1LaWd%J{K`$oVV5S9v*C^ z@VhBmsP)?V8(L}0LUD73VZ`=V@s?!e@?gfP&b=a#i1n)4ytttChEc`{PpiU(N- z`;Y-=pE^J*NPz_%=f>?0tw&*oxb>8A(a*K{$2+HRUay*o_l>gtwAByN{H<2LB(9rl ziKpQFYdw=gh$T~3{i=S+&S)r?b!bOCkw}i8x`K>v7Mjwq=tz7?po+vw{Eh<3jzrFOe7w@5~F&w|@ z`8#nt6;bT@DC<1PzWIK~`u{5=#j2N-=VRSi8S$9un(?YG3CHO5SqPrWCz^>hwn3jx zp5&1`Np>Sk)Hp#WeJI7&JFQ`zbJk3+5RLMk`|uhu??<8&>xOoYRpTQi{mEWZl3}Mr z+ILwyVrx_?b(R{+$RL_tY%8shmGI%NYT+_fQ)DIThB7BX1y3dxA^DRinMz%EjNycO zoQmU8c*5iGk#E|0JQZqVdJJ;zgEY#2*uZs0UL*I{avSl0RPGd913(3=E=6%ny#oy@ z3*tIe3YCtl?C+wyP@McOTI-fR>0#l#i6YigWsJA&mGrRD*K^VaUgJv1KV8ids}Jg7#98&0Y(W=nTezAoYtTF*ZiN8$ zH{aj2i*ax*EwJxgnzi=w+O@8;@cd2M6I|0L%~K8si*yGyp7W}$rhr#|d~$G%V4C;tt?S@4rX~XaNkyTml_lNVjY4 zyGg5a>(NiWXbt>0N<;5dE7*-XzuXU;F(>jog<5NTEpJa}YBWojBMwk^f*m?3v<3IJ zUg`Nu@pL3@G^MAjd{;+6)#`cFnVvxB_#il*!X3{(OXK+US<(o!;O0-MFgHU=$0yh1 zcqiT+blWjE$GK(1H!l-4W5oEVd$J)ax8j4!Dm)u|mpx6X@tluat=_5b22Zv=_L`>S zTj`bwiz9~D=bAW4d=fR<9(F4`Y@~y;wu zI~Ut%JeBnqIyy=^x>Uq=mO9p7=h~%DNBACHgJ(XDrp60W)@KE1p9+ZV$6ON1c^zUs zkp*iDdS>IFShL-Ev{#i~*|^o5CU~9z-i;b!I@8fEl)sO&u94xb@$wr}hsly&*?;ciU0=oblcXZAn{RYxxyP)3Q_Np| zV#yUZ-O5nS=)GfY*$v`Z$is zJ7G=GkjJsFsel6FcC5?moxXLRV7sm!OTm4qdr9+|qr16ZV21vPNy@e4WD3B!%vSg% z!?Q1ujPFCqywp#8nW@Z`zK;^J%RR_IKT_Ypl=MneV?G2Ta;uis$x%V?^DOQ-*^AoK z(b^ixX_cMeFs-JiIJOxz&0eU#^6uhrChhGKil_&pcXdV9q&&p0dvItmqcN`0Kc0|F zexz4cvJ0Nko|oOHX!Iai&o_DZHn_xFTWJhBI6MM%03Jlmx@DIqRCl%$WAhTR#tymD?Fijp*b#vv)qrbLV}s(~snhkwaWN+FIqFJ6YYG#a=t3Q6mn0 zk}aG1>_=Qss^;ihZY`_3aO5BzAz~ z9LLQ5I1S^B;YshpMOVbQ)H`5ft+>;k44Vo%wU!qt3zqhGg?}o$P${(0?ep??SuUS( zcT>Zwm;-aap&bc1Ga#zsDZaPh8fC8khpVjU7r4nmRHiD98+FD`d!Cu=6$*|C;o0`C z9C`gQCTI{(;%pxuYf_{_LGR=E;ih2t0hn{O%FLgWB&O2sTTXDno~&dI=va>4b*;6; zePhb?VSk?GWR=?hOuFz8c8}cZ&h{&boyGF=2j3c@s(2F@)QTWHv=%poI0+&r;xpup zJWfO7QLBARXu6-Wo|@OSB|2>O8tq)%JVA)lpY3noVtvVf87E5q@6u}G0G(ux!E$Yh zV{RT0A*dTSiWYn6C!a0D5uF1+|Ae+r_u`x~&dBSi719o4oU?DbK|CIR!``tvY+dTR zl=mg!?Od|H%3^VT+_d^IADpK@NK-zwML*<>G9C_b0+sMS&&SP0Xn71DR5`Azv7F;O z{6y|5&(DY@`OKU>J%5NbWA-iMuYIS9-7!=``iQ}SiLxpl!ti_bQ(7=4eD+4;V)n~CKwPBnPor#%|8HkgClED42j_R{T zM+&UvDa@(x`&~JRbMUVCSf5KWb@EiP_C)kV{GWoK;Dx*R)N#+6h9PBnT4o#IT|s@i zyyx11IZm7!qJQ!r-e+ogL~ee9z}b|_s2OR{J~!-{hocYs4_$;`*-4HQ{a41GCT02x zuVxQwSD&{t{W}&_X4CCkA7I+IXXrs@irmnm06-lCHE# ziR65q0Pm(iMxd6&y3J@6w&jMyg|xtJS?~=lX7H)2ltD!@pfcQpLCmbZ(*!rH|)E11EIU#D^_9rV?3^l~ihI_FA$t z#i1(qjgNdab<4Q)8*qO_oXiUnH~6`+Kc&AL<}Z5WS(hp za4(+&!$e(Yl)h>|k6i5L&k&r;(&W+jd_z;ZwsvwV?9AsZh+M>n=-iy8rb*MB7NZYt zFs#KF-tQ`*#*9x~sA0%V?N69dne%zKwY19Q%;-h3sXQ@-!4tt;#KJP(lsch{sbf|% zs|@SYa+4iv@15o;&dIesy1Zpbf}gS_(;_-8;?;D$5jQ|4anbj*oR4Zs!@A&Btn2Za zsQeNg&smtUQJv*C{F;%w^v2RcHD$W5|0m^-hBw7rjPt^4+4E5E&9z_G?Nxd~J|V~r znx&qn3ggCYpl-Z$eAa;VSk1YcEKUUDz9#o@^4Rp6JcRi%RYCjay6I5oFO;=BXGWeQ zNy7aMPGH-)=uKSsHWPkb=h&Et4q3mhXy|V_wWYSf=Je5|U_aOIuXLB!)o9(BaH?8=iY@2TpPe8I zQVqXxv_ok+S9)JQ{(Z;@w%(x dlfPh}yc!w7{a^F|?KS>nhGS&C3N2Vm{{=P>p%VZA literal 0 HcmV?d00001 diff --git a/fake_data_analysis.ipynb b/fake_data_analysis.ipynb new file mode 100644 index 0000000..99b162e --- /dev/null +++ b/fake_data_analysis.ipynb @@ -0,0 +1,127 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Fake Data Analysis" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import numpy as np\n", + "from scipy.stats import norm\n", + "from data_treatment import DataAtts\n", + "import ipywidgets as widgets\n", + "import matplotlib.pyplot as plt\n", + "import glob\n", + "from compare_data import *\n", + "from IPython.display import display" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "folder_name = 'original_data/diabetes.csv'[14:-4]\n", + "fake_files_dropdown = widgets.Dropdown(\n", + " options=glob.glob(\"fake_data/\" + folder_name + \"/*.csv\"),\n", + " description='Fake file:',\n", + " disabled=False,\n", + ")\n", + "display(fake_files_dropdown)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "file_name='original_data/diabetes.csv'\n", + "dataAtts = DataAtts(file_name)\n", + " \n", + " \n", + "data = pd.read_csv(file_name)\n", + "fake_data = pd.read_csv(fake_files_dropdown.value)\n", + "fake_data.loc[getattr(fake_data, dataAtts.class_name) >= 0.5, dataAtts.class_name] = 1\n", + "fake_data.loc[getattr(fake_data, dataAtts.class_name) < 0.5, dataAtts.class_name] = 0\n", + "\n", + "print(dataAtts.message)\n", + "print(dataAtts.values_names[0], round(data[dataAtts.class_name].value_counts()[0]/len(data) * 100,2), '% of the dataset')\n", + "print(dataAtts.values_names[1], round(data[dataAtts.class_name].value_counts()[1]/len(data) * 100,2), '% of the dataset')\n", + "\n", + "print(\"\\nFake Data\")\n", + "try:\n", + " positive=str(round(fake_data[dataAtts.class_name].value_counts()[0]/len(fake_data) * 100,2))\n", + "except:\n", + " positive=\"0\"\n", + "try:\n", + " negative=str(round(fake_data[dataAtts.class_name].value_counts()[1]/len(fake_data) * 100,2))\n", + "except:\n", + " negative=\"0\"\n", + " \n", + "\n", + "print(\"Outcome = 0: \", positive, '% of the dataset')\n", + "print(\"Outcome = 1: \", negative, '% of the dataset')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "classes = list(data)\n", + "\n", + "for name in classes:\n", + " if name==\"Unnamed: 32\":\n", + " continue\n", + " \n", + " plt.xlabel('Values')\n", + " plt.ylabel('Probability')\n", + " plt.title(name + \" distribution\")\n", + " real_dist = data[name].values\n", + " fake_dist = fake_data[name].values\n", + " plt.hist(real_dist, 50, density=True, alpha=0.5)\n", + " plt.hist(fake_dist, 50, density=True, alpha=0.5, facecolor='r')\n", + " #plt.savefig('fake_data/'+ dataAtts.fname + \"/\"+name+'_distribution.png')\n", + " plt.show()\n", + " plt.clf()\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "tabular_gan", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.8 | packaged by conda-forge | (main, Nov 24 2022, 14:07:00) [MSC v.1916 64 bit (AMD64)]" + }, + "vscode": { + "interpreter": { + "hash": "2f1136a7f15cd1225735fd9261403f7c342baa42a12d30e4630e4cfef11f2512" + } + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/format_data.py b/format_data.py new file mode 100644 index 0000000..078ae9e --- /dev/null +++ b/format_data.py @@ -0,0 +1,18 @@ +import pandas as pd +import numpy as np + + +def format_output(x): + x[:, 0] = np.rint(np.maximum(0, x[:, 0])) + x[:, 1] = np.rint(np.maximum(0, x[:, 1])) + x[:, 2] = np.rint(np.maximum(0, x[:, 2])) + x[:, 3] = np.rint(np.maximum(0, x[:, 3])) + x[:, 4] = np.rint(np.maximum(0, x[:, 4])) + x[:, 5] = np.maximum(0, x[:, 5]) + x[:, 6] = np.maximum(0, x[:, 6]) + x[:, 7] = np.rint(np.maximum(0, x[:, 7])) + x[:, 8] = (x[:, 8]>=0.5) + return x + +def format_output_db(db): + return db.astype({"Pregnancies": int, "Glucose": int, "BloodPressure": int, "SkinThickness": int, "Insulin": int, "BMI": float, "DiabetesPedigreeFunction": float, "Age": int, "Outcome": int}) diff --git a/generator.py b/generator.py new file mode 100644 index 0000000..538bdef --- /dev/null +++ b/generator.py @@ -0,0 +1,68 @@ +import torch +from torch import nn, optim +from torch.autograd.variable import Variable +from torchvision import transforms, datasets +from utils import real_data_target + +def noise(quantity, size): + return Variable(torch.randn(quantity, size)) + +class GeneratorNet(torch.nn.Module): + """ + A three hidden-layer generative neural network + """ + def __init__(self, out_features, leakyRelu=0.2, hidden_layers=[256, 512, 1024], in_features=100, escalonate=False): + super(GeneratorNet, self).__init__() + + self.in_features = in_features + self.layers = hidden_layers.copy() + self.layers.insert(0, self.in_features) + + for count in range(0, len(self.layers)-1): + self.add_module("hidden_" + str(count), + nn.Sequential( + nn.Linear(self.layers[count], self.layers[count+1]), + nn.LeakyReLU(leakyRelu) + ) + ) + + if not escalonate: + self.add_module("out", + nn.Sequential( + nn.Linear(self.layers[-1], out_features) + ) + ) + else: + self.add_module("out", + nn.Sequential( + nn.Linear(self.layers[-1], out_features), + escalonate + ) + ) + + def forward(self, x): + for name, module in self.named_children(): + x = module(x) + return x + + def create_data(self, quantity): + points = noise(quantity, self.in_features) + try: + data=self.forward(points.cuda()) + except RuntimeError: + data=self.forward(points.cpu()) + return data.detach().numpy() + +def train_generator(optimizer, discriminator, loss, fake_data): + # 2. Train Generator + # Reset gradients + optimizer.zero_grad() + # Sample noise and generate fake data + prediction = discriminator(fake_data) + # Calculate error and backpropagate + error = loss(prediction, real_data_target(prediction.size(0))) + error.backward() + # Update weights with gradients + optimizer.step() + # Return error + return error \ No newline at end of file diff --git a/original_data/diabetes.csv b/original_data/diabetes.csv new file mode 100644 index 0000000..9e6a362 --- /dev/null +++ b/original_data/diabetes.csv @@ -0,0 +1,769 @@ +Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome +6,148,72,35,0,33.6,0.627,50,1 +1,85,66,29,0,26.6,0.351,31,0 +8,183,64,0,0,23.3,0.672,32,1 +1,89,66,23,94,28.1,0.167,21,0 +0,137,40,35,168,43.1,2.288,33,1 +5,116,74,0,0,25.6,0.201,30,0 +3,78,50,32,88,31,0.248,26,1 +10,115,0,0,0,35.3,0.134,29,0 +2,197,70,45,543,30.5,0.158,53,1 +8,125,96,0,0,0,0.232,54,1 +4,110,92,0,0,37.6,0.191,30,0 +10,168,74,0,0,38,0.537,34,1 +10,139,80,0,0,27.1,1.441,57,0 +1,189,60,23,846,30.1,0.398,59,1 +5,166,72,19,175,25.8,0.587,51,1 +7,100,0,0,0,30,0.484,32,1 +0,118,84,47,230,45.8,0.551,31,1 +7,107,74,0,0,29.6,0.254,31,1 +1,103,30,38,83,43.3,0.183,33,0 +1,115,70,30,96,34.6,0.529,32,1 +3,126,88,41,235,39.3,0.704,27,0 +8,99,84,0,0,35.4,0.388,50,0 +7,196,90,0,0,39.8,0.451,41,1 +9,119,80,35,0,29,0.263,29,1 +11,143,94,33,146,36.6,0.254,51,1 +10,125,70,26,115,31.1,0.205,41,1 +7,147,76,0,0,39.4,0.257,43,1 +1,97,66,15,140,23.2,0.487,22,0 +13,145,82,19,110,22.2,0.245,57,0 +5,117,92,0,0,34.1,0.337,38,0 +5,109,75,26,0,36,0.546,60,0 +3,158,76,36,245,31.6,0.851,28,1 +3,88,58,11,54,24.8,0.267,22,0 +6,92,92,0,0,19.9,0.188,28,0 +10,122,78,31,0,27.6,0.512,45,0 +4,103,60,33,192,24,0.966,33,0 +11,138,76,0,0,33.2,0.42,35,0 +9,102,76,37,0,32.9,0.665,46,1 +2,90,68,42,0,38.2,0.503,27,1 +4,111,72,47,207,37.1,1.39,56,1 +3,180,64,25,70,34,0.271,26,0 +7,133,84,0,0,40.2,0.696,37,0 +7,106,92,18,0,22.7,0.235,48,0 +9,171,110,24,240,45.4,0.721,54,1 +7,159,64,0,0,27.4,0.294,40,0 +0,180,66,39,0,42,1.893,25,1 +1,146,56,0,0,29.7,0.564,29,0 +2,71,70,27,0,28,0.586,22,0 +7,103,66,32,0,39.1,0.344,31,1 +7,105,0,0,0,0,0.305,24,0 +1,103,80,11,82,19.4,0.491,22,0 +1,101,50,15,36,24.2,0.526,26,0 +5,88,66,21,23,24.4,0.342,30,0 +8,176,90,34,300,33.7,0.467,58,1 +7,150,66,42,342,34.7,0.718,42,0 +1,73,50,10,0,23,0.248,21,0 +7,187,68,39,304,37.7,0.254,41,1 +0,100,88,60,110,46.8,0.962,31,0 +0,146,82,0,0,40.5,1.781,44,0 +0,105,64,41,142,41.5,0.173,22,0 +2,84,0,0,0,0,0.304,21,0 +8,133,72,0,0,32.9,0.27,39,1 +5,44,62,0,0,25,0.587,36,0 +2,141,58,34,128,25.4,0.699,24,0 +7,114,66,0,0,32.8,0.258,42,1 +5,99,74,27,0,29,0.203,32,0 +0,109,88,30,0,32.5,0.855,38,1 +2,109,92,0,0,42.7,0.845,54,0 +1,95,66,13,38,19.6,0.334,25,0 +4,146,85,27,100,28.9,0.189,27,0 +2,100,66,20,90,32.9,0.867,28,1 +5,139,64,35,140,28.6,0.411,26,0 +13,126,90,0,0,43.4,0.583,42,1 +4,129,86,20,270,35.1,0.231,23,0 +1,79,75,30,0,32,0.396,22,0 +1,0,48,20,0,24.7,0.14,22,0 +7,62,78,0,0,32.6,0.391,41,0 +5,95,72,33,0,37.7,0.37,27,0 +0,131,0,0,0,43.2,0.27,26,1 +2,112,66,22,0,25,0.307,24,0 +3,113,44,13,0,22.4,0.14,22,0 +2,74,0,0,0,0,0.102,22,0 +7,83,78,26,71,29.3,0.767,36,0 +0,101,65,28,0,24.6,0.237,22,0 +5,137,108,0,0,48.8,0.227,37,1 +2,110,74,29,125,32.4,0.698,27,0 +13,106,72,54,0,36.6,0.178,45,0 +2,100,68,25,71,38.5,0.324,26,0 +15,136,70,32,110,37.1,0.153,43,1 +1,107,68,19,0,26.5,0.165,24,0 +1,80,55,0,0,19.1,0.258,21,0 +4,123,80,15,176,32,0.443,34,0 +7,81,78,40,48,46.7,0.261,42,0 +4,134,72,0,0,23.8,0.277,60,1 +2,142,82,18,64,24.7,0.761,21,0 +6,144,72,27,228,33.9,0.255,40,0 +2,92,62,28,0,31.6,0.13,24,0 +1,71,48,18,76,20.4,0.323,22,0 +6,93,50,30,64,28.7,0.356,23,0 +1,122,90,51,220,49.7,0.325,31,1 +1,163,72,0,0,39,1.222,33,1 +1,151,60,0,0,26.1,0.179,22,0 +0,125,96,0,0,22.5,0.262,21,0 +1,81,72,18,40,26.6,0.283,24,0 +2,85,65,0,0,39.6,0.93,27,0 +1,126,56,29,152,28.7,0.801,21,0 +1,96,122,0,0,22.4,0.207,27,0 +4,144,58,28,140,29.5,0.287,37,0 +3,83,58,31,18,34.3,0.336,25,0 +0,95,85,25,36,37.4,0.247,24,1 +3,171,72,33,135,33.3,0.199,24,1 +8,155,62,26,495,34,0.543,46,1 +1,89,76,34,37,31.2,0.192,23,0 +4,76,62,0,0,34,0.391,25,0 +7,160,54,32,175,30.5,0.588,39,1 +4,146,92,0,0,31.2,0.539,61,1 +5,124,74,0,0,34,0.22,38,1 +5,78,48,0,0,33.7,0.654,25,0 +4,97,60,23,0,28.2,0.443,22,0 +4,99,76,15,51,23.2,0.223,21,0 +0,162,76,56,100,53.2,0.759,25,1 +6,111,64,39,0,34.2,0.26,24,0 +2,107,74,30,100,33.6,0.404,23,0 +5,132,80,0,0,26.8,0.186,69,0 +0,113,76,0,0,33.3,0.278,23,1 +1,88,30,42,99,55,0.496,26,1 +3,120,70,30,135,42.9,0.452,30,0 +1,118,58,36,94,33.3,0.261,23,0 +1,117,88,24,145,34.5,0.403,40,1 +0,105,84,0,0,27.9,0.741,62,1 +4,173,70,14,168,29.7,0.361,33,1 +9,122,56,0,0,33.3,1.114,33,1 +3,170,64,37,225,34.5,0.356,30,1 +8,84,74,31,0,38.3,0.457,39,0 +2,96,68,13,49,21.1,0.647,26,0 +2,125,60,20,140,33.8,0.088,31,0 +0,100,70,26,50,30.8,0.597,21,0 +0,93,60,25,92,28.7,0.532,22,0 +0,129,80,0,0,31.2,0.703,29,0 +5,105,72,29,325,36.9,0.159,28,0 +3,128,78,0,0,21.1,0.268,55,0 +5,106,82,30,0,39.5,0.286,38,0 +2,108,52,26,63,32.5,0.318,22,0 +10,108,66,0,0,32.4,0.272,42,1 +4,154,62,31,284,32.8,0.237,23,0 +0,102,75,23,0,0,0.572,21,0 +9,57,80,37,0,32.8,0.096,41,0 +2,106,64,35,119,30.5,1.4,34,0 +5,147,78,0,0,33.7,0.218,65,0 +2,90,70,17,0,27.3,0.085,22,0 +1,136,74,50,204,37.4,0.399,24,0 +4,114,65,0,0,21.9,0.432,37,0 +9,156,86,28,155,34.3,1.189,42,1 +1,153,82,42,485,40.6,0.687,23,0 +8,188,78,0,0,47.9,0.137,43,1 +7,152,88,44,0,50,0.337,36,1 +2,99,52,15,94,24.6,0.637,21,0 +1,109,56,21,135,25.2,0.833,23,0 +2,88,74,19,53,29,0.229,22,0 +17,163,72,41,114,40.9,0.817,47,1 +4,151,90,38,0,29.7,0.294,36,0 +7,102,74,40,105,37.2,0.204,45,0 +0,114,80,34,285,44.2,0.167,27,0 +2,100,64,23,0,29.7,0.368,21,0 +0,131,88,0,0,31.6,0.743,32,1 +6,104,74,18,156,29.9,0.722,41,1 +3,148,66,25,0,32.5,0.256,22,0 +4,120,68,0,0,29.6,0.709,34,0 +4,110,66,0,0,31.9,0.471,29,0 +3,111,90,12,78,28.4,0.495,29,0 +6,102,82,0,0,30.8,0.18,36,1 +6,134,70,23,130,35.4,0.542,29,1 +2,87,0,23,0,28.9,0.773,25,0 +1,79,60,42,48,43.5,0.678,23,0 +2,75,64,24,55,29.7,0.37,33,0 +8,179,72,42,130,32.7,0.719,36,1 +6,85,78,0,0,31.2,0.382,42,0 +0,129,110,46,130,67.1,0.319,26,1 +5,143,78,0,0,45,0.19,47,0 +5,130,82,0,0,39.1,0.956,37,1 +6,87,80,0,0,23.2,0.084,32,0 +0,119,64,18,92,34.9,0.725,23,0 +1,0,74,20,23,27.7,0.299,21,0 +5,73,60,0,0,26.8,0.268,27,0 +4,141,74,0,0,27.6,0.244,40,0 +7,194,68,28,0,35.9,0.745,41,1 +8,181,68,36,495,30.1,0.615,60,1 +1,128,98,41,58,32,1.321,33,1 +8,109,76,39,114,27.9,0.64,31,1 +5,139,80,35,160,31.6,0.361,25,1 +3,111,62,0,0,22.6,0.142,21,0 +9,123,70,44,94,33.1,0.374,40,0 +7,159,66,0,0,30.4,0.383,36,1 +11,135,0,0,0,52.3,0.578,40,1 +8,85,55,20,0,24.4,0.136,42,0 +5,158,84,41,210,39.4,0.395,29,1 +1,105,58,0,0,24.3,0.187,21,0 +3,107,62,13,48,22.9,0.678,23,1 +4,109,64,44,99,34.8,0.905,26,1 +4,148,60,27,318,30.9,0.15,29,1 +0,113,80,16,0,31,0.874,21,0 +1,138,82,0,0,40.1,0.236,28,0 +0,108,68,20,0,27.3,0.787,32,0 +2,99,70,16,44,20.4,0.235,27,0 +6,103,72,32,190,37.7,0.324,55,0 +5,111,72,28,0,23.9,0.407,27,0 +8,196,76,29,280,37.5,0.605,57,1 +5,162,104,0,0,37.7,0.151,52,1 +1,96,64,27,87,33.2,0.289,21,0 +7,184,84,33,0,35.5,0.355,41,1 +2,81,60,22,0,27.7,0.29,25,0 +0,147,85,54,0,42.8,0.375,24,0 +7,179,95,31,0,34.2,0.164,60,0 +0,140,65,26,130,42.6,0.431,24,1 +9,112,82,32,175,34.2,0.26,36,1 +12,151,70,40,271,41.8,0.742,38,1 +5,109,62,41,129,35.8,0.514,25,1 +6,125,68,30,120,30,0.464,32,0 +5,85,74,22,0,29,1.224,32,1 +5,112,66,0,0,37.8,0.261,41,1 +0,177,60,29,478,34.6,1.072,21,1 +2,158,90,0,0,31.6,0.805,66,1 +7,119,0,0,0,25.2,0.209,37,0 +7,142,60,33,190,28.8,0.687,61,0 +1,100,66,15,56,23.6,0.666,26,0 +1,87,78,27,32,34.6,0.101,22,0 +0,101,76,0,0,35.7,0.198,26,0 +3,162,52,38,0,37.2,0.652,24,1 +4,197,70,39,744,36.7,2.329,31,0 +0,117,80,31,53,45.2,0.089,24,0 +4,142,86,0,0,44,0.645,22,1 +6,134,80,37,370,46.2,0.238,46,1 +1,79,80,25,37,25.4,0.583,22,0 +4,122,68,0,0,35,0.394,29,0 +3,74,68,28,45,29.7,0.293,23,0 +4,171,72,0,0,43.6,0.479,26,1 +7,181,84,21,192,35.9,0.586,51,1 +0,179,90,27,0,44.1,0.686,23,1 +9,164,84,21,0,30.8,0.831,32,1 +0,104,76,0,0,18.4,0.582,27,0 +1,91,64,24,0,29.2,0.192,21,0 +4,91,70,32,88,33.1,0.446,22,0 +3,139,54,0,0,25.6,0.402,22,1 +6,119,50,22,176,27.1,1.318,33,1 +2,146,76,35,194,38.2,0.329,29,0 +9,184,85,15,0,30,1.213,49,1 +10,122,68,0,0,31.2,0.258,41,0 +0,165,90,33,680,52.3,0.427,23,0 +9,124,70,33,402,35.4,0.282,34,0 +1,111,86,19,0,30.1,0.143,23,0 +9,106,52,0,0,31.2,0.38,42,0 +2,129,84,0,0,28,0.284,27,0 +2,90,80,14,55,24.4,0.249,24,0 +0,86,68,32,0,35.8,0.238,25,0 +12,92,62,7,258,27.6,0.926,44,1 +1,113,64,35,0,33.6,0.543,21,1 +3,111,56,39,0,30.1,0.557,30,0 +2,114,68,22,0,28.7,0.092,25,0 +1,193,50,16,375,25.9,0.655,24,0 +11,155,76,28,150,33.3,1.353,51,1 +3,191,68,15,130,30.9,0.299,34,0 +3,141,0,0,0,30,0.761,27,1 +4,95,70,32,0,32.1,0.612,24,0 +3,142,80,15,0,32.4,0.2,63,0 +4,123,62,0,0,32,0.226,35,1 +5,96,74,18,67,33.6,0.997,43,0 +0,138,0,0,0,36.3,0.933,25,1 +2,128,64,42,0,40,1.101,24,0 +0,102,52,0,0,25.1,0.078,21,0 +2,146,0,0,0,27.5,0.24,28,1 +10,101,86,37,0,45.6,1.136,38,1 +2,108,62,32,56,25.2,0.128,21,0 +3,122,78,0,0,23,0.254,40,0 +1,71,78,50,45,33.2,0.422,21,0 +13,106,70,0,0,34.2,0.251,52,0 +2,100,70,52,57,40.5,0.677,25,0 +7,106,60,24,0,26.5,0.296,29,1 +0,104,64,23,116,27.8,0.454,23,0 +5,114,74,0,0,24.9,0.744,57,0 +2,108,62,10,278,25.3,0.881,22,0 +0,146,70,0,0,37.9,0.334,28,1 +10,129,76,28,122,35.9,0.28,39,0 +7,133,88,15,155,32.4,0.262,37,0 +7,161,86,0,0,30.4,0.165,47,1 +2,108,80,0,0,27,0.259,52,1 +7,136,74,26,135,26,0.647,51,0 +5,155,84,44,545,38.7,0.619,34,0 +1,119,86,39,220,45.6,0.808,29,1 +4,96,56,17,49,20.8,0.34,26,0 +5,108,72,43,75,36.1,0.263,33,0 +0,78,88,29,40,36.9,0.434,21,0 +0,107,62,30,74,36.6,0.757,25,1 +2,128,78,37,182,43.3,1.224,31,1 +1,128,48,45,194,40.5,0.613,24,1 +0,161,50,0,0,21.9,0.254,65,0 +6,151,62,31,120,35.5,0.692,28,0 +2,146,70,38,360,28,0.337,29,1 +0,126,84,29,215,30.7,0.52,24,0 +14,100,78,25,184,36.6,0.412,46,1 +8,112,72,0,0,23.6,0.84,58,0 +0,167,0,0,0,32.3,0.839,30,1 +2,144,58,33,135,31.6,0.422,25,1 +5,77,82,41,42,35.8,0.156,35,0 +5,115,98,0,0,52.9,0.209,28,1 +3,150,76,0,0,21,0.207,37,0 +2,120,76,37,105,39.7,0.215,29,0 +10,161,68,23,132,25.5,0.326,47,1 +0,137,68,14,148,24.8,0.143,21,0 +0,128,68,19,180,30.5,1.391,25,1 +2,124,68,28,205,32.9,0.875,30,1 +6,80,66,30,0,26.2,0.313,41,0 +0,106,70,37,148,39.4,0.605,22,0 +2,155,74,17,96,26.6,0.433,27,1 +3,113,50,10,85,29.5,0.626,25,0 +7,109,80,31,0,35.9,1.127,43,1 +2,112,68,22,94,34.1,0.315,26,0 +3,99,80,11,64,19.3,0.284,30,0 +3,182,74,0,0,30.5,0.345,29,1 +3,115,66,39,140,38.1,0.15,28,0 +6,194,78,0,0,23.5,0.129,59,1 +4,129,60,12,231,27.5,0.527,31,0 +3,112,74,30,0,31.6,0.197,25,1 +0,124,70,20,0,27.4,0.254,36,1 +13,152,90,33,29,26.8,0.731,43,1 +2,112,75,32,0,35.7,0.148,21,0 +1,157,72,21,168,25.6,0.123,24,0 +1,122,64,32,156,35.1,0.692,30,1 +10,179,70,0,0,35.1,0.2,37,0 +2,102,86,36,120,45.5,0.127,23,1 +6,105,70,32,68,30.8,0.122,37,0 +8,118,72,19,0,23.1,1.476,46,0 +2,87,58,16,52,32.7,0.166,25,0 +1,180,0,0,0,43.3,0.282,41,1 +12,106,80,0,0,23.6,0.137,44,0 +1,95,60,18,58,23.9,0.26,22,0 +0,165,76,43,255,47.9,0.259,26,0 +0,117,0,0,0,33.8,0.932,44,0 +5,115,76,0,0,31.2,0.343,44,1 +9,152,78,34,171,34.2,0.893,33,1 +7,178,84,0,0,39.9,0.331,41,1 +1,130,70,13,105,25.9,0.472,22,0 +1,95,74,21,73,25.9,0.673,36,0 +1,0,68,35,0,32,0.389,22,0 +5,122,86,0,0,34.7,0.29,33,0 +8,95,72,0,0,36.8,0.485,57,0 +8,126,88,36,108,38.5,0.349,49,0 +1,139,46,19,83,28.7,0.654,22,0 +3,116,0,0,0,23.5,0.187,23,0 +3,99,62,19,74,21.8,0.279,26,0 +5,0,80,32,0,41,0.346,37,1 +4,92,80,0,0,42.2,0.237,29,0 +4,137,84,0,0,31.2,0.252,30,0 +3,61,82,28,0,34.4,0.243,46,0 +1,90,62,12,43,27.2,0.58,24,0 +3,90,78,0,0,42.7,0.559,21,0 +9,165,88,0,0,30.4,0.302,49,1 +1,125,50,40,167,33.3,0.962,28,1 +13,129,0,30,0,39.9,0.569,44,1 +12,88,74,40,54,35.3,0.378,48,0 +1,196,76,36,249,36.5,0.875,29,1 +5,189,64,33,325,31.2,0.583,29,1 +5,158,70,0,0,29.8,0.207,63,0 +5,103,108,37,0,39.2,0.305,65,0 +4,146,78,0,0,38.5,0.52,67,1 +4,147,74,25,293,34.9,0.385,30,0 +5,99,54,28,83,34,0.499,30,0 +6,124,72,0,0,27.6,0.368,29,1 +0,101,64,17,0,21,0.252,21,0 +3,81,86,16,66,27.5,0.306,22,0 +1,133,102,28,140,32.8,0.234,45,1 +3,173,82,48,465,38.4,2.137,25,1 +0,118,64,23,89,0,1.731,21,0 +0,84,64,22,66,35.8,0.545,21,0 +2,105,58,40,94,34.9,0.225,25,0 +2,122,52,43,158,36.2,0.816,28,0 +12,140,82,43,325,39.2,0.528,58,1 +0,98,82,15,84,25.2,0.299,22,0 +1,87,60,37,75,37.2,0.509,22,0 +4,156,75,0,0,48.3,0.238,32,1 +0,93,100,39,72,43.4,1.021,35,0 +1,107,72,30,82,30.8,0.821,24,0 +0,105,68,22,0,20,0.236,22,0 +1,109,60,8,182,25.4,0.947,21,0 +1,90,62,18,59,25.1,1.268,25,0 +1,125,70,24,110,24.3,0.221,25,0 +1,119,54,13,50,22.3,0.205,24,0 +5,116,74,29,0,32.3,0.66,35,1 +8,105,100,36,0,43.3,0.239,45,1 +5,144,82,26,285,32,0.452,58,1 +3,100,68,23,81,31.6,0.949,28,0 +1,100,66,29,196,32,0.444,42,0 +5,166,76,0,0,45.7,0.34,27,1 +1,131,64,14,415,23.7,0.389,21,0 +4,116,72,12,87,22.1,0.463,37,0 +4,158,78,0,0,32.9,0.803,31,1 +2,127,58,24,275,27.7,1.6,25,0 +3,96,56,34,115,24.7,0.944,39,0 +0,131,66,40,0,34.3,0.196,22,1 +3,82,70,0,0,21.1,0.389,25,0 +3,193,70,31,0,34.9,0.241,25,1 +4,95,64,0,0,32,0.161,31,1 +6,137,61,0,0,24.2,0.151,55,0 +5,136,84,41,88,35,0.286,35,1 +9,72,78,25,0,31.6,0.28,38,0 +5,168,64,0,0,32.9,0.135,41,1 +2,123,48,32,165,42.1,0.52,26,0 +4,115,72,0,0,28.9,0.376,46,1 +0,101,62,0,0,21.9,0.336,25,0 +8,197,74,0,0,25.9,1.191,39,1 +1,172,68,49,579,42.4,0.702,28,1 +6,102,90,39,0,35.7,0.674,28,0 +1,112,72,30,176,34.4,0.528,25,0 +1,143,84,23,310,42.4,1.076,22,0 +1,143,74,22,61,26.2,0.256,21,0 +0,138,60,35,167,34.6,0.534,21,1 +3,173,84,33,474,35.7,0.258,22,1 +1,97,68,21,0,27.2,1.095,22,0 +4,144,82,32,0,38.5,0.554,37,1 +1,83,68,0,0,18.2,0.624,27,0 +3,129,64,29,115,26.4,0.219,28,1 +1,119,88,41,170,45.3,0.507,26,0 +2,94,68,18,76,26,0.561,21,0 +0,102,64,46,78,40.6,0.496,21,0 +2,115,64,22,0,30.8,0.421,21,0 +8,151,78,32,210,42.9,0.516,36,1 +4,184,78,39,277,37,0.264,31,1 +0,94,0,0,0,0,0.256,25,0 +1,181,64,30,180,34.1,0.328,38,1 +0,135,94,46,145,40.6,0.284,26,0 +1,95,82,25,180,35,0.233,43,1 +2,99,0,0,0,22.2,0.108,23,0 +3,89,74,16,85,30.4,0.551,38,0 +1,80,74,11,60,30,0.527,22,0 +2,139,75,0,0,25.6,0.167,29,0 +1,90,68,8,0,24.5,1.138,36,0 +0,141,0,0,0,42.4,0.205,29,1 +12,140,85,33,0,37.4,0.244,41,0 +5,147,75,0,0,29.9,0.434,28,0 +1,97,70,15,0,18.2,0.147,21,0 +6,107,88,0,0,36.8,0.727,31,0 +0,189,104,25,0,34.3,0.435,41,1 +2,83,66,23,50,32.2,0.497,22,0 +4,117,64,27,120,33.2,0.23,24,0 +8,108,70,0,0,30.5,0.955,33,1 +4,117,62,12,0,29.7,0.38,30,1 +0,180,78,63,14,59.4,2.42,25,1 +1,100,72,12,70,25.3,0.658,28,0 +0,95,80,45,92,36.5,0.33,26,0 +0,104,64,37,64,33.6,0.51,22,1 +0,120,74,18,63,30.5,0.285,26,0 +1,82,64,13,95,21.2,0.415,23,0 +2,134,70,0,0,28.9,0.542,23,1 +0,91,68,32,210,39.9,0.381,25,0 +2,119,0,0,0,19.6,0.832,72,0 +2,100,54,28,105,37.8,0.498,24,0 +14,175,62,30,0,33.6,0.212,38,1 +1,135,54,0,0,26.7,0.687,62,0 +5,86,68,28,71,30.2,0.364,24,0 +10,148,84,48,237,37.6,1.001,51,1 +9,134,74,33,60,25.9,0.46,81,0 +9,120,72,22,56,20.8,0.733,48,0 +1,71,62,0,0,21.8,0.416,26,0 +8,74,70,40,49,35.3,0.705,39,0 +5,88,78,30,0,27.6,0.258,37,0 +10,115,98,0,0,24,1.022,34,0 +0,124,56,13,105,21.8,0.452,21,0 +0,74,52,10,36,27.8,0.269,22,0 +0,97,64,36,100,36.8,0.6,25,0 +8,120,0,0,0,30,0.183,38,1 +6,154,78,41,140,46.1,0.571,27,0 +1,144,82,40,0,41.3,0.607,28,0 +0,137,70,38,0,33.2,0.17,22,0 +0,119,66,27,0,38.8,0.259,22,0 +7,136,90,0,0,29.9,0.21,50,0 +4,114,64,0,0,28.9,0.126,24,0 +0,137,84,27,0,27.3,0.231,59,0 +2,105,80,45,191,33.7,0.711,29,1 +7,114,76,17,110,23.8,0.466,31,0 +8,126,74,38,75,25.9,0.162,39,0 +4,132,86,31,0,28,0.419,63,0 +3,158,70,30,328,35.5,0.344,35,1 +0,123,88,37,0,35.2,0.197,29,0 +4,85,58,22,49,27.8,0.306,28,0 +0,84,82,31,125,38.2,0.233,23,0 +0,145,0,0,0,44.2,0.63,31,1 +0,135,68,42,250,42.3,0.365,24,1 +1,139,62,41,480,40.7,0.536,21,0 +0,173,78,32,265,46.5,1.159,58,0 +4,99,72,17,0,25.6,0.294,28,0 +8,194,80,0,0,26.1,0.551,67,0 +2,83,65,28,66,36.8,0.629,24,0 +2,89,90,30,0,33.5,0.292,42,0 +4,99,68,38,0,32.8,0.145,33,0 +4,125,70,18,122,28.9,1.144,45,1 +3,80,0,0,0,0,0.174,22,0 +6,166,74,0,0,26.6,0.304,66,0 +5,110,68,0,0,26,0.292,30,0 +2,81,72,15,76,30.1,0.547,25,0 +7,195,70,33,145,25.1,0.163,55,1 +6,154,74,32,193,29.3,0.839,39,0 +2,117,90,19,71,25.2,0.313,21,0 +3,84,72,32,0,37.2,0.267,28,0 +6,0,68,41,0,39,0.727,41,1 +7,94,64,25,79,33.3,0.738,41,0 +3,96,78,39,0,37.3,0.238,40,0 +10,75,82,0,0,33.3,0.263,38,0 +0,180,90,26,90,36.5,0.314,35,1 +1,130,60,23,170,28.6,0.692,21,0 +2,84,50,23,76,30.4,0.968,21,0 +8,120,78,0,0,25,0.409,64,0 +12,84,72,31,0,29.7,0.297,46,1 +0,139,62,17,210,22.1,0.207,21,0 +9,91,68,0,0,24.2,0.2,58,0 +2,91,62,0,0,27.3,0.525,22,0 +3,99,54,19,86,25.6,0.154,24,0 +3,163,70,18,105,31.6,0.268,28,1 +9,145,88,34,165,30.3,0.771,53,1 +7,125,86,0,0,37.6,0.304,51,0 +13,76,60,0,0,32.8,0.18,41,0 +6,129,90,7,326,19.6,0.582,60,0 +2,68,70,32,66,25,0.187,25,0 +3,124,80,33,130,33.2,0.305,26,0 +6,114,0,0,0,0,0.189,26,0 +9,130,70,0,0,34.2,0.652,45,1 +3,125,58,0,0,31.6,0.151,24,0 +3,87,60,18,0,21.8,0.444,21,0 +1,97,64,19,82,18.2,0.299,21,0 +3,116,74,15,105,26.3,0.107,24,0 +0,117,66,31,188,30.8,0.493,22,0 +0,111,65,0,0,24.6,0.66,31,0 +2,122,60,18,106,29.8,0.717,22,0 +0,107,76,0,0,45.3,0.686,24,0 +1,86,66,52,65,41.3,0.917,29,0 +6,91,0,0,0,29.8,0.501,31,0 +1,77,56,30,56,33.3,1.251,24,0 +4,132,0,0,0,32.9,0.302,23,1 +0,105,90,0,0,29.6,0.197,46,0 +0,57,60,0,0,21.7,0.735,67,0 +0,127,80,37,210,36.3,0.804,23,0 +3,129,92,49,155,36.4,0.968,32,1 +8,100,74,40,215,39.4,0.661,43,1 +3,128,72,25,190,32.4,0.549,27,1 +10,90,85,32,0,34.9,0.825,56,1 +4,84,90,23,56,39.5,0.159,25,0 +1,88,78,29,76,32,0.365,29,0 +8,186,90,35,225,34.5,0.423,37,1 +5,187,76,27,207,43.6,1.034,53,1 +4,131,68,21,166,33.1,0.16,28,0 +1,164,82,43,67,32.8,0.341,50,0 +4,189,110,31,0,28.5,0.68,37,0 +1,116,70,28,0,27.4,0.204,21,0 +3,84,68,30,106,31.9,0.591,25,0 +6,114,88,0,0,27.8,0.247,66,0 +1,88,62,24,44,29.9,0.422,23,0 +1,84,64,23,115,36.9,0.471,28,0 +7,124,70,33,215,25.5,0.161,37,0 +1,97,70,40,0,38.1,0.218,30,0 +8,110,76,0,0,27.8,0.237,58,0 +11,103,68,40,0,46.2,0.126,42,0 +11,85,74,0,0,30.1,0.3,35,0 +6,125,76,0,0,33.8,0.121,54,1 +0,198,66,32,274,41.3,0.502,28,1 +1,87,68,34,77,37.6,0.401,24,0 +6,99,60,19,54,26.9,0.497,32,0 +0,91,80,0,0,32.4,0.601,27,0 +2,95,54,14,88,26.1,0.748,22,0 +1,99,72,30,18,38.6,0.412,21,0 +6,92,62,32,126,32,0.085,46,0 +4,154,72,29,126,31.3,0.338,37,0 +0,121,66,30,165,34.3,0.203,33,1 +3,78,70,0,0,32.5,0.27,39,0 +2,130,96,0,0,22.6,0.268,21,0 +3,111,58,31,44,29.5,0.43,22,0 +2,98,60,17,120,34.7,0.198,22,0 +1,143,86,30,330,30.1,0.892,23,0 +1,119,44,47,63,35.5,0.28,25,0 +6,108,44,20,130,24,0.813,35,0 +2,118,80,0,0,42.9,0.693,21,1 +10,133,68,0,0,27,0.245,36,0 +2,197,70,99,0,34.7,0.575,62,1 +0,151,90,46,0,42.1,0.371,21,1 +6,109,60,27,0,25,0.206,27,0 +12,121,78,17,0,26.5,0.259,62,0 +8,100,76,0,0,38.7,0.19,42,0 +8,124,76,24,600,28.7,0.687,52,1 +1,93,56,11,0,22.5,0.417,22,0 +8,143,66,0,0,34.9,0.129,41,1 +6,103,66,0,0,24.3,0.249,29,0 +3,176,86,27,156,33.3,1.154,52,1 +0,73,0,0,0,21.1,0.342,25,0 +11,111,84,40,0,46.8,0.925,45,1 +2,112,78,50,140,39.4,0.175,24,0 +3,132,80,0,0,34.4,0.402,44,1 +2,82,52,22,115,28.5,1.699,25,0 +6,123,72,45,230,33.6,0.733,34,0 +0,188,82,14,185,32,0.682,22,1 +0,67,76,0,0,45.3,0.194,46,0 +1,89,24,19,25,27.8,0.559,21,0 +1,173,74,0,0,36.8,0.088,38,1 +1,109,38,18,120,23.1,0.407,26,0 +1,108,88,19,0,27.1,0.4,24,0 +6,96,0,0,0,23.7,0.19,28,0 +1,124,74,36,0,27.8,0.1,30,0 +7,150,78,29,126,35.2,0.692,54,1 +4,183,0,0,0,28.4,0.212,36,1 +1,124,60,32,0,35.8,0.514,21,0 +1,181,78,42,293,40,1.258,22,1 +1,92,62,25,41,19.5,0.482,25,0 +0,152,82,39,272,41.5,0.27,27,0 +1,111,62,13,182,24,0.138,23,0 +3,106,54,21,158,30.9,0.292,24,0 +3,174,58,22,194,32.9,0.593,36,1 +7,168,88,42,321,38.2,0.787,40,1 +6,105,80,28,0,32.5,0.878,26,0 +11,138,74,26,144,36.1,0.557,50,1 +3,106,72,0,0,25.8,0.207,27,0 +6,117,96,0,0,28.7,0.157,30,0 +2,68,62,13,15,20.1,0.257,23,0 +9,112,82,24,0,28.2,1.282,50,1 +0,119,0,0,0,32.4,0.141,24,1 +2,112,86,42,160,38.4,0.246,28,0 +2,92,76,20,0,24.2,1.698,28,0 +6,183,94,0,0,40.8,1.461,45,0 +0,94,70,27,115,43.5,0.347,21,0 +2,108,64,0,0,30.8,0.158,21,0 +4,90,88,47,54,37.7,0.362,29,0 +0,125,68,0,0,24.7,0.206,21,0 +0,132,78,0,0,32.4,0.393,21,0 +5,128,80,0,0,34.6,0.144,45,0 +4,94,65,22,0,24.7,0.148,21,0 +7,114,64,0,0,27.4,0.732,34,1 +0,102,78,40,90,34.5,0.238,24,0 +2,111,60,0,0,26.2,0.343,23,0 +1,128,82,17,183,27.5,0.115,22,0 +10,92,62,0,0,25.9,0.167,31,0 +13,104,72,0,0,31.2,0.465,38,1 +5,104,74,0,0,28.8,0.153,48,0 +2,94,76,18,66,31.6,0.649,23,0 +7,97,76,32,91,40.9,0.871,32,1 +1,100,74,12,46,19.5,0.149,28,0 +0,102,86,17,105,29.3,0.695,27,0 +4,128,70,0,0,34.3,0.303,24,0 +6,147,80,0,0,29.5,0.178,50,1 +4,90,0,0,0,28,0.61,31,0 +3,103,72,30,152,27.6,0.73,27,0 +2,157,74,35,440,39.4,0.134,30,0 +1,167,74,17,144,23.4,0.447,33,1 +0,179,50,36,159,37.8,0.455,22,1 +11,136,84,35,130,28.3,0.26,42,1 +0,107,60,25,0,26.4,0.133,23,0 +1,91,54,25,100,25.2,0.234,23,0 +1,117,60,23,106,33.8,0.466,27,0 +5,123,74,40,77,34.1,0.269,28,0 +2,120,54,0,0,26.8,0.455,27,0 +1,106,70,28,135,34.2,0.142,22,0 +2,155,52,27,540,38.7,0.24,25,1 +2,101,58,35,90,21.8,0.155,22,0 +1,120,80,48,200,38.9,1.162,41,0 +11,127,106,0,0,39,0.19,51,0 +3,80,82,31,70,34.2,1.292,27,1 +10,162,84,0,0,27.7,0.182,54,0 +1,199,76,43,0,42.9,1.394,22,1 +8,167,106,46,231,37.6,0.165,43,1 +9,145,80,46,130,37.9,0.637,40,1 +6,115,60,39,0,33.7,0.245,40,1 +1,112,80,45,132,34.8,0.217,24,0 +4,145,82,18,0,32.5,0.235,70,1 +10,111,70,27,0,27.5,0.141,40,1 +6,98,58,33,190,34,0.43,43,0 +9,154,78,30,100,30.9,0.164,45,0 +6,165,68,26,168,33.6,0.631,49,0 +1,99,58,10,0,25.4,0.551,21,0 +10,68,106,23,49,35.5,0.285,47,0 +3,123,100,35,240,57.3,0.88,22,0 +8,91,82,0,0,35.6,0.587,68,0 +6,195,70,0,0,30.9,0.328,31,1 +9,156,86,0,0,24.8,0.23,53,1 +0,93,60,0,0,35.3,0.263,25,0 +3,121,52,0,0,36,0.127,25,1 +2,101,58,17,265,24.2,0.614,23,0 +2,56,56,28,45,24.2,0.332,22,0 +0,162,76,36,0,49.6,0.364,26,1 +0,95,64,39,105,44.6,0.366,22,0 +4,125,80,0,0,32.3,0.536,27,1 +5,136,82,0,0,0,0.64,69,0 +2,129,74,26,205,33.2,0.591,25,0 +3,130,64,0,0,23.1,0.314,22,0 +1,107,50,19,0,28.3,0.181,29,0 +1,140,74,26,180,24.1,0.828,23,0 +1,144,82,46,180,46.1,0.335,46,1 +8,107,80,0,0,24.6,0.856,34,0 +13,158,114,0,0,42.3,0.257,44,1 +2,121,70,32,95,39.1,0.886,23,0 +7,129,68,49,125,38.5,0.439,43,1 +2,90,60,0,0,23.5,0.191,25,0 +7,142,90,24,480,30.4,0.128,43,1 +3,169,74,19,125,29.9,0.268,31,1 +0,99,0,0,0,25,0.253,22,0 +4,127,88,11,155,34.5,0.598,28,0 +4,118,70,0,0,44.5,0.904,26,0 +2,122,76,27,200,35.9,0.483,26,0 +6,125,78,31,0,27.6,0.565,49,1 +1,168,88,29,0,35,0.905,52,1 +2,129,0,0,0,38.5,0.304,41,0 +4,110,76,20,100,28.4,0.118,27,0 +6,80,80,36,0,39.8,0.177,28,0 +10,115,0,0,0,0,0.261,30,1 +2,127,46,21,335,34.4,0.176,22,0 +9,164,78,0,0,32.8,0.148,45,1 +2,93,64,32,160,38,0.674,23,1 +3,158,64,13,387,31.2,0.295,24,0 +5,126,78,27,22,29.6,0.439,40,0 +10,129,62,36,0,41.2,0.441,38,1 +0,134,58,20,291,26.4,0.352,21,0 +3,102,74,0,0,29.5,0.121,32,0 +7,187,50,33,392,33.9,0.826,34,1 +3,173,78,39,185,33.8,0.97,31,1 +10,94,72,18,0,23.1,0.595,56,0 +1,108,60,46,178,35.5,0.415,24,0 +5,97,76,27,0,35.6,0.378,52,1 +4,83,86,19,0,29.3,0.317,34,0 +1,114,66,36,200,38.1,0.289,21,0 +1,149,68,29,127,29.3,0.349,42,1 +5,117,86,30,105,39.1,0.251,42,0 +1,111,94,0,0,32.8,0.265,45,0 +4,112,78,40,0,39.4,0.236,38,0 +1,116,78,29,180,36.1,0.496,25,0 +0,141,84,26,0,32.4,0.433,22,0 +2,175,88,0,0,22.9,0.326,22,0 +2,92,52,0,0,30.1,0.141,22,0 +3,130,78,23,79,28.4,0.323,34,1 +8,120,86,0,0,28.4,0.259,22,1 +2,174,88,37,120,44.5,0.646,24,1 +2,106,56,27,165,29,0.426,22,0 +2,105,75,0,0,23.3,0.56,53,0 +4,95,60,32,0,35.4,0.284,28,0 +0,126,86,27,120,27.4,0.515,21,0 +8,65,72,23,0,32,0.6,42,0 +2,99,60,17,160,36.6,0.453,21,0 +1,102,74,0,0,39.5,0.293,42,1 +11,120,80,37,150,42.3,0.785,48,1 +3,102,44,20,94,30.8,0.4,26,0 +1,109,58,18,116,28.5,0.219,22,0 +9,140,94,0,0,32.7,0.734,45,1 +13,153,88,37,140,40.6,1.174,39,0 +12,100,84,33,105,30,0.488,46,0 +1,147,94,41,0,49.3,0.358,27,1 +1,81,74,41,57,46.3,1.096,32,0 +3,187,70,22,200,36.4,0.408,36,1 +6,162,62,0,0,24.3,0.178,50,1 +4,136,70,0,0,31.2,1.182,22,1 +1,121,78,39,74,39,0.261,28,0 +3,108,62,24,0,26,0.223,25,0 +0,181,88,44,510,43.3,0.222,26,1 +8,154,78,32,0,32.4,0.443,45,1 +1,128,88,39,110,36.5,1.057,37,1 +7,137,90,41,0,32,0.391,39,0 +0,123,72,0,0,36.3,0.258,52,1 +1,106,76,0,0,37.5,0.197,26,0 +6,190,92,0,0,35.5,0.278,66,1 +2,88,58,26,16,28.4,0.766,22,0 +9,170,74,31,0,44,0.403,43,1 +9,89,62,0,0,22.5,0.142,33,0 +10,101,76,48,180,32.9,0.171,63,0 +2,122,70,27,0,36.8,0.34,27,0 +5,121,72,23,112,26.2,0.245,30,0 +1,126,60,0,0,30.1,0.349,47,1 +1,93,70,31,0,30.4,0.315,23,0 \ No newline at end of file diff --git a/train_generator_discriminator.py b/train_generator_discriminator.py new file mode 100644 index 0000000..e0a2d15 --- /dev/null +++ b/train_generator_discriminator.py @@ -0,0 +1,156 @@ +import torch +import pandas as pd +from torch import nn, optim +from torch.autograd.variable import Variable +from torchvision import transforms, datasets +from data_treatment import DataSet, DataAtts +from discriminator import * +from generator import * +import os +# import ipywidgets as widgets +# from IPython.display import display +# import matplotlib.pyplot as plt +import glob +from utils import * + + +class Architecture(): + def __init__(self, learning_rate, batch_size, loss, hidden_layers, name): + self.learning_rate=learning_rate + self.batch_size=batch_size + self.loss=loss + self.hidden_layers=hidden_layers + self.name=name + +def save_model(name, epoch, attributes, dictionary, optimizer_dictionary, loss_function, db_name, arch_name): + torch.save({ + 'epoch': epoch, + 'model_attributes': attributes, + 'model_state_dict': dictionary, + 'optimizer_state_dict': optimizer_dictionary, + 'loss': loss_function + }, "models/" + db_name + "/" + name + "_" + arch_name + ".pt") + + +# Check if creditcard.csv exists and if so, create a scalonated version of it +# escalonate_creditcard_db() +if not os.path.isfile('./original_data/diabetes.csv'): + print("Database creditcard.csv not found, exiting...") + exit() + +file_names=["original_data/diabetes.csv"] +num_epochs=[500] +learning_rate=[0.0002] +batch_size=[5] +number_of_experiments = 5 +#hidden_layers=[[256, 512]] +hidden_layers=[[256, 512], [256], [128, 256], [128]] +# hidden_layers=[[256]] + +#create the different architetures +architectures=[] +count=0 +for lr in learning_rate: + for b_size in batch_size: + for hidden in hidden_layers: + for i in range(number_of_experiments): + name = "id-" + str(count) + name += "_epochs-" + str(num_epochs[0]) + name += "_layer-" + str(len(hidden)) + name += "_lr-" + str(lr) + name += "_batch-" + str(b_size) + name += "_arc-" + ','.join(map(str, hidden)) + architectures.append( Architecture( + learning_rate=lr, + batch_size=b_size, + loss=nn.BCELoss(), + hidden_layers=hidden, + name=name + ) + ) + count+=1 + + +#training process +for file_name, epochs in zip(file_names, num_epochs): + dataAtts = DataAtts(file_name) + database = DataSet (csv_file=file_name, root_dir=".", shuffle_db=False) + + for arc in architectures: + if ("escalonated" in file_name): + esc = torch.nn.Sigmoid() + else: + esc = False + + generatorAtts = { + 'out_features':dataAtts.class_len, + 'leakyRelu':0.2, + 'hidden_layers':arc.hidden_layers, + 'in_features':100, + 'escalonate':esc + } + generator = GeneratorNet(**generatorAtts) + + discriminatorAtts = { + 'in_features':dataAtts.class_len, + 'leakyRelu':0.2, + 'dropout':0.3, + 'hidden_layers':arc.hidden_layers[::-1] + + } + discriminator = DiscriminatorNet(**discriminatorAtts) + + if torch.cuda.is_available(): + discriminator.cuda() + generator.cuda() + d_optimizer = optim.Adam(discriminator.parameters(), lr=arc.learning_rate) + g_optimizer = optim.Adam(generator.parameters(), lr=arc.learning_rate) + loss = arc.loss + data_loader = torch.utils.data.DataLoader(database, batch_size=arc.batch_size, shuffle=True) + num_batches = len(data_loader) + + print(dataAtts.fname) + print(arc.name) + for epoch in range(epochs): + if (epoch % 100 == 0): + print("Epoch ", epoch) + + for n_batch, real_batch in enumerate(data_loader): + # 1. Train DdataAtts.fnameiscriminator + real_data = Variable(real_batch).float() + if torch.cuda.is_available(): + real_data = real_data.cuda() + # Generate fake data + fake_data = generator(random_noise(real_data.size(0))).detach() + # Train D + d_error, d_pred_real, d_pred_fake = train_discriminator(d_optimizer, discriminator, loss, real_data, fake_data) + + # 2. Train Generator + # Generate fake data + fake_data = generator(random_noise(real_batch.size(0))) + # Train G + g_error = train_generator(g_optimizer, discriminator, loss, fake_data) + + # Display Progress + + #if (n_batch) % print_interval == 0: + + # From this line on it's just the saving + # save_model("generator", epoch, generatorAtts, generator.state_dict(), g_optimizer.state_dict(), loss, dataAtts.fname, arc.name) + # save_model("discriminator", epoch, discriminatorAtts, discriminator.state_dict(), d_optimizer.state_dict(), loss, dataAtts.fname, arc.name) + + torch.save({ + 'epoch': epoch, + 'model_attributes': generatorAtts, + 'model_state_dict': generator.state_dict(), + 'optimizer_state_dict': g_optimizer.state_dict(), + 'loss': loss + }, "models/" + dataAtts.fname + "/generator_" + arc.name + ".pt") + + torch.save({ + 'epoch': epoch, + 'model_attributes': discriminatorAtts, + 'model_state_dict': discriminator.state_dict(), + 'optimizer_state_dict': d_optimizer.state_dict(), + 'loss': loss + }, "models/" + dataAtts.fname + "/discriminator_" + arc.name + ".pt") \ No newline at end of file diff --git a/utils.py b/utils.py new file mode 100644 index 0000000..d40b5e1 --- /dev/null +++ b/utils.py @@ -0,0 +1,24 @@ +from torch.autograd.variable import Variable +import torch + +def random_noise(size): + n = Variable(torch.randn(size, 100)) + if torch.cuda.is_available(): + return n.cuda() + return n + +def real_data_target(size): + ''' + Tensor containing ones, with shape = size + ''' + data = Variable(torch.ones(size, 1)) + if torch.cuda.is_available(): return data.cuda() + return data + +def fake_data_target(size): + ''' + Tensor containing zeros, with shape = size + ''' + data = Variable(torch.zeros(size, 1)) + if torch.cuda.is_available(): return data.cuda() + return data \ No newline at end of file diff --git a/visualisation/Prog_R_IJB_2022.Rmd b/visualisation/Prog_R_IJB_2022.Rmd new file mode 100644 index 0000000..a06c14a --- /dev/null +++ b/visualisation/Prog_R_IJB_2022.Rmd @@ -0,0 +1,1643 @@ +--- +title: "Simulation de donnees a baseline et de l'outcome" +author: "Saint Pierre - Savy" +date: "1/07/2022" +output: + html_document: default + pdf_document: default + word_document: default +--- + +```{r,include=FALSE} + +#knitr::opts_chunk$set(fig.width=10, fig.height=7) +``` + +```{r,include=FALSE} +library(rvinecopulib) +# library(VineCopula) +# library(ggraph) +library(e1071) +library(GGally) +library(caret) +library(MASS) +library(tidyverse) +library(corrr) +library(lsr) +library(cowplot) +library(EnvStats) +library(ggraph) + + +#diabetes <- read.csv("C:/Users/nicolas.savy/Dropbox/PROJETS#RECHERCHE/7#BookChapter/Simulations/diabetes.csv") +data <- read.csv("diabetes.csv") +dim(data) + +# nettoyage des donn??es : les 0 sont des NA pour Glucose, BloodPressure,SkinThickness,Insulin,BMI +diabetes<-data[data$Insulin!=0 & data$Glucose!=0 & data$BMI!=0,] +summary(diabetes) +dim(diabetes) + +# split Virtual Patient (VP) and virtual Outcome (VO) +#separation var discrete et continue +VP <- diabetes[,c(1:8)] +VPDisc <- diabetes[,c(1,6)] +VPDisc$Pregnancies = cut(VPDisc$Pregnancies,c(-0.1,0.9,100)) +levels(VPDisc$Pregnancies)<-c('No','Yes') +VPDisc$BMI = cut(VPDisc$BMI,c(0,25,35,200)) +levels(VPDisc$BMI)<-c('<=25','25-35','>35') +VPCont <- diabetes[,-c(1,6,9)] +VPMod =as.data.frame(c(VPDisc,VPCont)) + +VO <- as.data.frame(as.factor(diabetes[,9])) +names(VO) = c('Pronostic') + +Data_Originale<-cbind(VPMod,VO) + +``` + +## Description des variables explicatives et de l'outcome + +```{r,echo=F,message = FALSE,fig.width=10, fig.height=10} + + +# ###################### Estimation correlation network mixed data ##################### +# #### Fonction trouvee sur internet +# ############################################################################### +# # +# # cor2 +# # -- Compute correlations of columns of a dataframe of mixed types +# # +# ############################################################################### +# # +# # author : Srikanth KS (talegari) +# # license : GNU AGPLv3 (http://choosealicense.com/licenses/agpl-3.0/) +# # +# ############################################################################### +# #' @title cor2 +# #' +# #' @description Compute correlations of columns of a dataframe of mixed types. +# #' The dataframe is allowed to have columns of these four classes: integer, +# #' numeric, factor and character. The character column is considered as +# #' categorical variable. +# #' +# #' @details The correlation is computed as follows: \itemize{ +# #' +# #' \item integer/numeric pair: pearson correlation using `cor` function. The +# #' valuelies between -1 and 1. +# #' +# #' \item integer/numeric - factor/categorical pair: correlation coefficient or +# #' squared root of R^2 coefficient of linear regression of integer/numeric +# #' variable over factor/categorical variable using `lm` function. The value +# #' lies between 0 and 1. \item factor/categorical pair: cramersV value is +# #' computed based on chisq test using `lsr::cramersV` function. The value lies +# #' between 0 and 1. +# #' } +# #' +# #' For a comprehensive implementation, use `polycor::hetcor` +# #' +# #' @param df input data frame +# #' +# #' @author Srikanth KS(gmail to sri dot teach) +# #' +# #' @keywords GNU AGPLv3 (http://choosealicense.com/licenses/agpl-3.0/) +# #' +# #' @examples +# #' iris_cor <- cor2(iris) +# #' corrplot::corrplot(iris_cor) +# #' corrgram::corrgram(iris_cor) +# #' +# cor2 = function(df){ +# +# stopifnot(inherits(df, "data.frame")) +# stopifnot(sapply(df, class) %in% c("integer" +# , "numeric" +# , "factor" +# , "character")) +# +# cor_fun <- function(pos_1, pos_2){ +# +# # both are numeric +# if(class(df[[pos_1]]) %in% c("integer", "numeric") && +# class(df[[pos_2]]) %in% c("integer", "numeric")){ +# r <- stats::cor(df[[pos_1]] +# , df[[pos_2]] +# , use = "pairwise.complete.obs" +# ) +# } +# +# # one is numeric and other is a factor/character +# if(class(df[[pos_1]]) %in% c("integer", "numeric") && +# class(df[[pos_2]]) %in% c("factor", "character")){ +# r <- sqrt( +# summary( +# stats::lm(df[[pos_1]] ~ as.factor(df[[pos_2]])))[["r.squared"]]) +# } +# +# if(class(df[[pos_2]]) %in% c("integer", "numeric") && +# class(df[[pos_1]]) %in% c("factor", "character")){ +# r <- sqrt( +# summary( +# stats::lm(df[[pos_2]] ~ as.factor(df[[pos_1]])))[["r.squared"]]) +# } +# +# # both are factor/character +# if(class(df[[pos_1]]) %in% c("factor", "character") && +# class(df[[pos_2]]) %in% c("factor", "character")){ +# r <- lsr::cramersV(df[[pos_1]], df[[pos_2]], simulate.p.value = TRUE) +# } +# +# return(r) +# } +# +# cor_fun <- Vectorize(cor_fun) +# +# # now compute corr matrix +# corrmat <- outer(1:ncol(df) +# , 1:ncol(df) +# , function(x, y) cor_fun(x, y) +# ) +# +# rownames(corrmat) <- colnames(df) +# colnames(corrmat) <- colnames(df) +# +# return(corrmat) +# } +# +# ### calcul des correlation grace ?? la fonction +# res.cor <- cor2(VPMod) +# res.cor +# +# ### representation du r??seau des correlations +# network_plot(res.cor,min_cor = 0.2) + + +# box and whisker plots for all attributes +#ggpairs(VPMod) + +p<-ggpairs(Data_Originale, aes(colour=Pronostic), lower = list(continuous = wrap("smooth", alpha = 0.3, size=0.1))) +for(i in 1:p$nrow) { + for(j in 1:p$ncol){ + p[i,j] <- p[i,j] + + scale_fill_manual(values=c("#00AFBB", "#E7B800", "#FC4E07")) + + scale_color_manual(values=c("#00AFBB", "#E7B800", "#FC4E07")) + } +} +#p + +``` + +## Taille de l'echantillon a simuler et nombre de simulations + +```{r,echo=F} +## Tailel echantillon simule +#################################################################### +N<-500 +cat("Taille de l'echantillon simule :", N,"\n") + + +## Nombre de simulations (nbre de p-value calculees) +#################################################################### +Nsimu<-100 +cat("Nombre de simulations :", Nsimu,"\n") + +``` + +## Simulation des donnees a baseline avec la methode continue + +```{r,echo=F} + +#### Fonction pour simuler avec la m??thode continue ######### +Simulation_Continue<- function(DataBaseline) { + +###################### LEARNING ############################### +MC = NULL +VC = NULL +VOriC = DataBaseline +VOriC[,1] = as.numeric(VOriC[,1]) +VOriC[,2] = as.numeric(VOriC[,2]) +MC = colMeans(VOriC) +VC = var(VOriC) + +##################### GENERATION of a SAMPLE of size N ################################# +VGenC = mvrnorm(N,MC,VC) +VGenC = as.data.frame(VGenC) +# Discretisation de Pregnancies +TAB1 = table(VPMod[,1]) +P1 = prop.table(TAB1) +CrV1 = MC[1]+sqrt(VC[1,1])*qnorm(P1[1],0,1) +VGenC[,1]=as.factor(cut(VGenC[,1],c(min(VGenC[,1]),CrV1,max(VGenC[,1])),include.lowest=T)) +# Discretisation de BMI +TAB2 = table(VPMod[,2]) +P2 = prop.table(TAB2) +CrV21 = MC[2]+sqrt(VC[2,2])*qnorm(P2[1],0,1) +CrV22 = MC[2]+sqrt(VC[2,2])*qnorm(P2[1]+P2[2],0,1) +VGenC[,2]=as.factor(cut(VGenC[,2],c(min(VGenC[,2]),CrV21,CrV22,max(VGenC[,2])),include.lowest=T)) + +levels(VGenC$Pregnancies)<-c('No','Yes') +levels(VGenC$BMI)<-c('<=25','25-35','>35') +return(VGenC) +} + +``` + +## Simulation des donnees a baseline avec la methode discrete + +```{r,echo=F,message=F} + +#### Fonction pour simuler avec la m??thode discrete ######### +Simulation_Discrete<- function(DataBaseline) { + +###################### LEARNING ############################### +VPDisc<-DataBaseline[,1:2] +VPCont<-DataBaseline[,3:8] + +P = length(VPDisc$Pregnancies) +L = NULL +L[[1]] = which((VPDisc$Pregnancies == levels(VPDisc$Pregnancies)[1]) & (VPDisc$BMI == levels(VPDisc$BMI)[1])) +L[[2]] = which((VPDisc$Pregnancies == levels(VPDisc$Pregnancies)[2]) & (VPDisc$BMI == levels(VPDisc$BMI)[1])) +L[[3]] = which((VPDisc$Pregnancies == levels(VPDisc$Pregnancies)[1]) & (VPDisc$BMI == levels(VPDisc$BMI)[2])) +L[[4]] = which((VPDisc$Pregnancies == levels(VPDisc$Pregnancies)[2]) & (VPDisc$BMI == levels(VPDisc$BMI)[2])) +L[[5]] = which((VPDisc$Pregnancies == levels(VPDisc$Pregnancies)[1]) & (VPDisc$BMI == levels(VPDisc$BMI)[3])) +L[[6]] = which((VPDisc$Pregnancies == levels(VPDisc$Pregnancies)[2]) & (VPDisc$BMI == levels(VPDisc$BMI)[3])) +proba = NULL +for (i in 1:6){ + proba = c(proba,length(L[[i]])/P) +} +M = NULL +V = NULL +for (i in 1:6){ + M[[i]] = colMeans(VPCont[L[[i]],]) + V[[i]] = var(VPCont[L[[i]],]) +} + +##################### GENERATION of a SAMPLE of size N ################################# +TEMP = sample(c("1","2","3","4","5","6"), N, replace=TRUE, prob = proba) +VGen = matrix(0,ncol = 8, nrow = N) +for (i in 1:N){ + if ( TEMP[i] == "1" ) { + VGen[i,1] = levels(VPDisc$Pregnancies)[1] + VGen[i,2] = levels(VPDisc$BMI)[1] + VGen[i,3] = mvrnorm(1,M[[1]],V[[1]])[1] + VGen[i,4] = mvrnorm(1,M[[1]],V[[1]])[2] + VGen[i,5] = mvrnorm(1,M[[1]],V[[1]])[3] + VGen[i,6] = mvrnorm(1,M[[1]],V[[1]])[4] + VGen[i,7] = mvrnorm(1,M[[1]],V[[1]])[5] + VGen[i,8] = mvrnorm(1,M[[1]],V[[1]])[6]} + if ( TEMP[i] == "2" ) { + VGen[i,1] = levels(VPDisc$Pregnancies)[2] + VGen[i,2] = levels(VPDisc$BMI)[1] + VGen[i,3] = mvrnorm(1,M[[2]],V[[2]])[1] + VGen[i,4] = mvrnorm(1,M[[2]],V[[2]])[2] + VGen[i,5] = mvrnorm(1,M[[2]],V[[2]])[3] + VGen[i,6] = mvrnorm(1,M[[2]],V[[2]])[4] + VGen[i,7] = mvrnorm(1,M[[2]],V[[2]])[5] + VGen[i,8] = mvrnorm(1,M[[2]],V[[2]])[6]} + if ( TEMP[i] == "3" ) { + VGen[i,1] = levels(VPDisc$Pregnancies)[1] + VGen[i,2] = levels(VPDisc$BMI)[2] + VGen[i,3] = mvrnorm(1,M[[3]],V[[4]])[1] + VGen[i,4] = mvrnorm(1,M[[3]],V[[4]])[2] + VGen[i,5] = mvrnorm(1,M[[3]],V[[4]])[3] + VGen[i,6] = mvrnorm(1,M[[3]],V[[4]])[4] + VGen[i,7] = mvrnorm(1,M[[3]],V[[4]])[5] + VGen[i,8] = mvrnorm(1,M[[3]],V[[4]])[6]} + if ( TEMP[i] == "4" ) { + VGen[i,1] = levels(VPDisc$Pregnancies)[2] + VGen[i,2] = levels(VPDisc$BMI)[2] + VGen[i,3] = mvrnorm(1,M[[5]],V[[5]])[1] + VGen[i,4] = mvrnorm(1,M[[5]],V[[5]])[2] + VGen[i,5] = mvrnorm(1,M[[5]],V[[5]])[3] + VGen[i,6] = mvrnorm(1,M[[5]],V[[5]])[4] + VGen[i,7] = mvrnorm(1,M[[5]],V[[5]])[5] + VGen[i,8] = mvrnorm(1,M[[5]],V[[5]])[6]} + if ( TEMP[i] == "5" ) { + VGen[i,1] = levels(VPDisc$Pregnancies)[1] + VGen[i,2] = levels(VPDisc$BMI)[3] + VGen[i,3] = mvrnorm(1,M[[6]],V[[6]])[1] + VGen[i,4] = mvrnorm(1,M[[6]],V[[6]])[2] + VGen[i,5] = mvrnorm(1,M[[6]],V[[6]])[3] + VGen[i,6] = mvrnorm(1,M[[6]],V[[6]])[4] + VGen[i,7] = mvrnorm(1,M[[6]],V[[6]])[5] + VGen[i,8] = mvrnorm(1,M[[6]],V[[6]])[6]} + if ( TEMP[i] == "6" ) { + VGen[i,1] = levels(VPDisc$Pregnancies)[2] + VGen[i,2] = levels(VPDisc$BMI)[3] + VGen[i,3] = mvrnorm(1,M[[1]],V[[1]])[1] + VGen[i,4] = mvrnorm(1,M[[1]],V[[1]])[2] + VGen[i,5] = mvrnorm(1,M[[1]],V[[1]])[3] + VGen[i,6] = mvrnorm(1,M[[1]],V[[1]])[4] + VGen[i,7] = mvrnorm(1,M[[1]],V[[1]])[5] + VGen[i,8] = mvrnorm(1,M[[1]],V[[1]])[6]} +} +VGen = as.data.frame(VGen) +for (i in 3:8){VGen[,i] = as.numeric(as.character(VGen[,i]))} + +colnames(VGen)<-colnames(DataBaseline) +return(VGen) +} + +``` + +## Simulation des donnees a baseline avec la methode Vine copula (structure data-driven) + +```{r,echo=F,message=F} + +#### Fonction pour simuler avec la m??thode des r-vine copula ######### +Simulation_Copule<- function(DataBaseline) { + + +###################### ESTIMATION ###################################### +#Pseudo inverse variables continues +U_cont<-pseudo_obs(DataBaseline[,3:8]) + +#Distribution des 2 variables discretes +disc_1<-as.integer(DataBaseline[,1])-1 +disc_2<-as.integer(DataBaseline[,2])-1 + +freq_disc1<-prop.table(table(DataBaseline[,1])) +freq_disc2<-prop.table(table(DataBaseline[,2])) + +Freq_disc_t1 <- cbind(pdiscrete(disc_1+1,freq_disc1),pdiscrete(disc_2+1,freq_disc2)) +Freq_disc_t0 <- cbind(pdiscrete(disc_1,freq_disc1),pdiscrete(disc_2,freq_disc2)) +#densite :ddiscrete(x+1,freq) +#distribution function : pdiscrete(x+1,freq) +#quantile function : qdiscrete(u[, 1],freq) - 1 + +# Infos necessaire pour copule avec donnees mixte +U_mixte<-cbind(Freq_disc_t1,U_cont,Freq_disc_t0) + +## Model for mixed data +fit_DataDriven <- vinecop(U_mixte, var_types = c("d","d", rep("c", 6))) +#summary(fit_DataDriven) +#plot(fit_DataDriven) +#contour(fit_DataDriven) + +# definition de la distribution de la r-vine +Fit_dist<-vinecop_dist(fit_DataDriven$pair_copulas,fit_DataDriven$structure,fit_DataDriven$var_types) + +###################### SIMULATION ###################################### + +# Fonction pour passer des uniformes aux donnees avec l'??chelle d'orgine +### Fonction inverse de Pseudo Obs +PseudoObsInverse<-function(Data,SimUniformData) { +PsInverse<-list() +for (j in 1:ncol(Data)){ + ecdfj<-ecdf(Data[,j]) + ECDFvar<-get("x",environment(ecdfj)) + ECDFjump<-get("y",environment(ecdfj)) + PsInverse[[j]]<-stepfun(ECDFjump[-length(ECDFjump)],ECDFvar) + } +SimData<-matrix(0,nrow(SimUniformData),ncol(SimUniformData)) +for (j in 1:ncol(SimUniformData)){ SimData[,j]<-PsInverse[[j]](SimUniformData[,j]) } +SimData<-as.data.frame(SimData) +return(SimData) +} + +## Simulation d'un echantillon d'uniformes a partir de la distribution de la vine +# taille de l'??chantillon definit avant +U_Simu<-rvinecop(N,Fit_dist) + +# Simulation de patients virtuels +Data_Simu<-PseudoObsInverse(DataBaseline,U_Simu) +for (i in 1:2){Data_Simu[,i] = as.factor(Data_Simu[,i])} +for (i in 3:8){Data_Simu[,i] = as.numeric(as.character(Data_Simu[,i]))} + +colnames(Data_Simu)<-colnames(DataBaseline) +levels(Data_Simu[,1])<-c('No','Yes') +levels(Data_Simu[,2])<-c('<=25','25-35','>35') + +return(Data_Simu) +} + + +# ## select by log-likelihood criterion from one-paramter families +# fit <- vinecop(u, family_set = "onepar", selcrit = "bic") +# summary(fit) +# +# ## Gaussian D-vine +# fit <- vinecop(u, structure = dvine_structure(1:5), family = "gauss") +# plot(fit) +# contour(fit) +# +# ## Partial structure selection with only first tree specified +# structure <- rvine_structure(order = 1:5, list(rep(5, 4))) +# structure +# fit <- vinecop(u, structure = structure, family = "gauss") +# plot(fit) +# +# ## 1-truncated model with random structure +# fit <- vinecop(u, structure = rvine_structure_sim(5), trunc_lvl = 1) +# contour(fit) +``` + +## Apprentissage via Random Forest pour predire l'outcome + +```{r,echo=F,message=F} + +#################################################################### +## estimation du "modele" de prediction avec les donnees origine +#################################################################### + +# Run algorithms using 10-fold cross validation +control <- trainControl(method="cv", number=10) +metric <- "Accuracy" +# Random Forest +set.seed(7) +fit.rf <- train(Pronostic~., data=Data_Originale, method="rf", metric=metric, trControl=control) + +#fit.rf$finalModel$confusion + +#################################################################### +#### Fonction pour simuler l'outcome ?? partir du "modele" et des donn??es simulees +#### En tenant compte ou pas de l'erreur de prediction +#################################################################### +Simulation_DataOutcome<- function(Modele,Covariables) { + + # prediction sans erreur + predictionsSE = NULL + predictionsSE <- predict(Modele,Covariables) + + # prediction avec erreur + n<-nrow(Covariables) + predictionsAE = NULL + for (i in 1:n) { + RAN = runif(1) + if (predictionsSE[i] == "1") { if (RAN < fit.rf$finalModel$confusion[1,3]) {predictionsAE[i] = "0" } + else {predictionsAE[i] = "1" }} + if (predictionsSE[i] == "0") { if (RAN < fit.rf$finalModel$confusion[2,3]) {predictionsAE[i] = "1" } + else {predictionsAE[i] = "0" }} } + + predictionsAE = as.factor(predictionsAE) + + DataOutcome_Simu <- cbind(Covariables,predictionsSE,predictionsAE) + return(DataOutcome_Simu) + } + +#################################################################### +#### Fonction pour calculer les p-values +#################################################################### +CalculPvalue<- function(DataOutcome) { + # pvalues pour outcome sans erreur + Pvalue_SE<-NULL + Pvalue_SE[1]<-chisq.test(DataOutcome[,1], DataOutcome[,9])$p.value + Pvalue_SE[2]<-chisq.test(DataOutcome[,2], DataOutcome[,9])$p.value + for (i in 3:8) { Pvalue_SE[i]<-t.test(DataOutcome[,i] ~ DataOutcome[,9])$p.value } + # pvalues pour outcome avec erreur + Pvalue_AE<-NULL + Pvalue_AE[1]<-chisq.test(DataOutcome[,1], DataOutcome[,10])$p.value + Pvalue_AE[2]<-chisq.test(DataOutcome[,2], DataOutcome[,10])$p.value + for (i in 3:8) { Pvalue_AE[i]<-t.test(DataOutcome[,i] ~ DataOutcome[,10])$p.value } + + return(list(p_SE=Pvalue_SE,p_AE=Pvalue_AE)) + } + +``` + +## Representation d'une simulation avec la methode discrete + +```{r,echo=F,message=F,fig.width=12, fig.height=12,eval=F} + +DataSimu_Discrete<-Simulation_Discrete(VPMod) +Simu_Discrete<-Simulation_DataOutcome(fit.rf, DataSimu_Discrete) + +ggpairs(DataSimu_Discrete) + +p<-ggpairs(Simu_Discrete[-9], aes(colour=predictionsAE), lower = list(continuous = wrap("smooth", alpha = 0.3, size=0.1))) +for(i in 1:p$nrow) { + for(j in 1:p$ncol){ + p[i,j] <- p[i,j] + + scale_fill_manual(values=c("#00AFBB", "#E7B800", "#FC4E07")) + + scale_color_manual(values=c("#00AFBB", "#E7B800", "#FC4E07")) + } +} +p + + +``` + +## Representation d'une simulation avec la methode Continue + +```{r,echo=F,message=F,fig.width=12, fig.height=12,eval=F} + +DataSimu_Continue<-Simulation_Continue(VPMod) +Simu_Continue<-Simulation_DataOutcome(fit.rf, DataSimu_Continue) + +ggpairs(DataSimu_Continue) +p<-ggpairs(Simu_Continue[-9], aes(colour=predictionsAE), lower = list(continuous = wrap("smooth", alpha = 0.3, size=0.1))) +for(i in 1:p$nrow) { + for(j in 1:p$ncol){ + p[i,j] <- p[i,j] + + scale_fill_manual(values=c("#00AFBB", "#E7B800", "#FC4E07")) + + scale_color_manual(values=c("#00AFBB", "#E7B800", "#FC4E07")) + } +} +p + +``` + +## Representation d'une simulation avec la methode Copula + +```{r,echo=F,message=F,fig.width=12, fig.height=12,eval=F} + +DataSimu_Copule<-Simulation_Copule(VPMod) +Simu_Copule<-Simulation_DataOutcome(fit.rf, DataSimu_Copule) + +ggpairs(DataSimu_Copule) +p<-ggpairs(Simu_Copule[-9], aes(colour=predictionsAE), lower = list(continuous = wrap("smooth", alpha = 0.3, size=0.1))) +for(i in 1:p$nrow) { + for(j in 1:p$ncol){ + p[i,j] <- p[i,j] + + scale_fill_manual(values=c("#00AFBB", "#E7B800", "#FC4E07")) + + scale_color_manual(values=c("#00AFBB", "#E7B800", "#FC4E07")) + } +} +p +``` + +## Representation de certaines correlations pour les 3 methode + originale + +```{r,echo=F,message=F,fig.width=7, fig.height=6,dpi=600,eval=F} + + +plot_grid( ggmatrix_gtable(ggpairs(VPMod[,c(3,5)],title ="Original data")), + ggmatrix_gtable(ggpairs(DataSimu_Discrete[,c(3,5)],title ="Simulation : discrete approach")), + ggmatrix_gtable(ggpairs(DataSimu_Continue[,c(3,5)],title ="Simulation : continuous approach")), + ggmatrix_gtable(ggpairs(DataSimu_Copule[,c(3,5)],title ="Simulation : Copula approach")),nrow=2) + +plot_grid( ggmatrix_gtable(ggpairs(VPMod[,c(4,8)],title ="Original data")), + ggmatrix_gtable(ggpairs(DataSimu_Discrete[,c(4,8)],title ="Simulation : discrete approach")), + ggmatrix_gtable(ggpairs(DataSimu_Continue[,c(4,8)],title ="Simulation : continuous approach")), + ggmatrix_gtable(ggpairs(DataSimu_Copule[,c(4,8)],title ="Simulation : Copula approach")),nrow=2) + +``` + +## Population d'origine + +```{r,echo=F,message=F,fig.width=12, fig.height=12} + +ggpairs(Data_Originale[,c(1:9)]) + +``` + +## Individus virtuels issus de la population d'origine : + +### simulations de 1000 individus suivant les estimations de la copule et des marginales (empirique) + +```{r,echo=F,message=F,fig.width=12, fig.height=12} + +# simulation de donnees avec la m??thode copule N=500 +N<-1000 + +# estimation de la copule sur les vraies donnees +###################### ESTIMATION ###################################### +#Pseudo inverse variables continues +U_cont<-pseudo_obs(Data_Originale[,3:8]) + +#Distribution des 2 variables discretes +disc_1<-as.integer(Data_Originale[,1])-1 +disc_2<-as.integer(Data_Originale[,2])-1 + +freq_disc1<-prop.table(table(Data_Originale[,1])) +freq_disc2<-prop.table(table(Data_Originale[,2])) + +Freq_disc_t1 <- cbind(pdiscrete(disc_1+1,freq_disc1),pdiscrete(disc_2+1,freq_disc2)) +Freq_disc_t0 <- cbind(pdiscrete(disc_1,freq_disc1),pdiscrete(disc_2,freq_disc2)) +#densite :ddiscrete(x+1,freq) +#distribution function : pdiscrete(x+1,freq) +#quantile function : qdiscrete(u[, 1],freq) - 1 + +# Infos necessaire pour copule avec donnees mixte +U_mixte<-cbind(Freq_disc_t1,U_cont,Freq_disc_t0) + +## Model for mixed data +fit_DataDriven <- vinecop(U_mixte, var_types = c("d","d", rep("c", 6))) +# definition de la distribution de la r-vine +Fit_dist<-vinecop_dist(fit_DataDriven$pair_copulas,fit_DataDriven$structure,fit_DataDriven$var_types) + + + plot(fit_DataDriven,edge_labels="pair", tree=c(1,2)) +plot(fit_DataDriven,edge_labels="family_tau", tree=1) + +# Simulation en fonction du modele de copule et des marginales +###################### SIMULATION ###################################### + +# Fonction pour passer des uniformes aux donnees avec l'echelle d'origine +### Fonction inverse de Pseudo Obs +PseudoObsInverse<-function(Data,SimUniformData) { +PsInverse<-list() +for (j in 1:ncol(Data)){ + ecdfj<-ecdf(Data[,j]) + ECDFvar<-get("x",environment(ecdfj)) + ECDFjump<-get("y",environment(ecdfj)) + PsInverse[[j]]<-stepfun(ECDFjump[-length(ECDFjump)],ECDFvar) + } +SimData<-matrix(0,nrow(SimUniformData),ncol(SimUniformData)) +for (j in 1:ncol(SimUniformData)){ SimData[,j]<-PsInverse[[j]](SimUniformData[,j]) } +SimData<-as.data.frame(SimData) +return(SimData) +} + +## Simulation d'un echantillon d'uniformes a partir de la distribution de la vine +# taille de l'echantillon definit avant +U_Simu<-rvinecop(N,Fit_dist) + +# Simulation de patients virtuels avec la copule sur les donnees d'origine et une marginale modifiee +SimuVar<-PseudoObsInverse(Data_Originale,U_Simu) +for (i in 1:2){SimuVar[,i] = as.factor(SimuVar[,i])} +for (i in 3:8){SimuVar[,i] = as.numeric(as.character(SimuVar[,i]))} + +colnames(SimuVar)<-colnames(Data_Originale[,c(1:8)]) +levels(SimuVar[,1])<-c('No','Yes') +levels(SimuVar[,2])<-c('<=25','25-35','>35') + +# prevision de l'outcome avec le modele random forest estimer (plus haut) +############### Prevision ####################@ +Simu<-Simulation_DataOutcome(fit.rf, SimuVar) + +############### representations graphiques ####################@ +#ggpairs(SimuVar) + +#ggpairs(Simu) + + +``` + +## Individus virtuels issus de la population d'origine : meme support (troncature) !! + +### simulations d'individus suivant les estimations de la copule et des marginales (empirique, sauf une marginale modelisee par une loi log normale + +```{r,echo=F,message=F,fig.width=12, fig.height=12} + +library(fitdistrplus) +library(truncdist) +library(truncnorm) + +###################################### fit de la meilleure distribution pour Glucose ###################################### + +fit_Glucose_norm <- fitdist(Data_Originale$Glucose, "norm") +fit_Glucose_lnorm <- fitdist(Data_Originale$Glucose, "lnorm") +fit_Glucose_gamma <- fitdist(Data_Originale$Glucose, "gamma") +fit_Glucose_weibull <- fitdist(Data_Originale$Glucose, "weibull") + +# par(mfrow=c(2,2)) +# cdfcomp(list(fit_Glucose_norm, fit_Glucose_lnorm, fit_Glucose_gamma, fit_Glucose_weibull), legendtext=c("normal", "lognormal", "gamma","Weibull")) +# denscomp(list(fit_Glucose_norm, fit_Glucose_lnorm, fit_Glucose_gamma, fit_Glucose_weibull), legendtext=c("normal", "lognormal", "gamma","Weibull")) +# qqcomp(list(fit_Glucose_norm, fit_Glucose_lnorm, fit_Glucose_gamma, fit_Glucose_weibull), legendtext=c("normal", "lognormal", "gamma","Weibull")) +# ppcomp(list(fit_Glucose_norm, fit_Glucose_lnorm, fit_Glucose_gamma, fit_Glucose_weibull), legendtext=c("normal", "lognormal", "gamma","Weibull")) +# gofstat(list(fit_Glucose_norm, fit_Glucose_lnorm, fit_Glucose_gamma, fit_Glucose_weibull), fitnames=c("normal", "lognormal", "gamma","Weibull")) + +####################################### Simulation suivant le meilleur modele parametrique pour la distribution marginale +###################### Preparation des donnees ###################################### +# Modification des donnees univariee ... nouvelle variable glucose (simuler via modele parametrique) +GlucoseNew<-rlnorm(nrow(Data_Originale),meanlog=4.778,sdlog=0.249) + +# Simulation d'une distribution marginale via des loi parametrique tronquee (ici loi normale, valeur >140 ) +#GlucoseNew <- rtruncnorm( nrow(Data_Originale), a=140,b=300 ,mean=122.63,sd=30.82) + +# attention ... si observations en dehors du support original (<56, >198), il faut en générer une autre !! +#summary(Data_Originale$Glucose) +#summary(GlucoseNew) +for (i in 1:length(GlucoseNew)) { + if (GlucoseNew[i]>198 | GlucoseNew[i]<56) { + GlucoseNew[i]<-rlnorm(1,meanlog=4.778,sdlog=0.249) + } +} +for (i in 1:length(GlucoseNew)) { + if (GlucoseNew[i]>198 | GlucoseNew[i]<56) { + GlucoseNew[i]<-rlnorm(1,meanlog=4.778,sdlog=0.249) + } +} +#summary(GlucoseNew) + +# definition de la base de donnees +Data_modif<-cbind(Data_Originale[,1:2],GlucoseNew,Data_Originale[,4:8]) + +# Simulation en fonction du modele de copule et des marginales (dont une modifiee) +###################### SIMULATION ###################################### +## Simulation d'un echantillon d'uniformes a partir de la distribution de la vine (estimee sur donnees originales) +# taille de l'echantillon definit avant +U_Simu<-rvinecop(N,Fit_dist) + +# Simulation de patients virtuels avec la copule sur les donnees d'origine et une marginale modifiee +SimuVar1<-PseudoObsInverse(Data_modif,U_Simu) +for (i in 1:2){SimuVar1[,i] = as.factor(SimuVar1[,i])} +for (i in 3:8){SimuVar1[,i] = as.numeric(as.character(SimuVar1[,i]))} +colnames(SimuVar1)<-colnames(Data_Originale[,c(1:8)]) +levels(SimuVar1[,1])<-c('No','Yes') +levels(SimuVar1[,2])<-c('<=25','25-35','>35') + +# prevision de l'outcome avec le modele random forest estimer (plus haut) +############### Prevision ####################@ +Simu1<-Simulation_DataOutcome(fit.rf, SimuVar1) + + +###################### Representations graphiques ###################################### +#ggpairs(SimuVar1) + + + +``` + +## Individus virtuels issus de la population d'origine : meme support !! + +### simulations d'individus suivant les estimations de la copule et des marginales (empirique, sauf une marginale modelisee par une loi normale tronquee + +```{r,echo=F,message=F,fig.width=12, fig.height=12} + +###################################### fit de la meilleure distribution pour Glucose ###################################### + +fit_Glucose_norm <- fitdist(Data_Originale$Glucose, "norm") +fit_Glucose_lnorm <- fitdist(Data_Originale$Glucose, "lnorm") +fit_Glucose_gamma <- fitdist(Data_Originale$Glucose, "gamma") +fit_Glucose_weibull <- fitdist(Data_Originale$Glucose, "weibull") + +# par(mfrow=c(2,2)) +# cdfcomp(list(fit_Glucose_norm, fit_Glucose_lnorm, fit_Glucose_gamma, fit_Glucose_weibull), legendtext=c("normal", "lognormal", "gamma","Weibull")) +# denscomp(list(fit_Glucose_norm, fit_Glucose_lnorm, fit_Glucose_gamma, fit_Glucose_weibull), legendtext=c("normal", "lognormal", "gamma","Weibull")) +# qqcomp(list(fit_Glucose_norm, fit_Glucose_lnorm, fit_Glucose_gamma, fit_Glucose_weibull), legendtext=c("normal", "lognormal", "gamma","Weibull")) +# ppcomp(list(fit_Glucose_norm, fit_Glucose_lnorm, fit_Glucose_gamma, fit_Glucose_weibull), legendtext=c("normal", "lognormal", "gamma","Weibull")) +# gofstat(list(fit_Glucose_norm, fit_Glucose_lnorm, fit_Glucose_gamma, fit_Glucose_weibull), fitnames=c("normal", "lognormal", "gamma","Weibull")) + +###################### Preparation des donnees ###################################### +# Modification des donnees univariee ... nouvelle variable glucose (simuler via modele parametrique normal tronquee) +#GlucoseNew<-rweibull(nrow(Data_Originale),shape=4.225,scale=134.705) + +GlucoseNew <- rtruncnorm( nrow(Data_Originale), a=56,b=198 ,mean=mean(Data_Originale[,3]),sd=sd(Data_Originale[,3])) + +hist(GlucoseNew) + +# Simulation d'une distribution marginale via des loi parametrique tronquee (ici loi normale, valeur >140 ) +#GlucoseNew <- rtruncnorm( nrow(Data_Originale), a=140,b=300 ,mean=122.63,sd=30.82) + +# attention ... si observations en dehors du support original (<56, >198), il faut en générer une autre !! +#summary(Data_Originale$Glucose) +#summary(GlucoseNew) +# for (i in 1:length(GlucoseNew)) { +# if (GlucoseNew[i]>198 | GlucoseNew[i]<56) { +# GlucoseNew[i]<-rlnorm(1,meanlog=4.778,sdlog=0.249) +# } +# } +# for (i in 1:length(GlucoseNew)) { +# if (GlucoseNew[i]>198 | GlucoseNew[i]<56) { +# GlucoseNew[i]<-rlnorm(1,meanlog=4.778,sdlog=0.249) +# } +# } +#summary(GlucoseNew) + +# definition de la base de donnees +Data_modif<-cbind(Data_Originale[,1:2],GlucoseNew,Data_Originale[,4:8]) + +# Simulation en fonction du modele de copule et des marginales (dont une modifiee) +###################### SIMULATION ###################################### +## Simulation d'un echantillon d'uniformes a partir de la distribution de la vine (estimee sur donnees originales) +# taille de l'echantillon definit avant +U_Simu<-rvinecop(N,Fit_dist) + +# Simulation de patients virtuels avec la copule sur les donnees d'origine et une marginale modifiee +SimuVar2<-PseudoObsInverse(Data_modif,U_Simu) +for (i in 1:2){SimuVar2[,i] = as.factor(SimuVar2[,i])} +for (i in 3:8){SimuVar2[,i] = as.numeric(as.character(SimuVar2[,i]))} +colnames(SimuVar2)<-colnames(Data_Originale[,c(1:8)]) +levels(SimuVar2[,1])<-c('No','Yes') +levels(SimuVar2[,2])<-c('<=25','25-35','>35') + +# prevision de l'outcome avec le modele random forest estimer (plus haut) +############### Prevision ####################@ +Simu2<-Simulation_DataOutcome(fit.rf, SimuVar2) + + +###################### Representations graphiques ###################################### +#ggpairs(SimuVar2) + + + +``` + +## Representation de certaines correlations pour donnees originales, simulees non param, simulee param 1 et simulee param 2 (dans les 2 dernier cas, la distrib est tronquee au support de la variable) + +```{r,echo=F,message=F,fig.width=7, fig.height=6,dpi=600} + +colnames(Simu)[9]<-"Prediction" +colnames(Simu1)[9]<-"Prediction" +colnames(Simu2)[9]<-"Prediction" +plot_grid( ggmatrix_gtable(ggpairs(Data_Originale[,c(3,6,9)],title ="Original data")), + ggmatrix_gtable(ggpairs(Simu[,c(3,6,9)],title ="Simulation using empirical distribution for Glucose")), + ggmatrix_gtable(ggpairs(Simu1[,c(3,6,9)],title ="Simulation using Lognormal distribution for Glucose")), + ggmatrix_gtable(ggpairs(Simu2[,c(3,6,9)],title ="Simulation using Normal distribution for Glucose")), + nrow=2) + + +# plot_grid( ggmatrix_gtable(ggpairs(Data_Originale[,c(3,6)],title ="Original data")), +# ggmatrix_gtable(ggpairs(SimuVar[,c(3,6)],title ="Simulation using empirical distribution for Glucose")), +# ggmatrix_gtable(ggpairs(SimuVar1[,c(3,6)],title ="Simulation using Lognormal distribution for Glucose")), +# ggmatrix_gtable(ggpairs(SimuVar2[,c(3,6)],title ="Simulation using Normal distribution for Glucose")), +# nrow=2) +# +# plot_grid( ggmatrix_gtable(ggpairs(Data_Originale[,c(3,9)],title ="Original data")), +# ggmatrix_gtable(ggpairs(Simu[,c(3,9)],title ="Standard simulation")), +# ggmatrix_gtable(ggpairs(Simu1[,c(3,9)],title ="Simulation with lognormal distribution for Glucose")), +# ggmatrix_gtable(ggpairs(Simu2[,c(3,9)],title ="Simulation with normal distribution for Glucose")),nrow=2) +# +# plot_grid( ggmatrix_gtable(ggpairs(Data_Originale[,c(3,9)],title ="Original data")), +# ggmatrix_gtable(ggpairs(Simu[,c(3,10)],title ="Standard simulation")), +# ggmatrix_gtable(ggpairs(Simu1[,c(3,10)],title ="Simulation with lognormal distribution for Glucose")), +# ggmatrix_gtable(ggpairs(Simu2[,c(3,10)],title ="Simulation with normal distribution for Glucose")),nrow=2) + +# c(4,8) +# c(3,6) +# c(3,8) + +``` + +# Individus virtuels dans un intervalle de la population d'origine + +### simulations d'individus suivant les estimations de la copule et des marginales (empirique) + +### filtering pour conserver les individus satisfaisant un critere d'inclusion + +```{r,echo=F,message=F,fig.width=12, fig.height=12} + +# simulation de donnees avec la methode copule N=3000 +NN<-6000 + +## Simulation d'un echantillon d'uniformes a partir de la distribution de la vine +# taille de l'echantillon definit avant +U_Simu<-rvinecop(NN,Fit_dist) + +# Simulation de patients virtuels avec la copule sur les donnees d'origine et une marginale modifiee +Simu_afiltrer<-PseudoObsInverse(Data_Originale,U_Simu) +for (i in 1:2){Simu_afiltrer[,i] = as.factor(Simu_afiltrer[,i])} +for (i in 3:8){Simu_afiltrer[,i] = as.numeric(as.character(Simu_afiltrer[,i]))} +colnames(Simu_afiltrer)<-colnames(Data_Originale[,c(1:8)]) +levels(Simu_afiltrer[,1])<-c('No','Yes') +levels(Simu_afiltrer[,2])<-c('<=25','25-35','>35') + +# Restriction de la population simulee par filtration : uniquement patient tel que glucose >= 140 +# attention ... avec estimation empirique, toutes les simulation sont inférieures au max du support d'origine +SimuVar3<-Simu_afiltrer[Simu_afiltrer$Glucose > 100 & Simu_afiltrer$Glucose <=150,] +# on garde uniquement 500 individus au hasard +SimuVar3<-SimuVar3[sample(1:nrow(SimuVar3), N), ] + + +# prevision de l'outcome avec le modele random forest estimer (plus haut) +############### Prevision ####################@ +Simu3<-Simulation_DataOutcome(fit.rf, SimuVar3) + +###################### Representations graphiques ###################################### + +#ggpairs(SimuVar3) + + +``` + +# Individus virtuels dans un intervalle de la population d'origine + +### simulations d'individus suivant les estimations de la copule et des marginales (empirique) et une param + +### ici une gaussienne ajuste sur les données et tronquee a 140 + +```{r,echo=F,message=F,fig.width=12, fig.height=12} + +###################################### fit de la meilleure distribution pour Glucose ###################################### +###################### Preparation des donnees ###################################### +# Modification des donnees univariee ... nouvelle variable glucose (simuler via modele parametrique normal tronquee) +#GlucoseNew<-rweibull(nrow(Data_Originale),shape=4.225,scale=134.705) + +GlucoseNew <- rtruncnorm( nrow(Data_Originale), a= 101, b=150, mean=mean(Data_Originale[,3]),sd=sd(Data_Originale[,3])) +#hist(GlucoseNew) +# definition de la base de donnees +Data_modif<-cbind(Data_Originale[,1:2],GlucoseNew,Data_Originale[,4:8]) + +# Simulation en fonction du modele de copule et des marginales (dont une modifiee) +###################### SIMULATION ###################################### +## Simulation d'un echantillon d'uniformes a partir de la distribution de la vine (estimee sur donnees originales) +# taille de l'echantillon definit avant +U_Simu<-rvinecop(N,Fit_dist) + +# Simulation de patients virtuels avec la copule sur les donnees d'origine et une marginale modifiee +SimuVar4<-PseudoObsInverse(Data_modif,U_Simu) +for (i in 1:2){SimuVar4[,i] = as.factor(SimuVar4[,i])} +for (i in 3:8){SimuVar4[,i] = as.numeric(as.character(SimuVar4[,i]))} +colnames(SimuVar4)<-colnames(Data_Originale[,c(1:8)]) +levels(SimuVar4[,1])<-c('No','Yes') +levels(SimuVar4[,2])<-c('<=25','25-35','>35') + +# prevision de l'outcome avec le modele random forest estimer (plus haut) +############### Prevision ####################@ +Simu4<-Simulation_DataOutcome(fit.rf, SimuVar4) + + +###################### Representations graphiques ###################################### +#ggpairs(SimuVar2) + +``` + +# Individus virtuels dans un intervalle de la population d'origine + +### simulations d'individus suivant les estimations de la copule et des marginales (empirique) et une param + +### Une marginale avec une loi parametrique differente (par ex biblio) de la loi des données ... ici lognormale tronquee + +```{r,echo=F,message=F,fig.width=12, fig.height=12} + +###################################### fit de la meilleure distribution pour Glucose ###################################### +###################### Preparation des donnees ###################################### +# Modification des donnees univariee ... nouvelle variable glucose (simuler via modele parametrique normal tronquee) +#GlucoseNew<-rweibull(nrow(Data_Originale),shape=4.225,scale=134.705) + +#GlucoseNew <- rtruncnorm( nrow(Data_Originale), a= 140, mean=160,sd=50) +#GlucoseNew <- rtruncnorm( nrow(Data_Originale), a= 101, b=150, mean=120,sd=30) +GlucoseNew <- rlnormTrunc( nrow(Data_Originale), min= 101, max=150, meanlog=120,sdlog=30) + + +# definition de la base de donnees +Data_modif<-cbind(Data_Originale[,1:2],GlucoseNew,Data_Originale[,4:8]) + +# Simulation en fonction du modele de copule et des marginales (dont une modifiee) +###################### SIMULATION ###################################### +## Simulation d'un echantillon d'uniformes a partir de la distribution de la vine (estimee sur donnees originales) +# taille de l'echantillon definit avant +U_Simu<-rvinecop(N,Fit_dist) + +# Simulation de patients virtuels avec la copule sur les donnees d'origine et une marginale modifiee +SimuVar5<-PseudoObsInverse(Data_modif,U_Simu) +for (i in 1:2){SimuVar5[,i] = as.factor(SimuVar5[,i])} +for (i in 3:8){SimuVar5[,i] = as.numeric(as.character(SimuVar5[,i]))} +colnames(SimuVar5)<-colnames(Data_Originale[,c(1:8)]) +levels(SimuVar5[,1])<-c('No','Yes') +levels(SimuVar5[,2])<-c('<=25','25-35','>35') + +# prevision de l'outcome avec le modele random forest estimer (plus haut) +############### Prevision ####################@ +Simu5<-Simulation_DataOutcome(fit.rf, SimuVar5) + + +###################### Representations graphiques ###################################### +#ggpairs(SimuVar2) + +``` + +## Representation de certaines correlations pour donnees originales, simulees standard et filtering, simulee normal fit et filtering et simulee avec gaussienne tronquee + +```{r,echo=F,message=F,fig.width=7, fig.height=6,dpi=600} + +colnames(Simu3)[9]<-"Prediction" +colnames(Simu4)[9]<-"Prediction" +colnames(Simu5)[9]<-"Prediction" +plot_grid( ggmatrix_gtable(ggpairs(Data_Originale[Data_Originale[,3]>100 & Data_Originale[,3]<=150,c(3,6,9)],title ="Filtering the original data")), + ggmatrix_gtable(ggpairs(Simu3[,c(3,6,9)],title ="Filtering the simulated data (based on empirical distribution)")), + ggmatrix_gtable(ggpairs(Simu4[,c(3,6,9)],title ="Filtering the simulated data (based on Normal distribution)")), + ggmatrix_gtable(ggpairs(Simu5[,c(3,6,9)],title ="Filtering the simulated data (based on Lognormal distribution)")),nrow=2) + + + +# plot_grid( ggmatrix_gtable(ggpairs(Data_Originale[,c(3,6)],title ="Original data")), +# ggmatrix_gtable(ggpairs(SimuVar3[,c(3,6)],title ="Standard simulation and filtering")), +# ggmatrix_gtable(ggpairs(SimuVar4[,c(3,6)],title ="Simulation with fitted normal distribution and filtering")), +# ggmatrix_gtable(ggpairs(SimuVar5[,c(3,6)],title ="Simulation with a different normal distribution")),nrow=2) +# +# plot_grid( ggmatrix_gtable(ggpairs(Data_Originale[,c(3,9)],title ="Original data")), +# ggmatrix_gtable(ggpairs(Simu3[,c(3,9)],title ="Standard simulation and filtering")), +# ggmatrix_gtable(ggpairs(Simu4[,c(3,9)],title ="Simulation with fitted normal distribution and filtering")), +# ggmatrix_gtable(ggpairs(Simu5[,c(3,9)],title ="Simulation with a different normal distribution")),nrow=2) +# +# plot_grid( ggmatrix_gtable(ggpairs(Data_Originale[,c(3,9)],title ="Original data")), +# ggmatrix_gtable(ggpairs(Simu3[,c(3,10)],title ="Standard simulation and filtering")), +# ggmatrix_gtable(ggpairs(Simu4[,c(3,10)],title ="Simulation with fitted normal distribution and filtering")), +# ggmatrix_gtable(ggpairs(Simu5[,c(3,10)],title ="Simulation with a different normal distribution")),nrow=2) + + +``` + +# Individus virtuels dans un intervalle en dehors du support de la population d'origine \>140 + +### simulations d'individus suivant les estimations de la copule et des marginales (empirique) + +### filtering pour conserver les individus satisfaisant un critere d'inclusion + +```{r,echo=F,message=F,fig.width=12, fig.height=12} + +# simulation de donnees avec la methode copule N=3000 +NN<-60000 + +## Simulation d'un echantillon d'uniformes a partir de la distribution de la vine +# taille de l'echantillon definit avant +U_Simu<-rvinecop(NN,Fit_dist) + +# Simulation de patients virtuels avec la copule sur les donnees d'origine et une marginale modifiee +Simu_afiltrer<-PseudoObsInverse(Data_Originale,U_Simu) +for (i in 1:2){Simu_afiltrer[,i] = as.factor(Simu_afiltrer[,i])} +for (i in 3:8){Simu_afiltrer[,i] = as.numeric(as.character(Simu_afiltrer[,i]))} +colnames(Simu_afiltrer)<-colnames(Data_Originale[,c(1:8)]) +levels(Simu_afiltrer[,1])<-c('No','Yes') +levels(Simu_afiltrer[,2])<-c('<=25','25-35','>35') + +# Restriction de la population simulee par filtration : uniquement patient tel que glucose >= 140 +# attention ... avec estimation empirique, toutes les simulation sont inférieures au max du support d'origine +SimuVar6<-Simu_afiltrer[Simu_afiltrer$Glucose > 140,] +# on garde uniquement N individus au hasard +SimuVar6<-SimuVar6[sample(1:nrow(SimuVar6), N), ] + + +# prevision de l'outcome avec le modele random forest estimer (plus haut) +############### Prevision ####################@ +Simu6<-Simulation_DataOutcome(fit.rf, SimuVar6) + +###################### Representations graphiques ###################################### + +#ggpairs(SimuVar3) + + +``` + +# Individus virtuels dans un intervalle en dehors du support de la population d'origine \>140 + +### simulations d'individus suivant les estimations de la copule et des marginales (empirique) et une param + +### ici une gaussienne ajuste sur les données et tronquee a 140 + +```{r,echo=F,message=F,fig.width=12, fig.height=12} + +###################################### fit de la meilleure distribution pour Glucose ###################################### +###################### Preparation des donnees ###################################### +# Modification des donnees univariee ... nouvelle variable glucose (simuler via modele parametrique normal tronquee) +#GlucoseNew<-rweibull(nrow(Data_Originale),shape=4.225,scale=134.705) + +GlucoseNew <- rtruncnorm( nrow(Data_Originale), a= 140, mean=mean(Data_Originale[,3]),sd=sd(Data_Originale[,3])) +#hist(GlucoseNew) +# definition de la base de donnees +Data_modif<-cbind(Data_Originale[,1:2],GlucoseNew,Data_Originale[,4:8]) + +# Simulation en fonction du modele de copule et des marginales (dont une modifiee) +###################### SIMULATION ###################################### +## Simulation d'un echantillon d'uniformes a partir de la distribution de la vine (estimee sur donnees originales) +# taille de l'echantillon definit avant +U_Simu<-rvinecop(N,Fit_dist) + +# Simulation de patients virtuels avec la copule sur les donnees d'origine et une marginale modifiee +SimuVar7<-PseudoObsInverse(Data_modif,U_Simu) +for (i in 1:2){SimuVar7[,i] = as.factor(SimuVar7[,i])} +for (i in 3:8){SimuVar7[,i] = as.numeric(as.character(SimuVar7[,i]))} +colnames(SimuVar7)<-colnames(Data_Originale[,c(1:8)]) +levels(SimuVar7[,1])<-c('No','Yes') +levels(SimuVar7[,2])<-c('<=25','25-35','>35') + +# prevision de l'outcome avec le modele random forest estimer (plus haut) +############### Prevision ####################@ +Simu7<-Simulation_DataOutcome(fit.rf, SimuVar7) + + +###################### Representations graphiques ###################################### +#ggpairs(SimuVar2) + +``` + +# Individus virtuels dans un intervalle en dehors du support de la population d'origine \>140 + +### simulations d'individus suivant les estimations de la copule et des marginales (empirique) et une param + +### Une marginale avec une loi parametrique differente (par ex biblio) de la loi des données ... ici lognormale tronquee + +```{r,echo=F,message=F,fig.width=12, fig.height=12} + +###################################### fit de la meilleure distribution pour Glucose ###################################### +###################### Preparation des donnees ###################################### +# Modification des donnees univariee ... nouvelle variable glucose (simuler via modele parametrique normal tronquee) +#GlucoseNew<-rweibull(nrow(Data_Originale),shape=4.225,scale=134.705) + +GlucoseNew <- rtruncnorm( nrow(Data_Originale), a= 140, mean=160,sd=50) +#GlucoseNew <- rtruncnorm( nrow(Data_Originale), a= 101, b=150, mean=120,sd=30) +#GlucoseNew <- rtruncnorm( nrow(Data_Originale), a= 170, mean=175,sd=180) + + +# definition de la base de donnees +Data_modif<-cbind(Data_Originale[,1:2],GlucoseNew,Data_Originale[,4:8]) + +# Simulation en fonction du modele de copule et des marginales (dont une modifiee) +###################### SIMULATION ###################################### +## Simulation d'un echantillon d'uniformes a partir de la distribution de la vine (estimee sur donnees originales) +# taille de l'echantillon definit avant +U_Simu<-rvinecop(N,Fit_dist) + +# Simulation de patients virtuels avec la copule sur les donnees d'origine et une marginale modifiee +SimuVar8<-PseudoObsInverse(Data_modif,U_Simu) +for (i in 1:2){SimuVar8[,i] = as.factor(SimuVar8[,i])} +for (i in 3:8){SimuVar8[,i] = as.numeric(as.character(SimuVar8[,i]))} +colnames(SimuVar8)<-colnames(Data_Originale[,c(1:8)]) +levels(SimuVar8[,1])<-c('No','Yes') +levels(SimuVar8[,2])<-c('<=25','25-35','>35') + +# prevision de l'outcome avec le modele random forest estimer (plus haut) +############### Prevision ####################@ +Simu8<-Simulation_DataOutcome(fit.rf, SimuVar8) + + +###################### Representations graphiques ###################################### +#ggpairs(SimuVar2) + +``` + +# simulation en dehors du support d'origine \>140 + +## Representation de certaines correlations pour donnees originales, simulees standard et filtering, simulee normal fit et filtering et simulee avec lognormal tronquee + +```{r,echo=F,message=F,fig.width=7, fig.height=6,dpi=600} + +colnames(Simu6)[9]<-"Prediction" +colnames(Simu7)[9]<-"Prediction" +colnames(Simu8)[9]<-"Prediction" +plot_grid( ggmatrix_gtable(ggpairs(Data_Originale[Data_Originale[,3]>140,c(3,6,9)],title ="Filtering the original data")), + ggmatrix_gtable(ggpairs(Simu6[,c(3,6,9)],title ="Filtering the simulated data (based on empirical distribution)")), + ggmatrix_gtable(ggpairs(Simu7[,c(3,6,9)],title ="Filtering the simulated data (based on best Normal distribution)")), + ggmatrix_gtable(ggpairs(Simu8[,c(3,6,9)],title ="Filtering the simulated data (based on a given Normal distribution)")),nrow=2) + +``` + +# Individus virtuels dans un intervalle en dehors du support de la population d'origine \>170 + +### simulations d'individus suivant les estimations de la copule et des marginales (empirique) + +### filtering pour conserver les individus satisfaisant un critere d'inclusion + +```{r,echo=F,message=F,fig.width=12, fig.height=12} + +# simulation de donnees avec la methode copule N=3000 +NN<-60000 + +## Simulation d'un echantillon d'uniformes a partir de la distribution de la vine +# taille de l'echantillon definit avant +U_Simu<-rvinecop(NN,Fit_dist) + +# Simulation de patients virtuels avec la copule sur les donnees d'origine et une marginale modifiee +Simu_afiltrer<-PseudoObsInverse(Data_Originale,U_Simu) +for (i in 1:2){Simu_afiltrer[,i] = as.factor(Simu_afiltrer[,i])} +for (i in 3:8){Simu_afiltrer[,i] = as.numeric(as.character(Simu_afiltrer[,i]))} +colnames(Simu_afiltrer)<-colnames(Data_Originale[,c(1:8)]) +levels(Simu_afiltrer[,1])<-c('No','Yes') +levels(Simu_afiltrer[,2])<-c('<=25','25-35','>35') + +# Restriction de la population simulee par filtration : uniquement patient tel que glucose >= 140 +# attention ... avec estimation empirique, toutes les simulation sont inférieures au max du support d'origine +SimuVar6<-Simu_afiltrer[Simu_afiltrer$Glucose > 170,] +# on garde uniquement N individus au hasard +SimuVar6<-SimuVar6[sample(1:nrow(SimuVar6), N), ] + + +# prevision de l'outcome avec le modele random forest estimer (plus haut) +############### Prevision ####################@ +Simu6<-Simulation_DataOutcome(fit.rf, SimuVar6) + +###################### Representations graphiques ###################################### + +#ggpairs(SimuVar3) + + +``` + +# Individus virtuels dans un intervalle en dehors du support de la population d'origine \>170 + +### simulations d'individus suivant les estimations de la copule et des marginales (empirique) et une param + +### ici une gaussienne ajuste sur les données et tronquee a 140 + +```{r,echo=F,message=F,fig.width=12, fig.height=12} + +###################################### fit de la meilleure distribution pour Glucose ###################################### +###################### Preparation des donnees ###################################### +# Modification des donnees univariee ... nouvelle variable glucose (simuler via modele parametrique normal tronquee) +#GlucoseNew<-rweibull(nrow(Data_Originale),shape=4.225,scale=134.705) + +GlucoseNew <- rtruncnorm( nrow(Data_Originale), a= 170, mean=mean(Data_Originale[,3]),sd=sd(Data_Originale[,3])) +#hist(GlucoseNew) +# definition de la base de donnees +Data_modif<-cbind(Data_Originale[,1:2],GlucoseNew,Data_Originale[,4:8]) + +# Simulation en fonction du modele de copule et des marginales (dont une modifiee) +###################### SIMULATION ###################################### +## Simulation d'un echantillon d'uniformes a partir de la distribution de la vine (estimee sur donnees originales) +# taille de l'echantillon definit avant +U_Simu<-rvinecop(N,Fit_dist) + +# Simulation de patients virtuels avec la copule sur les donnees d'origine et une marginale modifiee +SimuVar7<-PseudoObsInverse(Data_modif,U_Simu) +for (i in 1:2){SimuVar7[,i] = as.factor(SimuVar7[,i])} +for (i in 3:8){SimuVar7[,i] = as.numeric(as.character(SimuVar7[,i]))} +colnames(SimuVar7)<-colnames(Data_Originale[,c(1:8)]) +levels(SimuVar7[,1])<-c('No','Yes') +levels(SimuVar7[,2])<-c('<=25','25-35','>35') + +# prevision de l'outcome avec le modele random forest estimer (plus haut) +############### Prevision ####################@ +Simu7<-Simulation_DataOutcome(fit.rf, SimuVar7) + + +###################### Representations graphiques ###################################### +#ggpairs(SimuVar2) + +``` + +# Individus virtuels dans un intervalle en dehors du support de la population d'origine \>170 + +### simulations d'individus suivant les estimations de la copule et des marginales (empirique) et une param + +### Une marginale avec une loi parametrique differente (par ex biblio) de la loi des données ... ici lognormale tronquee + +```{r,echo=F,message=F,fig.width=12, fig.height=12} + +###################################### fit de la meilleure distribution pour Glucose ###################################### +###################### Preparation des donnees ###################################### +# Modification des donnees univariee ... nouvelle variable glucose (simuler via modele parametrique normal tronquee) +#GlucoseNew<-rweibull(nrow(Data_Originale),shape=4.225,scale=134.705) + +#GlucoseNew <- rtruncnorm( nrow(Data_Originale), a= 140, mean=160,sd=50) +#GlucoseNew <- rtruncnorm( nrow(Data_Originale), a= 101, b=150, mean=120,sd=30) +GlucoseNew <- rtruncnorm( nrow(Data_Originale), a= 170, mean=175,sd=180) + + +# definition de la base de donnees +Data_modif<-cbind(Data_Originale[,1:2],GlucoseNew,Data_Originale[,4:8]) + +# Simulation en fonction du modele de copule et des marginales (dont une modifiee) +###################### SIMULATION ###################################### +## Simulation d'un echantillon d'uniformes a partir de la distribution de la vine (estimee sur donnees originales) +# taille de l'echantillon definit avant +U_Simu<-rvinecop(N,Fit_dist) + +# Simulation de patients virtuels avec la copule sur les donnees d'origine et une marginale modifiee +SimuVar8<-PseudoObsInverse(Data_modif,U_Simu) +for (i in 1:2){SimuVar8[,i] = as.factor(SimuVar8[,i])} +for (i in 3:8){SimuVar8[,i] = as.numeric(as.character(SimuVar8[,i]))} +colnames(SimuVar8)<-colnames(Data_Originale[,c(1:8)]) +levels(SimuVar8[,1])<-c('No','Yes') +levels(SimuVar8[,2])<-c('<=25','25-35','>35') + +# prevision de l'outcome avec le modele random forest estimer (plus haut) +############### Prevision ####################@ +Simu8<-Simulation_DataOutcome(fit.rf, SimuVar8) + + +###################### Representations graphiques ###################################### +#ggpairs(SimuVar2) + +``` + +# simulation en dehors du support d'origine \>170 + +## Representation de certaines correlations pour donnees originales, simulees standard et filtering, simulee normal fit et filtering et simulee avec lognormal tronquee + +```{r,echo=F,message=F,fig.width=7, fig.height=6,dpi=600} + +colnames(Simu6)[9]<-"Prediction" +colnames(Simu7)[9]<-"Prediction" +colnames(Simu8)[9]<-"Prediction" +plot_grid( ggmatrix_gtable(ggpairs(Data_Originale[Data_Originale[,3]>170,c(3,6,9)],title ="Filtering the original data")), + ggmatrix_gtable(ggpairs(Simu6[,c(3,6,9)],title ="Filtering the simulated data (based on empirical distribution)")), + ggmatrix_gtable(ggpairs(Simu7[,c(3,6,9)],title ="Filtering the simulated data (based on best Normal distribution)")), + ggmatrix_gtable(ggpairs(Simu8[,c(3,6,9)],title ="Filtering the simulated data (based on a given Normal distribution)")),nrow=2) + +``` + +## Calcul des p-values pour plusieurs simulations + +Le code R de cette section n'est pas evalue. + +```{r,echo=F,message=F,eval = FALSE} + +#################################################################### +## P-value sur donnees originale (entre diabete et les autres variables) +#################################################################### + Pvalue_vraie<-NULL + Pvalue_vraie[1]<-chisq.test(Data_Originale[,1], Data_Originale[,9])$p.value + Pvalue_vraie[2]<-chisq.test(Data_Originale[,2], Data_Originale[,9])$p.value + for (i in 3:8) { Pvalue_vraie[i]<-t.test(Data_Originale[,i] ~ Data_Originale[,9])$p.value } + Pvalue_vraie + + +#################################################################### +#### Boucle Simulation methode discrete +#################################################################### +Mat_Pvalue_Disc_SE<-NULL +Mat_Pvalue_Disc_AE<-NULL +for (j in 1:Nsimu) { + Simu_Cov<-NULL + Simu_Data<-NULL + Pvalue_SE<-NULL + Pvalue_AE<-NULL + Simu_Cov<-Simulation_Discrete(VPMod) + Simu_Data<-Simulation_DataOutcome(fit.rf, Simu_Cov) + Pvalue_SE<-CalculPvalue(Simu_Data)$p_SE + Pvalue_AE<-CalculPvalue(Simu_Data)$p_AE + Mat_Pvalue_Disc_SE<-rbind(Mat_Pvalue_Disc_SE,Pvalue_SE) + Mat_Pvalue_Disc_AE<-rbind(Mat_Pvalue_Disc_AE,Pvalue_AE) +} +colnames(Mat_Pvalue_Disc_SE)<-colnames(VPMod) +row.names(Mat_Pvalue_Disc_SE)<-c(1:nrow(Mat_Pvalue_Disc_SE)) +Mat_Pvalue_Disc_SE<-as.data.frame(Mat_Pvalue_Disc_SE) +colnames(Mat_Pvalue_Disc_AE)<-colnames(VPMod) +row.names(Mat_Pvalue_Disc_AE)<-c(1:nrow(Mat_Pvalue_Disc_AE)) +Mat_Pvalue_Disc_AE<-as.data.frame(Mat_Pvalue_Disc_AE) + +#################################################################### +#### Boucle Simulation methode continue +#################################################################### +Mat_Pvalue_Cont_SE<-NULL +Mat_Pvalue_Cont_AE<-NULL +for (j in 1:Nsimu) { + Simu_Cov<-NULL + Simu_Data<-NULL + Pvalue_SE<-NULL + Pvalue_AE<-NULL + Simu_Cov<-Simulation_Continue(VPMod) + Simu_Data<-Simulation_DataOutcome(fit.rf, Simu_Cov) + Pvalue_SE<-CalculPvalue(Simu_Data)$p_SE + Pvalue_AE<-CalculPvalue(Simu_Data)$p_AE + Mat_Pvalue_Cont_SE<-rbind(Mat_Pvalue_Cont_SE,Pvalue_SE) + Mat_Pvalue_Cont_AE<-rbind(Mat_Pvalue_Cont_AE,Pvalue_AE) +} +colnames(Mat_Pvalue_Cont_SE)<-colnames(VPMod) +row.names(Mat_Pvalue_Cont_SE)<-c(1:nrow(Mat_Pvalue_Cont_SE)) +Mat_Pvalue_Cont_SE<-as.data.frame(Mat_Pvalue_Cont_SE) +colnames(Mat_Pvalue_Cont_AE)<-colnames(VPMod) +row.names(Mat_Pvalue_Cont_AE)<-c(1:nrow(Mat_Pvalue_Cont_AE)) +Mat_Pvalue_Cont_AE<-as.data.frame(Mat_Pvalue_Cont_AE) + +#################################################################### +#### Boucle Simulation methode copule +#################################################################### +Mat_Pvalue_Cop_SE<-NULL +Mat_Pvalue_Cop_AE<-NULL +for (j in 1:Nsimu) { + Simu_Cov<-NULL + Simu_Data<-NULL + Pvalue_SE<-NULL + Pvalue_AE<-NULL + Simu_Cov<-Simulation_Copule(VPMod) + Simu_Data<-Simulation_DataOutcome(fit.rf, Simu_Cov) + Pvalue_SE<-CalculPvalue(Simu_Data)$p_SE + Pvalue_AE<-CalculPvalue(Simu_Data)$p_AE + Mat_Pvalue_Cop_SE<-rbind(Mat_Pvalue_Cop_SE,Pvalue_SE) + Mat_Pvalue_Cop_AE<-rbind(Mat_Pvalue_Cop_AE,Pvalue_AE) + #print(j) +} +colnames(Mat_Pvalue_Cop_SE)<-colnames(VPMod) +row.names(Mat_Pvalue_Cop_SE)<-c(1:nrow(Mat_Pvalue_Cop_SE)) +Mat_Pvalue_Cop_SE<-as.data.frame(Mat_Pvalue_Cop_SE) +colnames(Mat_Pvalue_Cop_AE)<-colnames(VPMod) +row.names(Mat_Pvalue_Cop_AE)<-c(1:nrow(Mat_Pvalue_Cop_AE)) +Mat_Pvalue_Cop_AE<-as.data.frame(Mat_Pvalue_Cop_AE) + +``` + +## Boxplots divers + +```{r,warning=F,echo=F,message=F,fig.width=8, fig.height=5,dpi=600,eval=F} +# ## Boxplot P-value methode discrete +# ############################################################ +# Plot_Discrete<-function(Mat_SE,Mat_AE,IndexVariable) { +# vectorize_Pvalue<-NULL +# vectorize_Label<-NULL +# vectorize_Pvalue<-c(Mat_SE[,IndexVariable],Mat_AE[,IndexVariable]) +# vectorize_Label<-as.factor(c(rep(0,Nsimu),rep(1,Nsimu))) +# vectorize<-cbind(vectorize_Pvalue,vectorize_Label) +# vectorize<-as.data.frame(vectorize) +# vectorize[,2]<-as.factor(vectorize[,2]) +# levels(vectorize[,2])<-c("Without error","With error") +# +# Plot_pvalue<-ggplot(vectorize) + +# geom_jitter(aes(x=vectorize_Label, y=vectorize_Pvalue,color = vectorize_Label), width=0.2, alpha=0.5) + +# geom_boxplot(aes(x=vectorize_Label, y=vectorize_Pvalue), width=0.1) + +# scale_color_brewer(palette="Dark2") + theme(legend.position='none') + +# stat_summary(geom = "point", mapping = aes(x=vectorize_Label, y=vectorize_Pvalue), fun = "mean", shape=17, size=2) + +# labs(x ="Simulation Error", y = "P-value Cov. x Diabete") + +# ggtitle("P-value for discrete method \n True p-value =",round(Pvalue_vraie[IndexVariable],8)) + +# theme(plot.title = element_text(hjust = 0.5)) +# } +# +# ## Boxplot P-value methode continue +# ############################################################ +# Plot_Continue<-function(Mat_SE,Mat_AE,IndexVariable) { +# vectorize_Pvalue<-NULL +# vectorize_Label<-NULL +# vectorize_Pvalue<-c(Mat_SE[,IndexVariable],Mat_AE[,IndexVariable]) +# vectorize_Label<-as.factor(c(rep(0,Nsimu),rep(1,Nsimu))) +# vectorize<-cbind(vectorize_Pvalue,vectorize_Label) +# vectorize<-as.data.frame(vectorize) +# vectorize[,2]<-as.factor(vectorize[,2]) +# levels(vectorize[,2])<-c("Without error","With error") +# +# Plot_pvalue<-ggplot(vectorize) + +# geom_jitter(aes(x=vectorize_Label, y=vectorize_Pvalue,color = vectorize_Label), width=0.2, alpha=0.5) + +# geom_boxplot(aes(x=vectorize_Label, y=vectorize_Pvalue), width=0.1) + +# scale_color_brewer(palette="Dark2") + theme(legend.position='none') + +# stat_summary(geom = "point", mapping = aes(x=vectorize_Label, y=vectorize_Pvalue), fun = "mean", shape=17, size=2) + +# labs(x ="Simulation Error", y = "P-value Cov. x Diabete") + +# ggtitle("P-value for continuous method \n True p-value =",round(Pvalue_vraie[IndexVariable],8)) + +# theme(plot.title = element_text(hjust = 0.5)) +# } +# +# ## Boxplot P-value methode copule +# ############################################################ +# Plot_Copule<-function(Mat_SE,Mat_AE,IndexVariable) { +# vectorize_Pvalue<-NULL +# vectorize_Label<-NULL +# vectorize_Pvalue<-c(Mat_SE[,IndexVariable],Mat_AE[,IndexVariable]) +# vectorize_Label<-as.factor(c(rep(0,Nsimu),rep(1,Nsimu))) +# vectorize<-cbind(vectorize_Pvalue,vectorize_Label) +# vectorize<-as.data.frame(vectorize) +# vectorize[,2]<-as.factor(vectorize[,2]) +# levels(vectorize[,2])<-c("Without error","With error") +# +# Plot_pvalue<-ggplot(vectorize) + +# geom_jitter(aes(x=vectorize_Label, y=vectorize_Pvalue,color = vectorize_Label), width=0.2, alpha=0.5) + +# geom_boxplot(aes(x=vectorize_Label, y=vectorize_Pvalue), width=0.1) + +# scale_color_brewer(palette="Dark2") + theme(legend.position='none') + +# stat_summary(geom = "point", mapping = aes(x=vectorize_Label, y=vectorize_Pvalue), fun = "mean", shape=17, size=2) + +# labs(x ="Simulation Error", y = "P-value Cov. x Diabete") + +# ggtitle("P-value for copula method \n True p-value = ",round(Pvalue_vraie[IndexVariable],8)) + +# theme(plot.title = element_text(hjust = 0.5)) +# } +# +# ## Boxplot P-value avec les 3 methodes +# ############################################################ +# Plot_3methode<-function(Mat_D,Mat_C,Mat_Cop,IndexVariable) { +# vectorize_Pvalue<-NULL +# vectorize_Label<-NULL +# vectorize_Pvalue<-c(Mat_D[,IndexVariable],Mat_C[,IndexVariable],Mat_Cop[,IndexVariable]) +# vectorize_Label<-as.factor(c(rep(0,Nsimu),rep(1,Nsimu),rep(2,Nsimu))) +# vectorize<-cbind(vectorize_Pvalue,vectorize_Label) +# vectorize<-as.data.frame(vectorize) +# vectorize[,2]<-as.factor(vectorize[,2]) +# levels(vectorize[,2])<-c("Discrete","Continuous", "Copula") +# +# Plot_pvalue<-ggplot(vectorize) + +# geom_jitter(aes(x=vectorize_Label, y=vectorize_Pvalue,color = vectorize_Label), width=0.2, alpha=0.5) + +# geom_boxplot(aes(x=vectorize_Label, y=vectorize_Pvalue), width=0.1) + +# scale_color_brewer(palette="Dark2") + theme(legend.position='none') + +# stat_summary(geom = "point", mapping = aes(x=vectorize_Label, y=vectorize_Pvalue), fun = "mean", shape=17, size=2) + +# labs(x ="P-value", y = "P-value Cov. x Diabete") + +# ggtitle("P-value for the three methods \n True p-value = ",round(Pvalue_vraie[IndexVariable],8)) + +# theme(plot.title = element_text(hjust = 0.5)) +# } + + +############# Representation ############################ +# P_Disc_1<-Plot_Discrete(Mat_Pvalue_Disc_SE,Mat_Pvalue_Disc_AE,1) +# P_Disc_2<-Plot_Discrete(Mat_Pvalue_Disc_SE,Mat_Pvalue_Disc_AE,2) +# P_Disc_3<-Plot_Discrete(Mat_Pvalue_Disc_SE,Mat_Pvalue_Disc_AE,3) +# P_Disc_4<-Plot_Discrete(Mat_Pvalue_Disc_SE,Mat_Pvalue_Disc_AE,4) +# P_Disc_5<-Plot_Discrete(Mat_Pvalue_Disc_SE,Mat_Pvalue_Disc_AE,5) +# P_Disc_6<-Plot_Discrete(Mat_Pvalue_Disc_SE,Mat_Pvalue_Disc_AE,6) +# P_Disc_7<-Plot_Discrete(Mat_Pvalue_Disc_SE,Mat_Pvalue_Disc_AE,7) +# P_Disc_8<-Plot_Discrete(Mat_Pvalue_Disc_SE,Mat_Pvalue_Disc_AE,8) +# +# P_Cont_1<-Plot_Continue(Mat_Pvalue_Cont_SE,Mat_Pvalue_Cont_AE,1) +# P_Cont_2<-Plot_Continue(Mat_Pvalue_Cont_SE,Mat_Pvalue_Cont_AE,2) +# P_Cont_3<-Plot_Continue(Mat_Pvalue_Cont_SE,Mat_Pvalue_Cont_AE,3) +# P_Cont_4<-Plot_Continue(Mat_Pvalue_Cont_SE,Mat_Pvalue_Cont_AE,4) +# P_Cont_5<-Plot_Continue(Mat_Pvalue_Cont_SE,Mat_Pvalue_Cont_AE,5) +# P_Cont_6<-Plot_Continue(Mat_Pvalue_Cont_SE,Mat_Pvalue_Cont_AE,6) +# P_Cont_7<-Plot_Continue(Mat_Pvalue_Cont_SE,Mat_Pvalue_Cont_AE,7) +# P_Cont_8<-Plot_Continue(Mat_Pvalue_Cont_SE,Mat_Pvalue_Cont_AE,8) +# +# P_Cop_1<-Plot_Copule(Mat_Pvalue_Cop_SE,Mat_Pvalue_Cop_AE,1) +# P_Cop_2<-Plot_Copule(Mat_Pvalue_Cop_SE,Mat_Pvalue_Cop_AE,2) +# P_Cop_3<-Plot_Copule(Mat_Pvalue_Cop_SE,Mat_Pvalue_Cop_AE,3) +# P_Cop_4<-Plot_Copule(Mat_Pvalue_Cop_SE,Mat_Pvalue_Cop_AE,4) +# P_Cop_5<-Plot_Copule(Mat_Pvalue_Cop_SE,Mat_Pvalue_Cop_AE,5) +# P_Cop_6<-Plot_Copule(Mat_Pvalue_Cop_SE,Mat_Pvalue_Cop_AE,6) +# P_Cop_7<-Plot_Copule(Mat_Pvalue_Cop_SE,Mat_Pvalue_Cop_AE,7) +# P_Cop_8<-Plot_Copule(Mat_Pvalue_Cop_SE,Mat_Pvalue_Cop_AE,8) +# +# plot_grid(P_Disc_1,P_Disc_2,P_Disc_3,P_Disc_4,P_Disc_5,P_Disc_6,P_Disc_7,P_Disc_8,nrow=2) +# plot_grid(P_Cont_1,P_Cont_2,P_Cont_3,P_Cont_4,P_Cont_5,P_Cont_6,P_Cont_7,P_Cont_8,nrow=2) +# plot_grid(P_Cop_1,P_Cop_2,P_Cop_3,P_Cop_4,P_Cop_5,P_Cop_6,P_Cop_7,P_Cop_8,nrow=2) + +# 3 graphs erreur vs. sans erreur +# Graph avec boxplot pour chaque methode (meme graph) pour chaque covariable +# Tableau : vraie p-value et m??diane (+ moyenne ?) + + + +##### grah simulation avec erreur et sans erreur +########### + +#### Fonction pour 'vectoriser' les p-values (on ajoute une variable pour la methode et une pour erreur ou pas) +Vectorize<-function(IndexVariable) { + vectorize_Pvalue_Disc<-NULL + vectorize_Label_Disc<-NULL + vectorize_Pvalue_Disc<-c(Mat_Pvalue_Disc_SE[,IndexVariable],Mat_Pvalue_Disc_AE[,IndexVariable]) + vectorize_Label_Disc<-as.factor(c(rep(0,Nsimu),rep(1,Nsimu))) + vectorize_Disc<-cbind(vectorize_Pvalue_Disc,vectorize_Label_Disc,rep(1,length(vectorize_Label_Disc))) + vectorize_Disc<-as.data.frame(vectorize_Disc) + vectorize_Disc[,2]<-as.factor(vectorize_Disc[,2]) + vectorize_Disc[,3]<-as.factor(vectorize_Disc[,3]) + levels(vectorize_Disc[,2])<-c("Without error","With error") + levels(vectorize_Disc[,3])<-c("Discrete method") + colnames(vectorize_Disc)<-c("P_value","Error","Method") + + vectorize_Pvalue_Cont<-NULL + vectorize_Label_Cont<-NULL + vectorize_Pvalue_Cont<-c(Mat_Pvalue_Cont_SE[,IndexVariable],Mat_Pvalue_Cont_AE[,IndexVariable]) + vectorize_Label_Cont<-as.factor(c(rep(0,Nsimu),rep(1,Nsimu))) + vectorize_Cont<-cbind(vectorize_Pvalue_Cont,vectorize_Label_Cont,rep(1,length(vectorize_Label_Cont))) + vectorize_Cont<-as.data.frame(vectorize_Cont) + vectorize_Cont[,2]<-as.factor(vectorize_Cont[,2]) + vectorize_Cont[,3]<-as.factor(vectorize_Cont[,3]) + levels(vectorize_Cont[,2])<-c("Without error","With error") + levels(vectorize_Cont[,3])<-c("Continuous method") + colnames(vectorize_Cont)<-c("P_value","Error","Method") + + vectorize_Pvalue_Cop<-NULL + vectorize_Label_Cop<-NULL + vectorize_Pvalue_Cop<-c(Mat_Pvalue_Cop_SE[,IndexVariable],Mat_Pvalue_Cop_AE[,IndexVariable]) + vectorize_Label_Cop<-as.factor(c(rep(0,Nsimu),rep(1,Nsimu))) + vectorize_Cop<-cbind(vectorize_Pvalue_Cop,vectorize_Label_Cop,rep(1,length(vectorize_Label_Cop))) + vectorize_Cop<-as.data.frame(vectorize_Cop) + vectorize_Cop[,2]<-as.factor(vectorize_Cop[,2]) + vectorize_Cop[,3]<-as.factor(vectorize_Cop[,3]) + levels(vectorize_Cop[,2])<-c("Without error","With error") + levels(vectorize_Cop[,3])<-c("Copula method") + colnames(vectorize_Cop)<-c("P_value","Error","Method") + vectorize<-NULL + vectorize<-rbind(vectorize_Disc,vectorize_Cont,vectorize_Cop) + } + + +### Plot pour la variable 1 : pregnancie +ggplot(Vectorize(1) ,aes(x=Error, y=P_value,fill=Method)) + + geom_jitter(position=position_jitter(width=0.3), aes(colour=Method), alpha=0.9) + + geom_boxplot(alpha = 0.5, show.legend = FALSE, aes(fill=Method)) + + facet_grid(.~Method, labeller = as_labeller(Vectorize(1)$Method)) + + theme(strip.text.x = element_text(size=9, color="black", face="bold")) + + labs(y="P-value : Pronostic vs. Pregnancie", x="Error in simulation") + + ggtitle(paste("Chi-2 test \n","Pronostic vs. Pregnancy \n","True p-value =",round(Pvalue_vraie[1],5))) + + theme(plot.title = element_text(hjust = 0.5)) + + scale_fill_manual(values=c("gainsboro", "gray47", "gray0")) + + scale_color_manual(values=c("gainsboro", "gray47", "gray0")) + # scale_fill_manual(values=c("#00AFBB", "#E7B800", "#FC4E07")) + + # scale_color_manual(values=c("#00AFBB", "#E7B800", "#FC4E07")) + + +### Plot pour la variable 4 : BloodPressure +ggplot(Vectorize(4) ,aes(x=Error, y=P_value,fill=Method)) + + geom_jitter(position=position_jitter(width=0.3), aes(colour=Method), alpha=0.9) + + geom_boxplot(alpha = 0.5, show.legend = FALSE, aes(fill=Method)) + + facet_grid(.~Method, labeller = as_labeller(Vectorize(4)$Method)) + + theme(strip.text.x = element_text(size=9, color="black", face="bold")) + + labs(y="P-value : Pronostic vs. BloodPressure", x="Error in simulation") + + ggtitle(paste("Student test \n","Pronostic vs. BloodPressure \n","True p-value =",round(Pvalue_vraie[4],5))) + + theme(plot.title = element_text(hjust = 0.5)) + + scale_fill_manual(values=c("gainsboro", "gray47", "gray0")) + + scale_color_manual(values=c("gainsboro", "gray47", "gray0")) + +### Plot pour la variable 6 : Insulin +ggplot(Vectorize(6) ,aes(x=Error, y=P_value,fill=Method)) + + geom_jitter(position=position_jitter(width=0.3), aes(colour=Method), alpha=0.9) + + geom_boxplot(alpha = 0.5, show.legend = FALSE, aes(fill=Method)) + + facet_grid(.~Method, labeller = as_labeller(Vectorize(6)$Method)) + + theme(strip.text.x = element_text(size=9, color="black", face="bold")) + + labs(y="P-value : Pronostic vs. Insulin", x="Error in simulation") + + ggtitle(paste("Student test \n","Pronostic vs. Insulin \n","True p-value =",round(Pvalue_vraie[6],5))) + + theme(plot.title = element_text(hjust = 0.5)) + + scale_fill_manual(values=c("gainsboro", "gray47", "gray0")) + + scale_color_manual(values=c("gainsboro", "gray47", "gray0")) + +### Plot pour la variable 7 : DiabetesPedigreeFunction +ggplot(Vectorize(7) ,aes(x=Error, y=P_value,fill=Method)) + + geom_jitter(position=position_jitter(width=0.3), aes(colour=Method), alpha=0.9) + + geom_boxplot(alpha = 0.5, show.legend = FALSE, aes(fill=Method)) + + facet_grid(.~Method, labeller = as_labeller(Vectorize(7)$Method)) + + theme(strip.text.x = element_text(size=9, color="black", face="bold")) + + labs(y="P-value : Pronostic vs. DiabetesPedigreeFunction", x="Error in simulation") + + ggtitle(paste("Student test \n","Pronostic vs. DiabetesPedigreeFunction \n","True p-value =",round(Pvalue_vraie[7],5))) + + theme(plot.title = element_text(hjust = 0.5)) + + scale_fill_manual(values=c("gainsboro", "gray47", "gray0")) + + scale_color_manual(values=c("gainsboro", "gray47", "gray0")) + + + + + + +#### comparaison des 3 m??thodes (avec erreur) +########### +# variable 4 : p-value = 0.0002 +# Plot_3methode(Mat_Pvalue_Disc_AE,Mat_Pvalue_Cont_AE,Mat_Pvalue_Cop_AE,4) +# cat("True p-value =",Pvalue_vraie[4],"\n") +# cat("Median p-value (Discrete method) =",median(Mat_Pvalue_Disc_AE[,4]),"\n") +# cat("Median p-value (Continuous method) =",median(Mat_Pvalue_Cont_AE[,4]),"\n") +# cat("Median p-value (Copula method) =",median(Mat_Pvalue_Cop_AE[,4]),"\n") +# +# # variable 6 : p-value < 10^-7 +# Plot_3methode(Mat_Pvalue_Disc_AE,Mat_Pvalue_Cont_AE,Mat_Pvalue_Cop_AE,6) +# cat("True p-value =",Pvalue_vraie[6],"\n") +# cat("Median p-value (Discrete method) =",median(Mat_Pvalue_Disc_AE[,6]),"\n") +# cat("Median p-value (Continuous method) =",median(Mat_Pvalue_Cont_AE[,6]),"\n") +# cat("Median p-value (Copula method) =",median(Mat_Pvalue_Cop_AE[,6]),"\n") + + + + + +```