Coverage for /opt/hostedtoolcache/Python/3.8.18/x64/lib/python3.8/site-packages/memilio/surrogatemodel/ode_secir_simple/data_generation.py: 94%
95 statements
« prev ^ index » next coverage.py v7.6.1, created at 2025-01-17 11:58 +0000
« prev ^ index » next coverage.py v7.6.1, created at 2025-01-17 11:58 +0000
1#############################################################################
2# Copyright (C) 2020-2025 MEmilio
3#
4# Authors: Agatha Schmidt, Henrik Zunker, Khoa Nguyen
5#
6# Contact: Martin J. Kuehn <Martin.Kuehn@DLR.de>
7#
8# Licensed under the Apache License, Version 2.0 (the "License");
9# you may not use this file except in compliance with the License.
10# You may obtain a copy of the License at
11#
12# http://www.apache.org/licenses/LICENSE-2.0
13#
14# Unless required by applicable law or agreed to in writing, software
15# distributed under the License is distributed on an "AS IS" BASIS,
16# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17# See the License for the specific language governing permissions and
18# limitations under the License.
19#############################################################################
20import copy
21import os
22import pickle
23import random
24from datetime import date
26import numpy as np
27import pandas as pd
28import tensorflow as tf
29from progress.bar import Bar
30from sklearn.preprocessing import FunctionTransformer
32from memilio.simulation import (AgeGroup, ContactMatrix, Damping, LogLevel,
33 UncertainContactMatrix, set_log_level)
34from memilio.simulation.osecir import (Index_InfectionState,
35 InfectionState, Model, Simulation,
36 interpolate_simulation_result, simulate)
39def remove_confirmed_compartments(result_array):
40 sum_inf_no_symp = np.sum(result_array[:, [2, 3]], axis=1)
41 sum_inf_symp = np.sum(result_array[:, [2, 3]], axis=1)
42 result_array[:, 2] = sum_inf_no_symp
43 result_array[:, 4] = sum_inf_symp
44 return np.delete(result_array, [3, 5], axis=1)
47def run_secir_simple_simulation(days):
48 """! Uses an ODE SECIR model allowing for asymptomatic infection. The model is not stratified by region or demographic properties such as age.
49 Virus-specific parameters are fixed and initial number of persons in the particular infection states are chosen randomly from defined ranges.
51 @param Days Describes how many days we simulate within a single run.
52 @return List containing the populations in each compartment for each day of the simulation.
53 """
54 set_log_level(LogLevel.Off)
56 populations = [50_000]
57 start_day = 1
58 start_month = 1
59 start_year = 2019
60 dt = 0.1
61 num_groups = 1
63 # Initialize Parameters
64 model = Model(1)
66 A0 = AgeGroup(0)
68 # Set parameters
69 # Compartment transition duration
70 model.parameters.TimeExposed[A0] = 3.2
71 model.parameters.TimeInfectedNoSymptoms[A0] = 2.
72 model.parameters.TimeInfectedSymptoms[A0] = 6.
73 model.parameters.TimeInfectedSevere[A0] = 12.
74 model.parameters.TimeInfectedCritical[A0] = 8.
76 # Initial number of people in each compartment with random numbers
77 model.populations[A0, Index_InfectionState(
78 InfectionState.Exposed)] = 60 * random.uniform(0.2, 1)
79 model.populations[A0, Index_InfectionState(
80 InfectionState.InfectedNoSymptoms)] = 55 * random.uniform(0.2, 1)
81 model.populations[A0, Index_InfectionState(
82 InfectionState.InfectedSymptoms)] = 50 * random.uniform(0.2, 1)
83 model.populations[A0, Index_InfectionState(
84 InfectionState.InfectedSevere)] = 12 * random.uniform(0.2, 1)
85 model.populations[A0, Index_InfectionState(
86 InfectionState.InfectedCritical)] = 3 * random.uniform(0.2, 1)
87 model.populations[A0, Index_InfectionState(
88 InfectionState.Recovered)] = 50 * random.random()
89 model.populations[A0, Index_InfectionState(InfectionState.Dead)] = 0
90 model.populations.set_difference_from_total(
91 (A0, Index_InfectionState(InfectionState.Susceptible)), populations[0])
93 # Compartment transition propabilities
94 model.parameters.RelativeTransmissionNoSymptoms[A0] = 0.5
95 model.parameters.TransmissionProbabilityOnContact[A0] = 0.1
96 model.parameters.RecoveredPerInfectedNoSymptoms[A0] = 0.09
97 model.parameters.RiskOfInfectionFromSymptomatic[A0] = 0.25
98 model.parameters.SeverePerInfectedSymptoms[A0] = 0.2
99 model.parameters.CriticalPerSevere[A0] = 0.25
100 model.parameters.DeathsPerCritical[A0] = 0.3
101 # twice the value of RiskOfInfectionFromSymptomatic
102 model.parameters.MaxRiskOfInfectionFromSymptomatic[A0] = 0.5
104 model.parameters.StartDay = (
105 date(start_year, start_month, start_day) - date(start_year, 1, 1)).days
107 model.parameters.ContactPatterns.cont_freq_mat[0].baseline = np.ones(
108 (num_groups, num_groups)) * 10
109 model.parameters.ContactPatterns.cont_freq_mat[0].minimum = np.ones(
110 (num_groups, num_groups)) * 0
112 # Apply mathematical constraints to parameters
113 model.apply_constraints()
115 # Run Simulation
116 result = simulate(0, days, dt, model)
117 # Interpolate simulation result on days time scale
118 result = interpolate_simulation_result(result)
120 # Using an array instead of a list to avoid problems with references
121 result_array = result.as_ndarray()
123 result_array = remove_confirmed_compartments(
124 result_array[1:, :].transpose())
126 dataset = []
128 dataset_entries = copy.deepcopy(result_array)
130 return dataset_entries.tolist()
133def generate_data(
134 num_runs, path, input_width, label_width, normalize=True,
135 save_data=True):
136 """! Generate data sets of num_runs many equation-based model simulations and transforms the computed results by a log(1+x) transformation.
137 Divides the results in input and label data sets and returns them as a dictionary of two TensorFlow Stacks.
139 In general, we have 10 different compartments. However, we aggregate the InfectedNoSymptoms and InfectedSymptomsNoConfirmed compartments. The same
140 holds for the InfectedSymptoms and InfectedSymptomsConfirmed compartments. So, we end up with only 8 different compartments. If we choose,
141 input_width = 5 and label_width = 20, the dataset has
142 - input with dimension 5 x 8
143 - labels with dimension 20 x 8
145 @param num_runs Number of times, the function run_secir_simple_simulation is called.
146 @param path Path, where the dataset is saved to.
147 @param input_width Int value that defines the number of time series used for the input.
148 @param label_width Int value that defines the size of the labels.
149 @param normalize [Default: true] Option to transform dataset by logarithmic normalization.
150 @param save_data [Default: true] Option to save the dataset.
151 @return Data dictionary of input and label data sets.
152 """
153 data = {
154 "inputs": [],
155 "labels": []
156 }
158 # The number of days is the same as the sum of input and label width.
159 # Since the first day of the input is day 0, we still need to subtract 1.
160 days = input_width + label_width - 1
162 # show progess in terminal for longer runs
163 # Due to the random structure, theres currently no need to shuffle the data
164 bar = Bar('Number of Runs done', max=num_runs)
165 for _ in range(0, num_runs):
166 data_run = run_secir_simple_simulation(days)
167 data['inputs'].append(data_run[:input_width])
168 data['labels'].append(data_run[input_width:])
169 bar.next()
170 bar.finish()
172 if normalize:
173 # logarithmic normalization
174 transformer = FunctionTransformer(np.log1p, validate=True)
175 inputs = np.asarray(data['inputs']).transpose(2, 0, 1).reshape(8, -1)
176 scaled_inputs = transformer.transform(inputs)
177 scaled_inputs = scaled_inputs.transpose().reshape(num_runs, input_width, 8)
178 scaled_inputs_list = scaled_inputs.tolist()
180 labels = np.asarray(data['labels']).transpose(2, 0, 1).reshape(8, -1)
181 scaled_labels = transformer.transform(labels)
182 scaled_labels = scaled_labels.transpose().reshape(num_runs, label_width, 8)
183 scaled_labels_list = scaled_labels.tolist()
185 # cast dfs to tensors
186 data['inputs'] = tf.stack(scaled_inputs_list)
187 data['labels'] = tf.stack(scaled_labels_list)
189 if save_data:
190 # check if data directory exists. If necessary, create it.
191 if not os.path.isdir(path):
192 os.mkdir(path)
194 # save dict to json file
195 with open(os.path.join(path, 'data_secir_simple.pickle'), 'wb') as f:
196 pickle.dump(data, f)
197 return data
200if __name__ == "__main__":
201 # Store data relative to current file two levels higher.
202 path = os.path.dirname(os.path.realpath(__file__))
203 path_data = os.path.join(os.path.dirname(os.path.realpath(
204 os.path.dirname(os.path.realpath(path)))), 'data')
206 input_width = 5
207 label_width = 30
208 num_runs = 1000
209 data = generate_data(num_runs, path_data, input_width,
210 label_width)