Coverage for /opt/hostedtoolcache/Python/3.8.18/x64/lib/python3.8/site-packages/memilio/surrogatemodel/ode_secir_simple/data_generation.py: 94%

95 statements  

« prev     ^ index     » next       coverage.py v7.6.1, created at 2024-11-18 12:29 +0000

1############################################################################# 

2# Copyright (C) 2020-2024 MEmilio 

3# 

4# Authors: Agatha Schmidt, Henrik Zunker, Khoa Nguyen 

5# 

6# Contact: Martin J. Kuehn <Martin.Kuehn@DLR.de> 

7# 

8# Licensed under the Apache License, Version 2.0 (the "License"); 

9# you may not use this file except in compliance with the License. 

10# You may obtain a copy of the License at 

11# 

12# http://www.apache.org/licenses/LICENSE-2.0 

13# 

14# Unless required by applicable law or agreed to in writing, software 

15# distributed under the License is distributed on an "AS IS" BASIS, 

16# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 

17# See the License for the specific language governing permissions and 

18# limitations under the License. 

19############################################################################# 

20import copy 

21import os 

22import pickle 

23import random 

24from datetime import date 

25 

26import numpy as np 

27import pandas as pd 

28import tensorflow as tf 

29from progress.bar import Bar 

30from sklearn.preprocessing import FunctionTransformer 

31 

32from memilio.simulation import (AgeGroup, ContactMatrix, Damping, LogLevel, 

33 UncertainContactMatrix, set_log_level) 

34from memilio.simulation.osecir import (Index_InfectionState, 

35 InfectionState, Model, Simulation, 

36 interpolate_simulation_result, simulate) 

37 

38 

39def remove_confirmed_compartments(result_array): 

40 sum_inf_no_symp = np.sum(result_array[:, [2, 3]], axis=1) 

41 sum_inf_symp = np.sum(result_array[:, [2, 3]], axis=1) 

42 result_array[:, 2] = sum_inf_no_symp 

43 result_array[:, 4] = sum_inf_symp 

44 return np.delete(result_array, [3, 5], axis=1) 

45 

46 

47def run_secir_simple_simulation(days): 

48 """! Uses an ODE SECIR model allowing for asymptomatic infection. The model is not stratified by region or demographic properties such as age. 

49 Virus-specific parameters are fixed and initial number of persons in the particular infection states are chosen randomly from defined ranges. 

50 

51 @param Days Describes how many days we simulate within a single run. 

52 @return List containing the populations in each compartment for each day of the simulation. 

53 """ 

54 set_log_level(LogLevel.Off) 

55 

56 populations = [50_000] 

57 start_day = 1 

58 start_month = 1 

59 start_year = 2019 

60 dt = 0.1 

61 num_groups = 1 

62 

63 # Initialize Parameters 

64 model = Model(1) 

65 

66 A0 = AgeGroup(0) 

67 

68 # Set parameters 

69 # Compartment transition duration 

70 model.parameters.TimeExposed[A0] = 3.2 

71 model.parameters.TimeInfectedNoSymptoms[A0] = 2. 

72 model.parameters.TimeInfectedSymptoms[A0] = 6. 

73 model.parameters.TimeInfectedSevere[A0] = 12. 

74 model.parameters.TimeInfectedCritical[A0] = 8. 

75 

76 # Initial number of people in each compartment with random numbers 

77 model.populations[A0, Index_InfectionState( 

78 InfectionState.Exposed)] = 60 * random.uniform(0.2, 1) 

79 model.populations[A0, Index_InfectionState( 

80 InfectionState.InfectedNoSymptoms)] = 55 * random.uniform(0.2, 1) 

81 model.populations[A0, Index_InfectionState( 

82 InfectionState.InfectedSymptoms)] = 50 * random.uniform(0.2, 1) 

83 model.populations[A0, Index_InfectionState( 

84 InfectionState.InfectedSevere)] = 12 * random.uniform(0.2, 1) 

85 model.populations[A0, Index_InfectionState( 

86 InfectionState.InfectedCritical)] = 3 * random.uniform(0.2, 1) 

87 model.populations[A0, Index_InfectionState( 

88 InfectionState.Recovered)] = 50 * random.random() 

89 model.populations[A0, Index_InfectionState(InfectionState.Dead)] = 0 

90 model.populations.set_difference_from_total( 

91 (A0, Index_InfectionState(InfectionState.Susceptible)), populations[0]) 

92 

93 # Compartment transition propabilities 

94 model.parameters.RelativeTransmissionNoSymptoms[A0] = 0.5 

95 model.parameters.TransmissionProbabilityOnContact[A0] = 0.1 

96 model.parameters.RecoveredPerInfectedNoSymptoms[A0] = 0.09 

97 model.parameters.RiskOfInfectionFromSymptomatic[A0] = 0.25 

98 model.parameters.SeverePerInfectedSymptoms[A0] = 0.2 

99 model.parameters.CriticalPerSevere[A0] = 0.25 

100 model.parameters.DeathsPerCritical[A0] = 0.3 

101 # twice the value of RiskOfInfectionFromSymptomatic 

102 model.parameters.MaxRiskOfInfectionFromSymptomatic[A0] = 0.5 

103 

104 model.parameters.StartDay = ( 

105 date(start_year, start_month, start_day) - date(start_year, 1, 1)).days 

106 

107 model.parameters.ContactPatterns.cont_freq_mat[0].baseline = np.ones( 

108 (num_groups, num_groups)) * 10 

109 model.parameters.ContactPatterns.cont_freq_mat[0].minimum = np.ones( 

110 (num_groups, num_groups)) * 0 

111 

112 # Apply mathematical constraints to parameters 

113 model.apply_constraints() 

114 

115 # Run Simulation 

116 result = simulate(0, days, dt, model) 

117 # Interpolate simulation result on days time scale 

118 result = interpolate_simulation_result(result) 

119 

120 # Using an array instead of a list to avoid problems with references 

121 result_array = result.as_ndarray() 

122 

123 result_array = remove_confirmed_compartments( 

124 result_array[1:, :].transpose()) 

125 

126 dataset = [] 

127 

128 dataset_entries = copy.deepcopy(result_array) 

129 

130 return dataset_entries.tolist() 

131 

132 

133def generate_data( 

134 num_runs, path, input_width, label_width, normalize=True, 

135 save_data=True): 

136 """! Generate data sets of num_runs many equation-based model simulations and transforms the computed results by a log(1+x) transformation. 

137 Divides the results in input and label data sets and returns them as a dictionary of two TensorFlow Stacks. 

138 

139 In general, we have 10 different compartments. However, we aggregate the InfectedNoSymptoms and InfectedSymptomsNoConfirmed compartments. The same 

140 holds for the InfectedSymptoms and InfectedSymptomsConfirmed compartments. So, we end up with only 8 different compartments. If we choose,  

141 input_width = 5 and label_width = 20, the dataset has  

142 - input with dimension 5 x 8 

143 - labels with dimension 20 x 8 

144 

145 @param num_runs Number of times, the function run_secir_simple_simulation is called. 

146 @param path Path, where the dataset is saved to. 

147 @param input_width Int value that defines the number of time series used for the input. 

148 @param label_width Int value that defines the size of the labels. 

149 @param normalize [Default: true] Option to transform dataset by logarithmic normalization. 

150 @param save_data [Default: true] Option to save the dataset. 

151 @return Data dictionary of input and label data sets. 

152 """ 

153 data = { 

154 "inputs": [], 

155 "labels": [] 

156 } 

157 

158 # The number of days is the same as the sum of input and label width. 

159 # Since the first day of the input is day 0, we still need to subtract 1. 

160 days = input_width + label_width - 1 

161 

162 # show progess in terminal for longer runs 

163 # Due to the random structure, theres currently no need to shuffle the data 

164 bar = Bar('Number of Runs done', max=num_runs) 

165 for _ in range(0, num_runs): 

166 data_run = run_secir_simple_simulation(days) 

167 data['inputs'].append(data_run[:input_width]) 

168 data['labels'].append(data_run[input_width:]) 

169 bar.next() 

170 bar.finish() 

171 

172 if normalize: 

173 # logarithmic normalization 

174 transformer = FunctionTransformer(np.log1p, validate=True) 

175 inputs = np.asarray(data['inputs']).transpose(2, 0, 1).reshape(8, -1) 

176 scaled_inputs = transformer.transform(inputs) 

177 scaled_inputs = scaled_inputs.transpose().reshape(num_runs, input_width, 8) 

178 scaled_inputs_list = scaled_inputs.tolist() 

179 

180 labels = np.asarray(data['labels']).transpose(2, 0, 1).reshape(8, -1) 

181 scaled_labels = transformer.transform(labels) 

182 scaled_labels = scaled_labels.transpose().reshape(num_runs, label_width, 8) 

183 scaled_labels_list = scaled_labels.tolist() 

184 

185 # cast dfs to tensors 

186 data['inputs'] = tf.stack(scaled_inputs_list) 

187 data['labels'] = tf.stack(scaled_labels_list) 

188 

189 if save_data: 

190 # check if data directory exists. If necessary, create it. 

191 if not os.path.isdir(path): 

192 os.mkdir(path) 

193 

194 # save dict to json file 

195 with open(os.path.join(path, 'data_secir_simple.pickle'), 'wb') as f: 

196 pickle.dump(data, f) 

197 return data 

198 

199 

200if __name__ == "__main__": 

201 # Store data relative to current file two levels higher. 

202 path = os.path.dirname(os.path.realpath(__file__)) 

203 path_data = os.path.join(os.path.dirname(os.path.realpath( 

204 os.path.dirname(os.path.realpath(path)))), 'data') 

205 

206 input_width = 5 

207 label_width = 30 

208 num_runs = 1000 

209 data = generate_data(num_runs, path_data, input_width, 

210 label_width)