Révision a80cf00c
b/papers/2014/reservation/experiments/src/python/lib/kadeploy.py | ||
---|---|---|
19 | 19 |
|
20 | 20 |
class DeploymentStat: |
21 | 21 |
|
22 |
def __init__(self, host, start, step1_dur, step2_dur, step3_dur, success): |
|
22 |
def __init__(self, id, host, start, step1_dur, step2_dur, step3_dur, success): |
|
23 |
self._id = id |
|
23 | 24 |
self._host = host |
24 | 25 |
self._start = start |
25 | 26 |
self._step1_dur = int(step1_dur) |
... | ... | |
29 | 30 |
self._retry_1 = 0 |
30 | 31 |
self._retry_2 = 0 |
31 | 32 |
self._retry_3 = 0 |
33 |
self._cluster = None |
|
34 |
self._env = None |
|
35 |
|
|
36 |
@property |
|
37 |
def id(self): |
|
38 |
return self._id |
|
39 |
|
|
40 |
@id.setter |
|
41 |
def id(self, id): |
|
42 |
self._id = id |
|
32 | 43 |
|
33 | 44 |
@property |
34 | 45 |
def hostname(self): |
... | ... | |
56 | 67 |
|
57 | 68 |
@property |
58 | 69 |
def success(self): |
59 |
return self._success |
|
70 |
success = (self._step1_dur > 5 and self.step_2_duration > 10 and self.step_3_duration > 10) |
|
71 |
return self._success and success |
|
60 | 72 |
|
61 | 73 |
@property |
62 | 74 |
def num_retries_step_1(self): |
... | ... | |
88 | 100 |
self.num_retries_step_3 > 0 or |
89 | 101 |
self.num_retries_step_3 > 0) |
90 | 102 |
|
103 |
@property |
|
104 |
def cluster(self): |
|
105 |
return self._cluster |
|
106 |
|
|
107 |
@cluster.setter |
|
108 |
def cluster(self, c): |
|
109 |
self._cluster = c |
|
110 |
|
|
111 |
@property |
|
112 |
def environment(self): |
|
113 |
return self._env |
|
114 |
|
|
115 |
@environment.setter |
|
116 |
def environment(self, env): |
|
117 |
self._env = env |
|
118 |
|
|
119 |
|
|
120 |
class Deployment(list): |
|
121 |
""" |
|
122 |
Stores information about an entire deployment, |
|
123 |
with stats about deployment on all nodes |
|
124 |
""" |
|
125 |
|
|
126 |
def __init__(self, id): |
|
127 |
super(Deployment, self).__init__() |
|
128 |
self._id = id |
|
129 |
|
|
130 |
def append_stat(self, stat): |
|
131 |
""" |
|
132 |
Adds a deployment stat to this list |
|
133 |
:param stat: the stat to be added |
|
134 |
:return: nil |
|
135 |
""" |
|
136 |
self.append(stat) |
|
137 |
|
|
138 |
@property |
|
139 |
def id(self): |
|
140 |
""" |
|
141 |
Returns the deployment ID |
|
142 |
:return: the deployment ID |
|
143 |
""" |
|
144 |
return self._id |
|
145 |
|
|
146 |
@property |
|
147 |
def duration(self): |
|
148 |
""" |
|
149 |
Returns the time length of the longest deployment stat |
|
150 |
:return: the deployment duration |
|
151 |
""" |
|
152 |
last_stat = 0 |
|
153 |
for stat in self: |
|
154 |
last_stat = max(last_stat, stat.deployment_duration) |
|
155 |
return last_stat |
|
156 |
|
|
157 |
@property |
|
158 |
def success(self): |
|
159 |
""" |
|
160 |
Checks whether the deployment has been successful |
|
161 |
:return: False if any deployment on a machine failed |
|
162 |
""" |
|
163 |
for stat in self: |
|
164 |
if not stat.success: |
|
165 |
return False |
|
166 |
return True |
|
167 |
|
|
168 |
|
|
91 | 169 |
def parse_date(str_date): |
92 | 170 |
d = datetime.strptime(str_date, "%Y-%m-%d %H:%M:%S") |
93 | 171 |
d = TZINFO.localize(d) |
... | ... | |
99 | 177 |
help='the input request trace file') |
100 | 178 |
parser.add_argument('--output', dest='output', type=str, required=True, |
101 | 179 |
help='the output directory') |
102 |
# parser.add_argument('--start-date', dest='start_date', type=parse_date, required=True, |
|
103 |
# help='the start date of the log (i.e. YYYY-MM-DD HH:mm:ss)') |
|
104 | 180 |
parser.add_argument('--cluster', dest='cluster', type=str, required=True, |
105 | 181 |
help='the name of the cluster') |
182 |
parser.add_argument('--plot', dest='plot', type=str, required=True, choices=['deployment'], |
|
183 |
help='type of graphs that should be plotted') |
|
106 | 184 |
|
107 | 185 |
args = parser.parse_args() |
108 | 186 |
return args |
... | ... | |
110 | 188 |
def parse_log(log, cluster): |
111 | 189 |
f = open(log, 'r') |
112 | 190 |
|
113 |
deployments = []
|
|
191 |
stats = []
|
|
114 | 192 |
for line in f: |
115 | 193 |
fields = line.split(',') |
194 |
id = fields[0] |
|
116 | 195 |
hostname = fields[2] |
117 | 196 |
|
118 | 197 |
if cluster in hostname: |
... | ... | |
122 | 201 |
step2_dur = int(fields[5]) |
123 | 202 |
step3_dur = int(fields[6]) |
124 | 203 |
success = "true" in fields[10] |
125 |
dep = DeploymentStat(hostname, start, step1_dur, step2_dur, step3_dur, success) |
|
204 |
env = fields[11].strip() |
|
205 |
dep = DeploymentStat(id, hostname, start, step1_dur, step2_dur, step3_dur, success) |
|
126 | 206 |
retry_1 = int(fields[7]) |
127 | 207 |
retry_2 = int(fields[8]) |
128 | 208 |
retry_3 = int(fields[9]) |
129 | 209 |
dep.num_retries_step_1 = retry_1 |
130 | 210 |
dep.num_retries_step_2 = retry_2 |
131 | 211 |
dep.num_retries_step_3 = retry_3 |
132 |
deployments.append(dep) |
|
212 |
dep.cluster = cluster |
|
213 |
dep.environment = env |
|
214 |
stats.append(dep) |
|
133 | 215 |
|
134 | 216 |
f.close() |
135 |
return deployments
|
|
217 |
return stats
|
|
136 | 218 |
|
137 |
def plot_deployment_time(deployments, cluster, out_dir):
|
|
219 |
def plot_deployment_time(stats, cluster, out_dir):
|
|
138 | 220 |
values = [] |
139 |
for d in deployments:
|
|
221 |
for d in stats:
|
|
140 | 222 |
if d.success and not d.has_retries: |
141 |
value = d.deployment_duration |
|
142 |
if d.step_1_duration > 5 and d.step_2_duration > 10 and d.step_3_duration > 10: |
|
143 |
values.append(value) |
|
223 |
values.append(d.deployment_duration) |
|
144 | 224 |
|
145 | 225 |
# title = 'Deployment on Cluster %s' % cluster |
146 | 226 |
title = 'Deployment on %s' % cluster |
... | ... | |
148 | 228 |
hist_graph(values, title, 'Deployment Time (seconds)', '', out_file) |
149 | 229 |
|
150 | 230 |
|
151 |
def plot_boot_time(deployments, cluster, out_dir): |
|
152 |
values = [] |
|
153 |
for d in deployments: |
|
154 |
if d.success and not d.has_retries: |
|
155 |
if d.step_3_duration > 30: |
|
156 |
values.append(d.step_3_duration) |
|
231 |
def group_by_deployment(stats, cluster): |
|
232 |
""" |
|
233 |
Group deployment stats by deployment. |
|
234 |
|
|
235 |
:param stats: a list of deployment stats |
|
236 |
:param cluster: the name of the cluster to consider |
|
237 |
:return: a dictionary of <id, deployment> values |
|
238 |
""" |
|
239 |
deployments = {} |
|
240 |
for s in stats: |
|
241 |
if s.cluster != cluster: |
|
242 |
continue |
|
243 |
|
|
244 |
if s.id in deployments: |
|
245 |
dep = deployments[s.id] |
|
246 |
else: |
|
247 |
dep = Deployment(s.id) |
|
248 |
deployments[s.id] = dep |
|
249 |
dep.append_stat(s) |
|
250 |
return deployments |
|
251 |
|
|
157 | 252 |
|
158 |
title = 'Machine Boot Time on Cluster %s' % (cluster) |
|
159 |
out_file = out_dir + ("/boot_time_%s.pdf" % (cluster)) |
|
160 |
hist_graph(values, title, 'Boot Time (seconds)', '', out_file) |
|
253 |
def plot_deployment_size(deployments, cluster): |
|
254 |
sizes = {} |
|
255 |
for d in deployments.itervalues(): |
|
256 |
size = len(d) |
|
257 |
if size not in sizes: |
|
258 |
sizes[size] = [] |
|
161 | 259 |
|
260 |
if size > 0 and d.success: |
|
261 |
sizes[size].append(d.duration) |
|
262 |
|
|
263 |
fig, ax = plt.subplots() |
|
264 |
x = [] |
|
265 |
means = [] |
|
266 |
stds = [] |
|
267 |
for size, value in sizes.iteritems(): |
|
268 |
if len(value) > 0: |
|
269 |
x.append(size) |
|
270 |
means.append(np.mean(value)) |
|
271 |
stds.append(np.std(value)) |
|
272 |
|
|
273 |
ax.set_xlim(0, np.max(x) + 1) |
|
274 |
ax.set_ylabel('Deployment Duration (s)') |
|
275 |
ax.set_xlabel('Deployment Size (\# Machines)') |
|
276 |
|
|
277 |
plt.errorbar(x, means, yerr=stds) |
|
278 |
plt.show() |
|
162 | 279 |
|
163 | 280 |
def hist_graph(values, title, x_label, y_label, out_file): |
164 | 281 |
ar1 = np.array(values) |
... | ... | |
211 | 328 |
#plt.show() |
212 | 329 |
plt.savefig(out_file) |
213 | 330 |
|
331 |
|
|
214 | 332 |
def plot_graphs(): |
215 | 333 |
opts = parse_kdeploy_opt() |
216 |
deployments = parse_log(opts.input, opts.cluster) |
|
217 |
plot_deployment_time(deployments, opts.cluster, opts.output) |
|
218 |
# plot_boot_time(deployments, opts.cluster, opts.output) |
|
334 |
if opts.plot == 'deployment': |
|
335 |
print 'Printing deployment graphs...' |
|
336 |
stats = parse_log(opts.input, opts.cluster) |
|
337 |
# plot_deployment_time(stats, opts.cluster, opts.output) |
|
338 |
deployments = group_by_deployment(stats, opts.cluster) |
|
339 |
plot_deployment_size(deployments, opts.cluster) |
Formats disponibles : Unified diff