Révision a80cf00c

b/papers/2014/reservation/experiments/src/python/lib/kadeploy.py
19 19

  
20 20
class DeploymentStat:
21 21

  
22
    def __init__(self, host, start, step1_dur, step2_dur, step3_dur, success):
22
    def __init__(self, id, host, start, step1_dur, step2_dur, step3_dur, success):
23
        self._id = id
23 24
        self._host = host
24 25
        self._start = start
25 26
        self._step1_dur = int(step1_dur)
......
29 30
        self._retry_1 = 0
30 31
        self._retry_2 = 0
31 32
        self._retry_3 = 0
33
        self._cluster = None
34
        self._env = None
35

  
36
    @property
37
    def id(self):
38
        return self._id
39

  
40
    @id.setter
41
    def id(self, id):
42
        self._id = id
32 43

  
33 44
    @property
34 45
    def hostname(self):
......
56 67

  
57 68
    @property
58 69
    def success(self):
59
        return self._success
70
        success = (self._step1_dur > 5 and self.step_2_duration > 10 and self.step_3_duration > 10)
71
        return self._success and success
60 72

  
61 73
    @property
62 74
    def num_retries_step_1(self):
......
88 100
                self.num_retries_step_3 > 0 or
89 101
                self.num_retries_step_3 > 0)
90 102

  
103
    @property
104
    def cluster(self):
105
        return self._cluster
106

  
107
    @cluster.setter
108
    def cluster(self, c):
109
        self._cluster = c
110

  
111
    @property
112
    def environment(self):
113
        return self._env
114

  
115
    @environment.setter
116
    def environment(self, env):
117
        self._env = env
118

  
119

  
120
class Deployment(list):
121
    """
122
    Stores information about an entire deployment,
123
    with stats about deployment on all nodes
124
    """
125

  
126
    def __init__(self, id):
127
        super(Deployment, self).__init__()
128
        self._id = id
129

  
130
    def append_stat(self, stat):
131
        """
132
        Adds a deployment stat to this list
133
        :param stat: the stat to be added
134
        :return: nil
135
        """
136
        self.append(stat)
137

  
138
    @property
139
    def id(self):
140
        """
141
        Returns the deployment ID
142
        :return: the deployment ID
143
        """
144
        return self._id
145

  
146
    @property
147
    def duration(self):
148
        """
149
        Returns the time length of the longest deployment stat
150
        :return: the deployment duration
151
        """
152
        last_stat = 0
153
        for stat in self:
154
            last_stat = max(last_stat, stat.deployment_duration)
155
        return last_stat
156

  
157
    @property
158
    def success(self):
159
        """
160
        Checks whether the deployment has been successful
161
        :return: False if any deployment on a machine failed
162
        """
163
        for stat in self:
164
            if not stat.success:
165
                return False
166
        return True
167

  
168

  
91 169
def parse_date(str_date):
92 170
    d = datetime.strptime(str_date, "%Y-%m-%d %H:%M:%S")
93 171
    d = TZINFO.localize(d)
......
99 177
                        help='the input request trace file')
100 178
    parser.add_argument('--output', dest='output', type=str, required=True,
101 179
                        help='the output directory')
102
    # parser.add_argument('--start-date', dest='start_date', type=parse_date, required=True,
103
    #                     help='the start date of the log (i.e. YYYY-MM-DD HH:mm:ss)')
104 180
    parser.add_argument('--cluster', dest='cluster', type=str, required=True,
105 181
                        help='the name of the cluster')
182
    parser.add_argument('--plot', dest='plot', type=str, required=True, choices=['deployment'],
183
                        help='type of graphs that should be plotted')
106 184

  
107 185
    args = parser.parse_args()
108 186
    return args
......
110 188
def parse_log(log, cluster):
111 189
    f = open(log, 'r')
112 190

  
113
    deployments = []
191
    stats = []
114 192
    for line in f:
115 193
        fields = line.split(',')
194
        id = fields[0]
116 195
        hostname = fields[2]
117 196

  
118 197
        if cluster in hostname:
......
122 201
            step2_dur = int(fields[5])
123 202
            step3_dur = int(fields[6])
124 203
            success = "true" in fields[10]
125
            dep = DeploymentStat(hostname, start, step1_dur, step2_dur, step3_dur, success)
204
            env = fields[11].strip()
205
            dep = DeploymentStat(id, hostname, start, step1_dur, step2_dur, step3_dur, success)
126 206
            retry_1 = int(fields[7])
127 207
            retry_2 = int(fields[8])
128 208
            retry_3 = int(fields[9])
129 209
            dep.num_retries_step_1 = retry_1
130 210
            dep.num_retries_step_2 = retry_2
131 211
            dep.num_retries_step_3 = retry_3
132
            deployments.append(dep)
212
            dep.cluster = cluster
213
            dep.environment = env
214
            stats.append(dep)
133 215

  
134 216
    f.close()
135
    return deployments
217
    return stats
136 218

  
137
def plot_deployment_time(deployments, cluster, out_dir):
219
def plot_deployment_time(stats, cluster, out_dir):
138 220
    values = []
139
    for d in deployments:
221
    for d in stats:
140 222
        if d.success and not d.has_retries:
141
            value = d.deployment_duration
142
            if d.step_1_duration > 5 and d.step_2_duration > 10 and d.step_3_duration > 10:
143
                values.append(value)
223
            values.append(d.deployment_duration)
144 224

  
145 225
    # title = 'Deployment on Cluster %s' % cluster
146 226
    title = 'Deployment on %s' % cluster
......
148 228
    hist_graph(values, title, 'Deployment Time (seconds)', '', out_file)
149 229

  
150 230

  
151
def plot_boot_time(deployments, cluster, out_dir):
152
    values = []
153
    for d in deployments:
154
        if d.success and not d.has_retries:
155
            if d.step_3_duration > 30:
156
                values.append(d.step_3_duration)
231
def group_by_deployment(stats, cluster):
232
    """
233
    Group deployment stats by deployment.
234

  
235
    :param stats: a list of deployment stats
236
    :param cluster: the name of the cluster to consider
237
    :return: a dictionary of <id, deployment> values
238
    """
239
    deployments = {}
240
    for s in stats:
241
        if s.cluster != cluster:
242
            continue
243

  
244
        if s.id in deployments:
245
            dep = deployments[s.id]
246
        else:
247
            dep = Deployment(s.id)
248
            deployments[s.id] = dep
249
        dep.append_stat(s)
250
    return deployments
251

  
157 252

  
158
    title = 'Machine Boot Time on Cluster %s' % (cluster)
159
    out_file = out_dir + ("/boot_time_%s.pdf" % (cluster))
160
    hist_graph(values, title, 'Boot Time (seconds)', '', out_file)
253
def plot_deployment_size(deployments, cluster):
254
    sizes = {}
255
    for d in deployments.itervalues():
256
        size = len(d)
257
        if size not in sizes:
258
            sizes[size] = []
161 259

  
260
        if size > 0 and d.success:
261
            sizes[size].append(d.duration)
262

  
263
    fig, ax = plt.subplots()
264
    x = []
265
    means = []
266
    stds = []
267
    for size, value in sizes.iteritems():
268
        if len(value) > 0:
269
            x.append(size)
270
            means.append(np.mean(value))
271
            stds.append(np.std(value))
272

  
273
    ax.set_xlim(0, np.max(x) + 1)
274
    ax.set_ylabel('Deployment Duration (s)')
275
    ax.set_xlabel('Deployment Size (\# Machines)')
276

  
277
    plt.errorbar(x, means, yerr=stds)
278
    plt.show()
162 279

  
163 280
def hist_graph(values, title, x_label, y_label, out_file):
164 281
    ar1 = np.array(values)
......
211 328
    #plt.show()
212 329
    plt.savefig(out_file)
213 330

  
331

  
214 332
def plot_graphs():
215 333
    opts = parse_kdeploy_opt()
216
    deployments = parse_log(opts.input, opts.cluster)
217
    plot_deployment_time(deployments, opts.cluster, opts.output)
218
#    plot_boot_time(deployments, opts.cluster, opts.output)
334
    if opts.plot == 'deployment':
335
        print 'Printing deployment graphs...'
336
        stats = parse_log(opts.input, opts.cluster)
337
        # plot_deployment_time(stats, opts.cluster, opts.output)
338
        deployments = group_by_deployment(stats, opts.cluster)
339
        plot_deployment_size(deployments, opts.cluster)

Formats disponibles : Unified diff