Révision a04cd5c3

b/papers/2014/reservation/experiments/src/python/lib/graphs.py
110 110
    plt.savefig(out_dir + '/lyon_use_may_2013.pdf')
111 111
    # plt.show()
112 112

  
113
def plot_potential(conf):
113
def autolabel(ax, rects):
114
    # attach some text labels
115
    for rect in rects:
116
        height = rect.get_height()
117
        ax.text(rect.get_x()+rect.get_width()/2., 1.05*height, '%d'%int(height),
118
                ha='center', va='bottom')
119

  
120
def plot_potential(conf, output):
114 121
    value = conf.get('simulation', 'scenarios').strip().split(',')
115 122
    scenarios = []
116 123
    for v in value:
......
132 139
                off.append(o)
133 140
        f.close()
134 141

  
135
    print scenarios
136
    print potential
137
    print off
142
    N = 4
143

  
144
    ind = np.arange(N)  # the x locations for the groups
145
    width = 0.35       # the width of the bars
146

  
147
    fig, ax = plt.subplots()
148
    rects1 = ax.bar(ind, potential, width, color='r')
149
    rects2 = ax.bar(ind+width, off, width, color='y')
150

  
151
    # add some text for labels, title and axes ticks
152
    ax.set_ylabel('Aggregate Resource Time')
153
    ax.set_title('Aggregate Idle and Off Periods')
154
    ax.set_xticks(ind+width)
155
    ax.set_xticklabels( ('lyon\_cloud', 'lyon\_reservation', 'reims\_cloud', 'reims\_reservation') )
156
    ax.legend( (rects1[0], rects2[0]), ('Aggregate Idle Period', 'Aggregate Off Period') )
157

  
158
    plt.savefig(output + '/potential_saving.pdf')
159
    # plt.show()
160

  
161
def plot_delay(conf, output):
162
    value = conf.get('simulation', 'scenarios').strip().split(',')
163
    scenarios = []
164
    for v in value:
165
        scenarios.append(v + '_cloud')
166
        scenarios.append(v)
167

  
168
    delay = []
169
    for scenario in scenarios:
170
        output_dir = '%s/%s' % (conf.get('simulation', 'output-directory'), scenario)
171
        sched_log = output_dir + '/scheduling.txt'
172
        f = open(sched_log, 'r')
173
        for line in f:
174
            if 'aggregate_request_delay' in line:
175
                p = float(line.strip().split(',')[1])
176
                delay.append(p)
177
        f.close()
178

  
179
    figure(figsize=(7, 4))
180
    ax = plt.subplot(111)
181
    width = 0.85  # the width of the bars
182
    opacity = 0.5
183
    ind = np.arange(len(delay))
184
    colors = ['r','b','g','y']
185
    # bars = ax.bar(ind + width / 2, values, width=width, color=colors, alpha=opacity)
186
    bars = ax.barh(ind + width / 2, delay, color=colors)
187
    plt.yticks(ind + width, ('lyon\_cloud', 'lyon\_reservation', 'reims\_cloud', 'reims\_reservation'))
188

  
189
    # colors = np.linspace(0, 1, len(bars))
190

  
191
    # for bar in bars:
192

  
193
    # plt.tick_params(
194
    #     axis='y',          # changes apply to the y-axis
195
    #     which='both',      # both major and minor ticks are affected
196
    #     bottom='off',      # ticks along the bottom edge are off
197
    #     top='off',         # ticks along the top edge are off
198
    #     labelbottom='off') # labels along the bottom edge are off
199

  
200
    # plt.ylabel('Scenarios')
201
    plt.xlabel('Aggregate Request Delay')
202
    # plt.title('CPU Usage of Drivers Under Different Scenarios')
203
    # plt.legend((bars), xticks)
204

  
205
    plt.tight_layout()
206
    # plt.show()
207
    plt.savefig(output + '/request_delay.pdf')
138 208

  
139 209
def format_date(x, pos=None):
140 210
    timeoff = timedelta(seconds=int(x))
......
148 218
    print 'Creating all graphs...'
149 219
    opts = parse_graph_opt()
150 220
    conf = PropertiesConfigParser(opts.config)
151
    plot_potential(conf)
221
    plot_potential(conf, opts.output)
222
    plot_delay(conf, opts.output)
152 223

  
153 224
    # plot_lyon_usage(opts.input, opts.output)
b/papers/2014/reservation/experiments/src/python/lib/kadeploy.py
11 11
import scipy.stats as stats
12 12
rc('font',**{'family':'sans-serif','sans-serif':['Helvetica']})
13 13
rc('text', usetex=True)
14
plt.rcParams.update({'font.size': 14})
14
plt.rcParams.update({'font.size': 16})
15 15
from datetime import datetime
16 16
from statsmodels.stats.gof import powerdiscrepancy
17 17

  
......
99 99
                        help='the input request trace file')
100 100
    parser.add_argument('--output', dest='output', type=str, required=True,
101 101
                        help='the output directory')
102
    parser.add_argument('--start-date', dest='start_date', type=parse_date, required=True,
103
                        help='the start date of the log (i.e. YYYY-MM-DD HH:mm:ss)')
102
    # parser.add_argument('--start-date', dest='start_date', type=parse_date, required=True,
103
    #                     help='the start date of the log (i.e. YYYY-MM-DD HH:mm:ss)')
104 104
    parser.add_argument('--cluster', dest='cluster', type=str, required=True,
105 105
                        help='the name of the cluster')
106 106

  
......
134 134
    f.close()
135 135
    return deployments
136 136

  
137

  
138
def plot_deployment_time(deployments, cluster, year, out_dir):
137
def plot_deployment_time(deployments, cluster, out_dir):
139 138
    values = []
140 139
    for d in deployments:
141 140
        if d.success and not d.has_retries:
......
143 142
            if d.step_1_duration > 5 and d.step_2_duration > 10 and d.step_3_duration > 10:
144 143
                values.append(value)
145 144

  
146
    # title = 'Deployment on Cluster %s (%d)' % (cluster, year)
147
    title = 'Deployment on Cluster %s' % (cluster)
148
    # out_file = out_dir + ("/deployment_%s_%d.pdf" % (cluster, year))
149
    out_file = out_dir + ("/deployment_%s.pdf" % (cluster))
145
    # title = 'Deployment on Cluster %s' % cluster
146
    title = 'Deployment on %s' % cluster
147
    out_file = out_dir + ("/deployment_%s.pdf" % cluster)
150 148
    hist_graph(values, title, 'Deployment Time (seconds)', '', out_file)
151 149

  
152 150

  
151
def plot_boot_time(deployments, cluster, out_dir):
152
    values = []
153
    for d in deployments:
154
        if d.success and not d.has_retries:
155
            if d.step_3_duration > 30:
156
                values.append(d.step_3_duration)
157

  
158
    title = 'Machine Boot Time on Cluster %s' % (cluster)
159
    out_file = out_dir + ("/boot_time_%s.pdf" % (cluster))
160
    hist_graph(values, title, 'Boot Time (seconds)', '', out_file)
161

  
162

  
153 163
def hist_graph(values, title, x_label, y_label, out_file):
154 164
    ar1 = np.array(values)
155 165
    fig, ax = plt.subplots()
......
159 169
                                normed=True, facecolor='#708090', alpha=0.5, rwidth=0.8)
160 170
    plt.subplots_adjust(left=0.15)
161 171

  
162
    # max = round(bins[len(bins) - 1])
163 172
    max = ar1.max()
164 173
    xs = np.linspace(0, max, len(values))
165 174

  
166
    # density = gaussian_kde(ar1)
167
    # density.covariance_factor = lambda : .25
168
    # density._compute_covariance()
169
    # plt.plot(xs, density(xs), 'r--')
170

  
171 175
    ax.set_xlabel(x_label)
172 176
    ax.set_ylabel(y_label)
173 177
    ax.set_title(title)
......
185 189
        if 'lognorm' in dist_name:
186 190
            print 'shape = %.4f, loc=%.4f, scale=%.4f' % param
187 191

  
188
        # pdf_fitted = dist.pdf(xs, *param[:-2], loc=param[-2], scale=param[-1])
189 192
        pdf_fitted = dist.pdf(xs, *param[:-2], loc=param[-2], scale=param[-1])
190 193

  
191 194
        kst = kstest(ar1, dist_name, param)
192
        # kst = kstest(ar1, dist_name)
193 195
        ks_tests[dist_name] = kst
194 196
        fit_params[dist_name] = param
195 197

  
......
212 214
def plot_graphs():
213 215
    opts = parse_kdeploy_opt()
214 216
    deployments = parse_log(opts.input, opts.cluster)
215
    plot_deployment_time(deployments, opts.cluster, opts.start_date.year, opts.output)
217
    plot_deployment_time(deployments, opts.cluster, opts.output)
218
#    plot_boot_time(deployments, opts.cluster, opts.output)
b/papers/2014/reservation/paper.tex
47 47

  
48 48
\title{A Resource Reservation System to Improve the Support for HPC Applications in OpenStack}
49 49

  
50
\author{\IEEEauthorblockN{Fran\c{c}ois Rossigneux, Marcos Dias de Assun\c{c}\~ao, Laurent Lef\`{e}vre}
51
\IEEEauthorblockA{INRIA Avalon, LIP Laboratory\\
52
Ecole Normale Sup\'{e}rieure de Lyon\\
53
University of Lyon, France}
54
}
50
% \author{\IEEEauthorblockN{Fran\c{c}ois Rossigneux, Marcos Dias de Assun\c{c}\~ao, Laurent Lef\`{e}vre}
51
% \IEEEauthorblockA{INRIA Avalon, LIP Laboratory\\
52
% Ecole Normale Sup\'{e}rieure de Lyon\\
53
% University of Lyon, France}
54
% }
55 55

  
56 56
\maketitle
57 57

  
......
171 171
The second weigher, termed as \ac{KWRanking} ranks machines by their power efficiency (\textit{i.e.} FLOPS/Watt). This weigher relies on:
172 172

  
173 173
\begin{itemize}
174
\item A software infrastructure called \ac{KWAPI}, described in detail in our previous work, built for monitoring the power consumed by resources of a data centre and for interfacing with Ceilometer to provide power consumption data. Ceilometer is OpenStack's telemetry infrastructure used to monitor performance metrics\footnote{https://wiki.openstack.org/wiki/Ceilometer}.
174
\item A software infrastructure called \ac{KWAPI}, described in detail in our previous work \cite{Rossigneux:2014}, built for monitoring the power consumed by resources of a data centre and for interfacing with Ceilometer to provide power consumption data. Ceilometer is OpenStack's telemetry infrastructure used to monitor performance metrics\footnote{https://wiki.openstack.org/wiki/Ceilometer}.
175 175

  
176 176
\item A benchmark executed on the machines to determine their delivered performance by watt.
177 177
\end{itemize}
......
186 186

  
187 187
\subsection{Power-Off Idle Resources}
188 188

  
189
The strategy considered here consist of checking periodically what resources are idle. Once determined that a resource has remained idle for a number of consecutive intervals and it is not committed to serve a reservation over a give time horizon --- \textit{i.e.} when reservation is enabled --- the resource is powered off. Previous work \cite{OrgerieSaveWatts:2008} has evaluated the impact of decisions on appropriate intervals for measuring idleness, for deciding on the horizon for switching off resources committed to reservations. This work considers that the measurement interval, idleness time, and reservation horizon are respectively 1, 5 and 15 minutes. Algorithm~\ref{algo:alloc_policy} summarises the strategy.
189
As illustrated by Algorithm~\ref{algo:alloc_policy}, the strategy considered here checks periodically what resources are idle (Lines \ref{algo:check_idle_start} to \ref{algo:check_idle_end}). Also periodically, the strategy determines that resources have remained idle over a given time period and whether they will likely remain idle over a time horizon because they are not assigned to any request (Lines \ref{algo:switch_start} to \ref{algo:switch_start}). Once determined that a resource has remained idle for a number of consecutive intervals and it is not committed to serve a reservation over a give time horizon --- \textit{i.e.} when reservation is enabled --- the resource is powered off. Previous work \cite{OrgerieSaveWatts:2008} has evaluated the impact of decisions on appropriate intervals for measuring idleness, for deciding on the horizon for switching off resources committed to reservations. Here we consider that the measurement interval, idleness time, and reservation horizon are respectively 1, 5 and 50 minutes.
190 190
          
191 191
\IncMargin{-0.6em}
192 192
\RestyleAlgo{ruled}\LinesNumbered
193 193
\begin{algorithm}[ht]
194
\caption{Sample resource allocation policy.}
194
\caption{Na\"ive resource allocation strategy.}
195 195
\label{algo:alloc_policy} 
196 196
\DontPrintSemicolon
197 197
\SetAlgoLined
......
200 200

  
201 201
\label{algo:check_idle_start}\textbf{procedure} checkIdleNodes()\;
202 202
\Begin{ 
203
	$Res\_idle_t \leftarrow $ get list of idle resources at interval $t$\;
204
	$Res\_idle_{t-1} \leftarrow $ get list of idle resources at interval $t-1$\;
205
	
206
	\ForEach{resource $r \in Res\_idle_t$}{
207
    \eIf{$r \in Res\_idle_{t-1}$}{
208
       // increase number of idle intervals of $r$\;
209
       $r.idle\_intervals \leftarrow r.idle\_intervals + 1$\;
210
	  }{
211
       $r.idle\_intervals \leftarrow 1$\;\label{algo:check_idle_end}
212
	  }
213
	}
203
	$Res\_idle \leftarrow $ list of idle resources during past $x$ intervals\;
204
	$Res\_idle_{t} \leftarrow $ list of idle resources at interval $t$\;
205
	$Res\_idle \leftarrow Res\_idle \cap Res\_idle_{t}$ \label{algo:check_idle_end}
214 206
}
215 207

  
216 208
\BlankLine
......
219 211
	$Res\_on_{t} \leftarrow $ list of resources switched on\;
220 212
	$Res\_off_{t} \leftarrow $ list of resources switched off\;
221 213
	$Res\_reserv_{t,h} \leftarrow $ resources reserved until horizon $h$\;
222
	$nres\_reserv_{t,h} \leftarrow $ number of resources in $Res\_reserved_{t,h}$\;
223
	$nres\_fcast_{t+1} \leftarrow $ forecast number of resources required at $t+1$ \;
224
	$nres\_req_{t+1} \leftarrow max(nres\_fcast_{t+1},nres\_reserv_{t,h})$\;
214
	$Res\_idle \leftarrow $ list of idle resources during past $x$ intervals\;
225 215
	
226
	\While{$nres\_req_{t+1} > sizeof(Res\_on_{t})$} {
227
		$r \leftarrow $ pop resource from $Res_{off}$\;
228
		switch resource $r$ on\;
229
		add $r$ to $Res\_on_{t}$\;
230
	}
231
	
232
	$Res\_idle \leftarrow $ get list of resources idle during last checks\;
233
	\While{$nres\_req_{t+1} < sizeof(Res\_on_{t})$} {
234
	    \ForEach{resource $r \in Res\_idle$}{
235
	    	\If{$r \notin Res\_reserv_{t,h}$}{
236
		      remove $r$ from $Res\_on_{t}$\;
237
		      switch resource $r$ off\;
238
		      add $r$ to $Res\_off_{t}$\;
239
		    }
240
		    \If{$nres\_req_{t+1} = sizeof(Res\_on_{t})$}{
241
		      \textbf{break}\; \label{algo:switch_end}
242
		    }
243
	    } 
244
    }
245
  }
216
	// Obtain list of resources that will remain idle during $h$\;
217
	$Switch\_off = Res\_idle \cap Res\_off_{t} \cap Res\_reserv_{t,h}$\;
218
  $switchOff(Switch\_off)$\; \label{algo:switch_end}
219
}
220

  
221
\BlankLine
222
\label{algo:res_arrival_start}\textbf{procedure} requestSubmitted(Request $req$)\;
223
\Begin{ 
224
  $av\_boot\_time \leftarrow $ get average machine deployment time\;
225
  $now \leftarrow $ get current time\;
226
  // Find a place for $req$ in the scheduling queue\;
227
  $schedule(req)$\;
228
  \eIf{$res.start\_time < now + av\_boot\_time$}{
229
      $Req\_res \leftarrow $ get list of resources required by $req$\;
230
      $Res\_off \leftarrow $ list of resources switched off\;
231
      $Switch\_on \leftarrow Req\_res \cap Res\_off$\;
232
      $switchOn(Switch\_on)$\;  
233
   }{  
234
      $time\_check \leftarrow res.start\_time - av\_boot\_time$\;
235
      // schedule event to check whether machines need to be\;
236
      // switched on at time $time\_check$\;
237
      $check\_machines(req, time\_check)$\; \label{algo:res_arrival_end}
238
   }
246 239
}
247 240

  
241
\BlankLine
248 242
\While{system is running} {
249 243
   every minute call checkIdleNodes()\;
250 244
   every 5 minutes call switchResourcesOnOff()\;
251 245
}
246
}
252 247
\end{algorithm}
253 248
\IncMargin{0.6em} 
254 249

  
255
Lines~\ref{algo:check_idle_start} to \ref{algo:check_idle_end} contains the pseudo-code to identify idle resources, whereas lines \ref{algo:switch_start} to \ref{algo:switch_end} determines the resources that need to be switched on or off.
250
When a request arrives, the strategy finds a place for it in the scheduling agenda. Once the schedule for the request is determined, the strategy verifies whether resources need to be switched on, or if a future resource check must be scheduled (Lines \ref{algo:res_arrival_start} to \ref{algo:res_arrival_end}). This policy is simple and efficient from an energy consumption perspective, but it can lead to high performance degradation if resources need to be constantly switched on or off. 
256 251

  
257 252
\subsection{User Behaviour}
258 253

  
259
We believe that users of a cloud infrastructure would plan their resource demands in advance and use reservations if enough incentives were provided. These incentives could materialise in the form of discount prices for allocating resources or information on how their behavioural changes affects resource allocation and maximise energy savings \cite{OrgerieSaveWatts:2008}. In this work we do not focus on devising the proper incentives for users to adhere to using reservations. As discussed in Section \ref{sec:experiments}, the experiments consider that part of users find enough incentives to change their allocation decisions and hence reserve resources in advance.
254
We believe that users of a cloud infrastructure would plan their resource demands in advance and use reservations if enough incentives were provided. These incentives could materialise in the form of discount prices for allocating resources or information on how their behavioural changes affects resource allocation and maximise energy savings \cite{OrgerieSaveWatts:2008}. In this work we do not focus on devising the proper incentives for users to adhere to using reservations. As discussed in Section \ref{sec:experiments}, the experiments consider that at least part of users find enough incentives to change their allocation decisions and hence reserve resources in advance.
260 255
% ----------------------------------------------------------------------------------------
261 256

  
262
\section{Experimental Setup and Results}
263
\label{sec:experiments}
257
\section{Reservation Workloads and Bare-Metal Deployment}
258
\label{sec:workloads}
264 259

  
265
In this section, we evaluate the potential for energy savings when employing a reservation framework in private clouds. We discuss the experimental setup and metrics, and then analyse the obtained performance results.
260
This section describes the workloads used to evaluate the use of advance reservation for provisioning of bare-metal resources in a cloud environment, and the model used for deployment. 
266 261

  
267
\subsection{Experimental Setup}
262
\subsection{Reservation Workloads}
263

  
264
As traces of cloud workloads are very difficult to obtain, we use request logs gathered from Grid'5000 sites and adapt them to model cloud users' resource demands. There are essentially two types of requests that users of Grid'5000 can make, namely \textit{best-effort} and \textit{reservations}. Scheduling of best-effort requests is done as in most batch-scheduling systems, where resources are assigned to requests whenever they become available. Reservations allow users to request a set of resources over a defined time frame. Resource reservations have priority over best-effort requests, which means that the latter are cancelled whenever resources are reserved. In this work we ignore best-effort requests.
268 265

  
269
A discrete-event simulator developed in house is used to model and simulate the resource allocation and request scheduling in a private cloud setting. We resort to simulation as it enables controlled, repeatable and large-scale experiments. Both infrastructure capacity and resource requests are expressed in number of CPU cores. As traces of cloud workloads are very difficult to obtain, we use request logs gathered from Grid'5000 sites and adapt them to model cloud users' resource demands. Under normal operation, Grid'5000 enables resource reservations, but users' requests are conditioned by the available resources. For instance, a user willing to allocate resources for an experiment will often check a site's agenda, see what resources are available and will eventually make a reservation during a convenient time frame. If the user cannot find enough resources, she will either adapt her requirements to resource availability --- \textit{e.g.} change the number of required resources, and reservation start or/and finish time --- or choose another site with available capacity. The request traces, however, do not capture what users' initial requirements were before they make their requests.
266
Under normal operation, although Grid'5000 enables resource reservations, user requests are conditioned by the available resources. For instance, a user willing to allocate resources for an experiment will often check a site's agenda, see what resources are available and will eventually make a reservation during a convenient time frame. If the user cannot find enough resources, she will either adapt her requirements to resource availability --- \textit{e.g.} change the number of required resources, and reservation start or/and finish time --- or choose another site with available capacity. The request traces, however, do not capture what users' initial requirements were before they make their requests.
270 267

  
271 268
In order to obtain a workload trace on provisioning of bare-metal resources that is more cloud oriented, we adapt the request traces and infrastructure capacity of Grid'5000 by making the following changes to reservation requests:
272 269

  
......
276 273
\item \label{enum:capacity} The resource capacity of a site is modified to the maximum number of CPU cores required to honour all requests, plus a safety factor.
277 274
\end{enumerate}
278 275

  
279
The characteristics of best-effort requests are not changed. Change \ref{enum:cond1} modifies the behaviour of users who today explore resources during off-peak periods, whereas \ref{enum:cond2} alters the current practice of planning experiments in advance and reserving resources before they are taken by other users. Although the changes may seem extreme at first, they allow us to evaluate what we consider to be our \textit{worst case scenario} where reservation is not enabled. Moreover, as mentioned earlier, we believe the model adopted by existing clouds, where short-term advance reservations are generally not allowed and prices of on-demand instances do not vary over time, users would have little incentives to explore off-peak periods or plan their demand in advance. Change \ref{enum:capacity} reflects the industry practice of provisioning resources to handle peak demand and including a margin of safety.
276
Change \ref{enum:cond1} modifies the behaviour of users who today explore resources during off-peak periods, whereas \ref{enum:cond2} alters the current practice of planning experiments in advance and reserving resources before they are taken by other users. Although the changes may seem extreme at first, they allow us to evaluate what we consider to be our \textit{worst case scenario} where reservation is not enabled. Moreover, as mentioned earlier, we believe the model adopted by existing clouds, where short-term advance reservations are generally not allowed and prices of on-demand instances do not vary over time, users would have little incentives to explore off-peak periods or plan their demand in advance. Change \ref{enum:capacity} reflects the industry practice of provisioning resources to handle peak demand and including a margin of safety.
277

  
278

  
279
\subsection{Bare-Metal Deployment}
280

  
281
As the provisioning of bare-metal resources by clouds is a relatively new topic, it is also difficult to find workload traces that provide information on the time of operations required to deploy resources, such as switching resources on, cloning operating system images, and partitioning physical disks.
282

  
283
To model the time required for deployment, we use traces gathered from Grid'5000 sites and generated by Kadeploy3 \cite{JeanvoineKadeploy3:2013}. Kadeploy3 is a disk imaging and cloning tool that takes a file containing the operating system to deploy (\textit{i.e} an environment) and copies it to target nodes. An environment deployment by Kadeploy3 consists of essentially three phases:
284

  
285
\begin{enumerate}
286
 \item Minimal environment setup, where nodes reboot into a minimal environment containing tools required for partinioning disks during the phase.
287
 \item Environment installation, when the environment is broadcast and copied to all nodes, and post-copy operations are made. 
288
 \item Reboot of nodes using the deployed environment.
289
\end{enumerate}
290

  
291
In order to build a model for bare-metal deployment we gathered several years of Kadeploy3 traces from three Grid'5000 sites --- Rennes, Reims and Lyon --- and evaluated the time to execute the three phases described above. Table~\ref{tab:kadeploy_clusters} lists the characteristics of the clusters considered in this study.
292

  
293
\begin{table}[!hbt]
294
\caption{Clusters whose deployment logs were considered.}
295
\label{tab:kadeploy_clusters} 
296
\footnotesize
297
  \begin{tabular}{ccccl}
298
    \toprule
299
    \multirow{2}{10mm}{\centering{\textbf{Cluster Name}}} &
300
    \multirow{2}{7mm}{\centering{\textbf{Site Name}}} &
301
    \multirow{2}{7mm}{\centering{\textbf{\# Nodes}}} &
302
    \multirow{2}{10mm}{\centering{\textbf{Install Date}}} & 
303
    \multirow{2}{30mm}{\centering{\textbf{Node Characteristics}}}\\
304
    & & & & \\
305
    \toprule
306
    parapluie & Rennes & 40 & Oct. 2010 & \multirow{3}{30mm}{2 CPUs AMD\@1.7GHz, 12 cores/CPU, 47GB RAM, 232GB DISK}\\ 
307
    & & & \\ 
308
    & & & \\ \midrule
309
    parapide & Rennes & 25 & Nov. 2011 & \multirow{3}{30mm}{2 CPUs Intel\@2.93GHz, 4 cores/CPU, 23GB RAM, 465GB DISK}\\ 
310
    & & & \\ 
311
    & & & \\ \midrule
312
    paradent & Rennes & 64 & Feb. 2009 & \multirow{3}{30mm}{2 CPUs Intel\@2.5GHz, 4 cores/CPU, 31GB RAM, 298GB DISK}\\ 
313
    & & & \\ 
314
    & & & \\ \midrule
315
    stremi & Reims & 44 & Jan. 2011 & \multirow{3}{30mm}{2 CPUs AMD\@1.7GHz, 12 cores/CPU, 47GB RAM, 232GB DISK}\\
316
    & & & \\ 
317
    & & & \\ \midrule
318
    sagittaire & Lyon & 79 & Jul. 2007 & \multirow{3}{30mm}{2 CPUs AMD\@2.4GHz, 1 core/CPU, 1GB RAM, 68GB DISK}\\ 
319
    & & & \\ 
320
    & & & \\ 
321
    \bottomrule
322
  \end{tabular} 
323
\end{table}
324

  
325
All deployments from Jan. 2010 through Dec. 2013 have been considered. The first step towards building a model for deployment consisted in building time histograms and visually examining what probability distributions were more likely to fit the obtained data. Once this first step were a number of distributions were considered, we found that log-normal, gamma and generalised gamma were more likely to fit the data. Figure~\ref{fig:deploy_fitting} depics the results of fitting the three distributions to the deployment time information of each cluster.
326

  
327
\begin{figure*}
328
\centering{
329
\parbox{.32\linewidth}{
330
\includegraphics[width=1.\linewidth]{figs/deployment_parapluie.pdf} 
331
}
332
\begin{minipage}{.32\linewidth}
333
\includegraphics[width=1.\linewidth]{figs/deployment_parapide.pdf}
334
\end{minipage}
335
\begin{minipage}{.32\linewidth}
336
\includegraphics[width=1.\linewidth]{figs/deployment_paradent.pdf}
337
\end{minipage}
338
}
339
\centering{
340
\begin{minipage}{.32\linewidth}
341
\includegraphics[width=1.\linewidth]{figs/deployment_stremi.pdf}
342
\end{minipage}
343
\begin{minipage}{.32\linewidth}
344
\includegraphics[width=1.\linewidth]{figs/deployment_sagittaire.pdf}
345
\end{minipage}
346
}
347
\caption{Deployment time histograms and distribution fitting.}
348
\label{fig:deploy_fitting}
349
\end{figure*}
350

  
351
The goodness of fit of the distributions has also been submitted to the Kolmogorov–Smirnov test (KS test), whose statistic (D-statistic) quantifies the distance between the distribution function of empirical values and the cumulative distribution function of the reference distribution. Table~\ref{tab:ks_test} summarises the results for the considered clusters. 
352

  
353
\begin{table}[!hbt]
354
\centering
355
\caption{Kolmogorov-Smirnov test for goodness of fit.}
356
\label{tab:ks_test} 
357
\footnotesize
358
  \begin{tabular}{cccc}
359
    \toprule
360
    \multirow{2}{15mm}{\centering{\textbf{Cluster Name}}} & \multicolumn{3}{c}{\textbf{D-Statistics}}\\
361
    \cmidrule(r){2-4}
362
     & \textbf{Log-normal} & \textbf{Gamma} & \textbf{Gen. Gamma}\\
363
    \toprule
364
    parapluie & 0.054 & 0.056 & 0.057 \\ \midrule 
365
    parapide & 0.088 & 0.089 & 0.087 \\ \midrule
366
    paradent & 0.039 & 0.036 & 0.037 \\ \midrule
367
    stremi & 0.053 & 0.056 & 0.055 \\ \midrule
368
    sagittaire & 0.070 & 0.073 & 0.075 \\
369
    \bottomrule
370
  \end{tabular} 
371
\end{table}
372

  
373
Although the D-statistics does not present great differences among the results obtained by the distributions, log-normal provides slighly better fit to most clusters. We hence choose to base deployment time on a log-normal distribution.
374

  
375
% ----------------------------------------------------------------------------------------
376

  
377
\section{Experimental Setup and Results}
378
\label{sec:experiments}
379

  
380
In this section, we evaluate the potential for energy savings when employing a reservation framework for bare-metal provisioning in private clouds. We discuss the experimental setup and metrics, and then analyse the obtained performance results.
381

  
382
\subsection{Experimental Setup}
280 383

  
384
A discrete-event simulator developed in house was used to model and simulate the resource allocation and request scheduling in a private cloud setting. We resorted to simulation as it enables controlled, repeatable and large-scale experiments. We model and simulate two private cloud environments. Both infrastructure capacity and resource requests are expressed in number of machines.
385

  
386
To model the load of the two considered environments, we collected traces of reservation requests from two Grid'5000 sites, namely Lyon and Reims spanning six months, from Jan. 2014 to Jun. 2014. For each environment we consider two scenarios, one where the workload trace is modified according to the rules described in Section~\ref{sec:workloads}; and another using the original trace. The first is termed as a \textit{cloud} scenario, whereas the second is \textit{reservation}. In this way, there is a total of four scenarios --- \textit{lyon\_cloud}, \textit{lyon\_reservation}, \textit{reims\_cloud} and \textit{reims\_reservation}. As mentioned earlier, under cloud scenarios all requests are made by users on demand.
387

  
388
The number of resources of each environment is achieved by simulating their corresponding cloud scenarios under a large number of resources, so that no request is rejected. The maximum number of resources used during the evaluation is taken as the site capacity. In the scenarios we consider here, Lyon and Reims have 195 and 136 machines respectively.
389

  
390
Based on the deployment information from Kadeploy, we model the time required to boot powered-off machines requested by a reservation using a log-normal distribution whose scale is $\log(500)$ and shape is $0.4$.
391

  
392
 
281 393
\subsection{Performance Metrics}
282 394

  
283
To evaluate the potential for energy savings by introducing support for resource reservation, we first take into account the worst case scenario and quantify the time resources remain idle (\textit{i.e.} $res_{idle}$) from the start of an evaluation $t_{start}$ through its end $t_{end}$, which is given by: 
395
To evaluate the potential for energy savings by introducing support for resource reservation, we first take into account the worst case scenario and quantify the time resources remain idle (\textit{i.e.} $res_{idle}$) from the start of an evaluation $t_{start}$ through its end $t_{end}$ --- the time of submission of the last request --- which is given by: 
284 396

  
285 397
\[ res_{idle} = \int_{t_{start}}^{t_{end}} ct - cu \,dt \]
286 398

  
287
\noindent where $ct$ and $cu$ are respectively the site CPU core capacity and number of cores in use at a time $t$. As we consider that computer nodes would ideally be switched off when idle, $res_{idle}$ is taken as upper bound to potential savings. The actual energy savings $e_{savings}$ is the amount of time cores are switched off during interval $t_{start}$ to $t_{end}$ --- \textit{i.e.}, $e_{savings} = \int_{t_{start}}^{t_{end}} c_{off} \,dt$, where $c_{off}$ is the number of switched-off cores).
399
\noindent where $ct$ and $cu$ are respectively the site machine capacity and number of machines in use at a time $t$. As we consider that computer nodes would ideally be switched off when idle, $res_{idle}$ is taken as upper bound to potential savings. The actual energy savings $e_{savings}$ is the amount of time machines are in fact switched off during interval $t_{start}$ to $t_{end}$ --- \textit{i.e.}, $e_{savings} = \int_{t_{start}}^{t_{end}} c_{off} \,dt$, where $c_{off}$ is the number of powered-off machines.
400

  
401
Switching resources off, however, may lead to a scenario where they must be switched back on to serve a request that arrives. Booting up resources takes time and increases the time required to make resources available to users, specially under immediate reservations. Therefore, we assess the impact of switching resources off on the quality of service users perceive by computing the aggregate delay $delay_{req}$ of affected requests $R_{delay}$, which is given by:
402

  
403
% \[ delay_{req} = \sum_{r \in R_{delay}} {machines_r} \times \frac{{time\_boot_r}}{duration_r} \]
288 404

  
289
Switching resources off, however, may lead to a scenario where powered-off resources must be switched back on to serve a request that arrives. Booting up resources takes time and increases the time required to make resources available to users, specially under immediate reservations. Therefore, we assess the impact of switching resources on the quality of service users perceive by computing the aggregate weighted delay $delay_{req}$ of affected requests $R_{delay}$, which is given by:
405
\[ delay_{req} = \sum_{r \in R_{delay}} (r_{dep\_start} + r_{dep\_end}) - r_{start\_time} \]
290 406

  
291
\[ delay_{req} = \sum_{r \in R_{delay}} {cores_r} \times \frac{{time\_boot_r}}{duration_r} \]
407
\noindent where $r_{dep\_start}$ is the time at which deployment of the requested nodes started, $r_{dep\_end}$ is when the last machine became ready to be used, and $_{start\_time}$ is the time when the request was supposed to start. As discussed earlier, to model the machine deployment time, we used information collected from Kadeploy3 traces.
292 408

  
293
\noindent where $cores_r$ is the number of cores required by request $r$, $time\_boot_r$ is the number of seconds taken by the last allocated resource to boot, and $duration_r$ is the duration of $r$ in seconds.
294 409

  
295 410
\subsection{Evaluation Results}
296 411

  
297
In the first experiment we use a log obtained from the Lyon site of Grid'5000, spanning 6 months from Jun. 1 to Dec. 31, 2013.
412
Figure \ref{fig:potential_savings} summarises the results for potential energy savings. As the scheduling strategy switches resources off almost immediately once it identifies that they have remained idle for a period and will continue to be in a time horizon, it is able to explore almost all potential savings under both cloud and reservation scenarios. This simple policy does not consider a high cost of powering off/on resources such as issues related to air-conditioning and power supply. Even though the strategy is simple, the cloud and reservation scenarios present different results regarding quality of service.
298 413

  
414
\begin{figure}[htb]
415
\centering 
416
\includegraphics[width=0.95\linewidth]{figs/potential_saving.pdf} 
417
\caption{Potential savings and aggregate off periods.}
418
\label{fig:potential_savings}
419
\end{figure}
420

  
421
As shown in Figure~\ref{fig:request_delay}, the request delay is substantially reduced under scenarios with resource reservations. That is caused because with reservations, resources can be switched on before the reservation starts. Hence, the system does not spend reserved-resource time for environment deployment. 
422

  
423
\begin{figure}[htb]
424
\centering 
425
\includegraphics[width=0.95\linewidth]{figs/request_delay.pdf} 
426
\caption{Aggregate request delay in resource/hour.}
427
\label{fig:request_delay}
428
\end{figure}
299 429

  
300 430
% ----------------------------------------------------------------------------------------
301 431

  
302 432
\section{Conclusion}
303 433
\label{sec:conclusion}
304 434

  
305
This work discussed the need for reservation support on cloud resource management. It introduced a framework for OpenStack for enabling reservation of resources, with a focus on bare-metal provisioning for certain high performance computing applications.
435
This work discussed the need for reservation support in cloud resource management. It introduced an OpenStack framework for enabling resource reservation, with a focus on bare-metal provisioning for certain high performance computing applications.
306 436

  
307 437
% ----------------------------------------------------------------------------------------
308 438

  
b/papers/2014/reservation/references.bib
432 432
 publisher = {ACM},
433 433
 address = {New York, USA}
434 434
} 
435

  
436
@ARTICLE{JeanvoineKadeploy3:2013,
437
    title = {{Kadeploy3: Efficient and Scalable Operating System Provisioning}},
438
    author = {Emmanuel Jeanvoine and Luc Sarzyniec and Lucas Nussbaum},
439
    journal = {{USENIX} ;login:},
440
    volume = {38},
441
    number = {1},
442
    year = {2013},
443
    pages = {38-44},
444
    month = {February}
445
}
446

  
447
@MISC{Rossigneux:2014,
448
  author = {Francois Rossigneux and Jean-Patrick Gelas and Laurent Lefevre and Marcos Dias de Assuncao},
449
  title = {A Generic and Extensible Framework for Monitoring Energy Consumption of {OpenStack} Clouds},
450
  year = {2014},
451
  howpublished = {http://arxiv.org/abs/1408.6328},
452
  month = {August}
453
}
454

  

Formats disponibles : Unified diff