Révision 557e0020

b/livrables/L4_2_4/Data_file/Cat1/.Rhistory
1
symbols(0,0,circles=1,inches=FALSE,add=TRUE)
2
dev.off()
3
return (X)
4
}
5
funct(filename, "test_bt_cool.eps")
6
draw <- function(dataset,out_file){
7
X <- dudi.pca(dataset, center=T,scale = T, scannf=F)
8
postscript(out_file, width = 4.0, height = 4.0,horizontal = FALSE, onefile = FALSE, paper = "special",family = "ComputerModern", encoding = "TeXtext.enc")
9
keep <- 100 * X$eig/sum(X$eig)
10
plot(X$co[,1],X$co[,2],xlim = c(-1,1), ylim = c(-1,1), asp = 1,ylab=paste("2nd principal axis (", round(keep[2],1), "%)", sep=""),xlab=paste("1st principal axis (", round(keep[1],1), "%)", sep=""))
11
z <- row.names(X$co)
12
print(z[1])
13
print(X$co[1,1])
14
k <- 1
15
#while (k < length(row.names(X$co))){
16
x <- rnorm(5, 0, 1)
17
arrows(x[2] , x[4] , X$co[,1], X$co[,2], code = 2, col = 1, length=.25)
18
#arrows(X$co[,1], X$co[,2], y0=x[2] +1, y1=x[3] +1)
19
#text(X$co[,1],X$co[,2],row.names(X$co),col="navy")
20
text(x[2] ,x[4] ,z[1],col="navy")
21
k <- k + 1
22
#}
23
abline(h=0,v=0)
24
symbols(0,0,circles=1,inches=FALSE,add=TRUE)
25
dev.off()
26
return (X)
27
}
28
funct(filename, "test_bt_cool.eps")
29
draw <- function(dataset,out_file){
30
X <- dudi.pca(dataset, center=T,scale = T, scannf=F)
31
postscript(out_file, width = 4.0, height = 4.0,horizontal = FALSE, onefile = FALSE, paper = "special",family = "ComputerModern", encoding = "TeXtext.enc")
32
keep <- 100 * X$eig/sum(X$eig)
33
plot(X$co[,1],X$co[,2],xlim = c(-1,1), ylim = c(-1,1), asp = 1,ylab=paste("2nd principal axis (", round(keep[2],1), "%)", sep=""),xlab=paste("1st principal axis (", round(keep[1],1), "%)", sep=""))
34
z <- row.names(X$co)
35
print(z[1])
36
print(X$co[1,1])
37
k <- 1
38
while (k <= length(row.names(X$co))){
39
x <- rnorm(5, 0, 1)
40
arrows(x[2] , x[4] , X$co[k,1], X$co[k,2], code = 2, col = 1, length=.25)
41
#arrows(X$co[,1], X$co[,2], y0=x[2] +1, y1=x[3] +1)
42
#text(X$co[,1],X$co[,2],row.names(X$co),col="navy")
43
text(x[2] ,x[4] ,z[k],col="navy")
44
k <- k + 1
45
}
46
abline(h=0,v=0)
47
symbols(0,0,circles=1,inches=FALSE,add=TRUE)
48
dev.off()
49
return (X)
50
}
51
funct(filename, "test_bt_cool.eps")
52
source('~/AppProfile/Experiments/scriptPdpPca.R')
53
funct(filename, "test_bt_cool.eps")
54
source('~/AppProfile/Experiments/scriptPdpPca.R')
55
funct(filename, "test_bt_cool.eps")
56
source('~/AppProfile/Experiments/scriptPdpPca.R')
57
funct(filename, "test_bt_cool.eps")
58
source('~/AppProfile/Experiments/scriptPdpPca.R')
59
funct(filename, "test_bt_cool.eps")
60
source('~/AppProfile/Experiments/scriptPdpPca.R')
61
funct(filename, "test_bt_cool.eps")
62
source('~/AppProfile/Experiments/scriptPdpPca.R')
63
funct(filename, "test_bt_cool.eps")
64
source('~/AppProfile/Experiments/scriptPdpPca.R')
65
funct(filename, "test_bt_cool.eps")
66
source('~/AppProfile/Experiments/scriptPdpPca.R')
67
funct(filename, "test_bt_cool.eps")
68
source('~/AppProfile/Experiments/scriptPdpPca.R')
69
funct(filename, "test_bt_cool.eps")
70
source('~/AppProfile/Experiments/scriptPdpPca.R')
71
funct(filename, "test_bt_cool.eps")
72
source('~/AppProfile/Experiments/scriptPdpPca.R')
73
funct(filename, "test_bt_cool.eps")
74
source('~/AppProfile/Experiments/scriptPdpPca.R')
75
funct(filename, "test_bt_cool.eps")
76
source('~/AppProfile/Experiments/scriptPdpPca.R')
77
funct(filename, "test_bt_cool.eps")
78
source('~/AppProfile/Experiments/scriptPdpPca.R')
79
source('~/AppProfile/Experiments/scriptPdpPca.R')
80
funct(filename, "test_bt_cool.eps")
81
source('~/AppProfile/Experiments/scriptPdpPca.R')
82
funct(filename, "test_bt_cool.eps")
83
source('~/AppProfile/Experiments/scriptPdpPca.R')
84
funct(filename, "test_bt_cool.eps")
85
source('~/AppProfile/Experiments/scriptPdpPca.R')
86
funct(filename, "test_bt_cool.eps")
87
source('~/AppProfile/Experiments/scriptPdpPca.R')
88
source('~/AppProfile/Experiments/scriptPdpPca.R')
89
funct(filename, "test_bt_cool.eps")
90
source('~/AppProfile/Experiments/scriptPdpPca.R')
91
funct(filename, "test_bt_cool.eps")
92
source('~/AppProfile/Experiments/scriptPdpPca.R')
93
funct(filename, "test_bt_cool.eps")
94
source('~/AppProfile/Experiments/scriptPdpPca.R')
95
funct(filename, "test_bt_cool.eps")
96
filename="/Users/ghislainlandry/AppProfile/Experiments/PhaseDetect/data.raw/data.pca/BT.txt"
97
funct(filename, "test_bt_cool.eps")
98
filename="/Users/ghislainlandry/AppProfile/Experiments/PhaseDetect/data.raw/data.pca/bakup/BT.txt"
99
funct(filename, "test_bt_cool.eps")
100
source('~/AppProfile/Experiments/scriptPdpPca.R')
101
funct(filename, "test_bt_cool.eps")
102
filename = "/Users/ghislainlandry/AppProfile/Experiments/PhaseDetect/data.raw/data.pca/data.nas/l2.bt.txt"
103
funct(filename, "test_bt_cool.eps")
104
source('~/AppProfile/Experiments/scriptPdpPca.R')
105
funct(filename, "test_bt_cool.eps")
106
source('~/AppProfile/Experiments/scriptPdpPca.R')
107
funct(filename, "test_bt_cool.eps")
108
?as.numeric
109
?mode
110
?ld
111
??ld
112
??ForImp
113
install.packages("ForImp")
114
install.packages("mi")
115
?mi
116
??mi
117
library(stat)
118
library(stats)
119
?mi
120
x <- rnorm(100,0,1) # N(0,1)
121
x
122
y <- rbinom(100,1,invlogit(1+2*x))
123
library(mi)
124
y <- rbinom(100,1,invlogit(1+2*x))
125
y[seq(1,100,10)]<-NA
126
dat.xy <- data.frame(x,y)
127
head(dat.xy)
128
mi.binary(y~x, data = dat.xy)
129
x <-rnorm(100,0,1)
130
y <- x+4
131
y <- round(y)
132
y[y<0] <- 0
133
y
134
y[seq(1,100,10)] <- NA
135
?seq
136
dat.xy <- data.frame(x,y)
137
head(dat.xy)
138
mi.categorical(formula = y ~ x, data = dat.xy)
139
z <- mi.categorical(formula = y ~ x, data = dat.xy)
140
head(z)
141
head(dat.xy)
142
z <- as.data.frame(mi.categorical(formula = y ~ x, data = dat.xy))
143
head(z)
144
z <- mi.categorical(formula = y ~ x, data = dat.xy)
145
z
146
?mi
147
library(doMc)
148
library(doMC)
149
dir()
150
?factors
151
Factors
152
?Factors
153
?as.factor
154
?remove
155
?random
156
?rand
157
??random
158
seed <- 3433
159
set.seed(seed)
160
?runif
161
runif(1)
162
runif(1)
163
sample(1:6,10,replace=F)
164
sample(1:20,10,replace=F)
165
x <- sample(1:20,10,replace=F)
166
x[1]
167
df.fits <- list()
168
df.fits
169
?complete.cases()
170
x <- c(0.8, 0.47, 0.51, 0.73, 0.36, 0.58, 0.57, 0.85, 0.44, 0.42)
171
y <- c(1.39, 0.72, 1.55, 0.48, 1.19, -1.59, 1.23, -0.65, 1.49, 0.05)
172
fit <- lm(y ~ x -1)
173
fit
174
x <- c(10.1, 9.2, 8.4, 9.5, 7.5)
175
y <- c(9.9, 9.1, 8.4, 9.3, 7.2)
176
mean(x+y)
177
mean(x)
178
mean(y)
179
x
180
sum(x)
181
sum(x)*12
182
x <- c(0.8, 0.47, 0.51, 0.73, 0.36, 0.58, 0.57, 0.85, 0.44, 0.42)
183
y <- c(1.39, 0.72, 1.55, 0.48, 1.19, -1.59, 1.23, -0.65, 1.49, 0.05)
184
fit <- lm(y ~ 0+x)
185
summary(fit)
186
fit <- lm(y ~ x-1)
187
summary(fit)
188
x <- -5:5
189
y <- c(5.12, 3.93, 2.67, 1.87, 0.52, 0.08, 0.93, 2.05, 2.54, 3.87, 4.97)
190
knots <- rep(0,10)
191
splineTerms <- sapply(knots, function(k) (x > k) * (x -k))
192
xMat <- cbind(1, x, splineTerms)
193
yhat <- predict(lm(y ~ xMat - 1))
194
plot(x, y, frame = FALSE, pch = 21, bg = "lightblue", cex = 2)
195
lines(x, yhat, col = "red", lwd = 2)
196
fit <- lm(y ~ xMat - 1)
197
library(ggplot2)
198
?rnorm
199
?runif
200
x <- runif(10000, 50,47000)
201
hist(x)
202
con <- url("http://www.router-switch.com/search/server/?pagesize=150")
203
library(XML)
204
html <- htmlTreeParse(con, useInternalNodes=T)
205
url <- "http://www.router-switch.com/search/server/?pagesize=150"
206
html <- htmlTreeParse(url, useInternalNodes=T)
207
Y <- xpathSAply(html, "//List Price:", xmlValue)
208
Y <- xpathSApply(html, "//List Price:", xmlValue)
209
Y <- xpathSApply(html, "//td[@id='List Price:']", xmlValue)
210
head(y)
211
head(Y)
212
Y
213
Y[0]
214
xpathSApply(html, "//title", xmlValue)
215
head(html)
216
xpathSApply(html, "//td[@id='List Price:']", xmlValue)
217
html
218
doc.text = unlist(xpathApply(html, '//p', xmlValue))
219
head(doc.tex)
220
head(doc.text)
221
"List" %in% doc.text
222
doc.text
223
doc.text = gsub('\\n', ' ', doc.text)
224
doc.text
225
doc.text = gsub('\\r', '', doc.text)
226
doc.text
227
"Our" %in% doc.text
228
doc.text[338]
229
doc.text[339]
230
"Our" %in% doc.textdoc.text[339]
231
"Our" %in% doc.text[339]
232
class(doc.text[339])
233
?gsub
234
y <- gsub("  ", " ",doc.text)
235
y
236
y <- paste(doc.text, collapse=" ")
237
y
238
?grepl
239
y <- grepl("Our",doc.text[339])
240
y
241
doc.text
242
y <- grepl("Our",doc.text[303])
243
y
244
y <- grepl("List",doc.text[303])
245
y
246
y <- grepl(c("List","Our"), doc.text[303])
247
y <- grepl("List, Our", doc.text[303])
248
y
249
y <- grepl("List" "Our", doc.text[303])
250
y <- grepl("List", "Our", doc.text[303])
251
y
252
y <- grepl("$",, doc.text[303])
253
y <- grepl("\\$",, doc.text[303])
254
y <- grepl("\$",, doc.text[303])
255
y <- grepl("\$",doc.text[303])
256
y <- grepl("\\$",doc.text[303])
257
y
258
x <- rapply(lapply(doc.text, function(x){grepl("\\$",doc.text[303])}), function(z){z})
259
x[1:20]
260
FALSE %in% x
261
x <- rapply(lapply(doc.text, function(x){grepl("\\$",doc.text)}), function(z){z})
262
x[1:20]
263
FALSE %in% x
264
y <- doc.text[x]
265
length(y)
266
head(y)
267
class(y)
268
y[1]
269
y[2]
270
y <- gsub("List Price:                  USD$", "", doc.text[x])
271
head(y)
272
y <- gsub("List Price:                  USD$", "", y[1])
273
y
274
y[1]
275
y
276
gsub("List Price:                  USD\\$", "", y)
277
as.numeric("3,495.00")
278
gsub(",", "","3,495.00")
279
y <- doc.text[x]
280
head(y)
281
lprice <- rapply(lapply(y, function(a){gsub("List Price:                  USD\\$", "", a)}), function(z){z})
282
head(lprice)
283
lprice <- rapply(lapply(lprice, function(a){gsub("Our Price:                  USD\\$", "", a)}), function(z){z})
284
head(lprice)
285
prices <- rapply(lapply(lprice, function(x){gsub(",", "",x)}), function(z){as.numeric(z)})
286
prices
287
tail(lprice)
288
tail(y)
289
y <- doc.text[x]
290
tail(y)
291
head(y)
292
?ifelse
293
Z <- ifelse(is.na(y), True, False)
294
Z <- ifelse(is.na(y), T, F)
295
lean(Z)
296
length(Z)
297
l <- y[!Z]
298
l
299
serversPrices <- function(pagelink){
300
# Read and parse HTML file
301
html <- htmlTreeParse(pagelink, useInternalNodes=T)
302
# Extract all the paragraphs (HTML tag is p, starting at
303
# the root of the document). Unlist flattens the list to
304
# create a character vector.
305
doc.text = unlist(xpathApply(html, '//p', xmlValue))
306
# Replace all \n by spaces
307
doc.text = gsub('\\n', ' ', doc.text)
308
doc.text = gsub('\\r', ' ', doc.text)
309
x <- rapply(lapply(doc.text, function(x){grepl("\\$",doc.text)}), function(z){z})
310
y <- doc.text[x]
311
y <- ifelse(is.na(y), True, False)
312
lprice <- rapply(lapply(y, function(a){gsub("List Price:                  USD\\$", "", a)}),
313
function(z){z})
314
lprice <- rapply(lapply(lprice, function(a){gsub("Our Price:                  USD\\$", "", a)}),
315
function(z){z})
316
prices <- rapply(lapply(lprice, function(x){gsub(",", "",x)}),
317
function(z){as.numeric(z)})
318
prices
319
}
320
prices <- serversPrices("http://www.router-switch.com/search/server/?pagesize=150")
321
serversPrices <- function(pagelink){
322
# Read and parse HTML file
323
html <- htmlTreeParse(pagelink, useInternalNodes=T)
324
# Extract all the paragraphs (HTML tag is p, starting at
325
# the root of the document). Unlist flattens the list to
326
# create a character vector.
327
doc.text = unlist(xpathApply(html, '//p', xmlValue))
328
# Replace all \n by spaces
329
doc.text = gsub('\\n', ' ', doc.text)
330
doc.text = gsub('\\r', ' ', doc.text)
331
x <- rapply(lapply(doc.text, function(x){grepl("\\$",doc.text)}), function(z){z})
332
y <- doc.text[x]
333
y <- ifelse(is.na(y), T, F)
334
lprice <- rapply(lapply(y, function(a){gsub("List Price:                  USD\\$", "", a)}),
335
function(z){z})
336
lprice <- rapply(lapply(lprice, function(a){gsub("Our Price:                  USD\\$", "", a)}),
337
function(z){z})
338
prices <- rapply(lapply(lprice, function(x){gsub(",", "",x)}),
339
function(z){as.numeric(z)})
340
prices
341
}
342
prices <- serversPrices("http://www.router-switch.com/search/server/?pagesize=150")
343
warnings()
344
length(prices)
345
serversPrices <- function(pagelink){
346
# Read and parse HTML file
347
html <- htmlTreeParse(pagelink, useInternalNodes=T)
348
# Extract all the paragraphs (HTML tag is p, starting at
349
# the root of the document). Unlist flattens the list to
350
# create a character vector.
351
doc.text = unlist(xpathApply(html, '//p', xmlValue))
352
# Replace all \n by spaces
353
doc.text = gsub('\\n', ' ', doc.text)
354
doc.text = gsub('\\r', ' ', doc.text)
355
x <- rapply(lapply(doc.text, function(x){grepl("\\$",doc.text)}), function(z){z})
356
y <- doc.text[x]
357
#y <- ifelse(is.na(y), T, F)
358
lprice <- rapply(lapply(y, function(a){gsub("List Price:                  USD\\$", "", a)}),
359
function(z){z})
360
lprice <- rapply(lapply(lprice, function(a){gsub("Our Price:                  USD\\$", "", a)}),
361
function(z){z})
362
Z <- ifelse(is.na(y), T, F)
363
lprice <- lprice[!Z]
364
prices <- rapply(lapply(lprice, function(x){gsub(",", "",x)}),
365
function(z){as.numeric(z)})
366
prices
367
}
368
prices <- serversPrices("http://www.router-switch.com/search/server/?pagesize=150")
369
serversPrices <- function(pagelink){
370
# Read and parse HTML file
371
html <- htmlTreeParse(pagelink, useInternalNodes=T)
372
# Extract all the paragraphs (HTML tag is p, starting at
373
# the root of the document). Unlist flattens the list to
374
# create a character vector.
375
doc.text = unlist(xpathApply(html, '//p', xmlValue))
376
# Replace all \n by spaces
377
doc.text = gsub('\\n', ' ', doc.text)
378
doc.text = gsub('\\r', ' ', doc.text)
379
x <- rapply(lapply(doc.text, function(x){grepl("\\$",doc.text)}), function(z){z})
380
y <- doc.text[x]
381
#y <- ifelse(is.na(y), T, F)
382
lprice <- rapply(lapply(y, function(a){gsub("List Price:                  USD\\$", "", a)}),
383
function(z){z})
384
lprice <- rapply(lapply(lprice, function(a){gsub("Our Price:                  USD\\$", "", a)}),
385
function(z){z})
386
Z <- ifelse(is.na(lprice), T, F)
387
lprice <- lprice[!Z]
388
prices <- rapply(lapply(lprice, function(x){gsub(",", "",x)}),
389
function(z){as.numeric(z)})
390
prices
391
}
392
prices <- serversPrices("http://www.router-switch.com/search/server/?pagesize=150")
393
length(prices)
394
prices
395
serversPrices <- function(pagelink){
396
# Read and parse HTML file
397
html <- htmlTreeParse(pagelink, useInternalNodes=T)
398
# Extract all the paragraphs (HTML tag is p, starting at
399
# the root of the document). Unlist flattens the list to
400
# create a character vector.
401
doc.text = unlist(xpathApply(html, '//p', xmlValue))
402
# Replace all \n by spaces
403
doc.text = gsub('\\n', ' ', doc.text)
404
doc.text = gsub('\\r', ' ', doc.text)
405
x <- rapply(lapply(doc.text, function(x){grepl("\\$",doc.text)}), function(z){z})
406
y <- doc.text[x]
407
#y <- ifelse(is.na(y), T, F)
408
lprice <- rapply(lapply(y, function(a){gsub("List Price:                  USD\\$", "", a)}),
409
function(z){z})
410
lprice <- rapply(lapply(lprice, function(a){gsub("Our Price:                  USD\\$", "", a)}),
411
function(z){z})
412
Z <- ifelse(is.na(lprice), T, F)
413
lprice <- lprice[!Z]
414
lprice
415
#prices <- rapply(lapply(lprice, function(x){gsub(",", "",x)}),
416
#                 function(z){as.numeric(z)})
417
#prices
418
}
419
prices <- serversPrices("http://www.router-switch.com/search/server/?pagesize=150")
420
prices
421
lprice <- rapply(lapply(lprice, function(a){gsub("Our Price:                  USD\\$", "", a)}),
422
function(z){z})
423
lprice
424
doc.text = unlist(xpathApply(html, '//p', xmlValue))
425
doc.text = gsub('\\n', ' ', doc.text)
426
doc.text = gsub('\\r', ' ', doc.text)
427
doc.text
428
x <- rapply(lapply(doc.text, function(x){grepl("Price",doc.text)}), function(z){z})
429
head(x)
430
y <- doc.text[x]
431
length(y)
432
head(y)
433
tail(y)
434
Z <- ifelse(is.na(y), T, F)
435
y <- y[!Z]
436
tail(y)
437
t <- y
438
length(y)
439
head(y)
440
gsub(" $", "", "Our Price:                   USD$4,598.00                                      ")
441
gsub(" $ ", "", "Our Price:                   USD$4,598.00                                      ")
442
gsub("\\s", " ", "Our Price:                   USD$4,598.00                                      ")
443
gsub("\t", " ", "Our Price:                   USD$4,598.00                                      ")
444
gsub("\\t", "", "Our Price:                   USD$4,598.00                                      ")
445
gsub("\\t", "", "Our Price:                   USD$4,598.00                  ")
446
str.replace(/[\t\n\r]/gm,'')
447
str <- "Our Price:                   USD$4,598.00                  "
448
str.replace(/[\t\n\r]/gm,'')
449
y[72]
450
gsub("\t","",y[72])
451
gsub("\t","",as.character(y[72]))
452
gsub(" ","",as.character(y[72]))
453
lprice <- rapply(lapply(y, function(a){" ","",as.character(a)}),
454
function(z){z})
455
lprice <- rapply(lapply(y, function(a){" ","",as.character(a)}),function(z){z})
456
lprice <- rapply(lapply(y, function(a){gsub(" ","",as.character(a))}),function(z){z})
457
head(lprice)
458
lprice <- rapply(lapply(y, function(a){gsub("List Price:USD\\$", "", a)}),
459
function(z){z})
460
head(lprice)
461
lprice <- rapply(lapply(y, function(a){gsub(" ","",as.character(a))}),function(z){z})
462
lprice <- rapply(lapply(lprice, function(a){gsub("List Price:USD\\$", "", a)}),
463
function(z){z})
464
head(lprice)
465
lprice <- rapply(lapply(lprice, function(a){gsub("ListPrice:USD\\$", "", a)}),
466
function(z){z})
467
head(lprice)
468
lprice <- rapply(lapply(lprice, function(a){gsub("OurPrice:USD\\$", "", a)}),
469
function(z){z})
470
head(lprice)
471
prices <- rapply(lapply(lprice, function(x){gsub(",", "",x)}),
472
function(z){as.numeric(z)})
473
prices
474
hist(prices)
475
?rweibull
476
hist(rweibull(1e5,1.5,33))
477
mean(prices)
478
hist(rweibull(1e5,1.5,mean(prices)))
479
?rweibull
480
y <- rweibull(1e5,1.5,mean(prices))
481
mean(y)
482
y <- rweibull(1e5,2,mean(prices))
483
mean(y)
484
y <- rweibull(1e5,1,mean(prices))
485
mean(y)
486
hist(y)
487
y <- rweibull(1e5,1.5,mean(prices))
488
hist(y)
489
y <- rweibull(1e5,3.5,mean(prices))
490
hist(y)
491
y <- rweibull(1e5,2.5,mean(prices))
492
hist(y)
493
y <- rweibull(1e5,2,mean(prices))
494
hist(y)
495
mean(y)
496
setwd("/Users/ghislainlandry/Enseignements/HaasAnalysis/Cat1")
497
dir()
498
require(xlsReadWrite)
499
require(XLConnect)
500
require(XLConnect)
501
require(xlsx)
502
?read.xlsx
503
conf1_df <- read.xlsx("categoryOneServer.xlsx", sheetName = "conf1")
504
head(conf1_df)
505
procs <- read.xlsx("conf1extensions.xlsx", sheetName = "procs")
506
head(procs)
507
sep(nrow(procs))
508
seq(nrow(procs))
509
procs$id <- seq(nrow(procs))
510
head(procs)
511
y <- merge(procs, conf1_df, by= NULL)
512
head(y)
b/livrables/L4_2_4/Data_file/Cat1/GenerateServers.py
1
#!/usr/bin/env python
2

  
3
from __future__ import division
4
import numpy as np
5
import pandas as pd
6
from numpy import genfromtxt
7
from pandas import DataFrame
8

  
9
import re
10
import math
11
import sys
12
import os
13

  
14

  
15

  
16

  
17
def duplicate(x, n):
18
	return [x[0]] *n
19

  
20

  
21
basedirs = ["Cat1", "Cat2", "Cat3"]
22

  
23

  
24

  
25
for directory in basedirs:
26
	
27

  
b/livrables/L4_2_4/Data_file/Cat1/codeServers.R
1
require(xlsx)
2

  
3
conf1_df <- read.xlsx("categoryOneServer.xlsx", sheetName = "conf1")
4
conf1_df$id <- seq(nrow(conf1_df))
5

  
6
procs <- read.xlsx("conf1extensions.xlsx", sheetName = "procs")
7
procs$id <- seq(nrow(procs))
8

  
b/livrables/L4_2_4/Data_file/Cat1/makeserverinstances.py
1
#!/usr/bin/env python
2

  
3
from __future__ import division
4
import numpy as np
5
import pandas as pd
6
from numpy import genfromtxt
7
from pandas import DataFrame
8

  
9
import re
10
import math
11
import sys
12

  
13

  
14

  
15
def duplicate(x, n):
16
	return [x[0]] *n
17

  
18

  
19

  
20
cat1 = pd.ExcelFile("categoryOneServer.xlsx")
21

  
22
dico = {}
23

  
24
configurations = ["conf1", "conf2"]
25

  
26
for item in configurations:
27
	df_conf1 = cat1.parse("conf1")
28
	df_conf1["id"] = range(1, df_conf1.shape[0] + 1)
29

  
30
	extension_file = item + "extensions.xlsx"
31

  
32
	ext = pd.ExcelFile(extension_file)
33
	procs = ext.parse("procs")
34
	procs["id"] = range(1, procs.shape[0] + 1)
35

  
36
	mem = ext.parse("mem")
37
	mem["id"] = range(1, mem.shape[0] + 1)
38

  
39
	storage = ext.parse("storage3inch")
40
	storage["id"] = range(1, storage.shape[0] + 1)
41

  
42
	#network = ext.parse("network")
43
	#network["id"] = range(1, network.shape[0] + 1)
44

  
45

  
46
	df1 = df_conf1
47
	
48
	df1["procUnitCost"] = 0
49

  
50
	df = df1.merge(procs, how='outer', copy=False)
51

  
52
	colnames = list(df.columns)
53
	proc_names = list(procs.columns)
54

  
55
	colnames_t = [x for x in colnames if x not in proc_names]
56
	colnames = [x for x in colnames_t if x not in ["id"]]
57

  
58
	df[colnames] = df[colnames].apply(duplicate, 0, args=[df.shape[0]])
59

  
60
	data = df
61
	data["MemUnitCost"] = 0
62

  
63
	for i in range(df.shape[0]):
64
		d = pd.DataFrame(df.iloc[i, ]).T # get the transpose of the original data frame
65
		d1 = d.merge(mem, how="outer", copy=False)
66
		colnames = list(d1.columns)
67
		mem_names = list(mem.columns)
68
		colnames_t = [x for x in colnames if x not in mem_names]
69
		colnames = [x for x in colnames_t if x not in ["id"]]
70
		d1[colnames] = d1[colnames].apply(duplicate, 0, args=[d1.shape[0]])
71
		data = data.append(d1)
72
	
73
	## process storage
74
	df = data
75
	data["StorageUnitCost"] = 0
76

  
77
	for i in range(df.shape[0]):
78
		d = pd.DataFrame(df.iloc[i, ]).T # get the transpose of the original data frame
79
		d1 = d.merge(storage, how="outer", copy=False)
80
		colnames = list(d1.columns)
81
		storage_names = list(storage.columns)
82
		colnames_t = [x for x in colnames if x not in storage_names]
83
		colnames = [x for x in colnames_t if x not in ["id"]]
84
		d1[colnames] = d1[colnames].apply(duplicate, 0, args=[d1.shape[0]])
85
		data = data.append(d1)
86
	
87
	dico[item] = data
88

  
89

  
90
df = dico["conf1"]
91

  
92
## remove conf1 from the list
93
configurations.remove("conf1")
94

  
95
for items in configurations:
96
	df.append(dico[items])
97

  
98
#df = dico["conf1"].append(dico['conf2'])
99

  
100
print df.head()
101

  
102
df["category"] = "standard"
103
cost_fileds = [v for v in df.columns if "Cost" in v]
104
df["Cost"] = 0
105

  
106
for items in cost_fileds:
107
	df["Cost"] += df[items]
108

  
109
df.to_excel("standard.xlsx", sheet_name='cat', engine='xlsxwriter',index=False)
110

  
b/livrables/L4_2_4/Data_file/Cat1/makeserverinstances.py~
1
#!/usr/bin/env python
2

  
3
from __future__ import division
4
import numpy as np
5
import pandas as pd
6
from numpy import genfromtxt
7
from pandas import DataFrame
8

  
9
import re
10
import math
11
import sys
12

  
13

  
14

  
15
def duplicate(x, n):
16
	return [x[0]] *n
17

  
18

  
19

  
20
cat1 = pd.ExcelFile("categoryOneServer.xlsx")
21

  
22
dico = {}
23

  
24
configurations = ["conf1", "conf2"]
25

  
26
for item in configurations:
27
	df_conf1 = cat1.parse("conf1")
28
	df_conf1["id"] = range(1, df_conf1.shape[0] + 1)
29

  
30
	extension_file = item + "extensions.xlsx"
31

  
32
	ext = pd.ExcelFile(extension_file)
33
	procs = ext.parse("procs")
34
	procs["id"] = range(1, procs.shape[0] + 1)
35

  
36
	mem = ext.parse("mem")
37
	mem["id"] = range(1, mem.shape[0] + 1)
38

  
39
	storage = ext.parse("storage3inch")
40
	storage["id"] = range(1, storage.shape[0] + 1)
41

  
42
	#network = ext.parse("network")
43
	#network["id"] = range(1, network.shape[0] + 1)
44

  
45

  
46
	df1 = df_conf1
47
	
48
	df1["procUnitCost"] = 0
49

  
50
	df = df1.merge(procs, how='outer', copy=False)
51

  
52
	colnames = list(df.columns)
53
	proc_names = list(procs.columns)
54

  
55
	colnames_t = [x for x in colnames if x not in proc_names]
56
	colnames = [x for x in colnames_t if x not in ["id"]]
57

  
58
	df[colnames] = df[colnames].apply(duplicate, 0, args=[df.shape[0]])
59

  
60
	data = df
61
	data["MemUnitCost"] = 0
62

  
63
	for i in range(df.shape[0]):
64
		d = pd.DataFrame(df.iloc[i, ]).T # get the transpose of the original data frame
65
		d1 = d.merge(mem, how="outer", copy=False)
66
		colnames = list(d1.columns)
67
		mem_names = list(mem.columns)
68
		colnames_t = [x for x in colnames if x not in mem_names]
69
		colnames = [x for x in colnames_t if x not in ["id"]]
70
		d1[colnames] = d1[colnames].apply(duplicate, 0, args=[d1.shape[0]])
71
		data = data.append(d1)
72
	
73
	## process storage
74
	df = data
75
	data["StorageUnitCost"] = 0
76

  
77
	for i in range(df.shape[0]):
78
		d = pd.DataFrame(df.iloc[i, ]).T # get the transpose of the original data frame
79
		d1 = d.merge(storage, how="outer", copy=False)
80
		colnames = list(d1.columns)
81
		storage_names = list(storage.columns)
82
		colnames_t = [x for x in colnames if x not in storage_names]
83
		colnames = [x for x in colnames_t if x not in ["id"]]
84
		d1[colnames] = d1[colnames].apply(duplicate, 0, args=[d1.shape[0]])
85
		data = data.append(d1)
86
	
87
	dico[item] = data
88

  
89

  
90
df = dico["conf1"]
91

  
92
## remove conf1 from the list
93
configurations.remove("conf1")
94

  
95
for items in configurations:
96
	df.append(dico[items])
97

  
98
#df = dico["conf1"].append(dico['conf2'])
99

  
100
print df.head()
101

  
102
df["category"] = "standard"
103
cost_fileds = [v for v in df.columns if "Cost" in v]
104
df["Cost"] = 0
105

  
106
for items in cost_fileds:
107
	df["Cost"] += df[items]
108

  
109
df.to_excel("standard.xlsx", sheet_name='cat', engine='xlsxwriter',index=False)
110

  
b/livrables/L4_2_4/Data_file/GenerateServers.py
1
#!/usr/bin/env python
2

  
3
from __future__ import division
4
import numpy as np
5
import pandas as pd
6
from numpy import genfromtxt
7
from pandas import DataFrame
8

  
9

  
10
import re
11
import math
12
import sys
13
import os
14
from optparse import OptionParser
15

  
16
import logging
17

  
18

  
19
'''
20
python version: 
21
2.7.8 |Anaconda 2.0.1 (x86_64)
22

  
23
full description:
24
2.7.8 |Anaconda 2.0.1 (x86_64)| (default, Aug 21 2014, 15:21:46) \n[GCC 4.2.1 (Apple Inc. build 5577)]
25
'''
26

  
27

  
28
np.random.seed(12345)
29

  
30

  
31
class GenerateDate():
32
	
33
	def __init__(self, nber_servers, 
34
		maintainanceCost, 
35
		maintainance_hours,
36
		software_init_cost, 
37
		software_update_cost,
38
		network_hardware_cost,
39
		installation_cost,
40
		one_time_cost,
41
		platform_cost_hour,
42
		monthly_cost_instance,
43
		cost_extra_gb_memory,
44
		cost_extra_gb_storage,
45
		number_years,
46
		number_cores,
47
		memory_size,
48
		storage_size,
49
		storage_type,
50
		usage_hours):
51
	
52
		self.number_servers = nber_servers
53
		self.basedirs = ["Cat1", "Cat2"]
54
		self.dfservers = pd.DataFrame()
55
		self.maintainanceCost = maintainanceCost
56
		self.maintainance_hours = maintainance_hours
57
		self.software_init_cost = software_init_cost
58
		self.software_update_cost = software_update_cost
59
		self.network_hardware_cost= network_hardware_cost
60
		self.installation_cost = installation_cost
61
		self.one_time_cost = one_time_cost
62
		self.platform_cost_hour = platform_cost_hour
63
		self.monthly_cost_instance = monthly_cost_instance
64
		self.cost_extra_gb_memory = cost_extra_gb_memory #cost per extra giga bite of memory
65
		self.cost_extra_gb_storage = cost_extra_gb_storage
66
		self.number_years = number_years
67
		self.number_cores = number_cores
68
		self.memory_size = memory_size
69
		self.storage_size = storage_size
70
		self.storage_type = storage_type
71
		self.usage_hours = usage_hours
72

  
73
	def duplicate(self, x, n):
74
		return [x[0]] *n
75
	
76
	def getItem(self, item):
77
		if item == "procs":
78
			return "procUnitCost"
79
		else:
80
			if item == "mem":
81
				return "MemUnitCost"
82
			else:
83
				if item == "storage3inch" or item == "storage2inch":
84
					return "StorageUnitCost"
85
				else:
86
					if item == "network":
87
						return "netUnitCost"
88
	
89
	def difference_percentage(self, x):
90
		if np.nan in x:
91
			return np.nan
92
		else:
93
			return round((x[0] / x[1]) * 100, 2)
94
	
95
	
96
	'''
97
		base configuration data frame
98
		file name for the corresponding extension file
99
		flag to identify the appropriate component
100
	'''
101

  
102
	def createServerSet(self, baseConfDf, extensionfile):
103
		xcl_file = pd.ExcelFile(extensionfile)
104

  
105
		dframes = {sheet_name: xcl_file.parse(sheet_name) for sheet_name in xcl_file.sheet_names}
106
		#
107
		keys = dframes.keys()
108

  
109
		## add the processor unit cost
110
		baseConfDf[self.getItem("procs")] = 0
111
		#
112
		
113
		data = baseConfDf.merge(dframes["procs"], how="outer", copy=False)
114
		colnames = list(data.columns)
115
		proc_names = list(dframes["procs"])
116
		colnames_t = [x for x in colnames if x not in proc_names]
117
		colnames = [x for x in colnames_t if x not in ["id"]]
118

  
119
		data[colnames] = data[colnames].apply(self.duplicate, 0, args=[data.shape[0]])		
120
	
121
		ext = [v for v in keys if v in ["mem", "network","storage3inch", "storage2inch"]]
122
		#print ext
123
		if ("storage3inch" in ext) and ("storage2inch" in ext):
124
			ext.remove("storage3inch")
125
			ext.remove("storage2inch")
126
		
127
			for elt in ext:
128
			
129
				#df = data
130
				data[self.getItem(elt)] = 0
131
				df = data
132
			
133
				for i in range(df.shape[0]):
134
					d = pd.DataFrame(df.iloc[i, ]).T # get the transpose of the original data frame
135
					d1 = d.merge(dframes[elt], how="outer", copy=False)
136
					colnames = list(d1.columns)
137
					names = list(dframes[elt].columns)
138
					colnames_t = [x for x in colnames if x not in names]
139
					colnames = [x for x in colnames_t if x not in ["id"]] ## to be removed
140
					d1[colnames] = d1[colnames].apply(self.duplicate, 0, args=[d1.shape[0]])
141
					data = data.append(d1)
142
					
143
			for item in ["procUnitCost", "MemUnitCost", "StorageUnitCost", "netUnitCost"]:
144
				if item not in list(data):
145
					data[item] = 0	
146
				
147
			threeinch = data.copy()
148
			twoinche = data.copy()
149
		
150
			#df = threeinch
151
			threeinch[self.getItem("storage3inch")] = 0
152
			df = threeinch
153
		
154
			for i in range(df.shape[0]):
155
				d = pd.DataFrame(df.iloc[i, ]).T 
156
				d1 = d.merge(dframes["storage3inch"], how="outer", copy=False)
157
				colnames = list(d1.columns)
158
				names = list(dframes["storage3inch"].columns)
159
				colnames_t = [x for x in colnames if x not in names]
160
				colnames = [x for x in colnames_t if x not in ["id"]] ## to be removed
161
				d1[colnames] = d1[colnames].apply(self.duplicate, 0, args=[d1.shape[0]])
162
				threeinch = threeinch.append(d1)
163
			data = data.append(threeinch)
164
			del(threeinch)
165
		
166
			#df = twoinche
167
			twoinche[self.getItem("storage2inch")] = 0
168
			df = twoinche
169
		
170
			for i in range(df.shape[0]):
171
				d = pd.DataFrame(df.iloc[i, ]).T 
172
				d1 = d.merge(dframes["storage2inch"], how="outer", copy=False)
173
				colnames = list(d1.columns)
174
				names = list(dframes["storage2inch"].columns)
175
				colnames_t = [x for x in colnames if x not in names]
176
				colnames = [x for x in colnames_t if x not in ["id"]] ## to be removed
177
				d1[colnames] = d1[colnames].apply(self.duplicate, 0, args=[d1.shape[0]])
178
				twoinche = twoinche.append(d1)
179
				
180
			data = data.append(twoinche)	
181
			
182
			del(twoinche)
183
		else:
184
			for elt in ext:
185
			
186
				#df = data
187
				data[self.getItem(elt)] = 0
188
				df = data
189
			
190
				for i in range(df.shape[0]):
191
					d = pd.DataFrame(df.iloc[i, ]).T # get the transpose of the original data frame
192
					d1 = d.merge(dframes[elt], how="outer", copy=False)
193
					colnames = list(d1.columns)
194
					names = list(dframes[elt].columns)
195
					colnames_t = [x for x in colnames if x not in names]
196
					colnames = [x for x in colnames_t if x not in ["id"]] ## to be removed
197
					d1[colnames] = d1[colnames].apply(self.duplicate, 0, args=[d1.shape[0]])
198
					data = data.append(d1)
199
		for item in ["procUnitCost", "MemUnitCost", "StorageUnitCost", "netUnitCost"]:
200
			if item not in list(data):
201
				data[item] = 0
202
		
203
		return data
204
			
205

  
206
	def merge_datasets(self):
207
		for directory in self.basedirs:
208
			APP_ROOT = os.path.dirname(os.path.abspath(" "))
209
			#print APP_ROOT
210
			path = os.path.join(APP_ROOT, directory)
211
			#print path
212
			baseconfigs = [ v for v in os.listdir(path) if "category" in v and "$" not in v]
213
			#print directory
214
			dataFrames = pd.DataFrame()
215
	
216
			extensions = [ v for v in os.listdir(path) if "extensions" in v and "$" not in v]
217
			#print ' '.join(extensions)
218
			for base in baseconfigs:
219
				filename = path +"/"+base
220
		
221
				xl_file = pd.ExcelFile(filename)
222
				## create a dictionary containing a DataFrame for every sheet
223
				dfs = {sheet_name: xl_file.parse(sheet_name, na_value="na") 
224
					for sheet_name in xl_file.sheet_names}
225

  
226
				dataframe = pd.DataFrame()
227
	
228
				for items in dfs.keys():
229
					extensionFilename = [v for v in extensions if items in v]
230
					filename = path +"/"+extensionFilename[0]
231
					#print filename
232
					dframes = self.createServerSet(dfs[items], filename)
233

  
234
					dataframe = dataframe.append(dframes)
235
					
236
			#for col in ["procUnitCost", "MemUnitCost", "StorageUnitCost", "netUnitCost"]:
237
			#	if col not in list(dataframe.columns):
238
			#		dataframe[col] = 0	
239
				dataFrames = dataFrames.append(dataframe)
240

  
241
			dataFrames["category"] = directory
242
	
243
			self.dfservers = self.dfservers.append(dataFrames)
244

  
245

  
246
		cost_fileds = [v for v in self.dfservers.columns if "Cost" in v]
247
		self.dfservers["Cost"] = 0
248

  
249
		for item_ in cost_fileds:
250
			self.dfservers["Cost"] += map(lambda y: float(y) if y != "na" else np.nan, 
251
				self.dfservers[item_]) #dfservers[items]
252
			
253
		self.dfservers.to_excel("dataset.xlsx", sheet_name='cat',
254
			engine='xlsxwriter',index=False)
255
	
256
	
257
	def filter_dataframe(self,items):
258
		
259
		if items == "numberCores":
260
			self.dfservers["subset"] = map(lambda x: 
261
					True if int(x) >= self.number_cores else False,
262
					self.dfservers[items])
263
			df = self.dfservers[self.dfservers["subset"] == True]
264
			self.dfservers = df
265
		else:
266
			if items == "storageType":
267
				self.dfservers["subset"] = map(lambda x:
268
						True if str(x) == self.storage_type else False, 
269
						self.dfservers[items])
270
				df = self.dfservers[self.dfservers["subset"] == True]
271
				self.dfservers = df
272
			else:
273
				if items == "memory":
274
					self.dfservers["subset"] = map(lambda x:
275
							True if int(x) >= self.memory_size else False, 
276
							self.dfservers[items])
277
					df = self.dfservers[self.dfservers["subset"] == True]
278
					self.dfservers = df
279
				else:
280
					if items == "storageSize":
281
						self.dfservers["subset"] = map(lambda x:
282
								True if int(x) >= self.storage_size else False, 
283
								self.dfservers[items])
284
						df = self.dfservers[self.dfservers["subset"] == True]
285
						self.dfservers = df
286
		
287
	
288
	def process_data(self):
289
		
290
		"""
291
		The algorithm keeps a constant number of maintainance hours over the 
292
		years
293
		
294
		Software cost is relatively low but can be is provided as a parameter 
295
		to the program.
296
		
297
		We only use servers with the following characteristics 
298
		-- number of core == 4
299
		-- memory 48 GB
300
		-- SSD storage greater than SSD 160GB
301
		
302
		"""
303
		filter_list = ["numberCores", "storageType", "storageSize", "memory"]
304
		for elt in filter_list:
305
			self.filter_dataframe(elt)
306
		
307
		del(self.dfservers["subset"])
308
		
309
		self.dfservers["extra_memory_cost"] = (self.dfservers["memory"] 
310
			- self.memory_size)*self.cost_extra_gb_memory
311
		
312
		self.dfservers["extra_storage_cost"] = (self.dfservers["storageSize"] 
313
			- self.storage_size)*self.cost_extra_gb_storage
314
		
315
		
316
		#note that the model assumes that softeware are updated every year 
317
		# even at cost 0, this cost include licences and so on.
318
		if (self.number_years >= 3):
319
			maintainance_hours_year = self.maintainance_hours * 12 
320
			## Compute the cost of a home made data center 
321
			self.dfservers["traditional_hpc_cost"] = self.dfservers["Cost"] * self.number_servers + \
322
						(self.number_years * maintainance_hours_year * self.maintainance_hours)  + \
323
						self.software_init_cost + \
324
						self.number_years * self.software_update_cost + \
325
						self.network_hardware_cost + \
326
						self.installation_cost
327
						
328
			# Compute the cost of corresponding server instances rented, 
329
			# remember that the base period is 3 years
330
			# We will call this cost the outsource cost 
331
			self.dfservers["ondemand_hpc_cost"] = self.number_servers * ( \
332
							self.one_time_cost + \
333
							self.platform_cost_hour * self.usage_hours * 365 * self.number_years) +\
334
							(self.number_years * maintainance_hours_year *\
335
							self.maintainance_hours) +\
336
							self.number_years * self.software_update_cost + \
337
							self.software_init_cost
338
			
339
			self.dfservers["ondemand_hpc_cost"] = self.dfservers["ondemand_hpc_cost"] +\
340
								self.number_servers * (self.dfservers["extra_memory_cost"] +\
341
								self.dfservers["extra_storage_cost"])
342
								
343
			self.dfservers["Cost difference"] = \
344
							self.dfservers["ondemand_hpc_cost"] - \
345
							self.dfservers["traditional_hpc_cost"]
346
			tmp_df = self.dfservers[["Cost difference", "ondemand_hpc_cost"]]
347
			
348
			
349
			self.dfservers["Cost difference (%)"] = map(lambda x: round(x, 2), 
350
					tmp_df.apply(self.difference_percentage, 1))
351
			
352
			self.dfservers["Cost difference (HPC ondemand - traditional)"] = \
353
							self.dfservers["Cost difference"]
354
			del(self.dfservers["Cost difference"])
355
		else:
356
			try :
357
				raise Exception
358
			except xception as inst:
359
				print("The number of years should be at leat 3! program exits on %s", inst)
360
		
361
		""" Update the server data set with new parameters """
362
		self.dfservers.to_excel("dataset_analysis.xlsx", sheet_name='cat',
363
			engine='xlsxwriter',index=False)
364
		
365
			
366
		
367

  
368
def main(argv):
369
	"""Main entrypoint."""
370
	
371
	## Data used here are for the UK franckfort Amazon region
372
 
373
    	try:
374
        # Parse command-line options
375
        	parser = OptionParser()
376
        	
377
        	parser.add_option("-n", "--servers", dest="servers", 
378
        		metavar="NUMBER_SERVRES", action="store", type="int", default=1000,
379
        		help="number of servers for each observation")
380

  
381
       		parser.add_option("--hour_maintainance", dest="maintainance_hours", 
382
        		metavar="MAINTAINANCE_HOURS", action="store", type="int", default=15,
383
        		help="number of hours of maintainance per month")
384
        	
385
       		parser.add_option("--maintananceCost", dest="maintainance_cost", 
386
        		metavar="MAINTAINANCE_COST", action="store", type="float", default=50,
387
        		help="cost per maintainance hour")
388
        
389
        	parser.add_option("--software_init_cost", dest="software_cost", 
390
        		metavar="SOFTWARE_INITIAL_COST", action="store", type="float", default=5000,
391
        		help="initial software cost (this include network software)")
392
        
393
        	parser.add_option("--software_update_cost", dest="software_update", 
394
        		metavar="SOFTWARE_UPDATE", action="store", type="float", default=10000,
395
        		help="cost associate to software update including licences")
396
        		
397
        	parser.add_option("--network_hardware_cost", dest="network_hardware", 
398
        		metavar="NETWORK_HARDWARE", action="store", type="float", default=15000,
399
        		help="initial cost associated to network equipments purchase")
400
        		
401
        	parser.add_option("--installation_cost", dest="install_cost", 
402
        		metavar="INTALLATION_COST", action="store", type="float", default=10000,
403
        		help="initial cost associated to network equipments purchase")
404
        
405
        	## One time cost for a period of 3 years
406
        	parser.add_option("--one_time_cost", dest="one_time_cost", 
407
        		metavar="ONE_TIME_COST", action="store", type="int", default=2690.14,
408
        		help="one time set up cost when you book instances from amazone")
409
        
410
        	parser.add_option("--platform_cost_hour", dest="cost_hour", 
411
        		metavar="PLATFORM_COST_HOUR", action="store", type="float", default= 0.543,
412
        		help="one time set up cost when you book instances from amazone") 
413

  
414
		parser.add_option("--monthly_cost_instance", dest="cost_monthly", 
415
        		metavar="COST_HOUR", action="store", type="float", default=108.64,
416
        		help="monthly cost of a single machine book instances from amazone") 
417
        
418

  
419
        	parser.add_option("--cost_extra_gb_memory", dest="extra_memory", 
420
        		metavar="EXTRA_MEMORY", action="store", type="float", default=1.23,
421
        		help="cost per hour per giga byte of extra memory") 
422
        
423
        	parser.add_option("--cost_extra_gb_storage", dest="extra_storage", 
424
        		metavar="EXTRA_STORAGE", action="store", type="float", default=0.82,
425
        		help="cost per hour per giga byte of extra storage") 
426
        
427
        	parser.add_option("--number_years", dest="number_years", 
428
        		metavar="NUMBER_YEARS", action="store", type="int", default=3,
429
        		help="Number of years over which you want to predict") 
430
        
431
        	parser.add_option("--number_cores", dest="number_cores", 
432
        		metavar="NUMBER_CPU_CORES", action="store", type="int", default=8,
433
        		help="Number of CPU cores per host")
434
        
435
        	parser.add_option("--memory_size", dest="memory_size", 
436
        		metavar="MEMORY_SIZE", action="store", type="int", default=48,
437
        		help="Memory size expected, max is 768")
438
        
439
        	parser.add_option("--storage_size", dest="storage_size", 
440
        		metavar="STORAGE_SIZE", action="store", type="int", default=48,
441
        		help="Storage size expected")        
442
   
443
        	parser.add_option("--storage_type", dest="storage_type", 
444
        		metavar="STORAGE_TYPE", action="store", type="string", default="ssd sata",
445
        		help="Storage type expected")
446
        	
447
        	parser.add_option("--usage_ours", dest="usage_hours", 
448
        		metavar="STORAGE_TYPE", action="store", type="int", default=24,
449
        		help="number of hours the system is used per day")
450
        	
451
       		## baseline: c3.2xlarge	8	28	15	2 x 80 SSD	$0.420 par heure
452
       		## upfront	monthly		hourly
453
       		## $3288 	$132.86 	$0.3071 	
454

  
455
       		options, args = parser.parse_args(argv[1:])
456
                
457
		gen = GenerateDate(options.servers,
458
			options.maintainance_hours, 
459
			options.maintainance_cost, 
460
			options.software_cost,
461
			options.software_update,
462
			options.network_hardware,
463
			options.install_cost,
464
			options.one_time_cost,
465
			options.cost_hour,
466
			options.cost_monthly,
467
			options.extra_memory,
468
			options.extra_storage,
469
			options.number_years,
470
			options.number_cores,
471
			options.memory_size,
472
			options.storage_size,
473
			options.storage_type,
474
			options.usage_hours)
475
 		
476
 		gen.merge_datasets()
477
 		gen.process_data()
478
 		
479
    	except Exception as e:
480
    		print "Exception % rised" % e
481
    		return 1
482
 
483
    	return 0
484
 
485
if __name__ == "__main__":
486
    sys.exit(main(sys.argv))
b/livrables/L4_2_4/Data_file/GenerateServers.py~
1
#!/usr/bin/env python
2

  
3
from __future__ import division
4
import numpy as np
5
import pandas as pd
6
from numpy import genfromtxt
7
from pandas import DataFrame
8

  
9

  
10
import re
11
import math
12
import sys
13
import os
14
from optparse import OptionParser
15

  
16
import logging
17

  
18

  
19
'''
20
python version: 
21
2.7.8 |Anaconda 2.0.1 (x86_64)
22

  
23
full description:
24
2.7.8 |Anaconda 2.0.1 (x86_64)| (default, Aug 21 2014, 15:21:46) \n[GCC 4.2.1 (Apple Inc. build 5577)]
25
'''
26

  
27

  
28
np.random.seed(12345)
29

  
30

  
31
class GenerateDate():
32
	
33
	def __init__(self, nber_servers, 
34
		maintainanceCost, 
35
		maintainance_hours,
36
		software_init_cost, 
37
		software_update_cost,
38
		network_hardware_cost,
39
		installation_cost,
40
		one_time_cost,
41
		platform_cost_hour,
42
		monthly_cost_instance,
43
		cost_extra_gb_memory,
44
		cost_extra_gb_storage,
45
		number_years,
46
		number_cores,
47
		memory_size,
48
		storage_size,
49
		storage_type,
50
		usage_hours):
51
	
52
		self.number_servers = nber_servers
53
		self.basedirs = ["Cat1", "Cat2"]
54
		self.dfservers = pd.DataFrame()
55
		self.maintainanceCost = maintainanceCost
56
		self.maintainance_hours = maintainance_hours
57
		self.software_init_cost = software_init_cost
58
		self.software_update_cost = software_update_cost
59
		self.network_hardware_cost= network_hardware_cost
60
		self.installation_cost = installation_cost
61
		self.one_time_cost = one_time_cost
62
		self.platform_cost_hour = platform_cost_hour
63
		self.monthly_cost_instance = monthly_cost_instance
64
		self.cost_extra_gb_memory = cost_extra_gb_memory #cost per extra giga bite of memory
65
		self.cost_extra_gb_storage = cost_extra_gb_storage
66
		self.number_years = number_years
67
		self.number_cores = number_cores
68
		self.memory_size = memory_size
69
		self.storage_size = storage_size
70
		self.storage_type = storage_type
71
		self.usage_hours = usage_hours
72

  
73
	def duplicate(self, x, n):
74
		return [x[0]] *n
75
	
76
	def getItem(self, item):
77
		if item == "procs":
78
			return "procUnitCost"
79
		else:
80
			if item == "mem":
81
				return "MemUnitCost"
82
			else:
83
				if item == "storage3inch" or item == "storage2inch":
84
					return "StorageUnitCost"
85
				else:
86
					if item == "network":
87
						return "netUnitCost"
88
	
89
	def difference_percentage(self, x):
90
		if np.nan in x:
91
			return np.nan
92
		else:
93
			return round((x[0] / x[1]) * 100, 2)
94
	
95
	
96
	'''
97
		base configuration data frame
98
		file name for the corresponding extension file
99
		flag to identify the appropriate component
100
	'''
101

  
102
	def createServerSet(self, baseConfDf, extensionfile):
103
		xcl_file = pd.ExcelFile(extensionfile)
104

  
105
		dframes = {sheet_name: xcl_file.parse(sheet_name) for sheet_name in xcl_file.sheet_names}
106
		#
107
		keys = dframes.keys()
108

  
109
		## add the processor unit cost
110
		baseConfDf[self.getItem("procs")] = 0
111
		#
112
		
113
		data = baseConfDf.merge(dframes["procs"], how="outer", copy=False)
114
		colnames = list(data.columns)
115
		proc_names = list(dframes["procs"])
116
		colnames_t = [x for x in colnames if x not in proc_names]
117
		colnames = [x for x in colnames_t if x not in ["id"]]
118

  
119
		data[colnames] = data[colnames].apply(self.duplicate, 0, args=[data.shape[0]])		
120
	
121
		ext = [v for v in keys if v in ["mem", "network","storage3inch", "storage2inch"]]
122
		print ext
123
		if ("storage3inch" in ext) and ("storage2inch" in ext):
124
			ext.remove("storage3inch")
... Ce différentiel a été tronqué car il excède la taille maximale pouvant être affichée.

Formats disponibles : Unified diff