-
Notifications
You must be signed in to change notification settings - Fork 46
/
Copy pathmerging.R
78 lines (59 loc) · 1.73 KB
/
merging.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
#
# merging.R, 12 Jan 16
#
# Merging multiple csv files
# Data from the top500.org web site, November of every year
#
# Example from:
# Evidence-based Software Engineering: based on the publicly available data
# Derek M. Jones
#
# TAG supercomputer
source("ESEUR_config.r")
dir_str=paste0(ESEUR_dir, "Rlang/Top500/")
top_files=list.files(dir_str)
top_files=top_files[grep("^TOP500_.*.csv.xz", top_files)]
merge_csv=function(file_str)
{
all_csv <<- merge(all_csv, read.csv(paste(dir_str, file_str, sep="/")), all=TRUE)
return(0)
}
mv_col=function(old_col, new_col)
{
new_csv=subset(all_csv, is.na(all_csv[, old_col]))
t=subset(all_csv, !is.na(all_csv[, old_col]))
t[, new_col]=t[, old_col]
new_csv=rbind(new_csv, t)
new_csv[, old_col]=NULL
return(new_csv)
}
all_csv=0
dummy=sapply(top_files, function(X) merge_csv(X))
all_csv=mv_col("Effeciency....", "Efficiency....")
all_csv=mv_col("Proc..Frequency", "Processor.Speed..MHz.")
all_csv=mv_col("RMax", "Rmax")
all_csv=mv_col("RPeak", "Rpeak")
cpu_power=data.frame(Year=all_csv$Year,
Power=all_csv$Power,
Rmax=all_csv$Rmax,
Rpeak=all_csv$Rpeak,
Nmax=all_csv$Nmax,
Nhalf=all_csv$Nhalf,
Processor.Speed=all_csv$Processor.Speed..MHz.,
Segment=all_csv$Segment)
cpu_power=unique(cpu_power)
# What factors explain the following plot?
plot(cpu_power$Year, log(cpu_power$Power))
# all_csv$Rank=NULL
# all_csv$Previous.Rank=NULL
# all_csv$First.Rank=NULL
# all_csv$First.Appearance=NULL
# all_csv$Application.Area=NULL
# all_csv$Interconnect.Family=NULL
# all_csv$Interconnect=NULL
# all_csv$Accelerator.Co.Processor=NULL
# all_csv$Accelerator.Co.Processor.Cores=NULL
# all_csv$Processor.Generation=NULL
# all_csv$Computer=NULL
# all_csv$Name=NULL
# all_csv$Site=NULL