mirror of
https://github.com/nodejs/node.git
synced 2024-11-21 13:09:21 +01:00
7aac70607d
Adds lines between the points of the same category in scatter.R plots. PR-URL: https://github.com/nodejs/node/pull/22074 Reviewed-By: Andreas Madsen <amwebdk@gmail.com> Reviewed-By: Anatoli Papirovski <apapirovski@mac.com> Reviewed-By: James M Snell <jasnell@gmail.com> Reviewed-By: Tiancheng "Timothy" Gu <timothygu99@gmail.com>
87 lines
2.4 KiB
R
87 lines
2.4 KiB
R
#!/usr/bin/env Rscript
|
|
library(ggplot2);
|
|
library(plyr);
|
|
|
|
# get __dirname and load ./_cli.R
|
|
args = commandArgs(trailingOnly = F);
|
|
dirname = dirname(sub("--file=", "", args[grep("--file", args)]));
|
|
source(paste0(dirname, '/_cli.R'), chdir=T);
|
|
|
|
if (is.null(args.options$xaxis) || is.null(args.options$category) ||
|
|
(!is.null(args.options$plot) && args.options$plot == TRUE)) {
|
|
stop("usage: cat file.csv | Rscript scatter.R [variable=value ...]
|
|
--xaxis variable variable name to use as xaxis (required)
|
|
--category variable variable name to use as colored category (required)
|
|
--plot filename save plot to filename
|
|
--log use a log-2 scale for xaxis in the plot");
|
|
}
|
|
|
|
plot.filename = args.options$plot;
|
|
|
|
# parse options
|
|
x.axis.name = args.options$xaxis;
|
|
category.name = args.options$category;
|
|
use.log2 = !is.null(args.options$log);
|
|
|
|
# parse data
|
|
dat = read.csv(file('stdin'), strip.white=TRUE);
|
|
dat = data.frame(dat);
|
|
|
|
# List of aggregated variables
|
|
aggregate = names(dat);
|
|
aggregate = aggregate[
|
|
! aggregate %in% c('rate', 'time', 'filename', x.axis.name, category.name)
|
|
];
|
|
# Variables that don't change aren't aggregated
|
|
for (aggregate.key in aggregate) {
|
|
if (length(unique(dat[[aggregate.key]])) == 1) {
|
|
aggregate = aggregate[aggregate != aggregate.key];
|
|
}
|
|
}
|
|
|
|
# Print out aggregated variables
|
|
for (aggregate.variable in aggregate) {
|
|
cat(sprintf('aggregating variable: %s\n', aggregate.variable));
|
|
}
|
|
if (length(aggregate) > 0) {
|
|
cat('\n');
|
|
}
|
|
|
|
# Calculate statistics
|
|
stats = ddply(dat, c(x.axis.name, category.name), function(subdat) {
|
|
rate = subdat$rate;
|
|
|
|
# calculate confidence interval of the mean
|
|
ci = NA;
|
|
if (length(rate) > 1) {
|
|
se = sqrt(var(rate)/length(rate));
|
|
ci = se * qt(0.975, length(rate) - 1)
|
|
}
|
|
|
|
# calculate mean and 95 % confidence interval
|
|
r = list(
|
|
rate = mean(rate),
|
|
confidence.interval = ci
|
|
);
|
|
|
|
return(data.frame(r));
|
|
});
|
|
|
|
print(stats, row.names=F);
|
|
|
|
if (!is.null(plot.filename)) {
|
|
p = ggplot(stats, aes_string(x=x.axis.name, y='rate', colour=category.name));
|
|
if (use.log2) {
|
|
p = p + scale_x_continuous(trans='log2');
|
|
}
|
|
p = p + geom_errorbar(
|
|
aes(ymin=rate-confidence.interval, ymax=rate+confidence.interval),
|
|
width=.1, na.rm=TRUE
|
|
);
|
|
p = p + geom_point();
|
|
p = p + geom_line();
|
|
p = p + ylab("rate of operations (higher is better)");
|
|
p = p + ggtitle(dat[1, 1]);
|
|
ggsave(plot.filename, p);
|
|
}
|