Table of Contents
Using column generation in constraint-and-column generation for adjustable robust optimization > JSP
Loading the data
Our results can be found in the results.jsp.csv
file
with the following columns:
- “tag”: a tag always equal to “result” used grep the result line in our execution log file.
- “instance”: the path to the instance.
- “standard_phase_time_limit”: the time limit for the standard phase (i.e., without using CG).
- “master_solver”: the solver used for solving the CCG master problem: STD for standard, i.e., Gurobi, CG for column generation.
- “status”: the final status.
- “reason”: the final status reason.
- “has_large_scaled”: true if the CG phase has been started, false otherwise.
- “n_iterations”: the number of iterations.
- “total_time”: the total time to solve the problem.
- “master_time”: the time spent solving the master problem.
- “adversarial_time”: the time spent solving the adversarial problem.
- “best_bound”: the best bound found.
- “best_obj”: the best feasible point value.
- “relative_gap”: the final relative gap.
- “absolute_gap”: the final absolute gap.
- “adversarial_unexpected_status”: the status of the adversarial problem solver if it is not Optimal.
- “with_heuristic”: true if the CG-based heuristic is used.
- “with_non_optimal_pricing”: always false.
- “n_jobs”: the number of jobs in the instance.
- “Gamma”: the value for the uncertainty budget \(\Gamma\).
- “blank”: this column is left blank.
data = rbind(
read.csv("results-filtered.csv", header = FALSE)
)
colnames(data) = c("slurm_file", "tag", "instance", "standard_phase_time_limit_raw", "Gamma", "with_heuristic", "standard_phase_time_limit", "master_solver", "status", "reason", "has_large_scaled", "n_iterations", "total_time", "master_time", "adversarial_time", "best_bound", "best_obj", "relative_gap", "absolute_gap", "second_stage.mean", "second_stage.std_dev", "adversarial_unexpected_status", "memory_used", "memory_limit")
#write.csv(data %>% filter(Gamma %in% c(3,6)), "results-filtered.csv", row.names = FALSE, col.names = FALSE)
data = data %>% mutate(instance = basename(instance),
n_jobs = as.numeric(sub(".*N(\\d+)_R.*", "\\1", instance))
)
We start by removing the “tag” and the “blank” columns.
#data = data[, !(names(data) %in% c("tag", "blank"))]
For homogeneity, we fix the total_time of unsolved instances to the time limit.
if (sum(data$tag == "iteration") > 0 ) {
data[data$tag == "iteration",]$total_time = 10800
}
if (sum(data$total_time > 10800) > 0 ) {
data[data$total_time > 10800,]$total_time = 10800
}
Then, we create a column named “method” which gives a specific name to each method, comprising the approach for solving the CCG master problem, the time limit of the standard phase and a flag indicating if the CG-based heuristic was used.
data$method = paste0(data$master_solver, "_TL", data$standard_phase_time_limit, "_H", data$with_heuristic)
unique(data$method)
## [1] "STD_TLInf_Htrue" "CG_TL0_Htrue" "CG_TL0_Hfalse"
Our final data reads.
Sanity Check
# Define the relative tolerance
tolerance <- 10^-2
# Filter the data where time < 18000 and group by 'instance'
validation <- data %>%
filter(total_time < 18000) %>%
group_by(instance) %>%
summarise(min_best_obj = min(best_obj), max_best_obj = max(best_obj)) %>%
mutate(valid = (max_best_obj - min_best_obj) / min_best_obj <= tolerance)
# Check if all instances are valid
if (all(validation$valid)) {
print("All methods find the same best_obj value within the relative tolerance for all instances.")
} else {
print("Methods do not find the same best_obj value within the relative tolerance for some instances.")
print(validation %>% filter(!valid)) # Show the instances that failed validation
}
## [1] "All methods find the same best_obj value within the relative tolerance for all instances."
Empirical Cumulative Distribution Function (ECDF)
We plot the ECDF of computation time over our set of instances for all approaches.
ggplot(data %>% filter(Gamma %in% c(3,6)), aes(x = total_time, col = method)) + stat_ecdf(pad = FALSE) +
coord_cartesian(xlim = c(0,10800)) +
#scale_x_log10() +
theme_minimal()
for (Gamma_val in unique(data$Gamma)) {
print(Gamma_val)
p = ggplot(data %>% filter(Gamma == Gamma_val), aes(x = total_time, col = method)) + stat_ecdf(pad = FALSE) +
coord_cartesian(xlim = c(0,10800)) +
#scale_x_log10() +
theme_minimal()
print(p)
}
## [1] 3
## [1] 6
ggplot(data, aes(x = memory_used, col = method)) +
stat_ecdf(pad = FALSE) +
theme_minimal()
ggplot(data, aes(x = master_time / n_iterations, col = method)) + stat_ecdf(pad = FALSE) +
# coord_cartesian(xlim = c(0,18000)) +
theme_minimal()
We export these results in csv to print them in tikz.
#data_with_ecdf = data %>%
# group_by(method) %>%
# arrange(total_time) %>%
# mutate(ecdf_value = ecdf(total_time)(total_time)) %>%
# ungroup()
n_points <- 1000
x_points <- seq(1, 10800, length.out = n_points)
data_with_ecdf <- data %>%
group_by(method) %>%
arrange(total_time) %>%
group_modify(~ {
# Create ECDF function for the current method
ecdf_func <- ecdf(.x$total_time)
# Compute ECDF values for the specified points
tibble(total_time = x_points, ecdf_value = 100 * ecdf_func(x_points))
}) %>%
ungroup()
for (method in unique(data_with_ecdf$method)) {
output = data_with_ecdf[data_with_ecdf$method == method,]
output = output[,c("total_time", "ecdf_value")]
output = output[output$total_time < 10800,]
write.csv(output, file = paste0("TIME_", method, ".csv"), row.names = FALSE)
}
Summary table
In this section, we create a table summarizing the main outcome of our computational experiments.
We first focus on the solved instances.
summary_data_lt_18000 <- data %>%
filter(total_time < 10800) %>%
group_by(n_jobs, Gamma, method) %>%
summarize(
avg_total_time = mean(total_time, na.rm = TRUE),
avg_master_time = mean(master_time, na.rm = TRUE),
avg_adversarial_time = mean(adversarial_time, na.rm = TRUE),
avg_n_iterations = mean(n_iterations, na.rm = TRUE),
sum_has_large_scaled = sum(has_large_scaled),
sum_oom = sum(tag == "iteration"),
num_lines = n(),
.groups = "drop"
) %>%
ungroup() %>%
arrange(n_jobs, Gamma, method)
Then, we compute averages over the unsolved instances.
summary_data_ge_18000 <- data %>%
filter(total_time >= 10800) %>%
group_by(n_jobs, Gamma, method) %>%
summarize(
avg_n_iterations_unsolved = mean(n_iterations, na.rm = TRUE),
num_lines_unsolved = n(),
.groups = "drop"
) %>%
ungroup() %>%
arrange(n_jobs, Gamma, method)
Finally, we merge our results.
transposed_data_lt_18000 <- summary_data_lt_18000 %>%
pivot_wider(names_from = method, values_from = avg_total_time:num_lines)
transposed_data_ge_18000 <- summary_data_ge_18000 %>%
pivot_wider(names_from = method, values_from = avg_n_iterations_unsolved:num_lines_unsolved)
transposed_data_lt_18000 %>% kable()
n_jobs | Gamma | avg_total_time_CG_TL0_Hfalse | avg_total_time_CG_TL0_Htrue | avg_total_time_STD_TLInf_Htrue | avg_master_time_CG_TL0_Hfalse | avg_master_time_CG_TL0_Htrue | avg_master_time_STD_TLInf_Htrue | avg_adversarial_time_CG_TL0_Hfalse | avg_adversarial_time_CG_TL0_Htrue | avg_adversarial_time_STD_TLInf_Htrue | avg_n_iterations_CG_TL0_Hfalse | avg_n_iterations_CG_TL0_Htrue | avg_n_iterations_STD_TLInf_Htrue | sum_has_large_scaled_CG_TL0_Hfalse | sum_has_large_scaled_CG_TL0_Htrue | sum_has_large_scaled_STD_TLInf_Htrue | sum_oom_CG_TL0_Hfalse | sum_oom_CG_TL0_Htrue | sum_oom_STD_TLInf_Htrue | num_lines_CG_TL0_Hfalse | num_lines_CG_TL0_Htrue | num_lines_STD_TLInf_Htrue |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
30 | 3 | 2010.063 | 1821.504 | 158.5377 | 1986.157 | 1798.688 | 130.24936 | 23.04203 | 22.19827 | 28.20346 | 7.225807 | 7.147541 | 8.549296 | 62 | 61 | 0 | 0 | 0 | 0 | 62 | 61 | 71 |
30 | 6 | 1481.399 | 1394.076 | 560.3248 | 1238.982 | 1129.475 | 80.37561 | 241.80795 | 264.11766 | 479.88363 | 6.622642 | 6.673077 | 6.728814 | 53 | 52 | 0 | 0 | 0 | 0 | 53 | 52 | 59 |
40 | 3 | 1469.176 | 1533.361 | 315.2581 | 1438.349 | 1505.364 | 207.87783 | 29.62738 | 26.98819 | 107.23685 | 7.789474 | 7.842105 | 10.458333 | 19 | 19 | 0 | 0 | 0 | 0 | 19 | 19 | 48 |
40 | 6 | 1764.391 | 1471.362 | 299.4690 | 1746.922 | 1454.200 | 197.57655 | 15.00250 | 15.72054 | 101.78570 | 8.250000 | 8.291667 | 9.300000 | 24 | 24 | 0 | 0 | 0 | 0 | 24 | 24 | 30 |
transposed_data_ge_18000 %>% kable()
n_jobs | Gamma | avg_n_iterations_unsolved_CG_TL0_Hfalse | avg_n_iterations_unsolved_CG_TL0_Htrue | avg_n_iterations_unsolved_STD_TLInf_Htrue | num_lines_unsolved_CG_TL0_Hfalse | num_lines_unsolved_CG_TL0_Htrue | num_lines_unsolved_STD_TLInf_Htrue |
---|---|---|---|---|---|---|---|
30 | 3 | 6.111111 | 6.473684 | 6.888889 | 18 | 19 | 9 |
30 | 6 | 6.111111 | 5.964286 | 8.571429 | 27 | 28 | 21 |
40 | 3 | 5.672131 | 5.786885 | 8.906250 | 61 | 61 | 32 |
40 | 6 | 3.821429 | 3.892857 | 6.500000 | 56 | 56 | 50 |
#cbind(
# transposed_data_lt_18000,
# transposed_data_ge_18000
#) %>%
# kable() %>%
# kable_styling(full_width = FALSE, position = "center")
Second-stage Deviations
ggplot(data, aes(x = method, y = second_stage.std_dev / abs(second_stage.mean))) +
geom_boxplot() +
labs(title = "Boxplot of std.dev",
x = "Method",
y = "Number of Iterations") +
theme_minimal()
## Warning: Removed 960 rows containing non-finite outside the scale range
## (`stat_boxplot()`).
git push
action on the public repository
hlefebvr/hlefebvr.github.io
using rmarkdown and Github
Actions. This ensures the reproducibility of our data manipulation. The
last compilation was performed on the 20/02/25 08:03:36.