1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226
use crate::types::{
EnvFile, EnvValue, EnvValueConditioned, EnvValueDecode, EnvValuePathGlob, EnvValueScript,
};
use indexmap::IndexMap;
use once_cell::sync::Lazy;
use petgraph::algo::{kosaraju_scc, toposort};
use petgraph::graphmap::{DiGraphMap, GraphMap};
use regex::Regex;
use std::collections::HashSet;
#[cfg(test)]
#[path = "env_test.rs"]
mod env_test;
static RE_VARIABLE: Lazy<Regex> = Lazy::new(|| Regex::new(r"\$\{.*?}").unwrap());
fn env_unique<'a>(vals: &'a [&'a IndexMap<String, EnvValue>]) -> Vec<(&'a str, &'a EnvValue)> {
let mut visited = HashSet::new();
let mut unique = vec![];
// iterate through the list in reverse, only taking the first value, then
// reversing again to make sure that we still adhere to the order.
// This way we will only ever take the latest value.
for (key, val) in vals.iter().map(|map| map.iter()).flatten().rev() {
if visited.contains(&key.as_str()) {
continue;
}
visited.insert(key.as_str());
unique.push((key.as_str(), val))
}
unique.reverse();
unique
}
static RE_SH_PARAM: Lazy<Regex> =
Lazy::new(|| Regex::new(r"\$(?:([\w-]+)|\{#?([\w-]+)})").unwrap());
/// The depends_on for a script is a bit more complicated to find,
/// this is because it invokes a shell script (`sh`).
/// This means, that we need to go through the shell script and find all environmental variables
/// that are used, but haven't been declared yet.
///
/// One can also explicitly use a list `depends_on = [...]` to overwrite the existing behavior
/// for finding all dependencies.
///
/// This implementation is very conservative, and regardless of the context, will just capture
/// all environment variables that have been used.
///
/// A future implementation might further extend this, by looking for `'` usage,
/// `declare` statements of `[declare] name=value`, and then exclude those in the
/// subsequent analysis.
///
/// To be completely compliant this implementation is based on:
/// https://pubs.opengroup.org/onlinepubs/9699919799/, the official POSIX standard.
///
/// Chapter 8 Environment Variables:
///
/// > Environment variable names used by the utilities in the Shell and Utilities volume of
/// > POSIX.1-2017 consist solely of uppercase letters, digits, and the <underscore> ( '_' )
/// > from the characters defined in Portable Character Set and do not begin with a digit.
/// > Other characters may be permitted by an implementation;
/// > applications shall tolerate the presence of such names.
/// > Uppercase and lowercase letters shall retain their unique identities and shall not be folded
/// > together.
/// > The name space of environment variable names containing lowercase letters is
/// > reserved for applications.
/// > Applications can define any environment variables with names
/// > from this name space without modifying the behavior of the standard utilities.
///
/// A declaration in a shell script is: `declare var=...` where `declare` is optional.
fn env_depends_on_find_script(val: &EnvValueScript) -> Vec<&str> {
if let Some(depends_on) = &val.depends_on {
return depends_on.iter().map(String::as_str).collect();
}
let mut depends_on = vec![];
for line in &val.script {
for captures in RE_SH_PARAM.captures_iter(line) {
if let Some(capture) = captures.get(1).or_else(|| captures.get(2)) {
depends_on.push(capture.as_str());
}
}
}
depends_on
}
fn env_depends_on_find(val: &str) -> Vec<&str> {
let mut depends_on = vec![];
for matched in RE_VARIABLE.find_iter(val) {
let matched = matched.as_str();
// remove the first two characters (`${`)
let (_, matched) = matched.split_at(2);
// remove the last character (`}`)
let (matched, _) = matched.split_at(matched.len() - 1);
depends_on.push(matched.trim());
}
depends_on
}
fn env_depends_on(val: &EnvValue) -> Vec<&str> {
match val {
EnvValue::Value(value) => env_depends_on_find(value),
EnvValue::Decode(EnvValueDecode { source, .. }) => env_depends_on_find(source),
EnvValue::List(values) => values
.iter()
.map(|value| env_depends_on_find(value))
.reduce(|mut acc, mut other| {
acc.append(&mut other);
acc
})
.unwrap_or_default(),
EnvValue::Conditional(EnvValueConditioned { value, .. }) => env_depends_on_find(value),
EnvValue::PathGlob(EnvValuePathGlob { glob, .. }) => env_depends_on_find(glob),
EnvValue::Script(script) => env_depends_on_find_script(script),
_ => vec![],
}
}
pub(crate) fn merge_env(
base: &IndexMap<String, EnvValue>,
ext: &IndexMap<String, EnvValue>,
) -> Result<IndexMap<String, EnvValue>, String> {
let combined = [base, ext];
let combined: Vec<_> = env_unique(&combined);
let mut graph: GraphMap<&str, (), _> = DiGraphMap::new();
let keys: HashSet<_> = combined.iter().map(|(key, _)| *key).collect();
for key in keys {
graph.add_node(key);
}
for (key, val) in &combined {
// combined is unique (only latest value),
// which is why we do not need to delete any previously declared outbound edges.
// if the env variable is in the current scope add an edge,
// otherwise it is referencing an external variable.
for used in env_depends_on(val).into_iter() {
if graph.contains_node(used) {
graph.add_edge(*key, used, ());
}
}
}
debug!("env dependencies: {:?}", graph);
let variables = match toposort(&graph, None) {
Ok(iter) => iter,
Err(_) => {
// cycle has been detected, for better performance we now only
// execute scc.
// In strongly-connected-components every vertex
// (node) is reachable from every other node.
// This means that there **must** be a cycle.
// This isn't strictly necessary, but aids when debugging.
let mut err =
"A cycle between different env variables has been detected (E001, see: https://github.com/sagiegurari/cargo-make#e001 for more information)."
.to_owned();
for scc in kosaraju_scc(&graph) {
let render = scc
.iter()
.chain(scc.first())
.map(ToString::to_string)
.reduce(|acc, name| format!("{} -> {}", acc, name));
if let Some(render) = render {
err.push_str(&format!(" Cycle: {}.", render));
}
}
return Err(err);
}
};
let mut merge = IndexMap::new();
for name in variables.into_iter().rev() {
if name.starts_with("CARGO_MAKE_CURRENT_TASK_") {
// CARGO_MAKE_CURRENT_TASK are handled differently and **always**
// retain their old value
if let Some(value) = base.get(name) {
merge.insert(name.to_owned(), value.clone());
}
continue;
}
if let Some((key, val)) = combined
.iter()
.filter(|(key, _)| *key == name)
.last()
.cloned()
{
// we need to check if the base and ext both are a profile,
// in that case we need to do some special handling,
// by merging them as well.
match (base.get(key), ext.get(key)) {
(Some(EnvValue::Profile(base)), Some(EnvValue::Profile(ext))) => {
merge.insert(key.to_owned(), EnvValue::Profile(merge_env(base, ext)?));
}
_ => {
merge.insert(key.to_owned(), val.clone());
}
}
}
}
Ok(merge)
}
pub(crate) fn merge_env_files(
base: &mut Vec<EnvFile>,
extended: &mut Vec<EnvFile>,
) -> Vec<EnvFile> {
[&extended[..], &base[..]].concat()
}
pub(crate) fn merge_env_scripts(base: &mut Vec<String>, extended: &mut Vec<String>) -> Vec<String> {
[&extended[..], &base[..]].concat()
}