1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
use crate::types::{
    EnvFile, EnvValue, EnvValueConditioned, EnvValueDecode, EnvValuePathGlob, EnvValueScript,
};
use indexmap::IndexMap;
use once_cell::sync::Lazy;
use petgraph::algo::{kosaraju_scc, toposort};
use petgraph::graphmap::{DiGraphMap, GraphMap};
use regex::Regex;
use std::collections::HashSet;

#[cfg(test)]
#[path = "env_test.rs"]
mod env_test;

static RE_VARIABLE: Lazy<Regex> = Lazy::new(|| Regex::new(r"\$\{.*?}").unwrap());

fn env_unique<'a>(vals: &'a [&'a IndexMap<String, EnvValue>]) -> Vec<(&'a str, &'a EnvValue)> {
    let mut visited = HashSet::new();
    let mut unique = vec![];

    // iterate through the list in reverse, only taking the first value, then
    // reversing again to make sure that we still adhere to the order.
    // This way we will only ever take the latest value.
    for (key, val) in vals.iter().map(|map| map.iter()).flatten().rev() {
        if visited.contains(&key.as_str()) {
            continue;
        }

        visited.insert(key.as_str());
        unique.push((key.as_str(), val))
    }

    unique.reverse();
    unique
}

static RE_SH_PARAM: Lazy<Regex> =
    Lazy::new(|| Regex::new(r"\$(?:([\w-]+)|\{#?([\w-]+)})").unwrap());

/// The depends_on for a script is a bit more complicated to find,
/// this is because it invokes a shell script (`sh`).
/// This means, that we need to go through the shell script and find all environmental variables
/// that are used, but haven't been declared yet.
///
/// One can also explicitly use a list `depends_on = [...]` to overwrite the existing behavior
/// for finding all dependencies.
///
/// This implementation is very conservative, and regardless of the context, will just capture
/// all environment variables that have been used.
///
/// A future implementation might further extend this, by looking for `'` usage,
/// `declare` statements of `[declare] name=value`, and then exclude those in the
/// subsequent analysis.
///
/// To be completely compliant this implementation is based on:
/// https://pubs.opengroup.org/onlinepubs/9699919799/, the official POSIX standard.
///
/// Chapter 8 Environment Variables:
///
/// > Environment variable names used by the utilities in the Shell and Utilities volume of
/// > POSIX.1-2017 consist solely of uppercase letters, digits, and the <underscore> ( '_' )
/// > from the characters defined in Portable Character Set and do not begin with a digit.
/// > Other characters may be permitted by an implementation;
/// > applications shall tolerate the presence of such names.
/// > Uppercase and lowercase letters shall retain their unique identities and shall not be folded
/// > together.
/// > The name space of environment variable names containing lowercase letters is
/// > reserved for applications.
/// > Applications can define any environment variables with names
/// > from this name space without modifying the behavior of the standard utilities.
///
/// A declaration in a shell script is: `declare var=...` where `declare` is optional.
fn env_depends_on_find_script(val: &EnvValueScript) -> Vec<&str> {
    if let Some(depends_on) = &val.depends_on {
        return depends_on.iter().map(String::as_str).collect();
    }

    let mut depends_on = vec![];
    for line in &val.script {
        for captures in RE_SH_PARAM.captures_iter(line) {
            if let Some(capture) = captures.get(1).or_else(|| captures.get(2)) {
                depends_on.push(capture.as_str());
            }
        }
    }

    depends_on
}

fn env_depends_on_find(val: &str) -> Vec<&str> {
    let mut depends_on = vec![];

    for matched in RE_VARIABLE.find_iter(val) {
        let matched = matched.as_str();
        // remove the first two characters (`${`)
        let (_, matched) = matched.split_at(2);
        // remove the last character (`}`)
        let (matched, _) = matched.split_at(matched.len() - 1);

        depends_on.push(matched.trim());
    }

    depends_on
}

fn env_depends_on(val: &EnvValue) -> Vec<&str> {
    match val {
        EnvValue::Value(value) => env_depends_on_find(value),
        EnvValue::Decode(EnvValueDecode { source, .. }) => env_depends_on_find(source),
        EnvValue::List(values) => values
            .iter()
            .map(|value| env_depends_on_find(value))
            .reduce(|mut acc, mut other| {
                acc.append(&mut other);
                acc
            })
            .unwrap_or_default(),
        EnvValue::Conditional(EnvValueConditioned { value, .. }) => env_depends_on_find(value),
        EnvValue::PathGlob(EnvValuePathGlob { glob, .. }) => env_depends_on_find(glob),
        EnvValue::Script(script) => env_depends_on_find_script(script),
        _ => vec![],
    }
}

pub(crate) fn merge_env(
    base: &IndexMap<String, EnvValue>,
    ext: &IndexMap<String, EnvValue>,
) -> Result<IndexMap<String, EnvValue>, String> {
    let combined = [base, ext];
    let combined: Vec<_> = env_unique(&combined);

    let mut graph: GraphMap<&str, (), _> = DiGraphMap::new();

    let keys: HashSet<_> = combined.iter().map(|(key, _)| *key).collect();
    for key in keys {
        graph.add_node(key);
    }

    for (key, val) in &combined {
        // combined is unique (only latest value),
        // which is why we do not need to delete any previously declared outbound edges.

        // if the env variable is in the current scope add an edge,
        // otherwise it is referencing an external variable.
        for used in env_depends_on(val).into_iter() {
            if graph.contains_node(used) {
                graph.add_edge(*key, used, ());
            }
        }
    }

    debug!("env dependencies: {:?}", graph);

    let variables = match toposort(&graph, None) {
        Ok(iter) => iter,
        Err(_) => {
            // cycle has been detected, for better performance we now only
            // execute scc.
            // In strongly-connected-components every vertex
            // (node) is reachable from every other node.
            // This means that there **must** be a cycle.
            // This isn't strictly necessary, but aids when debugging.
            let mut err =
                "A cycle between different env variables has been detected (E001, see: https://github.com/sagiegurari/cargo-make#e001 for more information)."
                    .to_owned();
            for scc in kosaraju_scc(&graph) {
                let render = scc
                    .iter()
                    .chain(scc.first())
                    .map(ToString::to_string)
                    .reduce(|acc, name| format!("{} -> {}", acc, name));

                if let Some(render) = render {
                    err.push_str(&format!(" Cycle: {}.", render));
                }
            }

            return Err(err);
        }
    };

    let mut merge = IndexMap::new();
    for name in variables.into_iter().rev() {
        if name.starts_with("CARGO_MAKE_CURRENT_TASK_") {
            // CARGO_MAKE_CURRENT_TASK are handled differently and **always**
            // retain their old value
            if let Some(value) = base.get(name) {
                merge.insert(name.to_owned(), value.clone());
            }

            continue;
        }

        if let Some((key, val)) = combined
            .iter()
            .filter(|(key, _)| *key == name)
            .last()
            .cloned()
        {
            // we need to check if the base and ext both are a profile,
            // in that case we need to do some special handling,
            // by merging them as well.
            match (base.get(key), ext.get(key)) {
                (Some(EnvValue::Profile(base)), Some(EnvValue::Profile(ext))) => {
                    merge.insert(key.to_owned(), EnvValue::Profile(merge_env(base, ext)?));
                }
                _ => {
                    merge.insert(key.to_owned(), val.clone());
                }
            }
        }
    }

    Ok(merge)
}

pub(crate) fn merge_env_files(
    base: &mut Vec<EnvFile>,
    extended: &mut Vec<EnvFile>,
) -> Vec<EnvFile> {
    [&extended[..], &base[..]].concat()
}

pub(crate) fn merge_env_scripts(base: &mut Vec<String>, extended: &mut Vec<String>) -> Vec<String> {
    [&extended[..], &base[..]].concat()
}