# See https://github.com/check-spelling/check-spelling/wiki/Configuration-Examples:-patterns

# Automatically suggested patterns
# hit-count: 5753 file-count: 170
# hex runs
\b[0-9a-fA-F]{16,}\b

# hit-count: 5370 file-count: 70
# sha-... -- uses a fancy capture
(['"]|&quot;)[0-9a-f]{40,}\g{-1}

# hit-count: 2011 file-count: 538
# https/http/file urls
(?:\b(?:https?|ftp|file)://)[-A-Za-z0-9+&@#/%?=~_|!:,.;]+[-A-Za-z0-9+&@#/%=~_|]

# hit-count: 936 file-count: 350
# GitHub SHAs (markdown)
(?:\[`?[0-9a-f]+`?\]\(https:/|)/(?:www\.|)github\.com(?:/[^/\s"]+){2,}(?:/[^/\s")]+)(?:[0-9a-f]+(?:[-0-9a-zA-Z/#.]*|)\b|)

# hit-count: 302 file-count: 82
# version suffix <word>v#
(?:(?<=[A-Z]{2})V|(?<=[a-z]{2}|[A-Z]{2})v)\d+(?:\b|(?=[a-zA-Z_]))

# hit-count: 176 file-count: 21
# hex digits including css/html color classes:
(?:\\[xX])[0-9a-fA-F]{2}\b
(?:0[xX])[0-9_a-fA-FgGrR]*?[a-fA-FgGrR]{2,}[0-9_a-fA-FgGrR]*(?:[uUlL]{0,3}|u\d+)\b
(?:\\u|[uU]\+|#x?|\%23)[0-9_a-fA-FgGrR]*?[a-fA-FgGrR]{2,}[0-9_a-fA-FgGrR]*(?:[uUlL]{0,3}|u\d+)\b

# hit-count: 152 file-count: 2
# ANSI color codes
(?:\\(?:u00|x)1[Bb]|\x1b|\\u\{1[Bb]\})\[\d+(?:;\d+|)m

# hit-count: 141 file-count: 23
# Non-English
[a-zA-Z]*[ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØÙÚÛÜÝßàáâãäåæçèéêëìíîïðñòóôõöøùúûüýÿĀāŁłŃńŅņŒœŚśŠšŜŝŸŽžź][a-zA-Z]{3}[a-zA-ZÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØÙÚÛÜÝßàáâãäåæçèéêëìíîïðñòóôõöøùúûüýÿĀāŁłŃńŅņŒœŚśŠšŜŝŸŽžź]*|[a-zA-Z]{3,}[ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØÙÚÛÜÝßàáâãäåæçèéêëìíîïðñòóôõöøùúûüýÿĀāŁłŃńŅņŒœŚśŠšŜŝŸŽžź]|[ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØÙÚÛÜÝßàáâãäåæçèéêëìíîïðñòóôõöøùúûüýÿĀāŁłŃńŅņŒœŚśŠšŜŝŸŽžź][a-zA-Z]{3,}

# hit-count: 115 file-count: 53
# IPv6
\b(?:[0-9a-fA-F]{0,4}:){3,7}[0-9a-fA-F]{0,4}\b

# hit-count: 78 file-count: 23
# URL escaped characters
\%[0-9A-F][A-F]
\%2b(?=[a-z]{2,})

# hit-count: 66 file-count: 12
# IServiceProvider
\bI(?=(?:[A-Z][a-z]{2,})+\b)

# hit-count: 64 file-count: 24
# uuid:
\b[0-9a-fA-F]{8}-(?:[0-9a-fA-F]{4}-){3}[0-9a-fA-F]{12}\b

# hit-count: 36 file-count: 36
# set arguments
\bset(?:\s+-[abefimouxE]{1,2})*\s+-[abefimouxE]{3,}(?:\s+-[abefimouxE]+)*

# hit-count: 27 file-count: 14
# tar arguments
\b(?:\\n|)g?tar(?:\.exe|)(?:(?:\s+--[-a-zA-Z]+|\s+-[a-zA-Z]+|\s[ABGJMOPRSUWZacdfh-pr-xz]+\b)(?:=[^ ]*|))+

# hit-count: 16 file-count: 5
# curl arguments
\b(?:\\n|)curl(?:\s+-[a-zA-Z]{1,2}\b)*(?:\s+-[a-zA-Z]{3,})(?:\s+-[a-zA-Z]+)*

# hit-count: 15 file-count: 6
# Compiler flags
(?:^|[\t,>"'`=(])-[D](?=[A-Z]{2,}|[A-Z][a-z]|[a-z]{2,})
(?:^|[\t ,>"'`=(])-[XL](?=[A-Z]{2,}|[A-Z][a-z]|[a-z]{2,})
[^-\\|a-zA-Z0-9]-C(?!lass)(?=[a-z]{3,})

# hit-count: 15 file-count: 2
# AWS VPC
vpc-\w+

# hit-count: 10 file-count: 6
# Python stringprefix / binaryprefix
# Note that there's a high false positive rate, remove the `?=` and search for the regex to see if the matches seem like reasonable strings
(?<!')\b(?:B|BR|Br|F|FR|Fr|R|RB|RF|Rb|Rf|U|UR|Ur|b|bR|br|f|fR|fr|r|rB|rF|rb|rf|u|uR|ur)'(?:[A-Z]{3,}|[A-Z][a-z]{2,}|[a-z]{3,})

# hit-count: 9 file-count: 5
# githubusercontent
/[-a-z0-9]+\.githubusercontent\.com/[-a-zA-Z0-9?&=_\/.]*

# hit-count: 7 file-count: 3
# Update Lorem based on your content (requires `ge` and `w` from https://github.com/jsoref/spelling; and `review` from https://github.com/check-spelling/check-spelling/wiki/Looking-for-items-locally )
# grep '^[^#].*lorem' .github/actions/spelling/patterns.txt|perl -pne 's/.*i..\?://;s/\).*//' |tr '|' "\n"|sort -f |xargs -n1 ge|perl -pne 's/^[^:]*://'|sort -u|w|sed -e 's/ .*//'|w|review -
# Warning, while `(?i)` is very neat and fancy, if you have some binary files that aren't proper unicode, you might run into:
## Operation "substitution (s///)" returns its argument for non-Unicode code point 0x1C19AE (the code point will vary).
## You could manually change `(?i)X...` to use `[Xx]...`
## or you could add the files to your `excludes` file (a version after 0.0.19 should identify the file path)
# Lorem
(?:\w|\s|[,.])*\b(?i)(?:amet|consectetur|cursus|dolor|eros|ipsum|lacus|libero|ligula|lorem|magna|neque|nulla|suscipit|tempus)\b(?:\w|\s|[,.])*

# hit-count: 6 file-count: 4
# data url in parens
\(data:[^)]*?(?:[A-Z]{3,}|[A-Z][a-z]{2,}|[a-z]{3,})[^)]*\)

# hit-count: 4 file-count: 4
# Contributor
\[[^\]]+\]\(https://github\.com/[^/\s"]+\)

# hit-count: 3 file-count: 2
# Google Storage
\b[-a-zA-Z0-9.]*\bstorage\d*\.googleapis\.com(?:/\S*|)

# hit-count: 2 file-count: 2
# While you could try to match `http://` and `https://` by using `s?` in `https?://`, sometimes there
# YouTube url
#\b(?:(?:www\.|)youtube\.com|youtu.be)/(?:channel/|embed/|user/|playlist\?list=|watch\?v=|v/|)[-a-zA-Z0-9?&=_%]*

# hit-count: 2 file-count: 2
# discord
#/discord(?:app\.com|\.gg)/(?:invite/)?[a-zA-Z0-9]{7,}

# hit-count: 2 file-count: 2
# basic auth
"Basic [-a-zA-Z=;:/0-9+]{3,}"

# hit-count: 1 file-count: 1
# mailto urls
mailto:[-a-zA-Z=;:/?%&0-9+@.]{3,}

# hit-count: 1 file-count: 1
# Twitter markdown
\[\@[^[/\]:]*?\]\(https://twitter.com/[^/\s"')]*(?:/status/\d+(?:\?[-_0-9a-zA-Z&=]*|)|)\)

# Windows file path
\\r[a-z]{3,}

# hit-count: 1 file-count: 1
# the negative lookahead here is to allow catching 'templatesz' as a misspelling
# but to otherwise recognize a Windows path with \templates\foo.template or similar:
\\(?:necessary|r(?:eport|esolve[dr]?|esult)|t(?:arget|emplates?))(?![a-z])

# in a version of check-spelling after @0.0.21 printf markers won't be automatically consumed
# printf markers
(?<!\\)\\[nrt]

decode_base64!\(".*"
regex: '.*
"data": ".*"
"value": "[^"]{20,}"
"encrypted_message": "[^"]*
result: "[^" ]{20,}
build_id="[a-f0-9]+
\@models: .*
x-amz-id-2":"[^" ]+"
escapes: /.*/
\s\s+examples: \["[^" ]+"\]
func_args!\[value: r#".*"
 "[^"]*="
format!\("[^"]+"
value!\(b".*"
decode_mime_q!\(".*"
parse_internal_q::.*
elb: ".*"
seed = ".*"
seed: ".*"
sha: "[0-9a-f]+", date: "[^"]+", description: "[^"]+"
"mongodb://[^"]+"
author: "[^"]+"
contributors: \[".*"\]
uses: [^/]+/[^/@]+@
let input = r#.*
shared_key = ".*"
Ok\("[a-zA-Z0-9]+"
assert_eq!\([a-z.]+, "[^"]+"\)
value!\("[^"]+"\)
"HostId":".*"
"datadog_api_key": ".*"
nkey: ".*"
token: "[a-zA-Z0-9]+"
json = ".*"
Value::from\("[a-zA-Z0-9]+"
client_token = ".*"
splunk_hec_token"[:,] ".*"
CyberArk.*
(?<=[a-z])\\s(?=[a-z]{2,})
decode_base64\("[^"]+"\)
(?<!')\bs'(?=[^']*')
examples\s*[:=]\s*\[?"[^"]+"
BEGIN RSA PRIVATE KEY.*END RSA PRIVATE KEY
(?:return: |result: Ok\()"[-a-zA-Z=;:/0-9+]{24,}"
nkey\.nkey = "[A-Z0-9]+"
k8s
(?:access_key_id|secret_access_key) = "[A-Z]+"
"foreground": "[0-9a-f]{6}"

# Questionably acceptable forms of `in to`
# Personally, I prefer `log into`, but people object
# https://www.tprteaching.com/log-into-log-in-to-login/
\b[Ll]og in to\b

# acceptable duplicates
# ls directory listings
[-bcdlpsw](?:[-r][-w][-Ssx]){3}\s+\d+\s+\S+\s+\S+\s+\d+\s+
# mount
\bmount\s+-t\s+(\w+)\s+\g{-1}\b
# C types and repeated CSS values
\s(all|center|Coc|div|inherit|long|LONG|many|none|normal|pretty|really|solid|that|thin|transparent|very)(?: \g{-1})+\s
# C struct
\bstruct\s+(\w+)\s+\g{-1}\b
# go templates
\s(\w+)\s+\g{-1}\s+\`(?:graphql|json|yaml):
# javadoc / .net
(?:[\\@](?:groupname|param)|(?:public|private)(?:\s+static|\s+readonly)*)\s+(\w+)\s+\g{-1}\s

# Commit message -- Signed-off-by and friends
^\s*(?:(?:Based-on-patch|Co-authored|Helped|Mentored|Reported|Reviewed|Signed-off)-by|Thanks-to): (?:[^<]*<[^>]*>|[^<]*)\s*$

# Autogenerated revert commit message
^This reverts commit [0-9a-f]{40}\.$

# ignore long runs of a single character:
\b([A-Za-z])\g{-1}{3,}\b

# ignore comment in package-msi.sh
[#].*custom\.a28ecdc.*

# ignore specific user:password string containing special chars for unit testing
user:P@ssw0rd

# Ignore base64 encoded values in Prometheus Pushgateway URL paths
/.+@base64/.+

# Ignore base64 encoded values in VRL examples (requires padding to avoid false positives)
"[A-Za-z0-9+\/]*=="

# ignore uuid_from_friendly_id argument
uuid_from_friendly_id!\(".*"\)

# Ignore punycode
\bxn--[-0-9a-z]+

# GitHub username regex derived from https://github.com/shinnn/github-username-regex
# [a-z\d](?:[a-z\d]|-(?=[a-z\d])){0,38}

# authors array in .md files
(?i)^authors: \["[a-z\d](?:[a-z\d]|-(?=[a-z\d])){0,38}"(?:, "[a-z\d](?:[a-z\d]|-(?=[a-z\d])){0,38}")*\]$

# changelog.d fragment authors line
(?i)^authors:(?: [a-z\d](?:[a-z\d]|-(?=[a-z\d])){0,38})+$

# VRL release authors (embedded in releases/*.cue files)
(?i)^\s*authors:(?: [a-z\d](?:[a-z\d]|-(?=[a-z\d])){0,38})+ \(https://github\.com/vectordotdev/vrl/pull/\d+\)$

# Release authors (embedded in releases/*.cue files)
# Allow "contributors:" lines with one or more GitHub usernames in a JSON-style array
(?i)^\s*contributors: \["[a-z\d](?:[a-z\d]|-(?=[a-z\d])){0,38}"(?:, "[a-z\d](?:[a-z\d]|-(?=[a-z\d])){0,38}")*\]$

