#!/usr/bin/env ruby
# coding: utf-8

require 'find'

# These members/tags are common to multiple events
BYTE_SIZE_COUNT = ['byte_size', 'count']

# SUFFIX => [MESSAGE, COUNTERS, ADDITIONAL_TAGS]
EVENT_CLASSES = {
  'BytesReceived' => [
    'Bytes received.', ['received_bytes'], ['byte_size', 'protocol']
  ],
  'EventsReceived' => [
    'Events received.', ['received_events', 'received_event_bytes'], ['count', 'byte_size']
  ],
  'EventsSent' => [
    'Events sent.', ['sent_events', 'sent_event_bytes'], ['count', 'byte_size']
  ],
  'BytesSent' => [
    'Bytes sent.', ['sent_bytes'], ['byte_size', 'protocol']
  ],
}

METRIC_NAME_EVENTS_DROPPED = 'component_discarded_events_total'
METRIC_NAME_ERROR = 'component_errors_total'

def hash_array_add(hash, key, item)
  arr = hash.fetch(key, Array::new)
  arr.append(item)
  hash[key] = arr
end

def is_constant?(name)
  name.start_with? '"' and name.end_with? '"' or name.match? /^(.+::)[A-Z0-9_]$/
end

def find_line_number(haystack, needle)
  idx = haystack.index(needle)
  if !idx.nil?
    prior = haystack[0,idx]
    prior.lines.count
  else
    nil
  end
end

# A class to hold error reports and common functionality
class Event
  attr_accessor :path, :skip_dropped_events, :uses, :skip_duplicate_check, :skip_validity_check, :impl_internal_event, :impl_register_event, :impl_event_handle
  attr_reader :name, :reports, :logs
  attr_writer :members

  def initialize(name)
    @path = nil
    @skip_duplicate_check = false
    @skip_validity_check = false
    @skip_dropped_events = false
    @emits_component_events_dropped = false
    @name = name
    @reports = []
    @members = {}
    @counters = {}
    @metrics = {}
    @logs = []
    @uses = 0
    @impl_internal_event = false
    @impl_register_event = false
    @impl_event_handle = false
  end

  def add_metric(type, name, tags)
    @metrics["#{type}:#{name}"] = tags
    if type == 'counter'
      @counters[name] = tags
    end
  end

  # Scan for counter names and tags
  def scan_metrics(block)
    block.scan(/ (counter|gauge|histogram)!\((?:\n\s+)?"([^"]+)",?(.+?)\)[;\n]/ms) \
    do |type, name, tags|
      tags = Hash[tags.scan(/"([^"]+)" => (.+?)(?:,|$)/)]
      add_metric(type, name, tags)
    end
  end

  # Scan the registered event macro block
  def scan_registered_event(event_fields, handle_fields, data_type, emit_block)
    @members = event_fields.scan(/^ *([a-z0-9_]+): *(.+?),$/m) \
                 .map { |member, type| [member, type] }
    handle_fields.scan(/^ *([a-z0-9_]+): *(.+?) *= *(.+?),$/m) do |name, type, assignment|
      self.scan_component_dropped_events(assignment)
      # This is a _slightly_ different regex than the above, couldn't figure a way to unify them
      assignment.match(/ (counter|gauge|histogram)!\((?:\n\s+)?"([^"]+)"(,.+)?\)/ms) \
      do
        |type, name, tags|
        tags = tags || ''
        tags = Hash[tags.scan(/"([^"]+)" => (.+?)(?:,|$)/)]
        add_metric(type, name, tags)
        true
      end
    end
    self.scan_logs(emit_block)
  end

  def add_log(type, message, parameters)
    @logs.append([type, message, parameters])
  end

  # Scan for log outputs and their parameters
  def scan_logs(block)
    block.scan(/
               (trace|debug|info|warn|error)! # The log type
                \(\s*(?:message\s*=\s*)? # Skip any leading "message =" bit
                (?:"([^({)][^("]+)"|([^,]+)) # The log message text
                ([^;]*?) # Match the parameter list
                \)(?:;|\n\s*}) # Normally would end with simply ");", but some are missing the semicolon
               /mx) \
    do |type, raw_message, var_message, parameters|
      parameters = parameters.scan(/([a-z0-9_]+) *= .|[?%]([a-z0-9_.]+)/) \
                     .map { |assignment, simple| assignment or simple }

      message = raw_message.nil? ? var_message : raw_message

      add_log(type, message, parameters)
    end
  end

  # Scan for the emission of ComponentEventsDropped.
  def scan_component_dropped_events(block)
    if block.match?(/(emit|register)!\(\s*ComponentEventsDropped\b/)
      @emits_component_events_dropped = true
    end
  end

  # The event signature is used to check for duplicates and is
  # composed from the member names and their types, the metric types,
  # names, and their tags, and the log messages and parameters. If no
  # metrics and no logs are defined for the event, the signature is
  # `nil` to skip duplicate checking.
  def signature
    if @metrics.length == 0 and @logs.length == 0
      nil
    else
      members = @members.map { |name, type| "#{name}:#{type}" }.sort.join(':')
      metrics = @metrics.map do |name, value|
        tags = value.keys.sort.join(',')
        "#{name}(#{tags})"
      end
      metrics = metrics.sort.join(';')
      logs = @logs.sort.join(';')
      "#{members}[#{logs}][#{metrics}]"
    end
  end

  def valid?
    valid_with_handle? self
  end

  def valid_with_handle?(handle)
    if @uses == 0
      append('Event has no uses.')
    end

    EVENT_CLASSES.each do |suffix, (required_message, counters, additional_tags)|
      if @name.end_with? suffix
        handle.logs.each do |type, message, parameters|
          if type != 'trace'
            append('Log type MUST be \"trace!\".')
          end
          if message != required_message
            append("Log message MUST be \"#{required_message}\" (is \"#{message}\").")
          end
          additional_tags.each do |tag_name|
            unless parameters.include? tag_name
              append("Log MUST contain tag \"#{tag_name}\"")
            end
          end
        end
        counters.each do |counter|
          counter = "component_#{counter}_total"
          counters_must_include_exclude_tags(counter, additional_tags - BYTE_SIZE_COUNT)
        end
      end
    end

    has_error_logs = handle.logs.one? { |type, _, _| type == 'error' }

    is_events_dropped_event = (@name.end_with? 'EventsDropped' or @counters.include? METRIC_NAME_EVENTS_DROPPED)

    # Validate <Name>Error events
    if (has_error_logs and !is_events_dropped_event) or @name.end_with? 'Error'

      # Name check
      append('Error events MUST be named "___Error".') unless @name.end_with? 'Error'
      # Outputs an error log
      handle.log_level_exactly('error')
      # Metric check
      counters_must_include_exclude_tags(METRIC_NAME_ERROR, ['error_type', 'stage'])

      # Make sure Error events contain the required parameters
      handle.logs.each do |type, message, parameters|
        if type == 'error'
          ['error_type', 'stage'].each do |parameter|
            unless parameters.include? parameter
              append("Error log for Error event MUST include parameter \"#{parameter}\".")
            end
          end

          ['error_code', 'error_type', 'stage'].each do |parameter|
            if parameters.include? parameter and !@counters[METRIC_NAME_ERROR].include? parameter
              append("Counter \"#{METRIC_NAME_ERROR}\" must include \"#{parameter}\" to match error log.")
            end
          end
        end
      end
    end

    # TODO remove @skip_dropped_events check logic after DroppedEvents audit is complete
    # (https://github.com/vectordotdev/vector/issues/13995)

    # Validate <Namespace>EventsDropped events
    if is_events_dropped_event && !@skip_dropped_events

      # Don't run the checks on event structs which themselves emit ComponentEventsDropped,
      # as the ComponentEventsDropped event is already checked.
      # Instead, verify that component_discarded_events_total is not being over-incremented.
      if @emits_component_events_dropped
        if @counters.include? METRIC_NAME_EVENTS_DROPPED
          append("Event emitting ComponentEventsDropped should not also increment counter `#{METRIC_NAME_EVENTS_DROPPED}`")
        end
      else

        # Name check
        append('EventsDropped events MUST be named "___EventsDropped".') unless @name.end_with? 'EventsDropped'

        # Outputs an error log or debug log. Which level is dependent on the value of the param `intentional`, however
        # because implementation can involve passing in the value of the `intentional` bool at compile time, we would need to
        # scan all the source code for places that emit this event to determine that.
        handle.log_level_one_of(['error', 'debug'])

        # Metric check
        counters_must_include_exclude_tags(METRIC_NAME_EVENTS_DROPPED, ['intentional'], ['reason', 'count'])

        # Make sure EventsDropped events contain the required parameters
        handle.logs.each do |type, message, parameters|
          if type == 'error'
            ['count', 'intentional', 'reason'].each do |parameter|
              unless parameters.include? parameter
                append("Error log for EventsDropped event MUST include parameter \"#{parameter}\".")
              end
            end

            ['intentional'].each do |parameter|
              if parameters.include? parameter and !@counters[METRIC_NAME_EVENTS_DROPPED].include? parameter
                append("Counter \"#{METRIC_NAME_EVENTS_DROPPED}\" must include \"#{parameter}\" to match error log.")
              end
            end
          end
        end
      end
    end

    @counters.each do |name, tags|
      # Only component_errors_total and component_discarded_events_total metrics are considered
      if ['component_errors_total', 'component_discarded_events_total'].include? name
        # Make sure defined tags to counters are constants
        tags.each do |tag, value|
          if tag == 'stage'
            if !value.start_with? 'error_stage::'
              append("Counter \"#{name}\" tag \"#{tag}\" value must be an \"error_stage\" constant.")
            end
          elsif tag == 'error_type'
            if !value.start_with? 'error_type::'
              append("Counter \"#{name}\" tag \"#{tag}\" value must be an \"error_type\" constant.")
            end
          end
        end
      end
    end

    @reports.empty?
  end

  def log_level_one_of(levels)
    if @logs.find_index { |type, message, parameters| levels.include? type }.nil?
      append("This event MUST log with one of these levels: #{levels}.")
    end
  end

  def log_level_exactly(level)
    log_level_one_of([level])
  end

  def append(report)
    @reports.append(report)
  end

  private

    def counters_must_include_exclude_tags(name, required_tags, exclude_tags = [])
      unless @counters.include? name
        append("This event MUST increment counter \"#{name}\".")
      else
        tags = @counters[name]
        required_tags.each do |tag|
          unless tags.include? tag
            append("Counter \"#{name}\" MUST include tag \"#{tag}\".")
          end
        end

        exclude_tags.each do |tag|
          if tags.include? tag
            append("Counter \"#{name}\" MUST NOT include tag \"#{tag}\".")
          end
        end
      end
    end

end

$all_events = Hash::new { |hash, key| hash[key] = Event::new(key) }

error_count = 0

# Scan sources and build internal structures
Find.find('./src', './lib') do |path|
  if path.start_with? './'
    path = path[2..]
  end

  if path.end_with? '.rs'
    text = File.read(path)

    text.scan(/\b(?:emit!?|register!?)\((?:[a-z][a-z0-9_:]+)?([A-Z][A-Za-z0-9]+)/) \
    do |event_name,|
      $all_events[event_name].uses += 1
    end

    # Check log message texts for correct formatting.
    if path.start_with? 'src/'
      reports = []

      # Try to find all general usage of the various `tracing` macros.
      text.scan(/(
        (trace|debug|info|warn|error)!\( # Log type.
        ([^;]*?) # All parameters to the macro.
        \)(?:;|\n\s*}) # Handles usages that lack a trailing semicolon.
        )/mx) \
      do |full, type, params|
        # Extract each parameter to the macros, which involves handling structured fields and
        # string literals. We parse them further below so that we can iterate through them to try
        # and determine what the actual log message is, depending on if it's set by using the
        # `message` field, or implicitly with a string literal.
        #
        # We also have some special handling in there for `tracing`-specific "target" and "parent"
        # settings which influence how the event is handled when being processed by a subscriber,
        # which we don't care about _here_ but need to account for in our pattern to parse things.
        params = params.scan(/("(?:[^"\\]++|\\.)*+"|(?:target|parent):\s*[^,]+|(\w+\s*=\s*(?:"(?:[^"\\]++|\\.)*+"|[%?]?[^,]+))|[%?][^,]+)/) \
          .map do |param|
            if /^\".*\"$/.match?(param[0].strip)
              { "type" => "litstr", "value" => param[0] }
            elsif param[0].include? "="
              parts = param[0].split('=', 2).map { |part| part.strip }
              { "type" => "named_field", "field" => parts[0], "value" => parts[1] }
            else
              { "type" => "field", "field" => param[0] }
            end
          end

        # See if we found a message field.
        message_param = params.find { |param|
          # Use the first string literal parameter.
          param["type"] == "litstr" ||
          # Or the first named field called `message` that has a value that is a string literal.
          (param["type"] == "named_field" && param["field"] == "message" && /^\".*\"$/.match?(param["value"]))
        }

        # We further scrutinize the message field, if we believe we found one. This lets us avoid
        # scenarios where variable interpolation is being used, since we can't reasonably detect if
        # an interpolated variable at the beginning or end of the message is capitalized or has a
        # trailing period, respectively.
        has_message = !message_param.nil?
        message = if has_message then message_param["value"].gsub(/^"|"$/, '') else nil end
        is_capitalized = !has_message || (message[0] == "{" || !message.match?(/^[a-zA-Z]/) || message.match?(/^[[:upper:]]/))
        has_trailing_period = !has_message || (message[-1, 1] == "}" || message.match?(/\.$/))

        match_reports = []
        match_reports.append('Message must start with a capital.') unless is_capitalized
        match_reports.append('Message must end with a period.') unless has_trailing_period
        unless match_reports.empty?
          line_no = find_line_number(text, full)
          match_reports.each { |report| reports.push("    #{report} (`#{type}` call on #{path}:#{line_no})") }
        end
      end

      unless reports.empty?
        reports.each { |report| puts report }
        error_count += reports.length
      end
    end

    # TODO remove @skip_dropped_events check logic after DroppedEvents audit is complete
    # (https://github.com/vectordotdev/vector/issues/13995)
    skip_dropped_events = text.match? /## skip check-dropped-events ##/i

    if (path.start_with? 'src/internal_events/' or path.start_with? 'lib/vector-common/src/internal_event/')
      # Scan internal event structs for member names
      text.scan(/[\n ]struct (\S+?)(?:<.+?>)?(?: {\n(.+?)\n\s*}|;)\n/m) do |struct_name, members|
        event = $all_events[struct_name]
        event.path = path
        event.skip_dropped_events = skip_dropped_events
        if members
          members = members.scan(/ ([A-Za-z0-9_]+): +(.+?),/).map { |member, type| [member, type] }
          event.members = members.to_h
        end
      end

      # Scan internal event implementation blocks for logs and metrics
      text.scan(/^(\s*)impl(?:<.+?>)? (InternalEvent|RegisterInternalEvent|InternalEventHandle) for ([A-Za-z0-9_]+)(?:<.+?>)? {\n(.+?)\n\1}$/m) \
      do |_space, trait, event_name, block|
        event = $all_events[event_name]
        event.path = path

        event.skip_duplicate_check = block.match? /## skip check-duplicate-events ##/i
        event.skip_validity_check = block.match? /## skip check-validity-events ##/i

        if trait == 'InternalEvent'
          # Look-aside internal events that defer their implementation to a registered event.
          if ! block.include? 'register('
            event.impl_internal_event = true
            event.scan_metrics(block)
            event.scan_logs(block)
            event.scan_component_dropped_events(block)
          end
        elsif trait == 'RegisterInternalEvent'
          # This is just a dummy name and will cause spurious errors, but it will at least surface
          # the issue of using the macro.
          event.impl_register_event = event_name
          event.append("Do not implement RegisterInternalEvent manually. Use the registered_event! macro instead.")
        elsif trait == 'InternalEventHandle'
          event.impl_event_handle = true
          event.scan_logs(block)
        end
      end
    end

    # Scan for the `registered_event` macro
    text.scan(/^(crate::|vector_common::|)registered_event! *[({]\n *([A-Za-z0-9_]+) *({(.*?)})? *=> *{(.+?)}$.*^ *fn emit\(\&self, [a-z0-9_]+: ([A-Za-z0-9_]+)\) {$(.+?)}\n(\);|\})$/m) \
    do |_, event_name, _, event_fields, handle_fields, data_type, emit_block, _|
      event = $all_events[event_name]
      event.path = path
      event.scan_registered_event(event_fields || "", handle_fields, data_type, emit_block)
    end
  end
end

$duplicates = Hash::new { |hash, key| hash[key] = [] }

$all_events.each do |name, event|
  # Check for duplicated signatures
  if !event.skip_duplicate_check and (event.impl_internal_event or event.impl_event_handle)
    signature = event.signature
    if signature
      $duplicates[event.signature].append(name)
    end
  end

  # Check events for validity
  if !event.skip_validity_check
    if event.impl_internal_event
      unless event.valid?
        puts "#{event.path}: Errors in event #{event.name}:"
        event.reports.each { |report| puts "    #{report}" }
        error_count += 1
      end
    elsif event.impl_register_event
      handle = $all_events[event.impl_register_event]
      if handle
        unless event.valid_with_handle? handle
          puts "#{event.path}: Errors in event #{event.name}:"
          event.reports.each { |report| puts "    #{report}" }
          error_count += 1
        end
      else
        puts "Registered event #{event.name} references nonexistent handle #{event.impl_register_event}"
        error_count += 1
        next
      end
    end
  end
end

$duplicates.each do |signature, dupes|
  if dupes.length > 1
    dupes = dupes.join(', ')
    puts "Duplicate events detected: #{dupes}"
    error_count += 1
  end
end

puts "#{error_count} error(s)"
exit 1 if error_count > 0
