#!/bin/sh
# tla-update-ids -- Handle adding arch id-tags to new files, removing
#	deleted explicit ids, and some renames
#
#  Copyright (C) 2003, 2004  Miles Bader <miles@gnu.org>
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# Written by Miles Bader <miles@gnu.org>
#
#-
#   -T, --tagline-rules  Display the rules used for determining file tagline
#                        syntax (in the same format as {arch}/=tagline-rules),
#                        and exit.
#
#   -h, --help           Display a help message and exit
#   -V, --version        Display a release identifier string and exit
# 
# This command will find files in the current project tree which are
# missing arch id-tags, and give them id-tags; also it will find
# explicit id-tags for which the corresponding file has been removed,
# and remove those id-tags.
#
# It will also try to discover cases where a file with an explicit
# id-tag has been renamed, and move the explicit id-tag (instead of
# removing the old id-tag, and giving the file a new one); such
# detection is done by comparing the contents of any `new' files with
# the most recent contents files that have been removed, and seeing
# whether less than 10% of their lines are different.
#
# When adding an id, if the project tree's id-tagging-method is
# `tagline', an attempt is made to identify the file's type, and add an
# appropriate tagline; if the tree's id-tagging-method is `explicit', or
# a file's type cannot be identified, then an explicit id-tag is added
# instead.
#
# An exit status of 0 means nothing was done, 1 means some taglines or
# explicit ids were added, and anything else means there was some sort
# of error.
#
# For taglines, the builtin rules governing how file types are
# identified, and what tagline syntaxes are used, may be augmented via
# an {arch}/=tagline-rules file; that file consists of lines, where
# each line should have one of the following forms:
#
#       KIND[.LEVEL]  REGEXP  TYPE
#  or:  tagline       TYPE    FORMAT
#  or:  end-marker    TYPE    REGEXP
#  or:  # COMMENT
#
# where KIND is "name" to match whole filenames, "ext" to match file
# extensions, "header" to match the first line of the file, and "dir" to
# match directories (in that order of priority).  LEVEL is a priority,
# where lower levels are search first; default rules are level 9 or 10,
# and user rules with no explicit priority are level 5.
#
# REGEXP is an egrep-style (`extended') regular expression used for
# matching (within the context of KIND), and a positive match means a
# tagline of type TYPE is used.
#
# FORMAT should contain the actual tagline to add for TYPE, with a
# single %s where the actual tag value should be substituted (any other
# occurances of % should be escaped by doubling them percent, e.g %%).
#
# Both REGEXP and FORMAT may contain the following special backslash
# escape sequences: \n \t \s \\  (\s means a space)
# other occurrences of backslash are left unchanged.

# (---- beginning of hdr.shpp ----)
# hdr.shpp

me=`basename $0`

bindir='/usr/bin'
AWK='/usr/bin/nawk'; export AWK
TLA='tla'; export TLA
SED='/bin/sed'; export SED
UUIDGEN='uuidgen'; export UUIDGEN

# (---- TLA_TOOLS_VERSION defined from ,tla-tools-version ----)
TLA_TOOLS_VERSION='jgoerzen@complete.org--debian/tla-tools--debian--1.0--patch-12
'
# (---- end of TLA_TOOLS_VERSION defined from ,tla-tools-version ----)

TLA_TOOL_PFX="${bindir+$bindir/}"
export TLA_TOOL_PFX

TLA_ESCAPE='no'

if test "$TLA_ESCAPE" = yes; then
  TLA_UNESCAPED_OPT='--unescaped'
else
  TLA_UNESCAPED_OPT=''
fi

# (---- end of hdr.shpp ----)
# (---- beginning of cmd-line.shpp ----)
# cmd-line.shpp -- Command-line helper functions for shell scripts

script="$0"
case "$script" in
  */*) ;;
  *)   script="${TLA_TOOL_PFX}$script";;
esac

usage ()
{
  $SED -n -e '/^\([^#]\|#-* *$\)/{s@.*@Usage: '"$me"' [--help|--version]@p;q;}'	\
         -e '/^# *Usage:/,/^# *$/{s/^# //p;q;}'				\
     < "$script"
}

short_help ()
{
  $SED -n -e '/^\([^#]\|-*# *$\|# *Usage:\)/q'				\
	 -e '/^#!/d;s/^.*-- */# /;s/^#[ 	]*//p'			\
     < "$script" | fmt
}

help_body ()
{
  $SED -n '/^ *$/q;/^#-/,/^[^#]/s/^#\( \|$\)//p' < "$script"
}

help ()
{
  usage
  short_help
  echo ''
  help_body
}

version ()
{
  local no_nl_vers=`echo "$TLA_TOOLS_VERSION"`
  echo "$me (tla-tools) $no_nl_vers"
  $SED -n '/^[^#]/q;/^#-/q;s/^# *\(Written by\)/\
\1/p' < "$script"
  $SED -n '/^[^#]/q;/^#-/q;s/^# *\(Copyright\)/\
\1/p' < "$script"
}

unrec_opt ()
{
  echo 1>&2 "$me: unrecognized option "\`"$1'"
  echo 1>&2 "Try "\`"$me --help' for more information."
}

cmd_line_err ()
{
  usage 1>&2
  echo 1>&2 "Try "\`"$me --help' for more information."
}

long_opt_val ()
{
  echo "$1" | $SED 's/^[^=]*=//'
}

short_opt_val ()
{
  echo "$1" | $SED 's/^-.//'
}

# (---- end of cmd-line.shpp ----)

# (---- TLA_AWK_FUNS defined from tla-tools-funs.awk ----)
TLA_AWK_FUNS='# tla-tools-funs.awk -- AWK functions used by my tla-* shell scripts

function _append_cmd_arg(cmd, arg)
{
  if (arg) {
    gsub (/'\''/, "'\''\\'\'''\''", arg)
    cmd = cmd " '\''" arg "'\''"
  }
  return cmd
}

# Return a shell command string corresponding to CMD with args
# ARG1...ARG4.  CMD is included as-is, so can contain shell
# meta-characters; ARG1...ARG4 are quoted to prevent evaluation by the
# shell, and correctly handle any embedded spaces.
function make_cmd(cmd, arg1, arg2, arg3, arg4)
{
  cmd = _append_cmd_arg(cmd, arg1)
  cmd = _append_cmd_arg(cmd, arg2)
  cmd = _append_cmd_arg(cmd, arg3)
  cmd = _append_cmd_arg(cmd, arg4)
  return cmd
}

# Run CMD with args ARG1...ARG4, return non-zero if successful.
# CMD is passed raw to the shell, so can contain shell meta-characters;
# ARG1...ARG4 are quoted to prevent evaluation by the shell, and 
# correctly handle any embedded spaces.  Returns 1 if the command
# succeeded, and 0 otherwise.
function run_cmd(cmd, arg1, arg2, arg3, arg4)
{
  # print "run_cmd: " make_cmd(cmd, arg1, arg2, arg3, arg4)
  return (system(make_cmd(cmd, arg1, arg2, arg3, arg4)) == 0) ? 1 : 0
}

# Run CMD with args ARG1...ARG4, return the first line of output, or 0
# if the command returned a failure status (or the command could not be
# executed).  CMD is passed raw to the shell, so can contain shell
# meta-characters; ARG1...ARG4 are quoted to prevent evaluation by the
# shell, and correctly handle any embedded spaces.
function run_cmd_first_line(cmd, arg1, arg2, arg3, arg4  ,result)
{
  cmd = make_cmd(cmd, arg1, arg2, arg3, arg4)
  if ((cmd| getline result) <= 0)
    result = 0
  close (cmd)
  # print "run_cmd_first_line: " cmd " => " result
  return result
}

# Return the first line of FILE
function file_first_line(file)
{
  return run_cmd_first_line("sed 1q", file)
}

# Return the last line of FILE
function file_last_line(file)
{
  return run_cmd_first_line("sed -n", "$p", file)
}

# Return the number of lines in FILE
function file_num_lines(file)
{
  return run_cmd_first_line("wc -l <", file) + 0
}

function file_is_dir(file)
{
  return run_cmd("ls -d >/dev/null 2>/dev/null", file "/.")
}

function file_exists(file  ,line,result)
{
  result = (getline line < file)
  close (file)
  return result >= 0
}

# Append TEXT to FILE, with an intervening blank line if LAST_LINE
# isn'\''t blank.  Returns 1 if succesful, and 0 otherwise.
function append_text(file, text, last_line  ,append_cmd)
{
  append_cmd = make_cmd("cat >>", file)
  if (last_line && last_line !~ /^[ \t]*$/)
    print "" |append_cmd
  printf ("%s\n", text) |append_cmd
  return close (append_cmd) == 0
}

function file_explicit_id_dir(file  ,dir)
{
  dir = file
  sub (/\/[^\/]*$/, "", dir)
  sub (/.*\//, "", file)
  return ((dir && dir != file) ? dir "/.arch-ids" : ".arch-ids")
}
function file_explicit_id_file(file  ,dir)
{
  dir = file
  sub (/\/[^\/]*$/, "", dir)
  sub (/.*\//, "", file)
  return ((dir && dir != file) ? dir "/.arch-ids/" : ".arch-ids/") file ".id"
}

function file_from_explicit_id_file(file  ,dir)
{
  sub (/\.id$/, "", file)
  
  dir = file
  sub (/\/[^\/]*$/, "", dir)
  sub (/.*\//, "", file)

  sub (/\.arch-ids$/, "", dir)

  return dir file
}

function file_has_explicit_id(file)
{
  return file_exists(file_explicit_id_file(file))
}

# Return a prefix suitable for prepending to filenames in the current
# directory to make them properly project-tree-root relative, to the
# tree-root TREE_ROOT; if TREE_ROOT is zero (or not given), then the tla
# `tree-root'\'' command is invoked to compute the current tree-root.  If
# the current directory is a tree-root, then the result is the empty
# string.
function tree_root_prefix(tree_root  ,cwd)
{
  if (! tree_root)
    tree_root = run_cmd_first_line("$TLA tree-root 2>/dev/null")
  cwd = run_cmd_first_line("pwd")
  if (cwd != tree_root && substr (cwd, 1, length (tree_root)) == tree_root)
    return substr (cwd, length (tree_root) + 2) "/"
  else
    return ""
}

# Return the path to FILE in a pristine version (either a revision
# library entry or a pristine tree) of the latest revision, or 0 if one
# cannot be found.
function pristine_file(file  ,latest_rev,revlib,revlibs_cmd,revlibs_cmd_status,greedy)
{
  if (! pristine_root) {
    # Find the latest revision and make sure we have a pristine tree for
    # it; by `pristine tree'\'' we really mean revlib entry or pristine tree

    latest_rev = run_cmd_first_line("$TLA logs -f | sed -n '\''$p'\''")

    # See if we'\''ve got a revlib entry handy
    pristine_root = run_cmd_first_line("$TLA library-find --silent", latest_rev)

    if (! pristine_root) {
      # No revlib entry; can we add one to a greedy library?

      # Search for a greedy revision library
      revlibs_cmd = make_cmd("$TLA my-revision-library 2>/dev/null")
      while ((revlibs_cmd_status = (revlibs_cmd |getline revlib)) > 0) {
	greedy = run_cmd_first_line(make_cmd("$TLA library-config", revlib) \
				    "| grep '\''^greedy[?]'\''")
	if (greedy ~ /yes$/)
	  break
      }
      if (revlibs_cmd_status >= 0)
	close (revlibs_cmd)

      if (revlibs_cmd_status > 0) {
	# Found a greedy library, add an entry for this revision to it

	if (run_cmd("$TLA library-add", latest_rev))
	  pristine_root = run_cmd_first_line("$TLA library-find", latest_rev)
      }

      if (! pristine_root) {
	# Give up with revlibs and try to add a pristine tree

	if (run_cmd("$TLA add-pristine", latest_rev))
	  pristine_root = run_cmd_first_line("$TLA find-pristine", latest_rev)
      }
    }
  }

  if (pristine_root)
    return pristine_root "/" file
  else
    return 0
}

# Return a unique ID string
function unique_id() { return run_cmd_first_line("$UUIDGEN") }

# Return the filename FILE with any leading `./'\'' removed
function no_dot(file) { sub (/^\.\//, "", file); return file }

# Returns the (fully-specified) revision REV with the patch-level
# component removed
function revision_version(rev  ,archive,parts,ver)
{
  if (split (rev, parts, "/") == 2) {
    archive = parts[1]
    rev = parts[2]
  } else
    archive = 0
    
  split (rev, parts, "--")

  ver = parts[1] "--" parts[2] "--" parts[3]
  if (archive)
    ver = archive "/" ver

  return ver
}

# Returns the patch-level component of the (fully-specified) revision REV
function revision_patch_level(rev  ,parts)
{
  # Note that the archive component can have embedded -- markers too,
  # but that does not effect the result
  return parts[split (rev, parts, "--")]
}

function patch_log_file_name(rev   ,archive,parts)
{
  split (rev, parts, "/")
  archive = parts[1]
  rev = parts[2]
    
  split (rev, parts, "--")

  return								\
    "{arch}/"								\
    parts[1]								\
    "/" parts[1] "--" parts[2]						\
    "/" parts[1] "--" parts[2] "--" parts[3]				\
    "/" archive								\
    "/patch-log/" parts[4]
}

'
# (---- end of TLA_AWK_FUNS defined from tla-tools-funs.awk ----)
# (---- TAGLINE_AWK_FUNS defined from tagline-funs.awk ----)
TAGLINE_AWK_FUNS='# tagline-funs.awk -- AWK functions used for manipulating arch taglines

BEGIN {
  # Filename to file-type rules

  tagline_type[9, "ext", "am"]			= "automake"
  tagline_type[9, "ext", "ac"]			= "autoconf"
  tagline_type[9, "name", "configure\\.in"]	= "autoconf"

  # Override shell-script recognition
  tagline_type[9, "name", "configure"]		= "explicit"

  # There'\''s no way of inserting an automatically deleted comment in
  # autoconf input files, so they end up clashing with the resulting
  # generated file.  Thus we must use explicit tags (sigh).
  tagline_type[9, "ext", "in"]			= "explicit"

  tagline_type[10, "name", "\\.arch-inventory"] = "sh"
  tagline_type[10, "name", "ChangeLog.*"] 	= "lisp"
  tagline_type[10, "name", "[Mm]akefile.*"] 	= "sh"
  tagline_type[10, "name", "\\.gdbinit.*"] 	= "sh"
  tagline_type[10, "name", "Imakefile"]		= "c" # run through cpp
  tagline_type[10, "name", "texinfo\\.tex"]	= "texi" # texinfo, not tex

  tagline_type[10, "ext", "c"] 		= "c"	 #:  /* arch-tag: ...\n  ... */
  tagline_type[10, "ext", "h"] 		= "c"
  tagline_type[10, "ext", "s"] 		= "c"	 # fed through cpp

  tagline_type[10, "ext", "c\\+\\+"]	= "c++"	 #:  // arch-tag: ...
  tagline_type[10, "ext", "cc"]		= "c++"
  tagline_type[10, "ext", "cxx"]	= "c++"
  tagline_type[10, "ext", "C"]		= "c++"
  tagline_type[10, "ext", "CC"]		= "c++"
  tagline_type[10, "ext", "h\\+\\+"]	= "c++"
  tagline_type[10, "ext", "hh"]		= "c++"
  tagline_type[10, "ext", "hxx"]	= "c++"
  tagline_type[10, "ext", "H"]		= "c++"
  tagline_type[10, "ext", "HH"]		= "c++"

  tagline_type[10, "ext", "el"] 	= "lisp" #:  ;; arch-tag: ...
  tagline_type[10, "ext", "l"] 		= "lisp"

  tagline_type[10, "ext", "pas"]        = "pascal" #:  (* arch-tag: ...\n  ... *)
  tagline_type[10, "ext", "dpr"]        = "pascal"
  tagline_type[10, "ext", "pp"]         = "pascal"

  tagline_type[10, "ext", "sh"] 	= "sh" 	 #:  # arch-tag: ...
  tagline_type[10, "ext", "bash"] 	= "sh"
  tagline_type[10, "ext", "csh"] 	= "sh"
  tagline_type[10, "ext", "sed"] 	= "sh"
  tagline_type[10, "ext", "awk"] 	= "sh"
  tagline_type[10, "ext", "perl"] 	= "sh"
  tagline_type[10, "ext", "pl"] 	= "sh"	 # perl
  tagline_type[10, "ext", "py"] 	= "sh"	 # python
  tagline_type[10, "ext", "tit"] 	= "sh"	 # by examination
  tagline_type[10, "ext", "inp"] 	= "sh"	 # by examination

  tagline_type[10, "ext", "m4"] 	= "m4"

  tagline_type[10, "ext", "tex"] 	= "tex"  #:  % arch-tag: ...
  tagline_type[10, "ext", "sty"] 	= "tex"
  tagline_type[10, "ext", "erl"] 	= "tex"  # erlang
  tagline_type[10, "ext", "hrl"] 	= "tex"

  tagline_type[10, "ext", "texi"] 	= "texi" #:  @c arch-tag: ...
  tagline_type[10, "ext", "texinfo"] 	= "texi"

  tagline_type[10, "ext", "[1-9]"] 	= "roff" #:  .\" arch-tag: ...

  tagline_type[10, "ext", "html"]	= "html" #:  <!-- arch-tag: ...\n -->
  tagline_type[10, "ext", "xml"] 	= "html"

  tagline_type[10, "ext", "bat"] 	= "bat"  #:  rem arch-tag: ...

  # various script magic numbers all map to "sh"
  tagline_type[10, "header", "#! *[^ ]*/[a-z]*(sh|awk|perl)( .*)?"] = "sh"

  ## File-type tagline conventions

  file_type_tagline["sh"] 	= "# arch-tag: %s"
  file_type_tagline["c"] 	= "/* arch-tag: %s\n   (do not change this comment) */"
  file_type_tagline["c++"] 	= "// arch-tag: %s"
  file_type_tagline["pascal"] 	= "(* arch-tag: %s\n   (do not change this comment) *)"
  file_type_tagline["html"] 	= "<!-- arch-tag: %s\n     (do not change this comment) -->"
  file_type_tagline["lisp"] 	= ";; arch-tag: %s"
  file_type_tagline["null"] 	= "arch-tag: %s"
  file_type_tagline["roff"] 	= ".\\\" arch-tag: %s"
  file_type_tagline["tex"] 	= "%% arch-tag: %s"

  # For the following types, the most natural comment syntaxes run
  # afoul of the rule that arch-tag: must only be preceded by
  # punctuation, so various workarounds are used instead; hopefully
  # this problem will be fixed with the switch to arch-id:.

  #file_type_tagline["m4"]	= "dnl arch-tag: %s"
  file_type_tagline["m4"]	= "ifelse(dnl\tDo not change this comment\n   arch-tag: %s\n)dnl"
  #file_type_tagline["autoconf"] = "dnl arch-tag: %s"
  file_type_tagline["autoconf"]	= "m4_if(dnl\tDo not change this comment\n   arch-tag: %s\n)dnl"
  file_type_tagline["automake"] = "## arch-tag: %s"

  #file_type_tagline["texi"] 	= "@c arch-tag: %s"
  file_type_tagline["texi"] 	= "@ignore\n   arch-tag: %s\n@end ignore"

  #file_type_tagline["bat"] 	= "rem arch-tag: %s"
  file_type_tagline["bat"] 	= "goto skipArchTag\n   arch-tag: %s\n:skipArchTag"

  # Should be a bit loose
  file_type_end_marker_re["lisp"] = ";; .* ends here"
  file_type_end_marker_re["sh"]   = "# .* ends here"
}

function init_tagline_rules(  type,level,kind,regexp,idx,parts,line)
{
  if (! _tagline_rules_initialized) {
    ## Initialization

    # Read project tagline rules file
    #
    # The syntax is:
    #
    #       KIND[.LEVEL]  REGEXP  TYPE
    #  or:  tagline       TYPE    FORMAT
    #  or:  end-marker    TYPE    REGEXP
    #  or:  # COMMENT
    #
    # where KIND is "name" to match whole filenames, "ext" to match file
    # extensions, "header" to match the first line of the file, and "dir" to
    # match directories (in that order of priority).  LEVEL is a priority,
    # where lower levels are search first; default rules are level 9 or 10,
    # and user rules with no explicit priority are level 5.
    #
    # REGEXP is an egrep-style (`extended'\'') regular expression used for
    # matching (within the context of KIND), and a positive match means a
    # tagline of type TYPE is used.
    #
    # FORMAT should contain the actual tagline to add for TYPE, with a
    # single %s where the actual tag value should be substituted (any other
    # occurances of % should be escaped by doubling them percent, e.g %%).
    #
    # Both REGEXP and FORMAT may contain the following special backslash
    # escape sequences: \n \t \s \\  (\s means a space)
    # other occurrences of backslash are left unchanged.
    #
    while ((getline line < "{arch}/=tagline-rules") > 0)
      if (line !~ /^ *(#.*)?$/) {
	split (line, parts)
	if (parts[1] == "tagline") {
	  sub (/^tagline[ \t]*[^ \t]*[ \t]*/, "", line)
	  file_type_tagline[parts[2]] = _file_tagline_unesc(line)
	} else if (parts[1] == "end-marker") {
	  sub (/^end-marker[ \t]*[^ \t]*[ \t]*/, "", line)
	  file_type_end_marker[parts[2]] = _file_tagline_unesc(line)
	} else {
	  level = 5
	  kind = parts[1]
	  regexp = _file_tagline_unesc(parts[2])
	  type = parts[3]
	  if (kind ~ /[.]/) {
	    split (kind, parts, /[.]/)
	    kind = parts[1]
	    level = parts[2] + 0
	  }
	  tagline_type[level, kind, regexp] = type
	}
      }
    close ("{arch}/=tagline-rules")

    _tagline_rules_initialized = 1
  }
}

function _file_tagline_init(  type,level,kind,regexp,idx,parts,line)
{
  if (! _file_tagline_initialized) {
    init_tagline_rules()

    for (idx in tagline_type) {
      split (idx, parts, SUBSEP)
      level = parts[1]
      kind = parts[2]
      regexp = parts[3]

      if (level > _file_tagline_max_level)
	_file_tagline_max_level = level
      _file_tagline_levels[level] = 1

      type = tagline_type[idx]
      if ((level, kind, type) in _file_tagline_type_re) {
	if (kind == "ext")
	  regexp = ".*\\." regexp
	regexp = _file_tagline_type_re[level, kind, type] "|" regexp
      } else if (kind == "ext")
	regexp = "^(.*\\." regexp
      else if (kind == "dir")
	regexp = "(^|/)(" regexp
      else
	regexp = "^(" regexp

      _file_tagline_type_re[level, kind, type] = regexp

      _file_tagline_types[type] = 1
    }

    for (idx in _file_tagline_type_re)
      _file_tagline_type_re[idx] = _file_tagline_type_re[idx] ")$"

    _file_tagline_initialized = 1
  }
}

function _file_tagline_unesc(string)
{
  gsub (/\\\\/, "\\q", string)  # Change \\ to \q to avoid confusion below
  gsub (/\\n/, "\n", string)	# Do substitutions
  gsub (/\\t/, "\t", string)
  gsub (/\\s/, " ", string)
  gsub (/\\q/, "\\", string)	# Finally change \q into plain \
  return string
}

function _file_tagline_find_type(string, level, kind  ,type)
{
  for (type in _file_tagline_types)
    if ((level, kind, type) in _file_tagline_type_re)
      if (string ~ _file_tagline_type_re[level, kind, type])
	return type
  return 0
}

function _file_tagline_find_header_type(file, level  ,type,header)
{

  for (type in _file_tagline_types)
    if ((level, "header", type) in _file_tagline_type_re) {
      if (! header)
	header = file_first_line(file)
      if (! header)
	return 0
      if (header ~ _file_tagline_type_re[level, "header", type])
	return type
    }
  return 0
}

# Return the `tagline type'\'' of a file, which determines what commenting
# conventions to use for adding a tagline, or 0 if no tagline should be used
function file_tagline_type(file  ,base_name,dir,type,level)
{
  _file_tagline_init()

  if (file ~ /\//) {
    dir = file
    sub (/\/[^\/]*$/, "", dir)
    base_name = file
    sub (/.*\//, "", base_name)
  } else {
    dir = "."
    base_name = file
  }

  type = 0
  for (level = 0; level <= _file_tagline_max_level && !type; level++)
    if (level in _file_tagline_levels) {
      type = _file_tagline_find_type(base_name, level, "name")
      if (! type)
	type = _file_tagline_find_type(base_name, level, "ext")
      if (! type)
	type = _file_tagline_find_header_type(file, level)
      if (! type)
	type = _file_tagline_find_type(dir, level, "dir")
    }

  if (type == "explicit")
    type = 0

  return type
}

'
# (---- end of TAGLINE_AWK_FUNS defined from tagline-funs.awk ----)

# List tagline rules.  Optional single arg is the kind of rule to list.
list_tagline_rules()
{
  $AWK '
    '"$TLA_AWK_FUNS"'
    '"$TAGLINE_AWK_FUNS"'

    function esc(str, do_spaces)
    {
      gsub (/\\/, "\\\\", str)
      gsub (/\n/, "\\n", str)
      gsub (/\t/, "\\t", str)
      if (do_spaces)
      gsub (/ /, "\\s", str)
      return str
    }

    function tagline_key_prio(parts  ,prio,kind)
    {
      prio = parts[1] * 16
      kind = parts[2]

      if (kind == "dir")
	return prio + 3
      else if (kind == "header")
	return prio + 2
      else if (kind == "ext")
	return prio + 1
      else
	return prio
    }
    function tagline_keys_lessp(key1, key2  ,parts1,parts2,prio1,prio2)
    {
      split (key1, parts1, SUBSEP)
      split (key2, parts2, SUBSEP)

      prio1 = tagline_key_prio(parts1)
      prio2 = tagline_key_prio(parts2)

      return prio1 < prio2 || (prio1 == prio2 && parts1[3] < parts2[3])
    }
    function sort_tagline_keys(keys, len,  i,j,tmp)
    {
      for (i = 0; i < len; i++)
	for (j = i + 1; j < len; j++) {
	  if (! tagline_keys_lessp(keys[i], keys[j]))
	    {
	      tmp = keys[i]
	      keys[i] = keys[j]
	      keys[j] = tmp
	    }
	}
    }

    function print_sorted_mapping(array, fmt  ,num_els,key,keys,i,j)
    {
      num_els = 0
      for (key in array)
	keys[num_els++] = key

      for (i = 0; i < num_els; i++)
	for (j = i + 1; j < num_els; j++) {
	  if (keys[i] > keys[j])
	    {
	      tmp = keys[i]
	      keys[i] = keys[j]
	      keys[j] = tmp
	    }
	}

      for (i = 0; i < num_els; i++)
	printf (fmt, keys[i], esc(array[keys[i]], 0))
    }

    BEGIN {
      limit = "'"$1"'"
      if (limit == "")
	limit = 0

      init_tagline_rules()

      if (!limit || limit ~ /^(ext|name|dir)$/) {
	print "# Filename to file-type mapping rules (in order of priority)"
	printf ("# %s\t%-20s %s\n", "KIND", "REGEXP", "TYPE")

	num_tagline_type_keys = 0
	for (key in tagline_type)
	  tagline_type_keys[num_tagline_type_keys++] = key
	sort_tagline_keys(tagline_type_keys, num_tagline_type_keys)

	for (i = 0; i < num_tagline_type_keys; i++) {
	  key = tagline_type_keys[i]

	  split (key, parts, SUBSEP)

	  level = parts[1] + 0
	  kind = parts[2]
	  regexp = parts[3]

	  if (!limit || limit == kind)
	    if (level == 10)
	      printf ("%s\t%-20s %s\n",
		      kind, esc(regexp, 1),
		      tagline_type[level, kind, regexp]);
	    else
	      printf ("%s.%d\t%-20s %s\n",
		      kind, level, esc(regexp, 1),
		       tagline_type[level, kind, regexp]);
	}
      }

      if (!limit || limit == "tagline") {
	print ""
	print "# File-type tagline-syntax rules"
	printf ("#       %-8s %s\n", "TYPE", "TAGLINE FORMAT")
	print_sorted_mapping(file_type_tagline, "tagline %-8s %s\n")
      }

      if (!limit || limit == "end-marker") {
	print ""
	print "# File-type end-marker rules"
	printf ("#       %-8s %s\n", "TYPE", "END-MARKER REGEXP")
	print_sorted_mapping(file_type_end_marker, "end-marker %-8s %s\n")
      }
    }
  '
}

# Parse command-line options
while :; do
  case "$1" in
    --tagline-rules|-T)
      shift
      list_tagline_rules "$@"
      exit 0;;
    --help|-h|-H)
      help; exit 0;;
    --version|-V)
      version; exit 0;;
    -[!-]?*)
      # split concatenated single-letter options apart
      FIRST="$1"; shift
      set -- `echo $FIRST | $SED 's/-\(.\)\(.*\)/-\1 -\2/'` "$@"
      ;;
    -*)
      unrec_opt "$1"; exit 10;;
    *)
      break;
  esac
done

test "$#" = 0 || { cmd_line_err; exit 10; }

TREE_ROOT=`$TLA tree-root 2>/dev/null` || { echo 1>&2 "$me: Not in an arch project tree"; exit 11; }

ID_TAGGING_METHOD=`$TLA id-tagging-method`

# There's no point if the tree is not using tags at all
test "$ID_TAGGING_METHOD" = names && exit 0

# We use `tla tree-lint' to list files to act on, and tree-lint always
# operates relative to the tree-root.
cd "$TREE_ROOT"

{
  $TLA tree-lint $TLA_UNESCAPED_OPT --untagged-files | $SED 's@^@no_tag:@'
  $TLA tree-lint $TLA_UNESCAPED_OPT --missing-files  | $SED 's@^@no_file:@'
} | $AWK '
'"$TLA_AWK_FUNS"'
'"$TAGLINE_AWK_FUNS"'

BEGIN {
  # program name
  me = "'"$me"'"

  # Renaming heuristic thresholds
  max_closeness = 100
  rename_closeness_threshold = 10

  # This script works best with taglines, but try to handle explicit
  # tagging too.
  id_tagging_method = "'"$ID_TAGGING_METHOD"'"
}

# Append a tagline for file-type TYPE with the id-tag ID to FILE
# Returns 1 if successful, and 0 otherwise.
function append_tagline(file, type, id  ,tail_cmd,tmp_file,ok)
{
  text = sprintf (file_type_tagline[type], id)

  last_line = file_last_line(file)

  if (last_line								\
      && type in file_type_end_marker_re				\
      && last_line ~ file_type_end_marker_re[type])
  {
    # The file has an "end-marker" comment, and we want to put the
    # tagline before it, so we cannot simply append to the file.

    # Make a temp file containing everything but the last line
    tmp_file = file ".new"
    ok = run_cmd("sed >", tmp_file, "$d", file)

    if (ok) {
      last_line = file_last_line(tmp_file)
      # ... and append to the temp file instead.
      ok = append_text(tmp_file, text, last_line)

      if (ok) {
	# Now add the last line back, and move it to the real file.
	ok = run_cmd("sed -n >>", tmp_file, "$p", file)
	if (ok)
	  ok = run_cmd("mv", tmp_file, file)
	else
	  run_cmd("rm", tmp_file)
      }
    }
  } else
    ok = append_text(file, text, last_line)

  return ok
}

function compute_file_closeness(old, new  ,old_lines,new_lines,diff_lines)
{
  old_lines = file_num_lines(old)
  new_lines = file_num_lines(new)

  if (old_lines == 0 || new_lines == 0)
    # Empty files never compare equal
    return max_closeness

  # Note that we force even binary files to be compared; this should be
  # reasonably safe as we only use the output for counting with grep -c
  diff_lines = run_cmd_first_line(make_cmd("diff --text", old, new)	\
				  " | grep -c \"^[<>]\"")

  # Return the number of difference lines as a percentage of
  # total file lines
  return (diff_lines * 100) / (old_lines + new_lines)
}

/^no_tag:/ {
  sub (/^no_tag:/, "")
  dests[num_dests++] = $0
  next
}
/^no_file:/ {
  sub (/^no_file:/, "")
  sources[num_sources++] = file_from_explicit_id_file($0)
  next
}

END {
  ok = 1
  added_dirs = 0

  # Now try to match up adds and deletes of explicitly tagged files if possible
  for (i = 0; i < num_sources && ok; i++) {
    old = sources[i]
    old_pristine = pristine_file(old)

    best_closeness = max_closeness
    if (file_exists(old_pristine)) {
      # We do not really care about deletes, except for files that were
      # explicitly tagged -- in that case, there is the possibility that the
      # file was actually renamed/moved.

      for (j = 0; j < num_dests; j++) {
	new = dests[j];

	closeness = compute_file_closeness(old_pristine, new)
	if (closeness < best_closeness) {
	  best_closeness = closeness
	  best_target_index = j
	}
      }
    }

    if (best_closeness < rename_closeness_threshold) {
      new = dests[best_target_index]

      print "* renaming explicit id: " old " => " new
      ok = run_cmd("$TLA move-id", old, new)

      # Remove the target from further consideration
      dests[best_target_index] = dests[--num_dests]
    } else {
      # must be a delete instead
      print "* removing explicit id: " old
      ok = run_cmd("$TLA delete-id", old)

      removed_xid_dirs[file_explicit_id_dir(old)] = 1
    }

    something_changed = 1
  }

  # Add ids to any files that were not used in renaming
  for (i = 0; i < num_dests && ok; i++) {
    new = dests[i]

    id = unique_id()
    if (! id) {
      print me ": cannot generate unique-id for tagline, aborting..." |"cat 1>&2"
      exit (12)
    }

    is_dir = file_is_dir(new)

    if (!is_dir && id_tagging_method == "tagline")
      type = file_tagline_type(new)
    else
      type = 0

    if (type) {
      print "* adding `" type "'\'' tagline: " new
      ok = append_tagline(new, type, id)
    } else {
      print "* adding explicit id: " new
      ok = run_cmd("$TLA add-id --id", id, new)
      if (ok && is_dir)
	added_dirs = 1
    }

    something_changed = 1
  }

  if (ok && added_dirs) {
    # Recursively invoke ourselves to deal with the new subdirectories.
    # We use "system" instead of run_cmd so we can look at the
    # command exit-status.  An exit-status of 0 or 1 is OK,
    # anything else means there was an error.
    sub_command_status = system(make_cmd("'"$0"'"))
    ok = (sub_command_status == 0 || sub_command_status == 1)
  }

  # Try to remove explicit-id directories too
  for (xid_dir in removed_xid_dirs)
    run_cmd("rmdir 2>/dev/null", xid_dir)

  if (! ok)
    exit (2)
  else if (something_changed)
    exit (1)
  else
    exit (0)
}
'

