### ====================================================================
###  @Awk-file{
###     author          = "Nelson H. F. Beebe",
###     version         = "1.09",
###     date            = "25 August 2001",
###     time            = "17:44:41 MDT",
###     filename        = "bibextract.awk",
###     address         = "Center for Scientific Computing
###                        University of Utah
###                        Department of Mathematics, 322 INSCC
###                        155 S 1400 E RM 233
###                        Salt Lake City, UT 84112-0090
###                        USA",
###     telephone       = "+1 801 581 5254",
###     FAX             = "+1 801 585 1640, +1 801 581 4148",
###     URL             = "http://www.math.utah.edu/~beebe",
###     checksum        = "23217 261 1158 9297",
###     email           = "beebe@math.utah.edu, beebe@acm.org,
###                        beebe@computer.org, beebe@ieee.org (Internet)",
###     codetable       = "ISO/ASCII",
###     keywords        = "BibTeX, bibliography",
###     supported       = "yes",
###     abstract        = "This file is a template for bibextract.sh
###                        to produce a temporary awk program for
###                        extracting bibliography entries selected
###                        by particular keywords.",
###     docstring       = "*********************************************
###                        This code is hereby placed in the PUBLIC
###                        DOMAIN and may be redistributed without any
###                        restrictions.
###                        *********************************************
###
###                        NB: This file is not used directly by awk,
###                        but rather is a template for bibextract.sh
###                        to produce a temporary file with an awk
###                        program to do the work.  This subterfuge is
###                        necessary because there is no convenient
###                        way to provide a pattern for an awk program
###                        at run time.  The strings replaced by
###                        bibextract.sh are upper-case versions of
###                        'keyword' and 'pattern'.
###
###                        Matching entries are found in each of the
###                        file arguments as BibTeX bib files and
###                        output to stdout.  @preamble{} and
###                        @string{} entries are automatically output
###                        as well.
###
###                        Usage:
###                             nawk -f bibextract.awk bibfile(s) >newbibfile
###
###                        To be recognized, bib entries must look like
###
###                        @keyword{tag,
###                        ...
###                        }
###
###                        where the start @ appears in column 1, and
###                        the complete entry has balanced braces.
###
###                        The checksum field above contains a CRC-16
###                        checksum as the first value, followed by the
###                        equivalent of the standard UNIX wc (word
###                        count) utility output of lines, words, and
###                        characters.  This is produced by Robert
###                        Solovay's checksum utility.",
###  }
### ====================================================================

### Edit history (reverse chronological order):
### [25-Aug-2001]	1.09	Update file header data, and output
###				GNU Emacs editing mode comment.
### [19-Feb-1999]	1.08	Update file header data.
### [16-May-1998]	1.07	Update file header data.
### [24-Aug-1994]	1.06	Parenthesize substituted keyword and
###				pattern so that they can include
###				alternates, e.g.  "author|editor".
###				Add match_keyword() to handle case of
###				`key = abbrev' as well as the old `key
###				= "value"'.  Add output of `bibsource =
###				"URL"' lines when input is not from
###				stdin.
### [23-Aug-1994]	1.05	Correct name of script in comment.
### [22-Jul-1994]	1.04	Eliminate printbraceditem() in favor of
###				using collectbraceditem(), which now
###				checks for balanced braces to avoid
###				possibility of an infinite loop.
### [17-Jul-1994]	1.03	Revise to output only those @String{...}
###				entries that are actually used by the
###				matched entries.
### [30-Oct-1992]	1.02	Fix typographical error in bibextract.sh
### [21-Oct-1992]       1.01    Update for public distribution
### [08-May-1989]       1.00    original version

BEGIN	{ "hostname" | getline hostname
	"pwd" | getline cwd
	URL_prefix = (hostname != "") ? ("file://" hostname) : ""
	print "%%% -*-BibTeX-*-"
 }

# @string and @preamble -- collect up to paired closing brace

/^@[Pp][Rr][Ee][Aa][Mm][Bb][Ll][Ee]{/        { # brace balance -> }
    print collectbraceditem()
    print ""
}

/^@[sS][tT][rR][iI][nN][gG]{/        { savestring() }

# "@keyword{tag," -- collect up to line starting with right brace
/^@[a-zA-Z0-9]*{/       {
    item = collectbraceditem()
    if ("KEYWORD" == "")        # line is changed by bibextract.sh
    {
        if (lowercase(item) ~ /(PATTERN)/) # line is changed by bibextract.sh
	    save_entry(item)
    }
    else                        # match against text of selected field(s)
    {
        lcitem = lowercase(item)
	match_keyword(lcitem)
        while (RLENGTH > 0)
        {                       # loop over all keyword-pattern matches
            field = substr(lcitem,RSTART,RLENGTH)
#           if (RLENGTH > 0)
#               printf ("%%DEBUG%% %s\n",field)
            if (field ~ /(PATTERN)/) # line is changed by bibextract.sh
            {
		save_entry(item)
                break          # exit loop after printing
            }
            lcitem = substr(lcitem,RSTART+RLENGTH)
	    match_keyword(lcitem)
        }
    }
  }

END {

    for (m = 0; m < num_string; ++m)
    {				# print just those @String{...} entries used
#	print "DEBUG: [" abbrev[m] "]"
	for (k = 0; k < num_entry; ++k)
	{
	    if (index(entry[k],abbrev[m]) > 0)
	   {
#	       print "DEBUG: <" entry[k] ">[" k "]"
	       print string[m],"\n"
	       break
	   }
	}
    }
    for (k = 0; k < num_entry; ++k) # print the matched entries
	print entry[k],"\n"
}


function bracecount(s, k,n)
{
    n = 0
    for (k = 1; k <= length(s); ++k)
    {
        if (substr(s,k,1) == "{")
            n++
        else if (substr(s,k,1) == "}")
            n--
    }
    return (n)
}

# Starting with the current contents of $0, collect lines until we
# reach a zero brace count, and return the complete entry as a string
# value.  In order to prevent infinite loops in the event of unbalance
# braces, we abort with an error message if a line is found beginning
# with an @ character

function collectbraceditem( count,item)
{
    count = bracecount($0)
    item = $0
    while (count != 0)
    {
        if (getline <= 0)
            break
	if ($0 ~ /^[ \t]*@/)
	{
	    print "ERROR: Unbalanced brace detected at line", FNR, \
		" in entry before [" $0 "]" > "/dev/tty"
	    exit(1)
	}
        item = item "\n" $0
        count += bracecount($0)
    }
    return (item)
}


# Return a lower-cased copy of the argument string.

function lowercase(s, t,k,letter)
{
    t = s
    for (k = 1; k <= length(s); ++k)
    {
        letter = substr(t,k,1)
        if (("A" <= letter) && (letter <= "Z"))
        {
            letter = substr("abcdefghijklmnopqrstuvwxyz",
                index("ABCDEFGHIJKLMNOPQRSTUVWXYZ",letter),1)
            t = substr(t,1,k-1) letter substr(t,k+1)
        }
    }
#   printf ("%%DEBUG%% %s\n",t)
    return (t)
}

function match_keyword(lcitem)
{
    if (match(lcitem,/(KEYWORD)[ \t]*=/)) # quick check for `key ='
    {				# expect key = "value" or key = abbrev
	match(lcitem,/(KEYWORD)[ \t]*=[ \t]*"[^"]*"/) || \
	    match(lcitem,/(KEYWORD)[ \t]*=[ \t]*[^,]*,/)
    }
}

function save_entry(item)
{
    sub(/[ \t]*$/,"",item) # strip trailing space
    if ((FILENAME != "-") && (substr(item,length(item),1) == "}"))
    {				# add a record of where we extracted this from
	bibsource = "  bibsource =    \"" \
	    URL_prefix \
	    ((substr(FILENAME,1,1) == "/") ? "" : cwd "/") \
	    FILENAME "\","
	if (substr(item,length(item)-2,2) == ",\n")
	    item = substr(item,1,length(item)-1) bibsource "\n}"
	else if (substr(item,length(item)-1,1) == "\n")
	    item = substr(item,1,length(item)-2) ",\n" bibsource "\n}"
    }
    entry[num_entry++] = item	# save item, preserving input order
}

# Starting with the current contents of $0, collect lines until we
# reach a zero brace count, and then save the string value along
# with a reference count for the abbreviation.

function savestring( s,t)
{
    s = collectbraceditem()
    t = s			# collect the abbreviation name in t
    gsub(/^[^{]*{/,"",t)	# brace balance -> } }
    gsub(/ *=.*$/,"",t)
    abbrev[num_string] = t	# and save the entire @String{...}
    string[num_string] = s	# and its abbreviation name preserving
				# the input order
#   print "DEBUG: abbrev[" num_string "] = <" t "> length = " length(t)
    num_string++
}

### ====================================================================