/*
 *      fhist - file history and comparison tools
 *      Copyright (C) 1991-1994, 1998-2002, 2006, 2008, 2010 Peter Miller
 *
 *      This program is free software; you can redistribute it and/or modify
 *      it under the terms of the GNU General Public License as published by
 *      the Free Software Foundation; either version 3 of the License, or
 *      (at your option) any later version.
 *
 *      This program is distributed in the hope that it will be useful,
 *      but WITHOUT ANY WARRANTY; without even the implied warranty of
 *      MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *      GNU General Public License for more details.
 *
 *      You should have received a copy of the GNU General Public License
 *      along with this program. If not, see
 *      <http://www.gnu.org/licenses/>.
 *
 *
 * A literal pool is maintained.  Each string has a reference count.  The
 * string stays in the literal pool for as long as it has a positive
 * reference count.  To determine if a string is already in the literal pool,
 * linear dynamic hashing is used to guarantee an O(1) search.  Making all equal
 * strings the same item in the literal pool means that string equality is
 * a pointer test, and thus very fast.
 */

#include <common/ac/stddef.h>
#include <common/ac/stdio.h>
#include <common/ac/stdlib.h>
#include <common/ac/string.h>
#include <common/ac/ctype.h>
#include <common/ac/stdarg.h>
#include <libexplain/malloc.h>
#include <libexplain/output.h>
#include <libexplain/realloc.h>

#include <common/error.h>
#include <common/mprintf.h>
#include <common/str.h>


/*
 * maximum conversion width for numbers
 */
#define MAX_WIDTH 509

static string_ty **hash_table;
static str_hash_ty hash_modulus;
static str_hash_ty hash_mask;
static str_hash_ty hash_load;

#define MAX_HASH_LEN 20


#define is_power_of_two(x) ((x) == ((x) & -(x)))


/*
 * NAME
 *      hash_generate - hash string to number
 *
 * SYNOPSIS
 *      str_hash_ty hash_generate(char *s, size_t n);
 *
 * DESCRIPTION
 *      The hash_generate function is used to make a number from a string.
 *
 * RETURNS
 *      str_hash_ty - the magic number
 *
 * CAVEAT
 *      Only the last MAX_HASH_LEN characters are used.
 *      It is important that str_hash_ty be unsigned (int or long).
 */

static str_hash_ty
hash_generate(const char *s, size_t n)
{
    str_hash_ty     retval;

    if (n > MAX_HASH_LEN)
    {
        s += n - MAX_HASH_LEN;
        n = MAX_HASH_LEN;
    }

    retval = 0;
    while (n > 0)
    {
        retval = (retval + (retval << 1)) ^ *s++;
        --n;
    }
    return retval;
}


/*
 * NAME
 *      str_initialize - start up string table
 *
 * SYNOPSIS
 *      void str_initialize(void);
 *
 * DESCRIPTION
 *      The str_initialize function is used to create the hash table and
 *      initialize it to empty.
 *
 * RETURNS
 *      void
 *
 * CAVEAT
 *      This function must be called before any other defined in this file.
 */

void
str_initialize(void)
{
    str_hash_ty     j;

    if (hash_table)
        return;
    hash_modulus = 1 << 8;      /* MUST be a power of 2 */
    hash_mask = hash_modulus - 1;
    hash_load = 0;
    hash_table =
        (string_ty **)explain_malloc_or_die(hash_modulus * sizeof(string_ty *));
    for (j = 0; j < hash_modulus; ++j)
        hash_table[j] = 0;

    assert(is_power_of_two(hash_modulus));
}


/*
 * NAME
 *      split - reduce table loading
 *
 * SYNOPSIS
 *      void split(void);
 *
 * DESCRIPTION
 *      The split function is used to reduce the load factor on the hash table.
 *
 * RETURNS
 *      void
 *
 * CAVEAT
 *      A load factor of about 80% is suggested.
 */

static void
split(void)
{
    string_ty       **new_hash_table;
    str_hash_ty     new_hash_modulus;
    str_hash_ty     new_hash_mask;
    str_hash_ty     idx;

    /*
     * double the modulus
     *
     * This is subtle.  If we only increase the modulus by one, the load
     * always hovers around 80%, so we have to do a split for every
     * insert.  I.e. the malloc burden os O(n) for the lifetime of the
     * program.  BUT if we double the modulus, the length of time until
     * the next split also doubles, making the probablity of a split
     * halve, and sigma(2**-n)=1, so the malloc burden becomes O(1) for
     * the lifetime of the program.
     */
    assert(is_power_of_two(hash_modulus));
    new_hash_modulus = hash_modulus * 2;
    new_hash_mask = new_hash_modulus - 1;
    new_hash_table =
        explain_malloc_or_die(new_hash_modulus * sizeof(string_ty *));

    /*
     * now redistribute the list elements
     */
    for (idx = 0; idx < hash_modulus; ++idx)
    {
        string_ty       *p;

        new_hash_table[idx] = 0;
        new_hash_table[idx + hash_modulus] = 0;
        p = hash_table[idx];
        while (p)
        {
            string_ty       *p2;
            str_hash_ty     new_idx;

            p2 = p;
            p = p->str_next;

            assert((p2->str_hash & hash_mask) == idx);
            new_idx = p2->str_hash & new_hash_mask;
            p2->str_next = new_hash_table[new_idx];
            new_hash_table[new_idx] = p2;
        }
    }
    free(hash_table);
    hash_table = new_hash_table;
    hash_modulus = new_hash_modulus;
    hash_mask = new_hash_mask;
}


/*
 * NAME
 *      str_from_c - make string from C string
 *
 * SYNOPSIS
 *      string_ty *str_from_c(char*);
 *
 * DESCRIPTION
 *      The str_from_c function is used to make a string from a null terminated
 *      C string.
 *
 * RETURNS
 *      string_ty* - a pointer to a string in dynamic memory.  Use str_free when
 *      finished with.
 *
 * CAVEAT
 *      The contents of the structure pointed to MUST NOT be altered.
 */

string_ty *
str_from_c(const char *s)
{
    return str_n_from_c(s, strlen(s));
}


/*
 * NAME
 *      str_n_from_c - make string
 *
 * SYNOPSIS
 *      string_ty *str_n_from_c(char *s, size_t n);
 *
 * DESCRIPTION
 *      The str_n_from_c function is used to make a string from an array of
 *      characters.  No null terminator is assumed.
 *
 * RETURNS
 *      string_ty* - a pointer to a string in dynamic memory.  Use str_free when
 *      finished with.
 *
 * CAVEAT
 *      The contents of the structure pointed to MUST NOT be altered.
 */

string_ty *
str_n_from_c(const char *s, size_t length)
{
    str_hash_ty     hash;
    str_hash_ty     idx;
    string_ty       *p;

    hash = hash_generate(s, length);
    if (!hash_table)
        str_initialize();
    idx = hash & hash_mask;
    for (p = hash_table[idx]; p; p = p->str_next)
    {
        if
        (
            p->str_hash == hash
        &&
            p->str_length == length
        &&
            !memcmp(p->str_text, s, length)
        )
        {
            p->str_references++;
            return p;
        }
    }

    p = (string_ty *)explain_malloc_or_die(sizeof(string_ty) + length);
    p->str_hash = hash;
    p->str_length = length;
    p->str_references = 1;
    p->str_next = hash_table[idx];
    hash_table[idx] = p;
    memcpy(p->str_text, s, length);
    p->str_text[length] = 0;

    hash_load++;
    if (hash_load * 10 > hash_modulus * 8)
        split();
    return p;
}


/*
 * NAME
 *      str_copy - make a copy of a string
 *
 * SYNOPSIS
 *      string_ty *str_copy(string_ty *s);
 *
 * DESCRIPTION
 *      The str_copy function is used to make a copy of a string.
 *
 * RETURNS
 *      string_ty* - a pointer to a string in dynamic memory.  Use str_free when
 *      finished with.
 *
 * CAVEAT
 *      The contents of the structure pointed to MUST NOT be altered.
 */

string_ty *
str_copy(string_ty *s)
{
    s->str_references++;
    return s;
}


/*
 * NAME
 *      str_free - release a string
 *
 * SYNOPSIS
 *      void str_free(string_ty *s);
 *
 * DESCRIPTION
 *      The str_free function is used to indicate that a string hash been
 *      finished with.
 *
 * RETURNS
 *      void
 *
 * CAVEAT
 *      This is the only way to release strings DO NOT use the free function.
 */

void
str_free(string_ty *s)
{
    str_hash_ty     idx;
    string_ty       **spp;

    if (!s)
        return;
    if (s->str_references > 1)
    {
        s->str_references--;
        return;
    }

    /*
     * find the hash bucket it was in,
     * and remove it
     */
    idx = s->str_hash & hash_mask;
    for (spp = &hash_table[idx]; *spp; spp = &(*spp)->str_next)
    {
        if (*spp == s)
        {
            *spp = s->str_next;
            free(s);
            --hash_load;
            return;
        }
    }

    /*
     * should never reach here!
     */
    explain_output_error_and_die("attempted to free non-existent string (bug)");
}


/*
 * NAME
 *      str_catenate - join two strings
 *
 * SYNOPSIS
 *      string_ty *str_catenate(string_ty *, string_ty *);
 *
 * DESCRIPTION
 *      The str_catenate function is used to concatenate two strings to form a
 *      new string.
 *
 * RETURNS
 *      string_ty* - a pointer to a string in dynamic memory.  Use str_free when
 *      finished with.
 *
 * CAVEAT
 *      The contents of the structure pointed to MUST NOT be altered.
 */

string_ty *
str_catenate(string_ty *s1, string_ty *s2)
{
    static char     *tmp;
    static size_t   tmplen;
    string_ty       *s;
    size_t          length;

    length = s1->str_length + s2->str_length;
    if (tmplen < length)
    {
        tmplen = length;
        tmp = explain_realloc_or_die(tmp, tmplen);
    }
    memcpy(tmp, s1->str_text, s1->str_length);
    memcpy(tmp + s1->str_length, s2->str_text, s2->str_length);
    s = str_n_from_c(tmp, length);
    return s;
}


/*
 * NAME
 *      str_cat_three - join three strings
 *
 * SYNOPSIS
 *      string_ty *str_cat_three(string_ty *, string_ty *, string_ty *);
 *
 * DESCRIPTION
 *      The str_cat_three function is used to concatenate three strings to form
 *      a new string.
 *
 * RETURNS
 *      string_ty* - a pointer to a string in dynamic memory.  Use str_free when
 *      finished with.
 *
 * CAVEAT
 *      The contents of the structure pointed to MUST NOT be altered.
 */

string_ty *
str_cat_three(string_ty *s1, string_ty *s2, string_ty *s3)
{
    static char     *tmp;
    static size_t   tmplen;
    string_ty       *s;
    size_t          length;

    length = s1->str_length + s2->str_length + s3->str_length;
    if (tmplen < length)
    {
        tmplen = length;
        tmp = explain_realloc_or_die(tmp, tmplen);
    }
    memcpy(tmp, s1->str_text, s1->str_length);
    memcpy(tmp + s1->str_length, s2->str_text, s2->str_length);
    memcpy(tmp + s1->str_length + s2->str_length, s3->str_text, s3->str_length);
    s = str_n_from_c(tmp, length);
    return s;
}


/*
 * NAME
 *      str_equal - test equality of strings
 *
 * SYNOPSIS
 *      int str_equal(string_ty *, string_ty *);
 *
 * DESCRIPTION
 *      The str_equal function is used to test if two strings are equal.
 *
 * RETURNS
 *      int; zero if the strings are not equal, nonzero if the strings are
 *      equal.
 *
 * CAVEAT
 *      This function is implemented as a macro in strings.h
 */

#ifndef str_equal

int
str_equal(string_ty *s1, string_ty *s2)
{
    return (s1 == s2);
}

#endif


/*
 * NAME
 *      str_upcase - upcase a string
 *
 * SYNOPSIS
 *      string_ty *str_upcase(string_ty *);
 *
 * DESCRIPTION
 *      The str_upcase function is used to form a string which is an upper case
 *      form of the supplied string.
 *
 * RETURNS
 *      string_ty* - a pointer to a string in dynamic memory.  Use str_free when
 *      finished with.
 *
 * CAVEAT
 *      The contents of the structure pointed to MUST NOT be altered.
 */

string_ty *
str_upcase(string_ty *s)
{
    static char     *tmp;
    static size_t   tmplen;
    string_ty       *retval;
    char            *cp1;
    char            *cp2;

    if (tmplen < s->str_length)
    {
        tmplen = s->str_length;
        tmp = explain_realloc_or_die(tmp, tmplen);
    }
    for (cp1 = s->str_text, cp2 = tmp; *cp1; ++cp1, ++cp2)
    {
        int             c;

        c = *cp1;
        if (c >= 'a' && c <= 'z')
            c += 'A' - 'a';
        *cp2 = c;
    }
    retval = str_n_from_c(tmp, s->str_length);
    return retval;
}


/*
 * NAME
 *      str_downcase - lowercase string
 *
 * SYNOPSIS
 *      string_ty *str_downcase(string_ty *);
 *
 * DESCRIPTION
 *      The str_downcase function is used to form a string which is a lowercase
 *      form of the supplied string.
 *
 * RETURNS
 *      string_ty* - a pointer to a string in dynamic memory.  Use str_free when
 *      finished with.
 *
 * CAVEAT
 *      The contents of the structure pointed to MUST NOT be altered.
 */

string_ty *
str_downcase(string_ty *s)
{
    static char     *tmp;
    static size_t   tmplen;
    string_ty       *retval;
    char            *cp1;
    char            *cp2;

    if (tmplen < s->str_length)
    {
        tmplen = s->str_length;
        tmp = explain_realloc_or_die(tmp, tmplen);
    }
    for (cp1 = s->str_text, cp2 = tmp; *cp1; ++cp1, ++cp2)
    {
        int             c;

        c = *cp1;
        if (c >= 'A' && c <= 'Z')
            c += 'a' - 'A';
        *cp2 = c;
    }
    retval = str_n_from_c(tmp, s->str_length);
    return retval;
}


/*
 * NAME
 *      str_bool - get boolean value
 *
 * SYNOPSIS
 *      int str_bool(string_ty *s);
 *
 * DESCRIPTION
 *      The str_bool function is used to determine the boolean value of the
 *      given string.  A "false" result is if the string is empty or
 *      0 or blank, and "true" otherwise.
 *
 * RETURNS
 *      int: zero to indicate a "false" result, nonzero to indicate a "true"
 *      result.
 */

int
str_bool(string_ty *s)
{
    char            *cp;

    cp = s->str_text;
    while (*cp)
    {
        if (!isspace((unsigned char)*cp) && *cp != '0')
            return 1;
        ++cp;
    }
    return 0;
}


/*
 * NAME
 *      str_field - extract a field from a string
 *
 * SYNOPSIS
 *      string_ty *str_field(string_ty *, char separator, int field_number);
 *
 * DESCRIPTION
 *      The str_field functipon is used to erxtract a field from a string.
 *      Fields of the string are separated by ``separator'' characters.
 *      Fields are numbered from 0.
 *
 * RETURNS
 *      Asking for a field off the end of the string will result in a null
 *      pointer return.  The null string is considered to have one empty field.
 */

string_ty *
str_field(string_ty *s, int sep, int fldnum)
{
    char            *cp;
    char            *ep;

    cp = s->str_text;
    while (fldnum > 0)
    {
        ep = strchr(cp, sep);
        if (!ep)
            return 0;
        cp = ep + 1;
        --fldnum;
    }
    ep = strchr(cp, sep);
    if (ep)
        return str_n_from_c(cp, ep - cp);
    return str_from_c(cp);
}


void
slow_to_fast(char **in, string_ty **out, size_t length)
{
    size_t          j;

    if (out[0])
        return;
    for (j = 0; j < length; ++j)
        out[j] = str_from_c(in[j]);
}


string_ty *
str_format(const char *fmt, ...)
{
    va_list         ap;
    string_ty       *result;

    va_start(ap, fmt);
    result = vmprintf_str(fmt, ap);
    va_end(ap);
    return result;
}


string_ty *
str_vformat(const char *fmt, va_list ap)
{
    return vmprintf_str(fmt, ap);
}
