#! /usr/bin/env perl
#
# Static Hashtable Generator
#
# (c) 2000-2002 by Harri Porten <porten@kde.org> and
#                  David Faure <faure@kde.org>
# Modified (c) 2004 by Nikolas Zimmermann <wildfox@kde.org>
# Copyright (C) 2007-2023 Apple Inc. All rights reserved.
#
# This library is free software; you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public
# License as published by the Free Software Foundation; either
# version 2 of the License, or (at your option) any later version.
#
# This library is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
# Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public
# License along with this library; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
#
 
use strict;
use warnings;
use bigint;
use Getopt::Long qw(:config pass_through);

my $file = shift @ARGV or die("Must provide source file as final argument.");

open(IN, $file) or die "No such file $file";

my @keys = ();
my @attrs = ();
my @values = ();
my @table = ();
my @links = ();

my $hasSetter = "false";

my $includeBuiltin = 0;
my $inside = 0;
my $name;
my $perfectHashSize;
my $compactSize;
my $compactHashSizeMask;
my $banner = 0;
my $mask64 = 2**64 - 1;
my $mask32 = 2**32 - 1;
sub calcPerfectHashSize($);
sub calcCompactHashSize($);
sub output();
sub jsc_ucfirst($);
sub hashValue($$);

while (<IN>) {
    chomp;
    s/^\s+//;
    next if /^\#|^$/; # Comment or blank line. Do nothing.
    if (/^\@begin/ && !$inside) {
        if (/^\@begin\s*([:_\w]+)\s*\d*\s*$/) {
            $inside = 1;
            $name = $1;
        } else {
            print STDERR "WARNING: \@begin without table name, skipping $_\n";
        }
    } elsif (/^\@end\s*$/ && $inside) {
        output();

        @keys = ();
        @attrs = ();
        @values = ();
        $includeBuiltin = 0;

        $inside = 0;
    } elsif (/^(\S+)\s*(\S+)\s*([\w\|]*)\s*(\w*)\s*(\w*)\s*$/ && $inside) {
        my $key = $1;
        my $val = $2;
        my $att = $3;
        my $param = $4;
        my $intrinsic = $5;

        push(@keys, $key);
        push(@attrs, length($att) > 0 ? $att : "None");

        if ($val eq "JSBuiltin") {
            $includeBuiltin = 1;
        }

        if ($att =~ m/Function/) {
            push(@values, { "type" => "PropertyAttribute::Function", "function" => $val, "params" => (length($param) ? $param : ""), "intrinsic" => (length($intrinsic) ? $intrinsic : "NoIntrinsic") });
            #printf STDERR "WARNING: Number of arguments missing for $key/$val\n" if (length($param) == 0);
        } elsif ($att =~ m/CellProperty/) {
            my $property = $val;
            push(@values, { "type" => "PropertyAttribute::CellProperty", "property" => $property });
        } elsif ($att =~ m/ClassStructure/) {
            my $property = $val;
            push(@values, { "type" => "PropertyAttribute::ClassStructure", "property" => $property });
        } elsif ($att =~ m/PropertyCallback/) {
            my $cback = $val;
            push(@values, { "type" => "PropertyAttribute::PropertyCallback", "cback" => $cback });
        } elsif (length($att)) {
            my $get = $val;
            my $put = "0";
            if (!($att =~ m/ReadOnly/)) {
                $put = "set" . jsc_ucfirst($val);
            }
            $hasSetter = "true";
            push(@values, { "type" => "PropertyAttribute::Property", "get" => $get, "put" => $put });
        } else {
            push(@values, { "type" => "Lexer", "value" => $val });
        }
    } elsif ($inside) {
        die "invalid data {" . $_ . "}";
    }
}

die "missing closing \@end" if ($inside);

sub jsc_ucfirst($)
{
    my ($value) = @_;

    if ($value =~ /js/) {
        $value =~ s/js/JS/;
        return $value;
    }

    return ucfirst($value);
}


sub ceilingToPowerOf2
{
    my ($perfectHashSize) = @_;

    my $powerOf2 = 1;
    while ($perfectHashSize > $powerOf2) {
        $powerOf2 <<= 1;
    }

    return $powerOf2;
}

sub calcPerfectHashSize($)
{
    my ($useWYHash) = @_;
tableSizeLoop:
    for ($perfectHashSize = ceilingToPowerOf2(scalar @keys); ; $perfectHashSize += $perfectHashSize) {
        if ($perfectHashSize > 2**15) {
            die "The hash size is far too big. This should not be reached.";
        }
        my @table = ();
        foreach my $key (@keys) {
            my $h = hashValue($key, $useWYHash) % $perfectHashSize;
            next tableSizeLoop if $table[$h];
            $table[$h] = 1;
        }
        last;
    }
}

sub leftShift($$) {
    my ($value, $distance) = @_;
    return (($value << $distance) & 0xFFFFFFFF);
}

sub calcCompactHashSize($)
{
    my ($useWYHash) = @_;
    my $compactHashSize = ceilingToPowerOf2(2 * @keys);
    $compactHashSizeMask = $compactHashSize - 1;
    $compactSize = $compactHashSize;
    my $collisions = 0;
    my $maxdepth = 0;
    my $i = 0;
    foreach my $key (@keys) {
        my $depth = 0;
        my $h = hashValue($key, $useWYHash) % $compactHashSize;
        while (defined($table[$h])) {
            if ($compactSize > 1000) {
                die "The hash size is far too big. This should not be reached.";
            }
            if ($depth > 100) {
                die "The depth is far too big. This should not be reached.";
            }
            if (defined($links[$h])) {
                $h = $links[$h];
                $depth++;
            } else {
                $collisions++;
                $links[$h] = $compactSize;
                $h = $compactSize;
                $compactSize++;
            }
        }
        $table[$h] = $i;
        $i++;
        $maxdepth = $depth if ($depth > $maxdepth);
    }
}

sub avalancheBits($) {
    my ($value) = @_;

    $value &= $mask32;

    # Force "avalanching" of lower 32 bits
    $value ^= leftShift($value, 3);
    $value += ($value >> 5);
    $value = ($value & $mask32);
    $value ^= (leftShift($value, 2) & $mask32);
    $value += ($value >> 15);
    $value = $value & $mask32;
    $value ^= (leftShift($value, 10) & $mask32);

    return $value;
}

sub maskTop8BitsAndAvoidZero($) {
    my ($value) = @_;

    $value &= $mask32;

    # Save 8 bits for StringImpl to use as flags.
    $value &= 0xffffff;

    # This avoids ever returning a hash code of 0, since that is used to
    # signal "hash not computed yet". Setting the high bit maintains
    # reasonable fidelity to a hash code of 0 because it is likely to yield
    # exactly 0 when hash lookup masks out the high bits.
    $value = (0x80000000 >> 8) if ($value == 0);

    return $value;
}

# Paul Hsieh's SuperFastHash
# http://www.azillionmonkeys.com/qed/hash.html
sub superFastHash {
    my @chars = @_;

    # This hash is designed to work on 16-bit chunks at a time. But since the normal case
    # (above) is to hash UTF-16 characters, we just treat the 8-bit chars as if they
    # were 16-bit chunks, which should give matching results

    my $hash = 0x9e3779b9;
    my $l    = scalar @chars; #I wish this was in Ruby --- Maks
    my $rem  = $l & 1;
    $l = $l >> 1;

    my $s = 0;

    # Main loop
    for (; $l > 0; $l--) {
        $hash   += ord($chars[$s]);
        my $tmp = leftShift(ord($chars[$s+1]), 11) ^ $hash;
        $hash   = (leftShift($hash, 16) & $mask32) ^ $tmp;
        $s += 2;
        $hash += $hash >> 11;
        $hash &= $mask32;
    }

    # Handle end case
    if ($rem != 0) {
        $hash += ord($chars[$s]);
        $hash ^= (leftShift($hash, 11) & $mask32);
        $hash += $hash >> 17;
    }

    $hash = avalancheBits($hash);
    return maskTop8BitsAndAvoidZero($hash);
}

sub uint64_add($$) {
    my ($a, $b) = @_;
    my $sum = $a + $b;
    return $sum & $mask64;
}

sub uint64_multi($$) {
    my ($a, $b) = @_;
    my $product = $a * $b;
    return $product & $mask64;
}

sub wymum($$) {
    my ($A, $B) = @_;

    my $ha = $A >> 32;
    my $hb = $B >> 32;
    my $la = $A & $mask32;
    my $lb = $B & $mask32;
    my $hi;
    my $lo;
    my $rh = uint64_multi($ha, $hb);
    my $rm0 = uint64_multi($ha, $lb);
    my $rm1 = uint64_multi($hb, $la);
    my $rl = uint64_multi($la, $lb);
    my $t = uint64_add($rl, ($rm0 << 32));
    my $c = int($t < $rl);

    $lo = uint64_add($t, ($rm1 << 32));
    $c += int($lo < $t);
    $hi = uint64_add($rh, uint64_add(($rm0 >> 32), uint64_add(($rm1 >> 32), $c)));

    return ($lo, $hi);
};

sub wymix($$) {
    my ($A, $B) = @_;
    ($A, $B) = wymum($A, $B);
    return $A ^ $B;
}

sub convert32BitTo64Bit($) {
    my ($v) = @_;
    my ($mask1) = 281470681808895;   # 0x0000_ffff_0000_ffff
    $v = ($v | ($v << 16)) & $mask1;
    my ($mask2) = 71777214294589695; # 0x00ff_00ff_00ff_00ff
    return ($v | ($v << 8)) & $mask2;
}

sub convert16BitTo32Bit($) {
    my ($v) = @_;
    return ($v | ($v << 8)) & 0x00ff_00ff;
}

sub wyhash {
    # https://github.com/wangyi-fudan/wyhash
    my @chars = @_;
    my $charCount = scalar @chars;
    my $byteCount = $charCount << 1;
    my $charIndex = 0;
    my $seed = 0;
    my @secret = ( 11562461410679940143, 16646288086500911323, 10285213230658275043, 6384245875588680899 );
    my $move1 = (($byteCount >> 3) << 2) >> 1;

    $seed ^= wymix($seed ^ $secret[0], $secret[1]);
    my $a = 0;
    my $b = 0;

    local *c2i = sub {
        my ($i) = @_;
        return ord($chars[$i]);
    };

    local *wyr8 = sub {
        my ($i) = @_;
        my $v = c2i($i) | (c2i($i + 1) << 8) | (c2i($i + 2) << 16) | (c2i($i + 3) << 24);
        return convert32BitTo64Bit($v);
    };

    local *wyr4 = sub {
        my ($i) = @_;
        my $v = c2i($i) | (c2i($i + 1) << 8);
        return convert16BitTo32Bit($v);
    };

    local *wyr2 = sub {
        my ($i) = @_;
        return c2i($i) << 16;
    };

    if ($byteCount <= 16) {
        if ($byteCount >= 4) {
            $a = (wyr4($charIndex) << 32) | wyr4($charIndex + $move1);
            $charIndex = $charIndex + $charCount - 2;
            $b = (wyr4($charIndex) << 32) | wyr4($charIndex - $move1);
        } elsif ($byteCount > 0) {
            $a = wyr2($charIndex);
            $b = 0;
        } else {
            $a = $b = 0;
        }
    } else {
        my $i = $byteCount;
        if ($i > 48) {
            my $see1 = $seed;
            my $see2 = $seed;
            do {
                $seed = wymix(wyr8($charIndex) ^ $secret[1], wyr8($charIndex + 4) ^ $seed);
                $see1 = wymix(wyr8($charIndex + 8) ^ $secret[2], wyr8($charIndex + 12) ^ $see1);
                $see2 = wymix(wyr8($charIndex + 16) ^ $secret[3], wyr8($charIndex + 20) ^ $see2);
                $charIndex += 24;
                $i -= 48;
            } while ($i > 48);
            $seed ^= $see1 ^ $see2;
        }
        while ($i > 16) {
            $seed = wymix(wyr8($charIndex) ^ $secret[1], wyr8($charIndex + 4) ^ $seed);
            $i -= 16;
            $charIndex += 8;
        }
        my $move2 = $i >> 1;
        $a = wyr8($charIndex + $move2 - 8);
        $b = wyr8($charIndex + $move2 - 4);
    }
    $a ^= $secret[1];
    $b ^= $seed;

    ($a, $b) = wymum($a, $b);
    my $hash = wymix($a ^ $secret[0] ^ $byteCount, $b ^ $secret[1]) & $mask32;

    return maskTop8BitsAndAvoidZero($hash);
}

sub hashValue($$) {
    my ($string, $useWYHash) = @_;
    my @chars = split(/ */, $string);
    my $charCount = scalar @chars;
    if ($useWYHash) {
        if ($charCount <= 48) {
            return superFastHash(@chars);
        }
        return wyhash(@chars);
    } else {
        return superFastHash(@chars);
    }
}

sub output() {
    if (!$banner) {
        $banner = 1;
        print "// Automatically generated from $file using $0. DO NOT EDIT!\n";
    }

    my $nameEntries = "${name}Values";
    $nameEntries =~ s/:/_/g;
    my $nameIndex = "${name}Index";
    $nameIndex =~ s/:/_/g;

    print "\n";
    print "#include \"JSCBuiltins.h\"\n" if ($includeBuiltin);
    print "#include \"Lookup.h\"\n";
    print "\n";
    print "namespace JSC {\n";
    print "\n";

    local *generateHashTableHelper = sub {
        my ($useWYHash, $setToOldValues) = @_;
        my $oldCompactSize = $compactSize;
        my $oldCompactHashSizeMask = $compactHashSizeMask;
        calcPerfectHashSize($useWYHash);
        calcCompactHashSize($useWYHash);

        my $hashTableString = "";

        $hashTableString .= "static constinit const struct CompactHashIndex ${nameIndex}\[$compactSize\] = {\n";
        for (my $i = 0; $i < $compactSize; $i++) {
            my $T = -1;
            if (defined($table[$i])) { $T = $table[$i]; }
            my $L = -1;
            if (defined($links[$i])) { $L = $links[$i]; }
            $hashTableString .= "    { $T, $L },\n";
        }

        $hashTableString .= "};\n";
        $hashTableString .= "\n";

        my $packedSize = scalar @keys;
	$hashTableString .= "static constinit const struct HashTableValue ${nameEntries}\[$packedSize\] = {\n";

        my $i = 0;
        foreach my $key (@keys) {
            my $typeTag = "";
            my $firstValue = "";
            my $secondValue = "";
            my $hasSecondValue = 1;
            my $intrinsic = "NoIntrinsic";

            if ($values[$i]{"type"} eq "PropertyAttribute::Function") {
                $typeTag = "NativeFunction";
                $firstValue = $values[$i]{"function"};
                $secondValue = $values[$i]{"params"};
                $intrinsic = $values[$i]{"intrinsic"};
            } elsif ($values[$i]{"type"} eq "PropertyAttribute::Property") {
                $typeTag = "GetterSetter";
                $firstValue = $values[$i]{"get"};
                $secondValue = $values[$i]{"put"};
            } elsif ($values[$i]{"type"} eq "Lexer") {
                $typeTag = "Lexer";
                $firstValue = $values[$i]{"value"};
                $hasSecondValue = 0;
            } elsif ($values[$i]{"type"} eq "PropertyAttribute::CellProperty" || $values[$i]{"type"} eq "PropertyAttribute::ClassStructure") {
                $typeTag = ($values[$i]{"type"} eq "PropertyAttribute::CellProperty") ? "LazyCellProperty" : "LazyClassStructure";
                $values[$i]{"property"} =~ /\A([a-zA-Z0-9_]+)::(.*)\Z/ or die;
                $firstValue = "OBJECT_OFFSETOF($1, $2)";
                $hasSecondValue = 0;
            } elsif ($values[$i]{"type"} eq "PropertyAttribute::PropertyCallback") {
                $typeTag = "LazyProperty";
                $firstValue = $values[$i]{"cback"};
                $hasSecondValue = 0;
            }

            my $attributes = "PropertyAttribute::" . $attrs[$i];
            $attributes =~ s/\|/\|PropertyAttribute::/g;
            $attributes = "static_cast<unsigned>(" . $attributes . ")";
            if ($values[$i]{"type"} eq "PropertyAttribute::Function" && $firstValue eq "JSBuiltin")  {
                $typeTag = "BuiltinGenerator";
                my $tableHead = $name;
                $tableHead =~ s/Table$//;
                $hashTableString .= "   { \"$key\"_s, (($attributes) & ~PropertyAttribute::Function) | PropertyAttribute::Builtin, $intrinsic, { HashTableValue::" . $typeTag . "Type, " . $tableHead . ucfirst($key) . "CodeGenerator, $secondValue } },\n";
            }
            else {
                $hashTableString .= "   { \"$key\"_s, $attributes, $intrinsic, { HashTableValue::" . $typeTag . "Type, $firstValue" . ($hasSecondValue ? ", " . $secondValue : "") . " } },\n";
            }
            $i++;
        }
        $hashTableString .= "};\n";
        $hashTableString .= "\n";
        $hashTableString .= "static constinit const struct HashTable $name =\n";
        $hashTableString .= "    \{ $packedSize, $compactHashSizeMask, $hasSetter, nullptr, $nameEntries, $nameIndex \};\n";
        $hashTableString .= "\n";

        @table = ();
        @links = ();
        if ($setToOldValues) {
            $compactSize = $oldCompactSize;
            $compactHashSizeMask = $oldCompactHashSizeMask;
        }
        return $hashTableString;
    };

    my $hashTableForWYHash = generateHashTableHelper(1, 1);
    my $hashTableForSFHash = generateHashTableHelper(0, 0);
    my $hashTableToWrite = $hashTableForWYHash;
    if ($hashTableForWYHash ne $hashTableForSFHash) {
        $hashTableToWrite = "#if ENABLE(WYHASH_STRING_HASHER)\n" . $hashTableForWYHash . "#else\n" . $hashTableForSFHash . "#endif\n";
    }
    print $hashTableToWrite;

    print "} // namespace JSC\n";
}
