#!/bin/bash
#
# A script to create an apt sources.list file automatically by downloading
# the list of Debian mirrors and choosing the fastest using netselect.
#
# Author: Avery Pennarun <apenwarr@gmail.com>
# Enhancements:
#    - Filippo Giunchedi <filippo@esaurito.net>
#    - Javier Fernandez-Sanguino <jfs@debian.org>
#
# License: public domain.  Please feel free to improve this script.  It
# doesn't really belong in the netselect package, particularly since the
# netselect package doesn't depend on wget and this script does.  If you
# feel like merging this into another package, or creating a new package,
# please do.  Then tell me, so I can remove it from the netselect package.
#
# TO DO:
#   - test the generated files automatically.  Some mirrors in the list
#       are broken.  We should at least verify that the Packages and Sources
#       files exist and aren't ancient.
#   - maybe generate redundant entries, in case one server is missing files?

# our defaults
# RATIONALE for WANT_SOURCES and WANT_NONFREE environmental variables:
# both variables can be system wide and can be useful to not always specify
# them on the commandline

want_sources=${WANT_SOURCES:-0}
want_nonfree=${WANT_NONFREE:-0}
want_security=1
infile=""
tmpinfile="1"
# File to generate
outfile="sources.list"
# Default distribution
distro="stable"
# Default protocol
protocol="http"
# Number of fastest hosts that will be validated
test_hosts="10"
# URL where the mirror list is retrieved from 
url="http://www.debian.org/mirror/mirrors_full"
# Test file retrieve from sites to valide
test_file="README"     
# Country used to filter the list
country=""
# Debug mode?
DEBUG=""
if [ -x /usr/bin/dpkg-architecture ] ; then
    arch=$(/usr/bin/dpkg-architecture -qDEB_HOST_ARCH)
else
    arch=$(dpkg --print-architecture)
fi

options="-o a:dsi:o:O:t:c:nfhu -l arch:,debug,sources,infile:,outfile:,tests:,country:,nonfree,ftp,help"

trap '[ "$tmpinfile" = "1" ] && rm -f "$infile"' TERM INT EXIT

# misc functions
log()
{
	echo "$@" >&2
}

# tests a list of hosts and returns a (filtered) list of those
# that you can connect to using a defined protocol
tests_hosts()
{
    local host_list="$*"
    local host_filtered=""
    [ -n "$DEBUG" ] && log "DEBUG: Filtering host list is $host_list"
    # Do not continue if curl is not installed 
    [ ! -x /usr/bin/curl  ] && echo $host_list 
    host_filtered=""
    for host in $host_list; do
# Test using curl
        [ -n "$DEBUG" ] && log "DEBUG: Testing host $host with curl"
        test_host $host
        if [ $? -eq 0 ]; then
            [ -n "$DEBUG" ] && log "DEBUG: Host $hot is ALIVE"
            if [ -z "$host_filtered" ] ; then
                host_filtered="$host"
            else
                host_filtered="$host_filtered $host"
            fi
        fi
    done
    [ -n "$DEBUG" ] && log "DEBUG: Filtered host list is $host_filtered"
    echo $host_filtered
}

validate_hosts()
{
    for host in $*; do
        [ -n "$DEBUG" ] && log "DEBUG: Testing host $host with curl"
        test_host $host
        if [ $? -eq 0 ]; then
            [ -n "$DEBUG" ] && log "DEBUG: Host $host is valid"
            echo $host
            return 0
            # We return as soon as we have a valid host since netselect
            # returns them in order (fastest to slowest)
        fi
        [ -n "$DEBUG" ] && log "DEBUG: Host $host is INVALID"
    done
    return -1
}

# Test a single host using Curl, if available
test_host()
{
    local host="$1"
    local rv=0
    [ ! -x /usr/bin/curl  ] && return 255
    [ -z "${host}" ]        && return 255

    # First test: does it actually serve anything?
    curl -m 2 -q -s "$host"  >/dev/null 2>&1
    [ $? -ne 0 ] && return $?

    # Second test: do we have the test file we are looking for?
    [ -n "$DEBUG" ] && log "DEBUG: Checking if the file '$test_file' is available in site"
    temp=`mktemp`
    [ ! -e "$temp" ] && return 0 # Return without error if we cannot create
                                 # a temporary file
    curl -m 2 -q -s "$host/$test_file"  >$temp 2>&1
    rv=$?
    if [ $rv -ne 0 ] ; then
        rm -f $temp
        return $rv
    fi

    # Third test: does the file have any reference to 'Debian'
    [ -n "$DEBUG" ] && log "DEBUG: Checking the contents of the '$test_file' file (downloaded to '$temp')"
    grep -qi debian $temp 
    rv=$?
    if [ $rv -ne 0 ] ; then
        rm -f $temp
        return $rv
    fi
    # Fourth test: does it have the content we expected
    head -1 $temp | grep -qi "See http://www.debian.org/"
    rv=$?
    if [ $rv -ne 0 ] ; then
       rm -f $temp
        return $rv
    fi

    # All tests completed succesfully, remove temporary file and exit
    [ -e "$temp" ] && rm -f $temp
    return 0
}

run_netselect()
{
	SEARCH="$1"
	PROTO="$2"
        hosts=$(cat "$infile" \
               | perl -n -e '
                        use warnings;
                        use strict;
			$/="<br><br>";
                        my $country_name  = ".*";
                        my $country_iso  = "..";
                        my $country = "'"$country"'";
                        my $my_country = 1;
                        if ( $country ne "" ) {
                          $my_country = 0;
                          if ( $country =~ /^\w{2}$/ ) {
                            $country_iso  = uc($country);
                            $country_name = "";
                          }  else {
                            $country_iso  = "";
                            $country_name = ucfirst(lc($country));
                          } ;
                        }
			while(<>){
                                if ( /<h3><a name="$country_iso">/ ||
                                     /<h3><a name=".*?">$country_name<\/a>/ ) {
                                    $my_country = 1 ;
                                }
                                if ( $_ =~ /<h3>/ && $_ !~ /<h3><a name="$country_iso">/ &&
                                     $_ !~ /<h3><a name=".*?">$country_name<\/a>/ ) {
                                    $my_country = 0 ;
                                }
				next if $_ !~ /Site:/;
				if( ( /Includes architectures:.+'"$arch"'.+/i ||
						$_ !~ /Includes architectures:/ ) &&
                                               m@'"$SEARCH"':.*<a.*?href="('"$PROTO"'://.*?)">@i && $my_country == 1
					){
					print("$1\n");
				}}')
        [ -z "$hosts" ] && return 200
        [ -n "$DEBUG" ] && log "DEBUG: Running netselect: 'netselect $netselect_options $hosts'"
	out=`netselect $netselect_options  $hosts`
	rv=$?
	# Throw away hosts with negative scores to workaround a netselect bug.
	# (netselect 0.3.ds1-25 seems to have problems pinging round-robin DNS)
	corruptedhosts=$(echo "$out" | egrep '^ *-')
	if [ $? -eq 0 ]; then
		log "Detected corrupt scores from bug in netselect: $corruptedhosts"
		out=$(echo "$out" | sed -e '/^ *-/d')
	fi


	# Remove the score number leaving just the host.
        echo $out | sed -e 's/[0-9]\+\s\+/ /g'
	return $rv
}

netselect_generic_error()
{
	log "netselect was unable to find a mirror, this probably means that"
	log "you are behind a firewall and it is blocking ICMP and/or"
        log "UDP traceroute. Or the servers test are actively blocking"
        log "ICMP and/or UDP traceroute probes."
}

netselect_permission_error()
{
	log "netselect was unable to operate successfully, please check the errors,"
	log "most likely you don't have enough permission."
}

netselect_parse_error ()
{
    
	log "netselect-apt was unable to obtain a list of valid hosts from"
        if [ "$tmpinfile" = "0" ] ;then
            log "the file provided ($infile)."
        else
            log "the file downloaded from the url '$url'."
        fi
        if [ -n "$country" ] ; then
            log "You told the program to only look for hosts in country '$country' "
            log "and there might no mirrors to chose from that country. Remove the "
            log "option -c and try again."
        else
            log "This might happen because of any of the following reasons: "
            log "   - there was an error in the file "
            log "   - the file is not in the format netselect-apt expected "
            log "   - there is a bug in netselect-apt "
            log "Please manually check the file. If you believe its contents are correct, file "
            log "a bug (hint: use 'reportbug') against netselect-apt and provide the file as "
            log "well as the output generated by the program (hint: use 'script')."
        fi
}

netselect_aborted_error ()
{
	log "netselect aborted before it was able to find a mirror."
}



usage()
{
	log "Usage: netselect-apt [OPTIONS] [ debian_release ]"
	log "       debian_release is one of stable, testing, unstable, experimental"
	log "       or a codename etch, lenny, squeeze, wheezy, jessie, stretch, sid"
	log "Options:"
	log "   -a, --arch ARCH        Use mirrors containing arch ($arch)"
	log "   -s, --sources          Include deb-src lines in generated file (no)"
	log "   -i, --infile INFILE    Use INFILE as the input file (temp file)"
	log "   -o, --outfile OUTFILE  Use OUTFILE as the output file (sources.list)"
	log "   -n, --nonfree          Use also non-free packages in OUTFILE (no)"
	log "   -f, --ftp              Use FTP as the protocol for OUTFILE (http)"
	log "   -t, --tests #          Number of hosts to test ($test_hosts)"
	log "   -c, --country COUNTRY  Restrict search to servers in that country"
	log "   -d, --debug            Enable debugging"
}

# Sanity tests
NETSELECT=`which netselect`
# Does netselect exist?
if [ -z "$NETSELECT" ] ; then
    log "Sorry, I cannot find the 'netselect' binary in my PATH"
    exit 1
fi
# Are we root?
if [ "`id -u`" -gt 0 ] ; then
# If we are not, is netselect setuid?
    if [ ! -u "$NETSELECT" ] ; then
        log "Sorry, you need to be root to run $0 since the netselect"
        log "binary we will use ($NETSELECT) is not setuid."
        exit 1
    fi
fi


# commandline parsing
# TODO: We should do some sanity checking of some values
# (such as test_hosts to ensure it is a number > 1)
temp=$(getopt $options -n 'netselect-apt' -- "$@")
if [ $? != 0 ]; then echo "Terminating..." >&2; exit 2; fi
eval set -- "$temp"
while true; do
	case "$1" in
		-a|--arch) arch=$2; shift 2 ;;
		-s|--sources) want_sources=1; shift ;;
		-i|--infile) infile="$2"; tmpinfile="0"; shift 2 ;;
		-o|--outfile) outfile="$2"; shift 2 ;;
		-O) netselect_options="$netselect_options $2"; shift 2 ;;
		-n|--nonfree) want_nonfree=1; shift ;;
		-c|--country) country=$2; shift 2 ;;
		-t|--tests) test_hosts=$2; shift 2 ;;
		-f|--ftp) protocol="ftp"; shift ;;
		-d|--debug) DEBUG=1; shift ;;
		-h|--help) usage; exit 0;;
		--) shift; break;;
		*) echo "Internal Error!"; echo "args: $@"; exit 1;;
	esac
done

# Default netselect options
netselect_options="-D -I -v -s $test_hosts"
# The '-s number' option can be used to display the top 'number' best
# hosts. This is useful for finding the best host to use in a sources.list file
# that is shared between several hosts on different networks in the same
# geographical location. For example, it would be better to choose a host that
# is #2 on all hosts than #1 on one host but is #50 on all the rest.

# distro is a non-option for backward compatibility
case "$1" in
	# don't forget to update the usage with new codenames
	stable|testing|unstable|experimental|etch|lenny|squeeze|wheezy|jessie|stretch|sid) distro="$1" ;;
	'') ;;
	*) log "Invalid distribution: $1"; exit 1 ;;
esac

if [ "$distro" != "stable" ]; then
	want_security=0
fi

# netselect starting
log "Using distribution $distro."

if [ "$tmpinfile" = "0" -a ! -f "$infile" -a ! -x /usr/bin/wget ]; then
	log "Sorry, this script requires the 'wget' package in order to run."
	log "You can also download the mirrors list yourself and pass it"
	log "with -i option, consult the manpage for further details:"
	log "        $url"
	exit 2
fi

if [ ! -f "$infile" ]; then
	if [ "$tmpinfile" = "1" ]; then
		infile=$(mktemp -t netselect-apt.XXXXXX) ||
			{ log "unable to create tempfile"; exit 2; }
	fi

	log "Retrieving the list of mirrors from www.debian.org..."
	log

	if ! /usr/bin/wget -O "$infile" "$url"; then
		log "$0: wget failed to retrieve $url."
                log "Please try to correct the problem by reading the wget "
                log " messages printed above."
		exit 2
	fi
else
	log "$infile has been found."
	log "I'll use that, rather than downloading it again."
	log
fi

log "Choosing a main Debian mirror using netselect."
if [ -n "$country" ]; then
    log "(will filter only for mirrors in country $country)"
fi
# Convert Search string to uppercase as it is used in the website
search_string="Packages over ${protocol^^}"
hosts_netselect=$(run_netselect "$search_string" $protocol)
netselect_rv=$?
if [ $netselect_rv -eq 0 ] ; then
        if [ ! -z "$hosts_netselect" ] ; then
            if [ "$test_hosts" -gt 1 ]; then
                log "The fastest $test_hosts servers seem to be:"
                echo "$hosts_netselect" | sed -e 's/\s\+/\n\t/g'
                log
            else
                log "The fastest server seems to be:"
                log "          $hosts_netselect"
                log
            fi
            main=$(validate_hosts $hosts_netselect)
            if [ ! -z "$main" ] ; then
                    if [ "$test_hosts" -gt 1 ]; then
                        log "Of the hosts tested we choose the fastest valid for $protocol:"
                        log "        $main"
                        log
                    else
                        log "and is a valid server for $protocol."
                    fi
            else
                    log "No valid servers were found that could be reachable"
                    log "through $PROTO. Netselect found these hosts to be "
                    log "closest to you, but we could not connect to any "
                    log "of them using that protocol."
                    exit 2
            fi
        else
	    netselect_generic_error
            exit 2
        fi
elif [ $netselect_rv -eq 6 ]; then
	netselect_permission_error
	exit 2
elif [ $netselect_rv -eq 130 ]; then
	netselect_aborted_error
	exit 2
elif [ $netselect_rv -eq 200 ]; then
	netselect_parse_error
	exit 2
else
	netselect_generic_error
	exit 2
fi

log "Writing $outfile."

if [ -f "$outfile" ]; then
	destfile="$outfile.$(date +%s)"
	log "$outfile exists, moving to $destfile"
	mv $outfile $destfile
fi

sections="main contrib"
if [ "$want_nonfree" -eq 1 ]; then sections="$sections non-free"; fi

(
	echo "# Debian packages for $distro"
	echo "deb $main $distro $sections"
	echo "# Uncomment the deb-src line if you want 'apt-get source'"
	echo "# to work with most packages."
	if [ "$want_sources" -eq 0 ]; then
		echo -n "# "
	fi
	echo "deb-src $main $distro $sections"

	echo
	echo "# Security updates for stable"
	if [ "$want_security" -eq 0 ]; then
		echo -n "# "
	fi
	echo "deb http://security.debian.org/ stable/updates $sections"

) > $outfile

echo "Done."
