#!/usr/bin/bash

BIN=$0
FILTER_IP=false
NSCRUB_ENDPOINT="127.0.0.1:8880"

function help() {
	echo "Build a blacklist file reading a sources list from a file."
	echo "Usage:"
	echo "$BIN <sources file>"
	echo "Options:"
	echo "-f               Filter IP addresses (accept hostnames otherwise)"
	echo "-d               Filter DNS IP addresses"
	echo "-l <target>      Load IP blacklist from cloud.ntop.org to the target"
	echo "-c <host>:<port> Connect to the nscrub instance at <host>:<port> (default: 127.0.0.1:8880)"
	echo "-h               Print help"
	echo "Examples:"
	echo "$BIN -f blacklist-ip.sources "
	echo "$BIN blacklist-hostnames.sources "
	echo "$BIN -l web-server-1 "
	echo "$BIN -c 192.168.1.1:8880 -l web-server-1 "
	exit
}

function buildList() {
	if [ -z "$1" ]; then
		help
	fi
	SOURCES=$1
	FILENAME=$(basename -- "$SOURCES")
	BASENAME="${FILENAME%.*}"
	WHITELIST_FILENAME=$BASENAME.whitelist
	WARNLIST_FILENAME=$BASENAME.warn
	OUT_FILENAME=$BASENAME.list

	tmpfolder=$(mktemp -d /tmp/XXXXXX)

	cat $SOURCES | grep -v "^#" | while read line
	do
		echo -n "[>] $line "
		tmpfile=$(mktemp ${tmpfolder}/XXXXXX)

		wget -q -t 1 --timeout 5 "${line}" -O ${tmpfile}

		if [ "$FILTER_IP" = true ] ; then
			# Parsing:
			# 1. remove comments or empty lines
			# 2. get the first column if many (by space, comma, semicolon)
			# 3. filter IPs
			# 4. filter out broadcast
			sed -e 's/#.*$//' -e '/^$/d' ${tmpfile} | cut -f1 | cut -d',' -f1 | cut -d';' -f1 | grep -oE "\b([0-9]{1,3}\.){3}[0-9]{1,3}\b" | grep -v "255\.255\.255\.255" > ${tmpfile}.list
		else
		    if [ "$FILTER_DNS" = true ] ; then
			grep -v "^#" $tmpfile | cut -d ' ' -f 2 > $tmpfile.list
		    else
			sed -e 's/#.*$//' -e '/^$/d' $tmpfile | cut -f1 | cut -d',' -f1 | cut -d';' -f1 > $tmpfile.list
		    fi
		fi

		if [ -f ${WHITELIST_FILENAME} ] ; then
			# Remove IP/hosts in the whitelsit

			# Exact match
			#comm <(sort ${tmpfile}.list) <(sort ${WHITELIST_FILENAME}) -2 -3 > ${tmpfile}.list.filtered

			# Substring match
			cat ${tmpfile}.list | grep -v -F -f ${WHITELIST_FILENAME} > ${tmpfile}.list.filtered

			diff ${tmpfile}.list ${tmpfile}.list.filtered

			mv ${tmpfile}.list.filtered ${tmpfile}.list
		fi

		items=$(cat $tmpfile.list | wc -l)
		echo "[${items}]"

		if [ -f ${WARNLIST_FILENAME} ] ; then
			# Warn if IPs are in the list

			# Exact match
			#comm <(sort ${tmpfile}.list) <(sort ${WARNLIST_FILENAME}) -1 -2 > ${tmpfile}.list.warn

			# Substring match
			cat ${tmpfile}.list | grep -F -f ${WARNLIST_FILENAME} > ${tmpfile}.list.warn

			NUM_WARN=$(wc -l <${tmpfile}.list.warn)
			if [[ ${NUM_WARN} -gt 0 ]];then
				echo "Warning: this list contains ${NUM_WARN} of the IPs listed in ${WARNLIST_FILENAME}"
				cat ${tmpfile}.list.warn
			fi 
		fi

		rm $tmpfile
	done

	# Aggregation:
	# 1. trim
	# 2. sort and get uniq occurrences
	cat $tmpfolder/*.list | sed 's/^[ \t]*//;s/[ \t]*$//' | sort -u > $OUT_FILENAME
	rm -rf $tmpfolder

	NUM=$(cat $OUT_FILENAME | wc -l)
	echo "[!] Generated $OUT_FILENAME with $NUM items."
}

function loadList() {
	if [ -z "$1" ]; then
		help
	fi

	date

	rm -f /tmp/blacklist-ip.list
	/usr/bin/wget https://cloud.ntop.org/blacklist-ip.list -O /tmp/blacklist-ip.list

	echo "Loading IPs for $1"

	echo "target $1 attackers purge all" | /usr/bin/nscrub-cli -c ${NSCRUB_ENDPOINT}
	echo "target $1 attackers load ntop-bl /tmp/blacklist-ip.list black" | /usr/bin/nscrub-cli -c ${NSCRUB_ENDPOINT}

	echo "[!] blacklist 'ntop-bl' reloaded for target $1"
}

ACTION=build

while getopts "hflc:d" opt; do
  case ${opt} in
    h)
      ACTION=help
      ;;
    c)
      NSCRUB_ENDPOINT=${OPTARG}
      ;;
    f)
      FILTER_IP=true
      ;;
    d)
      FILTER_DNS=true
      ;;
    l)
      ACTION=load
      ;;
    \?)
      echo "Invalid option: -$OPTARG" >&2
      exit 1
      ;;
    :)
      echo "Option -$OPTARG requires an argument." >&2
      exit 1
      ;;
  esac
done
shift $((OPTIND-1))

case $ACTION in
help)
help
;;
load)
loadList $1
;;
*)
buildList $1
;;
esac

