#!/bin/sh

# getlinks - tant donn une URL, renvoie tous ses liens internes
#   et externes.

if [ $# -eq 0 ] ; then
  echo "Syntaxe: $0 [-d|-i|-x] URL"  >&2
  echo "  -d=seulement les domaines" >&2
  echo "  -i=uniquement les rfrences internes" >&2
  echo "  -x=rfrences externes seules" >&2
  exit 1
fi

if [ $# -gt 1 ] ; then
  case "$1" in
    -d) lastcmd="cut -d/ -f3 | sort | uniq"
        shift
        ;;
    -i) basedomain="http://$(echo $2 | cut -d/ -f3)/"
        lastcmd="grep \"^$basedomain\" | sed \"s|$basedomain||g\" | sort | uniq"
        shift
        ;;
    -x) basedomain="http://$(echo $2 | cut -d/ -f3)/"
        lastcmd="grep -v \"^$basedomain\" | sort | uniq"
        shift
        ;;
     *) echo "$0: option non reconnue: $1" >&2; exit 1
  esac
else
  lastcmd="sort | uniq"
fi

lynx -dump "$1" | \
  sed -n '/^References$/,$p' | \
  grep -E '[[:digit:]]+\.' | \
  awk '{print $2}' | \
  cut -d\? -f1 | \
  eval $lastcmd

exit 0
