#! /bin/bash -
#
# H. A. Trujillo	Fall 2016
# Last Change:      5 Aug 2021
# Version:		3.1.1

#
# Purpose:
#    to extract a list of courses a professor has taught 
#    from the Registrar's pages, for tenure candidates, etc.
#
# Usage:
#	    % ClassesTaught [-h]
#
#    Will ask for faculty surname (eg, Smith or Smith B), and 
#    semester range to parse (eg, F18, Sp19, Su20).  
#
#    Output is left as a text file (/tmp/classes.txt).
#

if [ "$1" == "-h" ]; then 	# Print help text
    sed -n '7,19s/#//p' $0 
    exit
fi

read -p "  Faculty Surname:             " name
echo    "  Starting Semester"
read -p "        eg: F18, Sp19, Su20    " begin
read -p "  Ending Semester:             " end


### Sanity-check the input

#	ensure semesters are capitalized (bash 3.0 style)
begin="$( echo ${begin:0:1} | tr [:lower:] [:upper:] )${begin:1}"
end="$( echo ${end:0:1} | tr [:lower:] [:upper:] )${end:1}"

#	Check semesters for bad format
for var in "$begin" "$end" ; do
    if [[ "$var" =~ " " ]] ; then 
	echo "	Oops -- no spaces allowed in semesters"
	exit
    fi
    if [[ "$var" =~ S[[:digit:]] ]] ; then
	echo "	Oops -- use “Sp” or “Su”, not “S”"
	exit
    fi
    if [[ "$var" =~ F[[:alpha:]] ]] ; then
	echo "	Oops -- use “F”, not “${var:0:2}”"
	exit
    fi
done

#	ensure name is capitalized appropriately  (ie, "Smith" or "Smith B")
if ! echo $name |  egrep -q '^[[:upper:]][[:lower:]]*( [[:upper:]])?$' ; then
    echo "	Oops -- bad caps in name"
    exit
fi

#	ensure end date is after start date
dne=${end: -2}${end/F/X}		# ie F19 -> 19X19
nigeb=${begin: -2}${begin/F/X}

if [ "$dne" \< $nigeb ] ; then
    echo "	Oops -- ending semester before start"
    exit
fi


### Download web pages 

len=$(( ${#begin} - 2 ))
sm=${begin:0:$len}	# eg: F, Su
yr=${begin: -2}		# eg: 20
#sm=${begin:0: -2}	# eg: F, Su		<-- ok for Bash4, but not Bash3

function Download() {
    printf "\n\tdownloading:  " >&2
    while [ "$semester" != "$end" ] ; do
	semester=${sm}${yr}
	printf "$semester  " >&2
	curl -s -L  http://rosters.wilkes.edu/scheds/courses${semester}.html

	let i++
	if [ $i == 25 ]; then break ; fi	# runaway loop check

	case $sm in
	    F)  sm=Sp
		let yr++
		;;
	    Sp) sm=Su
		;;
	    Su) sm=F
		;;
	esac
    done
}

### Create awk rules-file
awkfil=/tmp/ClassesTaught.$$.awk
mkfifo $awkfil
sed -e '1,/^#### END_OF_WRAPPER/d' \
    -e  s/NAME/"$name"/ $0  > $awkfil &


### Convert curl download into a tab-separated, human-readable table
### and let awk chew on it.

Download | \
    iconv -s -c -t ASCII | \
    sed -e 's,<tr>,\
,g' | \
    php -R 'echo strip_tags($argn, "<td>")."\n";' | \
    sed -e 's,</td>,,g'    \
        -e 's,<td[^>]*>,	,g' \
        -e 's,&nbsp;&nbsp;, ,g'     \
        -e  's,^	,,'  | \
    awk -f $awkfil


# Clean up and go away
rm $awkfil 
exit



#### END_OF_WRAPPER_SCRPT ####

/\tNAME[ \t]/ {
    if ( ModeFlag == 1 && $0 ~ "Online" ) { 	# online classes have no time or
	e=enrol-2 ; i=instr-2			# classroom fields
	print "  " $course, $sect "\t" $e "\t" $i "\t" $title > outfile ; next
    }
    print "  " $course, $sect "\t" $enrol "\t" $instr "\t" $title > outfile ; next
}

/Schedule of Courses/{
    if ( $0 ~ "Spring 20.. Sched" ) {
	if (didhdr == "") {
	    printf "\n" divider > outfile
	}
	split($0, sem, " ")
	printf "\n"sem[1]" "sem[2]"\n" > outfile
	course=1 ; sect=2 ; enrol=12 ; instr=10 ; title=4 
	didhdr = 1
	ModeFlag=0
    }

    if ( $0 ~ "Summer 20.. Sched" ) {
	if (didhdr == "") {
	    printf "\n" divider > outfile
	}
	split($0, sem, " ")
	printf "\n"sem[1]" "sem[2]"\n" > outfile
	course=1 ; sect=2 ; enrol=12 ; instr=10 ; title=4 
	didhdr = 1
	ModeFlag=0
    }

    if ( $0 ~ "Fall 20.. Sched" ) {
	split($0, sem, " ")
	printf "\n" divider "\n"sem[1]" "sem[2]"\n" > outfile
	didhdr = 1
	ModeFlag=0
	course=1 ; sect=2 ; enrol=12 ; instr=10 ; title=4 
    }
    next
}

/^Modes/ {  # set field numbers for OL/F2F/... format	(F20 - ?)
    course=2 ; sect=3 ; enrol=12 ; instr=10 ; title=5 
    ModeFlag=1
}

BEGIN{
    FS="\t" 
    outfile="/tmp/classes.txt"
    printf  "  course\tenrlmt\tinstructor\ttitle" > outfile
    divider="----------------------------------------------------------------"
}

END{
    close outfile
    print "\n\n        --> output left in " outfile
    print ""
#    print "            To print:   print2 -f Courier7 "
    print "            To print:   enscript -2r -DDuplex:true -f Courier7 < " outfile
}

#	 17Nov18  Deals gracefully with summer classes
# 		  Deals gracefully with non-fall starting semester
#	  9Jan20  Now a self-contained script that downloads the pages by itself.
# 3.0.2  10jan20  - curl downloads into a pipe rather than a file, since 800k/semester
#		  - better input sanity-checking
# 3.0.5	 21jan20  - name grepping a bit smarter, to minimize bad matches
#		  - now also can take first initial, to minimize bad matches
# 3.1	  3feb21  Adapted for new F2F/Hybrid/... format of masterlist
