#!/usr/bin/python

"""
Geocache Grabber

This is used to download the information for one or several geocaches and upload
as much of that information into the GPSr. Main motivation for writing this
script is the fact that sending the information using geocaching.com doesn't
work on linux.

This program is under the GNU Affero General Public License Version 3 (GPL 3)
http://www.fsf.org/licensing/licenses/agpl-3.0.html

Please only use this script in ways that do not violate geocaching.com's terms
of use:-
http://www.geocaching.com/about/termsofuse.aspx
"""

__author__      =   "Fuad Tabba (altabba_org fuad_at)"
__version__     =   "$Revision: 0.9 $"
__date__        =   "$Date: 2008/03/19 $"
__copyright__   =   "Copyright (c) 2008 Fuad Tabba"
__license__     =   "GPL3"


###########################################################
# Define global constants as parameters the user should set
#

# geocaching.com username
username = ''

# geocaching.com password
password = ''

# The maximum size for the geocache's name field
nameSize = 10

# The maximum size for the geocache's hint (comment) field
hintSize = 30

# The command used to run gpsbabel
gpsbabel = 'gpsbabel'

# The file associated with the GPSr
outfile = '/dev/ttyUSB0'

# The gpsbabel format for the GPSr
gpsr = 'garmin'

# gpsbabel is flaky, so if there's an error sending will try this many times
babelRetry = 3


#########################################################################
# Define global constants that probably shoud NOT be modified by the user

# Login page for geocaching.com
loginurl = 'http://www.geocaching.com/login/default.aspx'

# URL used to find a cache based on the waypoint number
cachefind = 'http://www.geocaching.com/seek/cache_details.aspx?wp='

# Useragent: which browser/platform should this script masquerade as
useragent = 'User-Agent=Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.8.1.12)'

# Template for the gpx file to be generated (one line)
#
# LATI and LONGI are replaced with the latitude and logtitude respectively
# SIZENUM is replaced with a number representing the size of the cache
# NAME is replaced with the name of the cache
# HINT is replaced with the cache hint
gpx = (r'<?xml version="1.0" encoding="UTF-8"?>'
'<gpx version="1.0" creator="fuads python script" '
'xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" '
'xmlns="http://www.topografix.com/GPX/1/0" '
'xsi:schemaLocation="http://www.topografix.com/GPX/1/0 '
'http://www.topografix.com/GPX/1/0/gpx.xsd"><wpt lat="LATI" lon="LONGI">'
'<ele>SIZENUM</ele><name>NAME</name><cmt>HINT</cmt><sym>Geocache</sym>'
'</wpt></gpx>')

# Number of decimal places in the latitude or longitude when converting
coround = 6


#########################################################################
# Define global arguments and their default values

# Prints only errors (-q)
quiet = False

# Prints more information (-v)
verbose = False

# Does not send the data to the gps, just prints the gps data (-n)
dryrun = False

# If it's a puzzle cache, ask the user to enter the cache coordinates
promptPuzzle = False

# If it's interactive mode, ask user to input all fields
interactive = False


################################################
# Import the Neccessary modules for this program

# To encode the POSTDATA used for login
from urllib import urlencode

# To run the gpsbabel command
from os import system

# General web processing
import urllib2

# Setup the cookie jar to store the site's cookies
import cookielib

# Regular expressions for pattern matching
import re

# To input and output stuff, and read the command line arguments
import sys

# To parse the command line input
import getopt


###########################################
# Define the functions used in this program

def usage():
    """Display usage for this script"""

    sys.stderr.write('Geocache Grabber\n')
    sys.stderr.write('Fuad Tabba (altabba_org fuad_at)\n')
    sys.stderr.write('\n')
    sys.stderr.write('Usage: %s [OPTION]... [WAYPOINT]...\n' % sys.argv[0])
    sys.stderr.write('Grabs the cache information from geocaching.com and sends it to the GPSr.\n')
    sys.stderr.write('\n')
    sys.stderr.write('Options:\n')
    sys.stderr.write('  -q        (Quiet)   Suppresses warnings\n')
    sys.stderr.write('  -v        (Verbose) Prints more information\n')
    sys.stderr.write('  -n        (Dry Run) Prints the GPX data but does not sent it\n')
    sys.stderr.write('  -p        (Puzzle)  Prompts for cache coordinates for unknown caches\n')
    sys.stderr.write('\n')
    sys.stderr.write('Waypoints:\n')
    sys.stderr.write('  Must specify at least one waypoint in the form GC????? (e.g. GCVWMR)\n')
    sys.stderr.write('  Can handle multiple waypoints as well.\n')
    sys.stderr.write('\n')
    sys.stderr.write('Notes:\n')
    sys.stderr.write('  - The name of the cache is the waypoint number (without GC) plus\n')
    sys.stderr.write('  as many letters of the title that can fit.\n')
    sys.stderr.write('  - The hint (or as much as can fit) is stored as a comment.\n')
    sys.stderr.write('  - The size is stored under elevation. Unknown size is 0.\n')
    sys.stderr.write('    Micro being 1 up to large being 4.\n')
    sys.exit(1)


def parseArgs(args):
    """Parse the command line arguments.

    Returns a list of caches to be processed if successful"""

    global verbose, quiet, dryrun, promptPuzzle

    try:
        opts, args = getopt.getopt(args, 'vqnp')

        for o, a in opts:
            if o == '-v':
                verbose = True
            elif o == '-q':
                quiet = True
            elif o == '-n':
                dryrun = True
            elif o == '-p':
                promptPuzzle = True
            else:
                printError('unhandled option')

        if len(args) == 0:
            usage()

        return args

    except getopt.GetoptError, err:
        print str(err)
        print
        usage()


def printError(msg):
    """Prints an error message to stderr and fails.

    Works even in quiet mode"""

    sys.stderr.write('Error: ' + msg + '\n')
    sys.exit(1)


def printWarning(msg):
    """Prints a warning message. Doesn't fail and can be silenced"""

    if not quiet:
        sys.stderr.write("Warning: " + msg + '\n')


def printVerbose(msg):
    """Prints a message only if verbose is set."""

    if verbose:
        print(msg)


def installCookieJar():
    """Installs a cookie jar and associates it with urllib2.

    Important to keep us logged on to the website."""

    # Adapted http://www.voidspace.org.uk/python/articles/cookielib.shtml
    cj = cookielib.LWPCookieJar()
    opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj))
    urllib2.install_opener(opener)


def loginGeocaching():
    """Logs the user in to Geocaching.com.

    Downloads the relevant cookies to keep the user logged in."""

    try:
        # Get the session STATE before requesting the login page
        page = urllib2.urlopen(loginurl)
        m = re.match(r'.+?id="__VIEWSTATE"\s+value="(.+?)"',
            page.read(), re.S)
        viewstate = m.group(1)

        fromvalues = {  '__VIEWSTATE' : viewstate,
                        'myUsername' : username,
                        'myPassword' : password,
                        'cookie' : 'on',
                        'Button1' : 'Login'}
        headers = { 'User-Agent' : useragent }
        fromdata = urlencode(fromvalues)

        # Login to the site
        request = urllib2.Request(loginurl, fromdata, headers)
        page = urllib2.urlopen(request)

        # Check that logon was successfull
        m = re.match(r'.+?you are logged', page.read(), re.S)

        if m is None:
            printError('cannot logon to the site. '
            'Probably wrong username or password.')

    except urllib2.URLError:
        printError('cannot access the website.')


def htmlClean(text):
    """Removes all characters that are offensive to xml from the text.

    Also removes html tags, entities and the like"""

    # Replace all html tags with spaces
    text = re.sub('<.*?>', ' ', text)

    # Remove all HTML character entities
    text = re.sub('&[a-zA-Z0-9];', ' ', text)

    # Remove any other offensive characters if they're still there
    t = "".join(chr(i) for i in range(256))

    return text.translate(t, '<&>\'"()')


def truncateText(text, limit, fromEnd):
    """Truncates the text to the specified limit if neccessary.

    Trieds to Put the words together, remove punctuation change some words,
    and then trucate the actual text. It also cleans it up from abd xml.
    If fromEnd is True then it truncates from the end of the text."""

    truncated = htmlClean(text)

    # If it's too big start with removing spaces and changing words
    if len(truncated) > limit:
        truncated = truncated.title().strip('.!?').replace('A ', '')\
            .replace('And ', 'N').replace('The ', '').replace(' ', '')

    if fromEnd:
        return truncated[-limit:]
    else:
        return truncated[:limit]


def processCacheName(page, wp):
    """Returns the cache name from the page and its waypoint.

    Adjusts the name to fit in the specified size limit XML frieldliness."""

    try:
        m = re.match(r'.+?<title>.+?called (.+?) created by.+?</title>',
        page, re.S)
        name = m.group(1)

        # Add the waypoint number without the leading GC
        return truncateText(wp[2:] + ' ' + name, nameSize, False)

    except AttributeError:
        printError('cannot proccess the cache name.')


def processCacheSize(page):
    """Returns a number corressponding to the size of the cache.

    micro == 1, small == 2, regular == 3, large == 4, other/unknown == 5"""

    try:
        m = re.match(r'.+?\.\./images/icons/container/(.+?)\.gif', page,
        re.S)
        size = m.group(1)

        if size == 'micro':
            sizenum = '1'
        elif size == 'small':
            sizenum = '2'
        elif size == 'regular':
            sizenum = '3'
        elif size == 'large':
            sizenum = '4'
        else:
            sizenum = '0'

        return sizenum

    except AttributeError:
        printError('cannot proccess the cache size.')


def processCacheType(page, wp):
    """Issues a warning if the cache isn't a traditional cache."""

    try:
        m = re.match(r'.+?<title>.+?A (.+?) in', page, re.S)
        cache = m.group(1)

        # Only warn of puzzles if the prompt isn't set
        if cache != 'Traditional Cache' and \
            not (cache == 'Unknown Cache' and promptPuzzle):

            printWarning(wp + ' is a ' + cache)

        return cache

    except AttributeError:
        printError('cannot proccess the cache type.')


def processHint(page, wp):
    """Returns a truncated and modified version of the hint if any."""

    # Find the cache hint and decode it
    m = re.match(r'.+?<span id="Hints" class="displayMe">(.+?)</span>',
    page, re.S)

    if m is None:
        return 'No hint.'

    hint = m.group(1)
    hint = hint.encode('rot13')
    orgHint = hint

    hint = truncateText(hint, hintSize, True)

    # If the hint still doesn't fit, and issue a warning
    if hint != orgHint:
        printWarning(wp + ' hint got truncated.\n'
            'Original hint: %s\n'
            'New hint: %s\n' % (orgHint, hint))

    return hint


def processLatLon(page):
    """Returns a tuple containing the latitude and longitude"""

    try:
        m = re.match(r'.+?<a id="lnkConversions" href="\.\./wpt/\?lat=(.+?)&amp;lon=(.+?)&amp;detail=1" target="_blank">', page, re.S)
        lati=m.group(1)
        longi=m.group(2)

        return (lati, longi)
    except AttributeError:
        printError('cannot proccess latitude and longitutde.')


def sendGpsData(name, size, hint, coords):
    """Generates the gpx data and sends it to the GPSr."""

    compiledgpx = gpx.replace('LATI', coords[0]).\
        replace('LONGI', coords[1]).replace('SIZENUM', size).\
        replace('NAME', name).replace('HINT', hint)

    babelcommand = "echo '%s' | %s -i gpx -f - -o %s -F %s" % \
        (compiledgpx, gpsbabel, gpsr, outfile)

    if verbose:
        print babelcommand
        print

    if not dryrun:
        for i in range(babelRetry):
            if system(babelcommand) == 0:
                break
        else:
            printError('could not write to gps device.')


def processPuzzle(wp):
    """Returns a string containing the coordinates of the puzzle entered by
    the user"""

    sys.stderr.write(wp + ' is an unknown cache. ')
    sys.stderr.write('Please enter the solution coordinates: ')

    pcoords = parseDm(sys.stdin.readline())

    lati = dmToDecimal(pcoords[0], coround)
    longi = dmToDecimal(pcoords[1], coround)

    return (lati, longi)


def processCache(wp):
    """Parses the cache information and sends it to the GPSr"""

    printVerbose('\nProcesssing ' + wp + '\n')

    #search waypoint url
    cacheurl = cachefind + wp

    # Download the cache webpage
    page = urllib2.urlopen(cacheurl)
    cache = page.read()

    # Find the cache name
    name = processCacheName(cache, wp)

    # Find the cache size
    size = processCacheSize(cache)

    # Find the cache hint and decode it
    hint = processHint(cache, wp)

    # Find the latitude and logitude
    coords = processLatLon(cache)

    # Process type
    if processCacheType(cache, wp) == 'Unknown Cache' and promptPuzzle:
        coords = processPuzzle(wp)

    # If the type is a puzzle, get the coords from the user

    printVerbose('Cache: ' + name)
    printVerbose('Size: ' + size)
    printVerbose('Coords: ' + str(coords))
    printVerbose('Hint: ' + hint)
    printVerbose('')

    sendGpsData(name, size, hint, coords)


def dmToDecimal(angle, rnd):
    """Converts angle from DegreeMinute to decimal

    Returns the converted value as a string"""

    # Make sure to turn the second part into the negative if the first is
    if angle[0] >= 0:
        return "%0.*f" % (rnd, (angle[0] + angle[1] / 60))
    else:
        return "%0.*f" % (rnd, (angle[0] - angle[1] / 60))


def parseDm(coordText):
    """Parses the coords in Degree Minutes format. Expecting:

    S 36 51.918 E 174 46.725 or
    N 6 52.861  W174   43.327

    Spaces do not matter. Neither does having the degree symbol.

    Returns a two tuples in dm format."""

    # Make it uppercase for consistency
    coordText = coordText.upper().replace('N',' ').replace('S','-') \
        .replace('E',' ').replace('W','-').replace('+',' ')

    try:
        m = re.match(r'\s*(-?\s*\d+)\D+(\d+\.\d+)\s*(-?\s*\d+)\D+(\d+\.\d+)',
            coordText)

        latDeg = float(m.group(1).replace(' ',''))
        latMin = float(m.group(2).replace(' ',''))
        lonDeg = float(m.group(3).replace(' ',''))
        lonMin = float(m.group(4).replace(' ',''))

        return ((latDeg, latMin),(lonDeg,lonMin))

    except AttributeError:
        printError('Could not parse the coordinates entered manually.')


def main():
    """The main program"""

    counter = 0
    # Parse the command line arguments and get the list of caches to process

    caches = parseArgs(sys.argv[1:])

    # Setup the cookie jar and log in to geocaching.com
    printVerbose('Logging in...')
    installCookieJar()
    loginGeocaching()

    for wp in caches:
        counter += 1
        processCache(wp)

    printVerbose('All ' + str(counter) + ' caches uploaded successfully...')


if __name__ == "__main__":
    main()

