Thursday, September 2, 2010

Sydney Cityrail (131500) Timetable on Command-line (Python version) v0.2

    1 #!/usr/bin/python
    2 
    3 import sys
    4 import getopt
    5 import time
    6 import re
    7 
    8 from BeautifulSoup import BeautifulSoup, Comment
    9 import urllib
   10 import urllib2
   11 
   12 
   13 def strip_comments(soup):
   14  comments = soup.findAll(text=lambda text:isinstance(text, Comment))
   15  [comment.extract() for comment in comments]
   16  return soup
   17 
   18 def show_timetable(html, from_sta):
   19 
   20  ## PARSE
   21  soup = BeautifulSoup(BeautifulSoup(html).prettify())
   22  regex = re.compile("\&nbsp",re.I)
   23 
   24  ## FINDING OPTIONS
   25  options = []
   26  div1s = soup.findAll("div",{ "class" : "boxhead tal iewfix2" })
   27 
   28  for d1 in div1s:
   29   option = d1.find("h2").renderContents().encode("ASCII").strip()
   30   options.append(option)
   31 
   32 
   33  ## FINDING TIMETABLE
   34  timetable = []
   35  div2s = soup.findAll("div",{ "class" : "boxbody iewfix" })
   36 
   37  for d2 in div2s:
   38   bs = d2.findAll("b")
   39   for b in bs:
   40    tmp = b.renderContents().encode("ASCII").strip()
   41    tmp = regex.sub("",tmp)
   42 
   43    # fixing "Sydney Terminal" / "Central Station" dilemma
   44    if tmp.find("Sydney Terminal") > 0:
   45     tmp = tmp.replace("Sydney Terminal", "Central Station (Sydney Terminal)
   46     ")
   47    elif tmp.find("Central Station") > 0:
   48     tmp = tmp.replace("Central Station", "Central Station (Sydney Terminal)
   49     ")
   50 
   51    timetable.append(tmp)
   52 
   53 
   54  ## DIPLAY
   55  a_regex = re.compile(from_sta,re.I)
   56 
   57  j=0
   58  for i in xrange(0,len(timetable)):
   59   if a_regex.search(timetable[i]):
   60    print "\n  [[ %s ]]" % (options[j])
   61    j = j + 1
   62 
   63   print timetable[i]
   64 
   65 def usage():
   66  print """
   67  [*] usage: %s  options
   68 
   69  OPTIONS:
   70  -u      print url
   71  -h      show this message
   72 
   73  -a      set from station
   74  -b      set dest. station
   75 
   76  -t      set search time (08:30:AM)
   77    hour, min, sec must be seperated by colon (:)
   78 
   79  -d      set search date (yyyy/mm/dd)
   80    yyyy = 4 digits
   81    mm = 2 digits
   82    dd = 2 ditigs
   83 
   84  EXAMPLE:
   85  %s -a central -b 'town hall' -d 2010/09/02 -t 05:30:PM
   86   """ % (sys.argv[0], sys.argv[0])
   87 
   88 def prepare_param(args):
   89  ## PRESET VARIABLES
   90  print_url = False
   91  from_sta = "redfern"
   92  to_sta = "blacktown"
   93  now_time = time.strftime("%H:%M:%p")
   94  now_date = time.strftime("%Y/%m/%d")
   95 
   96 
   97  ## PARSE COMMAND LINE ARGS
   98  try:
   99   opts, args = getopt.getopt(sys.argv[1:], "a:b:d:t:uh")
  100 
  101  except getopt.GetoptError, err:
  102   print str(err)
  103   usage()
  104   sys.exit(2)
  105 
  106  for opt, arg in opts:
  107   if opt in ("-a"):
  108    from_sta = arg
  109   elif opt in ("-b"):
  110    to_sta = arg
  111   elif opt in ("-h"):
  112    usage()
  113    sys.exit(-1)
  114   elif opt in ("-u"):
  115    print_url = True
  116   elif opt in ("-t"):
  117    now_time = arg
  118   elif opt in ("-d"):
  119    now_date = arg
  120   else:
  121    assert False, "unhandled option"
  122 
  123  ## BREAK TIME INTO HOUR, MIN, AM/PM
  124  try:
  125   now_hour = re.split(":", now_time)[0]
  126   now_min = re.split(":", now_time)[1]
  127   now_ampm = re.split(":", now_time)[2]
  128  except:
  129   print "Incorrect time format"
  130   sys.exit(-1)
  131 
  132  now_date = re.sub("[^0-9]","",now_date)
  133 
  134  return from_sta, to_sta, now_date, now_hour, now_min, now_ampm, print_url
  135 
  136 def fetch_html( from_sta,
  137     to_sta,
  138     now_date,
  139     now_hour,
  140     now_min,
  141     now_ampm):
  142 
  143  url = "http://www.131500.com.au/plan-your-trip/trip-planner?"
  144  values = { "session" : "invalidate",
  145     "itd_cmd" : "invalid",
  146     "itd_includedMeans" : "checkbox",
  147     "itd_inclMOT_5" : "1",
  148     "itd_inclMOT_7" : "1",
  149     "itd_inclMOT_1" : "1",
  150     "itd_inclMOT_9" : "1",
  151     "itd_anyObjFilter_origin" : "2",
  152     "itd_name_origin" : from_sta ,
  153     "itd_anyObjFilter_destination" : "2",
  154     "itd_name_destination" : to_sta,
  155     "itd_itdDate" : now_date,
  156     "itd_itdTripDateTimeDepArr" : "dep",
  157     "itd_itdTimeHour" : now_hour,
  158     "itd_itdTimeMinute" : now_min,
  159     "itd_itdTimeAMPM" : now_ampm,
  160     "x" : "56",
  161     "y" : "11"
  162     }
  163  data = urllib.urlencode(values)
  164  try:
  165   req = urllib2.Request(url, data)
  166  except:
  167   print "Server Error", error
  168   sys.exit(-1)
  169 
  170  response = urllib2.urlopen(req)
  171  html = response.read()
  172 
  173  return html, url, data
  174 
  175 def main(args):
  176  from_sta, to_sta, now_date, now_hour, now_min, now_ampm, print_url =
  177  prepare_param(args)
  178 
  179  print
  180  print "From Station:", from_sta.title()
  181  print "To Station:", to_sta.title()
  182  print "Time:", "%s:%s:%s" % (now_hour, now_min, now_ampm)
  183  print "Date: %s/%s/%s" % (now_date[:4], now_date[4:6],now_date[-2:])
  184 
  185  html, url, data = fetch_html(from_sta, to_sta, now_date, now_hour, now_min,
  186  now_ampm)
  187  if print_url:
  188   print "\n%s%s" % (url, data)
  189 
  190  show_timetable(html, from_sta)
  191 
  192 
  193 if __name__ == "__main__":
  194   main(sys.argv[1:])





0 comments:

Post a Comment