1 #!/usr/bin/python
2
3 import sys
4 import getopt
5 import time
6 import re
7
8 from BeautifulSoup import BeautifulSoup, Comment
9 import urllib
10 import urllib2
11
12
13 def strip_comments(soup):
14 comments = soup.findAll(text=lambda text:isinstance(text, Comment))
15 [comment.extract() for comment in comments]
16 return soup
17
18 def show_timetable(html, from_sta):
19
20 ## PARSE
21 soup = BeautifulSoup(BeautifulSoup(html).prettify())
22 regex = re.compile("\ ",re.I)
23
24 ## FINDING OPTIONS
25 options = []
26 div1s = soup.findAll("div",{ "class" : "boxhead tal iewfix2" })
27
28 for d1 in div1s:
29 option = d1.find("h2").renderContents().encode("ASCII").strip()
30 options.append(option)
31
32
33 ## FINDING TIMETABLE
34 timetable = []
35 div2s = soup.findAll("div",{ "class" : "boxbody iewfix" })
36
37 for d2 in div2s:
38 bs = d2.findAll("b")
39 for b in bs:
40 tmp = b.renderContents().encode("ASCII").strip()
41 tmp = regex.sub("",tmp)
42
43 # fixing "Sydney Terminal" / "Central Station" dilemma
44 if tmp.find("Sydney Terminal") > 0:
45 tmp = tmp.replace("Sydney Terminal", "Central Station (Sydney Terminal)
46 ")
47 elif tmp.find("Central Station") > 0:
48 tmp = tmp.replace("Central Station", "Central Station (Sydney Terminal)
49 ")
50
51 timetable.append(tmp)
52
53
54 ## DIPLAY
55 a_regex = re.compile(from_sta,re.I)
56
57 j=0
58 for i in xrange(0,len(timetable)):
59 if a_regex.search(timetable[i]):
60 print "\n [[ %s ]]" % (options[j])
61 j = j + 1
62
63 print timetable[i]
64
65 def usage():
66 print """
67 [*] usage: %s options
68
69 OPTIONS:
70 -u print url
71 -h show this message
72
73 -a set from station
74 -b set dest. station
75
76 -t set search time (08:30:AM)
77 hour, min, sec must be seperated by colon (:)
78
79 -d set search date (yyyy/mm/dd)
80 yyyy = 4 digits
81 mm = 2 digits
82 dd = 2 ditigs
83
84 EXAMPLE:
85 %s -a central -b 'town hall' -d 2010/09/02 -t 05:30:PM
86 """ % (sys.argv[0], sys.argv[0])
87
88 def prepare_param(args):
89 ## PRESET VARIABLES
90 print_url = False
91 from_sta = "redfern"
92 to_sta = "blacktown"
93 now_time = time.strftime("%H:%M:%p")
94 now_date = time.strftime("%Y/%m/%d")
95
96
97 ## PARSE COMMAND LINE ARGS
98 try:
99 opts, args = getopt.getopt(sys.argv[1:], "a:b:d:t:uh")
100
101 except getopt.GetoptError, err:
102 print str(err)
103 usage()
104 sys.exit(2)
105
106 for opt, arg in opts:
107 if opt in ("-a"):
108 from_sta = arg
109 elif opt in ("-b"):
110 to_sta = arg
111 elif opt in ("-h"):
112 usage()
113 sys.exit(-1)
114 elif opt in ("-u"):
115 print_url = True
116 elif opt in ("-t"):
117 now_time = arg
118 elif opt in ("-d"):
119 now_date = arg
120 else:
121 assert False, "unhandled option"
122
123 ## BREAK TIME INTO HOUR, MIN, AM/PM
124 try:
125 now_hour = re.split(":", now_time)[0]
126 now_min = re.split(":", now_time)[1]
127 now_ampm = re.split(":", now_time)[2]
128 except:
129 print "Incorrect time format"
130 sys.exit(-1)
131
132 now_date = re.sub("[^0-9]","",now_date)
133
134 return from_sta, to_sta, now_date, now_hour, now_min, now_ampm, print_url
135
136 def fetch_html( from_sta,
137 to_sta,
138 now_date,
139 now_hour,
140 now_min,
141 now_ampm):
142
143 url = "http://www.131500.com.au/plan-your-trip/trip-planner?"
144 values = { "session" : "invalidate",
145 "itd_cmd" : "invalid",
146 "itd_includedMeans" : "checkbox",
147 "itd_inclMOT_5" : "1",
148 "itd_inclMOT_7" : "1",
149 "itd_inclMOT_1" : "1",
150 "itd_inclMOT_9" : "1",
151 "itd_anyObjFilter_origin" : "2",
152 "itd_name_origin" : from_sta ,
153 "itd_anyObjFilter_destination" : "2",
154 "itd_name_destination" : to_sta,
155 "itd_itdDate" : now_date,
156 "itd_itdTripDateTimeDepArr" : "dep",
157 "itd_itdTimeHour" : now_hour,
158 "itd_itdTimeMinute" : now_min,
159 "itd_itdTimeAMPM" : now_ampm,
160 "x" : "56",
161 "y" : "11"
162 }
163 data = urllib.urlencode(values)
164 try:
165 req = urllib2.Request(url, data)
166 except:
167 print "Server Error", error
168 sys.exit(-1)
169
170 response = urllib2.urlopen(req)
171 html = response.read()
172
173 return html, url, data
174
175 def main(args):
176 from_sta, to_sta, now_date, now_hour, now_min, now_ampm, print_url =
177 prepare_param(args)
178
179 print
180 print "From Station:", from_sta.title()
181 print "To Station:", to_sta.title()
182 print "Time:", "%s:%s:%s" % (now_hour, now_min, now_ampm)
183 print "Date: %s/%s/%s" % (now_date[:4], now_date[4:6],now_date[-2:])
184
185 html, url, data = fetch_html(from_sta, to_sta, now_date, now_hour, now_min,
186 now_ampm)
187 if print_url:
188 print "\n%s%s" % (url, data)
189
190 show_timetable(html, from_sta)
191
192
193 if __name__ == "__main__":
194 main(sys.argv[1:])
0 comments:
Post a Comment