| prev | Draft Version 534 (Wed Nov 9 17:40:59 2005) | next |
stdin, stdout, and zero/nonzero exit codeswww.third-bit.comFirefoxApache Web Server is the most widely used, but many others existGET or POST
GET typically used to fetch informationPOST usually used to submit form data, or to upload files/index.htmlHTTP/1.0Accept: text/htmlAccept-Language: en, frIf-Modified-Since: 16-May-2005HTTP/1.0import sys, socket
HttpRequest = \
'''GET /greeting.html HTTP/1.0
'''
s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
s.connect(("www.third-bit.com", 80))
s.send(HttpRequest)
response = ''
while True:
data = s.recv(1024)
if not data:
break
response += data
s.close()
print response
HTTP/1.1 404 Not Found Date: Sat, 02 Jul 2005 11:16:44 GMT Server: Apache/2.0.54 (Debian GNU/Linux) DAV/2 SVN/1.1.4 mod_python/3.1.3 Python/2.3.5 PHP/4.3.10-9 mod_ssl/2.0.54 OpenSSL/0.9.7e Content-Length: 394 Connection: close Content-Type: text/html; charset=iso-8859-1 <!DOCTYPE HTML PUBLIC "-//IETF//DTD HTML 2.0//EN"> <html><head> <title>404 Not Found</title> </head><body> <h1>Not Found</h1> <p>The requested URL /greeting.html was not found on this server.</p> <hr> <address>Apache/2.0.54 (Debian GNU/Linux) DAV/2 SVN/1.1.4 mod_python/3.1.3 Python/2.3.5 PHP/4.3.10-9 mod_ssl/2.0.54 OpenSSL/0.9.7e Server at pyre.third-bit.com Port 80</address> </body></html>
socket.AF_INET means “an Internet socket”socket.SOCK_STREAM means “use TCP”urlliburllib.urlopen(URL) does what your browser would do if you gave it the URLimport urllib
instream = urllib.urlopen("http://www.third-bit.com/greeting.html")
lines = instream.readlines()
instream.close()
for line in lines[:5]:
print line,
readlines wouldn't do the right thing if the thing being read was an imageread to grab the bytes in that caseimport urllib, re
from sets import Set
instream = urllib.urlopen("http://www.third-bit.com/index.html")
page = instream.read()
instream.close()
links = re.findall(r'href=\"[^\"]+\"', page)
temp = Set()
for x in links:
temp.add(x[6:-1])
links = list(temp)
links.sort()
for x in links:
print x
#ack #news #rules #search /MT/mt.cgi /blog /cgi-bin/awstats.pl?config=www.third-bit.com http://books.slashdot.org/books/05/06/16/1930227.shtml http://projects.edgewall.com/trac http://subversion.tigris.org http://weblog.infoworld.com/udell http://weblog.infoworld.com/udell/2005/06/23.html http://www.amazon.com/exec/obidos/redirect?path=ASIN/0974514071&link_code=as2&camp=1789&tag=thirdbit-20&creative=9325 http://www.amazon.com/exec/obidos/redirect?tag=thirdbit-20&creative=9325&camp=1789&link_code=as2&path=ASIN/0974514071 http://www.apache.org http://www.artima.com http://www.artima.com/chapters/book.jsp?num=114451 http://www.cs.utoronto.ca http://www.debian.org http://www.jonahgroup.com http://www.list.org http://www.oreillynet.com http://www.oreillynet.com/cs/user/print/a/5862 http://www.pragmaticprogrammer.com/titles/gwd/index.html http://www.provost.utoronto.ca http://www.python.org http://www.python.org/psf http://www.sf.net http://www.techbookreport.com/tbr0172.html http://www.third-bit.com/pyweb/index.html http://www.third-bit.com/swc http://www.third-bit.com/trac/argon http://www.third-bit.com/trac/pyre http://www.third-bit.com/trac/pyre/newticket http://www.third-bit.com/trac/swc http://www.third-bit.com/~gvwilson http://www.utoronto.ca mailinglists.html mailto:gvwilson@cs.utoronto.ca old-presentations.html past.html reading.html style.css
http://www.google.com?q=Python searches for pages related to Python"?" separates the parameters from the rest of the URL"&"
http://www.google.ca/search?q=Python&client=firefox"name=value""+" (so “web programming” becomes “web+programming”)
"+"'s in a moment"?" or "&" in a parameter?"%" followed by its 2-digit hexadecimal codehttp://www.google.ca/search?q=grade+%3D+A%2Burllib.quote(str) replaces special characters in str with escape sequencesurllib.unquote(str) replaces escape sequences with charactersurllib.urlencode(params) takes a dictionary, or a list of pairs, and constructs the entire query parameter stringimport urllib
print urllib.urlencode({'surname' : 'hopper', 'forename' : 'grace'})
print urllib.urlencode([('surname', 'turing'), ('forename', 'alan')])
surname=hopper&forename=grace surname=turing&forename=alan
amazon.py to search by various criteriaimport sys, amazon
# Format multiple authors' names nicely.
def prettyName(arg):
if type(arg) in (list, tuple):
arg = ', '.join(arg[:-1]) + ' and ' + arg[-1]
return arg
if __name__ == '__main__':
# Get information.
key, asin = sys.argv[1], sys.argv[2]
amazon.setLicense(key)
items = amazon.searchByASIN(asin)
# Handle errors.
if not items:
print 'Nothing found for', asin
if len(items) > 1:
print len(items), 'items found for', asin
# Display information.
item = items[0]
productName = item.ProductName
ourPrice = item.OurPrice
authors = prettyName(item.Authors.Author)
print '%s: %s (%s)' % (authors, productName, ourPrice)
| prev | Copyright © 2005, Python Software Foundation. See License for details. | next |