| prev | Draft Version 574 (Wed Nov 9 17:41:31 2005) | next |
<person role="litigant"> <given-name>Charles</given-name> <surname>Babbage</surname> </person>
"<>"<tagname>…</tagname><tagname/><X>…<Y>…</Y></X> is legal…<X>…<Y>…</X></Y> is not<first> This document is <em>illegal</em> </first> <second> because it does not have a unique root element. </second>
"<" and ">"&name;
html | Root element of entire HTML document. |
|---|---|
body | Body of page (i.e., visible content). |
h1 | Top-level heading. Use h2, h3, etc. for second- and third-level headings. |
p | Paragraph. |
em | Emphasized text; browser or editor will usually display it in italics. |
address | Address of document author (also usually displayed in italics). |
h1 (level-1 heading) is semantic, i (italics) is display<html> <body> <h1>Software Carpentry</h1> <p>This course will introduce <em>essential software development skills</em>, and show where and how they should be applied.</p> <address>Greg Wilson (gvwilson@third-bit.com)</address> </html> |
<h1>A Centered Heading</h1><p>This planet provided as-is.</p><p>…</p> is illegal<p>…</p>, but modern parsers will reject it| With Attributes | Without Attributes |
|---|---|
<a b="c"> <d e="f"/> </a> |
<a> <a-b>c</a-b> <d><d-e>f</d-e></d> </a> |
head element as well as a body
<!--, and end with --><html> <head> <title>Comments Page</title> <meta name="author" content="aturing"/> </head> <body> <!-- House style puts all titles in italics --> <h1><em>Welcome to the Comments Page</em></h1> <!-- Update this paragraph to describe the forum. --> <p>Welcome to the Comments Forum.</p> </body> </html>
ul for an unordered (bulleted) list, and ol for an ordered (numbered) onelitable for tablestr (for “table row”)td (for “table data”)<html>
<head>
<title>Lists and Tables</title>
<meta name="svn" content="$Id: xml.swc 54 2005-04-13 13:29:28Z gvwilson $"/>
</head>
<body>
<table cellpadding="3" border="1">
<tr>
<td align="center"><em>Unordered List</em></td>
<td align="center"><em>Ordered List</em></td>
</tr>
<tr>
<td align="left" valign="top">
<ul>
<li>Hydrogen</li>
<li>Lithium</li>
<li>Sodium</li>
<li>Potassium</li>
<li>Rubidium</li>
<li>Cesium</li>
<li>Francium</li>
</ul>
</td>
<td align="left" valign="top">
<ol>
<li>Helium</li>
<li>Neon</li>
<li>Argon</li>
<li>Krypton</li>
<li>Xenon</li>
<li>Radon</li>
</ol>
</td>
</tr>
</table>
</body>
</html>
|
meta elements in document headimg tagsrc argument specifies where to find the image filealt attribute to specify alternative texta element to create a linkhref attribute specifies what the link is pointing at<html>
<head>
<title>Links</title>
<meta name="svn" content="$Id: xml.swc 54 2005-04-13 13:29:28Z gvwilson $"/>
</head>
<body>
<h1>A Few of My Favorite Places</h1>
<ul>
<li><a href="http://www.google.com">Google</a></li>
<li><a href="http://www.python.org">Python</a></li>
<li><a href="http://www.nature.com/index.html">Nature Online</a></li>
<li>Examples in this lecture:
<ul>
<li><a href="comments.html">Comments</a></li>
<li><a href="image.html">Images</a></li>
<li><a href="list_table.html">Lists and Tables</a></li>
</ul>
</li>
</ul>
</body>
</html>
|
Top 10 Accessible Web Authoring Practices describes what you should do to make your pages more accessibleminidom
<?xml version="1.0" encoding="utf-8"?> <planet name="Mercury"> <period units="days">87.97</period> </planet>
import xml.dom.minidom
doc = xml.dom.minidom.parse('mercury.xml')
print doc.toxml('utf-8')
<?xml version="1.0" encoding="utf-8"?> <planet name="Mercury"> <period units="days">87.97</period> </planet>
toxml method can be called on the document, or on any element node"utf-8" as the character encodingimport xml.dom.minidom
src = '''<planet name="Venus">
<period units="days">224.7</period>
</planet>'''
doc = xml.dom.minidom.parseString(src)
print doc.toxml('utf-8')
<?xml version="1.0" encoding="utf-8"?> <planet name="Venus"> <period units="days">224.7</period> </planet>
import xml.dom.minidom
impl = xml.dom.minidom.getDOMImplementation()
doc = impl.createDocument(None, 'planet', None)
root = doc.documentElement
root.setAttribute('name', 'Mars')
period = doc.createElement('period')
root.appendChild(period)
text = doc.createTextNode('686.98')
period.appendChild(text)
print doc.toxml('utf-8')
<?xml version="1.0" encoding="utf-8"?> <planet name="Mars"><period>686.98</period></planet>
xml.dom.minidom is really just a wrapper around other platform-specific XML librariesdocument nodecreateDocument arecreateDocument what type of element the document's root node should besetAttribute(attributeName, newValue)
import xml.dom.minidom
src = '''<solarsystem>
<planet name="Mercury"><period units="days">87.97</period></planet>
<planet name="Venus"><period units="days">224.7</period></planet>
<planet name="Earth"><period units="days">365.26</period></planet>
</solarsystem>
'''
def walkTree(currentNode, indent=0):
spaces = ' ' * indent
if currentNode.nodeType == currentNode.TEXT_NODE:
print spaces + 'TEXT' + ' (%d)' % len(currentNode.data)
else:
print spaces + currentNode.tagName
for child in currentNode.childNodes:
walkTree(child, indent+1)
doc = xml.dom.minidom.parseString(src)
walkTree(doc.documentElement)
solarsystem TEXT (1) planet period TEXT (5) TEXT (1) planet period TEXT (5) TEXT (1) planet period TEXT (6) TEXT (1)
nodeType
ELEMENT_NODE, TEXT_NODE, ATTRIBUTE_NODE, DOCUMENT_NODEchildNodes
data
class Visitor(object):
def __init__(self):
pass
def visit(self, node):
# When given the document, skip to the root.
if node.nodeType == node.DOCUMENT_NODE:
self.visit(node.documentElement)
return
# Handle other types of nodes.
self.before(node)
self.at(node)
if node.nodeType == node.ELEMENT_NODE:
for child in node.childNodes:
self.visit(child)
self.after(node)
def doNothing(self, node):
pass
before = doNothing
at = doNothing
after = doNothing
Visitor.visit with the root node of the tree they want to traverseclass Counter(Visitor):
def __init__(self):
Visitor.__init__(self)
self.count = 0
def at(self, node):
if node.nodeType == node.ELEMENT_NODE:
self.count += 1
count to zero before traversingif __name__ == '__main__':
src = '<a><b>c</b><d>e</d><f>g<h/>i</f></a>'
tree = xml.dom.minidom.parseString(src)
c = Counter()
c.visit(tree)
assert c.count == 5
<em/> element whose only child is a text node containing that word<em/>
getElementsByTagName, and iterate over themdef emphasize(doc):
paragraphs = doc.getElementsByTagName('p')
for para in paragraphs:
first = para.firstChild
if first.nodeType == first.TEXT_NODE:
emphasizeText(doc, para, first)
def emphasizeText(doc, para, textNode):
# Look for optional spaces, a word, and the rest of the paragraph.
m = re.match(r'^(\s*)(\S*)\b(.*)$', str(textNode.data))
if not m:
return
leadingSpace, firstWord, restOfText = m.groups()
if not firstWord:
return
# If there's text after the first word, re-save it.
if restOfText:
restOfText = doc.createTextNode(restOfText)
para.insertBefore(restOfText, para.firstChild)
# Emphasize the first word.
emph = doc.createElement('em')
emph.appendChild(doc.createTextNode(firstWord))
para.insertBefore(emph, para.firstChild)
# If there's leading space, re-save it.
if leadingSpace:
leadingSpace = doc.createTextNode(leadingSpace)
para.insertBefore(leadingSpace, para.firstChild)
# Get rid of the original text.
para.removeChild(textNode)
if __name__ == '__main__':
src = '''<html><body>
<p>First paragraph.</p>
<p>Second paragraph contains <em>emphasis</em>.</p>
<p>Third paragraph.</p>
</body></html>'''
doc = xml.dom.minidom.parseString(src)
emphasize(doc)
print doc.toxml('utf-8')
<?xml version="1.0" encoding="utf-8"?> <html><body> <p><em>First</em> paragraph.</p> <p><em>Second</em> paragraph contains <em>emphasis</em>.</p> <p><em>Third</em> paragraph.</p> </body></html>
| prev | Copyright © 2005, Python Software Foundation. See License for details. | next |