OpenLayers OpenLayers

root/trunk/openlayers/tools/exampleparser.py

Revision 7477, 8.1 kB (checked in by crschmidt, 2 months ago)

not sure if this is right, but adding in a link to the metadata about
each entry. This seems to fix the issue reported earlier, that the links
in the example list are broken.

  • Property svn:executable set to *
Line 
1 #!/usr/bin/env python
2
3 import sys
4 import os
5 import re
6 import urllib2
7 import time
8 from xml.dom.minidom import Document
9
10 try:
11     import xml.etree.ElementTree as ElementTree
12 except ImportError:
13     try:
14         import cElementTree as ElementTree
15     except ImportError:
16         try:
17             import elementtree.ElementTree as ElementTree
18         except ImportError:
19             import lxml.etree as ElementTree
20
21 missing_deps = False
22 try:
23     import simplejson
24     from BeautifulSoup import BeautifulSoup
25 except ImportError, E:
26     missing_deps = E
27
28 feedName = "example-list.xml"
29 feedPath = "http://openlayers.org/dev/examples/"
30
31 def getListOfOnlineExamples(baseUrl):
32     """
33     useful if you want to get a list of examples a url. not used by default.
34     """
35     html = urllib2.urlopen(baseUrl)
36     soup = BeautifulSoup(html)
37     examples = soup.findAll('li')
38     examples = [example.find('a').get('href') for example in examples]
39     examples = [example for example in examples if example.endswith('.html')]
40     examples = [example for example in examples]
41     return examples
42    
43 def getListOfExamples(relPath):
44     """
45     returns list of .html filenames within a given path - excludes example-list.html
46     """
47     examples = os.listdir(relPath)
48     examples = [example for example in examples if example.endswith('.html') and example != "example-list.html"]
49     return examples
50    
51
52 def getExampleHtml(location):
53     """
54     returns html of a specific example that is available online or locally
55     """
56     print '.',
57     if location.startswith('http'):
58         return urllib2.urlopen(location).read()
59     else:
60         f = open(location)
61         html = f.read()
62         f.close()
63         return html
64        
65    
66 def extractById(soup, tagId, value=None):
67     """
68     returns full contents of a particular tag id
69     """
70     beautifulTag = soup.find(id=tagId)
71     if beautifulTag:
72         if beautifulTag.contents:
73             value = str(beautifulTag.renderContents()).strip()
74             value = value.replace('\t','')
75             value = value.replace('\n','')
76     return value
77
78 def getRelatedClasses(html):
79     """
80     parses the html, and returns a list of all OpenLayers Classes
81     used within (ie what parts of OL the javascript uses). 
82     """
83     rawstr = r'''(?P<class>OpenLayers\..*?)\('''
84     return re.findall(rawstr, html)
85
86 def parseHtml(html,ids):
87     """
88     returns dictionary of items of interest
89     """
90     soup = BeautifulSoup(html)
91     d = {}
92     for tagId in ids:
93         d[tagId] = extractById(soup,tagId)
94     #classes should eventually be parsed from docs - not automatically created.
95     classes = getRelatedClasses(html)
96     d['classes'] = classes
97     return d
98
99 def getSvnInfo(path):
100     h = os.popen("svn info %s --xml" % path)
101     tree = ElementTree.fromstring(h.read())
102     h.close()
103     d = {
104         'url': tree.findtext('entry/url'),
105         'author': tree.findtext('entry/commit/author'),
106         'date': tree.findtext('entry/commit/date')
107     }
108     return d
109    
110 def createFeed(examples):
111     doc = Document()
112     atomuri = "http://www.w3.org/2005/Atom"
113     feed = doc.createElementNS(atomuri, "feed")
114     feed.setAttribute("xmlns", atomuri)
115     title = doc.createElementNS(atomuri, "title")
116     title.appendChild(doc.createTextNode("OpenLayers Examples"))
117     feed.appendChild(title)
118     link = doc.createElementNS(atomuri, "link")
119     link.setAttribute("rel", "self")
120     link.setAttribute("href", feedPath + feedName)
121    
122     modtime = time.strftime("%Y-%m-%dT%I:%M:%SZ", time.gmtime())
123     id = doc.createElementNS(atomuri, "id")
124     id.appendChild(doc.createTextNode("%s%s#%s" % (feedPath, feedName, modtime)))
125     feed.appendChild(id)
126    
127     updated = doc.createElementNS(atomuri, "updated")
128     updated.appendChild(doc.createTextNode(modtime))
129     feed.appendChild(updated)
130
131     examples.sort(key=lambda x:x["modified"])
132     for example in sorted(examples, key=lambda x:x["modified"], reverse=True):
133         entry = doc.createElementNS(atomuri, "entry")
134        
135         title = doc.createElementNS(atomuri, "title")
136         title.appendChild(doc.createTextNode(example["title"] or example["example"]))
137         entry.appendChild(title)
138        
139         link = doc.createElementNS(atomuri, "link")
140         link.setAttribute("href", "%s%s" % (feedPath, example["example"]))
141         entry.appendChild(link)
142    
143         summary = doc.createElementNS(atomuri, "summary")
144         summary.appendChild(doc.createTextNode(example["shortdesc"] or example["example"]))
145         entry.appendChild(summary)
146        
147         updated = doc.createElementNS(atomuri, "updated")
148         updated.appendChild(doc.createTextNode(example["modified"]))
149         entry.appendChild(updated)
150        
151         author = doc.createElementNS(atomuri, "author")
152         name = doc.createElementNS(atomuri, "name")
153         name.appendChild(doc.createTextNode(example["author"]))
154         author.appendChild(name)
155         entry.appendChild(author)
156        
157         id = doc.createElementNS(atomuri, "id")
158         id.appendChild(doc.createTextNode("%s%s#%s" % (feedPath, example["example"], example["modified"])))
159         entry.appendChild(id)
160        
161         feed.appendChild(entry)
162
163     doc.appendChild(feed)
164     return doc   
165    
166 def wordIndex(examples):
167     """
168     Create an inverted index based on words in title and shortdesc.  Keys are
169     lower cased words.  Values are dictionaries with example index keys and
170     count values.
171     """
172     index = {}
173     unword = re.compile("\\W+")
174     keys = ["shortdesc", "title"]
175     for i in range(len(examples)):
176         for key in keys:
177             text = examples[i][key]
178             if text:
179                 words = unword.split(text)
180                 for word in words:
181                     if word:
182                         word = word.lower()
183                         if index.has_key(word):
184                             if index[word].has_key(i):
185                                 index[word][i] += 1
186                             else:
187                                 index[word][i] = 1
188                         else:
189                             index[word] = {i: 1}
190     return index
191    
192 if __name__ == "__main__":
193
194     if missing_deps:
195         print "This script requires simplejson and BeautifulSoup. You don't have them. \n(%s)" % E
196         sys.exit()
197    
198     if len(sys.argv) > 1:
199         outFile = open(sys.argv[1],'w')
200     else:
201         outFile = open('../examples/example-list.js','w')
202    
203     examplesLocation = '../examples'
204     print 'Reading examples from %s and writing out to %s' % (examplesLocation, outFile.name)
205    
206     exampleList = []
207     docIds = ['title','shortdesc']
208    
209     #comment out option to create docs from online resource
210     #examplesLocation = 'http://svn.openlayers.org/sandbox/docs/examples/'
211     #examples = getListOfOnlineExamples(examplesLocation)
212
213     examples = getListOfExamples(examplesLocation)
214
215     modtime = time.strftime("%Y-%m-%dT%I:%M:%SZ", time.gmtime())
216
217     for example in examples:
218         url = os.path.join(examplesLocation,example)
219         html = getExampleHtml(url)
220         tagvalues = parseHtml(html,docIds)
221         tagvalues['example'] = example
222         # add in svn info
223         d = getSvnInfo(url)
224         tagvalues["modified"] = d["date"] or modtime
225         tagvalues["author"] = d["author"] or "anonymous"
226         tagvalues['link'] = example
227
228         exampleList.append(tagvalues)
229        
230     print
231    
232     exampleList.sort(key=lambda x:x['example'].lower())
233    
234     index = wordIndex(exampleList)
235
236     json = simplejson.dumps({"examples": exampleList, "index": index})
237     #give the json a global variable we can use in our js.  This should be replaced or made optional.
238     json = 'var info=' + json
239     outFile.write(json)
240     outFile.close()
241
242     print "writing feed to ../examples/%s " % feedName
243     atom = open('../examples/%s' % feedName, 'w')
244     doc = createFeed(exampleList)
245     atom.write(doc.toxml())
246     atom.close()
247
248
249     print 'complete'
250
251    
Note: See TracBrowser for help on using the browser.