| 1 |
|
|---|
| 2 |
|
|---|
| 3 |
import sys |
|---|
| 4 |
import os |
|---|
| 5 |
import re |
|---|
| 6 |
import urllib2 |
|---|
| 7 |
import time |
|---|
| 8 |
from xml.dom.minidom import Document |
|---|
| 9 |
|
|---|
| 10 |
try: |
|---|
| 11 |
import xml.etree.ElementTree as ElementTree |
|---|
| 12 |
except ImportError: |
|---|
| 13 |
try: |
|---|
| 14 |
import cElementTree as ElementTree |
|---|
| 15 |
except ImportError: |
|---|
| 16 |
try: |
|---|
| 17 |
import elementtree.ElementTree as ElementTree |
|---|
| 18 |
except ImportError: |
|---|
| 19 |
import lxml.etree as ElementTree |
|---|
| 20 |
|
|---|
| 21 |
missing_deps = False |
|---|
| 22 |
try: |
|---|
| 23 |
import simplejson |
|---|
| 24 |
from BeautifulSoup import BeautifulSoup |
|---|
| 25 |
except ImportError, E: |
|---|
| 26 |
missing_deps = E |
|---|
| 27 |
|
|---|
| 28 |
feedName = "example-list.xml" |
|---|
| 29 |
feedPath = "http://openlayers.org/dev/examples/" |
|---|
| 30 |
|
|---|
| 31 |
def getListOfOnlineExamples(baseUrl): |
|---|
| 32 |
""" |
|---|
| 33 |
useful if you want to get a list of examples a url. not used by default. |
|---|
| 34 |
""" |
|---|
| 35 |
html = urllib2.urlopen(baseUrl) |
|---|
| 36 |
soup = BeautifulSoup(html) |
|---|
| 37 |
examples = soup.findAll('li') |
|---|
| 38 |
examples = [example.find('a').get('href') for example in examples] |
|---|
| 39 |
examples = [example for example in examples if example.endswith('.html')] |
|---|
| 40 |
examples = [example for example in examples] |
|---|
| 41 |
return examples |
|---|
| 42 |
|
|---|
| 43 |
def getListOfExamples(relPath): |
|---|
| 44 |
""" |
|---|
| 45 |
returns list of .html filenames within a given path - excludes example-list.html |
|---|
| 46 |
""" |
|---|
| 47 |
examples = os.listdir(relPath) |
|---|
| 48 |
examples = [example for example in examples if example.endswith('.html') and example != "example-list.html"] |
|---|
| 49 |
return examples |
|---|
| 50 |
|
|---|
| 51 |
|
|---|
| 52 |
def getExampleHtml(location): |
|---|
| 53 |
""" |
|---|
| 54 |
returns html of a specific example that is available online or locally |
|---|
| 55 |
""" |
|---|
| 56 |
print '.', |
|---|
| 57 |
if location.startswith('http'): |
|---|
| 58 |
return urllib2.urlopen(location).read() |
|---|
| 59 |
else: |
|---|
| 60 |
f = open(location) |
|---|
| 61 |
html = f.read() |
|---|
| 62 |
f.close() |
|---|
| 63 |
return html |
|---|
| 64 |
|
|---|
| 65 |
|
|---|
| 66 |
def extractById(soup, tagId, value=None): |
|---|
| 67 |
""" |
|---|
| 68 |
returns full contents of a particular tag id |
|---|
| 69 |
""" |
|---|
| 70 |
beautifulTag = soup.find(id=tagId) |
|---|
| 71 |
if beautifulTag: |
|---|
| 72 |
if beautifulTag.contents: |
|---|
| 73 |
value = str(beautifulTag.renderContents()).strip() |
|---|
| 74 |
value = value.replace('\t','') |
|---|
| 75 |
value = value.replace('\n','') |
|---|
| 76 |
return value |
|---|
| 77 |
|
|---|
| 78 |
def getRelatedClasses(html): |
|---|
| 79 |
""" |
|---|
| 80 |
parses the html, and returns a list of all OpenLayers Classes |
|---|
| 81 |
used within (ie what parts of OL the javascript uses). |
|---|
| 82 |
""" |
|---|
| 83 |
rawstr = r'''(?P<class>OpenLayers\..*?)\(''' |
|---|
| 84 |
return re.findall(rawstr, html) |
|---|
| 85 |
|
|---|
| 86 |
def parseHtml(html,ids): |
|---|
| 87 |
""" |
|---|
| 88 |
returns dictionary of items of interest |
|---|
| 89 |
""" |
|---|
| 90 |
soup = BeautifulSoup(html) |
|---|
| 91 |
d = {} |
|---|
| 92 |
for tagId in ids: |
|---|
| 93 |
d[tagId] = extractById(soup,tagId) |
|---|
| 94 |
|
|---|
| 95 |
classes = getRelatedClasses(html) |
|---|
| 96 |
d['classes'] = classes |
|---|
| 97 |
return d |
|---|
| 98 |
|
|---|
| 99 |
def getSvnInfo(path): |
|---|
| 100 |
h = os.popen("svn info %s --xml" % path) |
|---|
| 101 |
tree = ElementTree.fromstring(h.read()) |
|---|
| 102 |
h.close() |
|---|
| 103 |
d = { |
|---|
| 104 |
'url': tree.findtext('entry/url'), |
|---|
| 105 |
'author': tree.findtext('entry/commit/author'), |
|---|
| 106 |
'date': tree.findtext('entry/commit/date') |
|---|
| 107 |
} |
|---|
| 108 |
return d |
|---|
| 109 |
|
|---|
| 110 |
def createFeed(examples): |
|---|
| 111 |
doc = Document() |
|---|
| 112 |
atomuri = "http://www.w3.org/2005/Atom" |
|---|
| 113 |
feed = doc.createElementNS(atomuri, "feed") |
|---|
| 114 |
feed.setAttribute("xmlns", atomuri) |
|---|
| 115 |
title = doc.createElementNS(atomuri, "title") |
|---|
| 116 |
title.appendChild(doc.createTextNode("OpenLayers Examples")) |
|---|
| 117 |
feed.appendChild(title) |
|---|
| 118 |
link = doc.createElementNS(atomuri, "link") |
|---|
| 119 |
link.setAttribute("rel", "self") |
|---|
| 120 |
link.setAttribute("href", feedPath + feedName) |
|---|
| 121 |
|
|---|
| 122 |
modtime = time.strftime("%Y-%m-%dT%I:%M:%SZ", time.gmtime()) |
|---|
| 123 |
id = doc.createElementNS(atomuri, "id") |
|---|
| 124 |
id.appendChild(doc.createTextNode("%s%s#%s" % (feedPath, feedName, modtime))) |
|---|
| 125 |
feed.appendChild(id) |
|---|
| 126 |
|
|---|
| 127 |
updated = doc.createElementNS(atomuri, "updated") |
|---|
| 128 |
updated.appendChild(doc.createTextNode(modtime)) |
|---|
| 129 |
feed.appendChild(updated) |
|---|
| 130 |
|
|---|
| 131 |
examples.sort(key=lambda x:x["modified"]) |
|---|
| 132 |
for example in sorted(examples, key=lambda x:x["modified"], reverse=True): |
|---|
| 133 |
entry = doc.createElementNS(atomuri, "entry") |
|---|
| 134 |
|
|---|
| 135 |
title = doc.createElementNS(atomuri, "title") |
|---|
| 136 |
title.appendChild(doc.createTextNode(example["title"] or example["example"])) |
|---|
| 137 |
entry.appendChild(title) |
|---|
| 138 |
|
|---|
| 139 |
link = doc.createElementNS(atomuri, "link") |
|---|
| 140 |
link.setAttribute("href", "%s%s" % (feedPath, example["example"])) |
|---|
| 141 |
entry.appendChild(link) |
|---|
| 142 |
|
|---|
| 143 |
summary = doc.createElementNS(atomuri, "summary") |
|---|
| 144 |
summary.appendChild(doc.createTextNode(example["shortdesc"] or example["example"])) |
|---|
| 145 |
entry.appendChild(summary) |
|---|
| 146 |
|
|---|
| 147 |
updated = doc.createElementNS(atomuri, "updated") |
|---|
| 148 |
updated.appendChild(doc.createTextNode(example["modified"])) |
|---|
| 149 |
entry.appendChild(updated) |
|---|
| 150 |
|
|---|
| 151 |
author = doc.createElementNS(atomuri, "author") |
|---|
| 152 |
name = doc.createElementNS(atomuri, "name") |
|---|
| 153 |
name.appendChild(doc.createTextNode(example["author"])) |
|---|
| 154 |
author.appendChild(name) |
|---|
| 155 |
entry.appendChild(author) |
|---|
| 156 |
|
|---|
| 157 |
id = doc.createElementNS(atomuri, "id") |
|---|
| 158 |
id.appendChild(doc.createTextNode("%s%s#%s" % (feedPath, example["example"], example["modified"]))) |
|---|
| 159 |
entry.appendChild(id) |
|---|
| 160 |
|
|---|
| 161 |
feed.appendChild(entry) |
|---|
| 162 |
|
|---|
| 163 |
doc.appendChild(feed) |
|---|
| 164 |
return doc |
|---|
| 165 |
|
|---|
| 166 |
def wordIndex(examples): |
|---|
| 167 |
""" |
|---|
| 168 |
Create an inverted index based on words in title and shortdesc. Keys are |
|---|
| 169 |
lower cased words. Values are dictionaries with example index keys and |
|---|
| 170 |
count values. |
|---|
| 171 |
""" |
|---|
| 172 |
index = {} |
|---|
| 173 |
unword = re.compile("\\W+") |
|---|
| 174 |
keys = ["shortdesc", "title"] |
|---|
| 175 |
for i in range(len(examples)): |
|---|
| 176 |
for key in keys: |
|---|
| 177 |
text = examples[i][key] |
|---|
| 178 |
if text: |
|---|
| 179 |
words = unword.split(text) |
|---|
| 180 |
for word in words: |
|---|
| 181 |
if word: |
|---|
| 182 |
word = word.lower() |
|---|
| 183 |
if index.has_key(word): |
|---|
| 184 |
if index[word].has_key(i): |
|---|
| 185 |
index[word][i] += 1 |
|---|
| 186 |
else: |
|---|
| 187 |
index[word][i] = 1 |
|---|
| 188 |
else: |
|---|
| 189 |
index[word] = {i: 1} |
|---|
| 190 |
return index |
|---|
| 191 |
|
|---|
| 192 |
if __name__ == "__main__": |
|---|
| 193 |
|
|---|
| 194 |
if missing_deps: |
|---|
| 195 |
print "This script requires simplejson and BeautifulSoup. You don't have them. \n(%s)" % E |
|---|
| 196 |
sys.exit() |
|---|
| 197 |
|
|---|
| 198 |
if len(sys.argv) > 1: |
|---|
| 199 |
outFile = open(sys.argv[1],'w') |
|---|
| 200 |
else: |
|---|
| 201 |
outFile = open('../examples/example-list.js','w') |
|---|
| 202 |
|
|---|
| 203 |
examplesLocation = '../examples' |
|---|
| 204 |
print 'Reading examples from %s and writing out to %s' % (examplesLocation, outFile.name) |
|---|
| 205 |
|
|---|
| 206 |
exampleList = [] |
|---|
| 207 |
docIds = ['title','shortdesc'] |
|---|
| 208 |
|
|---|
| 209 |
|
|---|
| 210 |
|
|---|
| 211 |
|
|---|
| 212 |
|
|---|
| 213 |
examples = getListOfExamples(examplesLocation) |
|---|
| 214 |
|
|---|
| 215 |
modtime = time.strftime("%Y-%m-%dT%I:%M:%SZ", time.gmtime()) |
|---|
| 216 |
|
|---|
| 217 |
for example in examples: |
|---|
| 218 |
url = os.path.join(examplesLocation,example) |
|---|
| 219 |
html = getExampleHtml(url) |
|---|
| 220 |
tagvalues = parseHtml(html,docIds) |
|---|
| 221 |
tagvalues['example'] = example |
|---|
| 222 |
|
|---|
| 223 |
d = getSvnInfo(url) |
|---|
| 224 |
tagvalues["modified"] = d["date"] or modtime |
|---|
| 225 |
tagvalues["author"] = d["author"] or "anonymous" |
|---|
| 226 |
tagvalues['link'] = example |
|---|
| 227 |
|
|---|
| 228 |
exampleList.append(tagvalues) |
|---|
| 229 |
|
|---|
| 230 |
print |
|---|
| 231 |
|
|---|
| 232 |
exampleList.sort(key=lambda x:x['example'].lower()) |
|---|
| 233 |
|
|---|
| 234 |
index = wordIndex(exampleList) |
|---|
| 235 |
|
|---|
| 236 |
json = simplejson.dumps({"examples": exampleList, "index": index}) |
|---|
| 237 |
|
|---|
| 238 |
json = 'var info=' + json |
|---|
| 239 |
outFile.write(json) |
|---|
| 240 |
outFile.close() |
|---|
| 241 |
|
|---|
| 242 |
print "writing feed to ../examples/%s " % feedName |
|---|
| 243 |
atom = open('../examples/%s' % feedName, 'w') |
|---|
| 244 |
doc = createFeed(exampleList) |
|---|
| 245 |
atom.write(doc.toxml()) |
|---|
| 246 |
atom.close() |
|---|
| 247 |
|
|---|
| 248 |
|
|---|
| 249 |
print 'complete' |
|---|
| 250 |
|
|---|
| 251 |
|
|---|