1
0
Fork 0
mirror of https://github.com/josephmisiti/awesome-machine-learning.git synced 2024-11-13 11:24:23 -05:00

update it to write packages to a text file

Codecs is needed for reading/writing unicode chars(range 128+) too.
This commit is contained in:
Sumit Khanna 2015-02-08 01:34:10 +05:30
parent 473df23964
commit 2689ecbca7

View file

@ -7,10 +7,11 @@
from pyquery import PyQuery as pq
import urllib
import codecs
text_file = codecs.open("Packages.txt",encoding='utf-8',mode="w")
d = pq(url='http://cran.r-project.org/web/views/MachineLearning.html',opener=lambda url, **kw: urllib.urlopen(url).read())
index = 0
for e in d("li").items():
package_name = e("a").html()
package_link = e("a")[0].attrib['href']
@ -18,6 +19,7 @@ for e in d("li").items():
package_link = package_link.replace("..",'http://cran.r-project.org/web')
dd = pq(url=package_link,opener=lambda url, **kw: urllib.urlopen(url).read())
package_description = dd("h2").html()
print "* [%s](%s) - %s" % (package_name,package_link,package_description)
text_file.write(" [%s](%s) - %s \n" % (package_name,package_link,package_description))
# print "* [%s](%s) - %s" % (package_name,package_link,package_description)
index += 1
index += 1