A script for download all papers on ICML conference

script code

# coding=utf-8
from multiprocessing import Pool
import requests
from bs4 import BeautifulSoup
import traceback
import re
import os
import pdb

prefix = 'http://proceedings.mlr.press/v80/'
save_dir = 'icml2018'

def get_pdf(data):
    href, title = data
    name = re.sub(r'[\\/:*?"<>|\bx0\u2019\u2014\xb0\u2013]', ' ', title) 
    if os.path.isfile(save_dir+"/icml18-%s.pdf" % name):
        print("File already exsists, skip %s" % name)
        return
    try:
        content = requests.get(href).content
        with open(save_dir+"/icml18-%s.pdf" % name, 'wb') as f:  # You may change to "path/to/your/folder"
            f.write(content)
        print("Finish downloading %s" % title)
    except:
        print('Error when downloading %s' % href)
        print(traceback.format_exc())
        
pool = Pool(100)
if not os.path.exists(save_dir):
    os.mkdir(save_dir)
html = requests.get(prefix).content
soup = BeautifulSoup(html, "lxml")
a_list = soup.findAll("p", {"class": "links"})
title_list = soup.findAll("p", {"class": "title"})
title_list = [_.text for _ in title_list]
pdf_list = []
for everya in a_list:
    if everya.contents[3].text == "Download PDF":
        href = everya.contents[3].get("href")
        pdf_list.append(href)
assert len(pdf_list) == len(title_list), "numbers of title and pdf not euqal"
print("Find %d papers" % len(pdf_list))
pool.map(get_pdf, zip(pdf_list, title_list))
print("Find %d papers" % len(pdf_list))
recent article

yield for python

a example of codedef fun(): for i in range(20): x = yield i print('good', x, i)if __name__ == '__main__': a = fun() a.__next__() a.__next__() x1 = a.send(5) x2 = a.send(3) next(a) next(a) print(x1, x2)resul...…

computer science deep learning python blogread
previous article

at for python

a example of codedef func1(a): def b(*args, **kwargs): print('a = ', a) output = a(*args, **kwargs) output = output ** 2 print('func1 =', output) return output return b@func1def func2(b): output = b + 2...…

computer science deep learning at blogread