先贴上代码,以后再更新!

具体可以看Code-Grab-from-Leetcode

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
import requests
import json
from bs4 import BeautifulSoup
import os
import re
import sys
import time
reload(sys)
sys.setdefaultencoding('utf-8')
# above is to solve the problem 'UnicodeEncodeError: 'ascii' codec can't encode character...'
# for python 2.7 is based on ascii, when character is out of ascii, it will post an error

s = requests.Session()
headers_base = {
'Accept':'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
'Accept-Encoding': 'gzip, deflate, br',
'Accept-Language':'zh-CN,zh;q=0.8,en;q=0.6',
'Connection':'keep-alive',
'Host': 'leetcode.com',
'Referer':'https://leetcode.com/accounts/login/',
'User-Agent':'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.71 Safari/537.36'
}


def login():
url = "https://leetcode.com/accounts/login/"
res = s.get(url=url,headers=headers_base)
#get csrftoken from cookie
print res.cookies['csrftoken']

login_data={}
login_data['csrfmiddlewaretoken']=res.cookies['csrftoken']
login_data['login'] = "username"
login_data['password'] = "password"
login_data['remember'] ='on'
res = s.post(url,headers = headers_base,data=login_data)
print res.status_code
return res.cookies

def get_bs(url):
try:
page = s.get(url)
except (requests.exceptions.ReadTimeout,requests.exceptions.ConnectTimeout):
print('time out')
return 'time out'
bs = BeautifulSoup(page.text, "html.parser")
return bs

#get the ac solution url
def get_ac_url(alg_name_slug):
url = 'https://leetcode.com/problems/'+alg_name_slug+'/submissions/'
bs = get_bs(url)

ac_solutions = bs.find_all("a","text-danger status-accepted")#get all ac solution
return 'https://leetcode.com'+ ac_solutions[0]["href"]#choose the first one

def get_ac_code(url):
bs = get_bs(url)
ac_code = bs.find_all('script')
final_code = txt_wrap_by('submissionCode:','editCodeUrl:',ac_code[11].text)
final_code = re.sub('/\\*.*\\*/','',final_code)#remove annotation to prevent chinese
return final_code.decode('unicode-escape')


#get the algorithm content by name
def get_alg_content(name):
url = 'https://leetcode.com/problems/' + name
try:
page = requests.get(url)
except (requests.exceptions.ReadTimeout,requests.exceptions.ConnectTimeout):
print('time out')
return 'time out'
alg_page = BeautifulSoup(page.text, "html.parser")
alg_contents = alg_page.select('.question-content')

alg_text = ''
#for some special case(such as subscribe needed),
#the alg_contents' length will be 0
if len(alg_contents) > 0:
contents = alg_contents[0].find_all(['p','pre'])

for ctt in contents:
alg_text += ctt.get_text()

page.close
return alg_text

#get the code from the js code by fetch
#content between submissionCode: & editCodeUrl:
def txt_wrap_by(start_str, end, html):
start = html.find(start_str)
if start >= 0:
start += len(start_str)
end = html.find(end, start)
if end >= 0:
return html[start+2:end-5].strip()

def save_all(content):
name = str(content['id'])+'_'+content['name']+'.md'
f = open(name,'wb')
f.write('###'+str(content['id'])+' '+content['title']+'\n')
f.write(content['subject']+'\n')
f.write('###Solution'+'\n')
f.write('```java'+'\n')
f.write(content['code'])
f.close()

if not os.path.exists('leetcode_with_solution'):
os.mkdir('leetcode_with_solution')
#change directory to leetcode for convinient
os.chdir('leetcode_with_solution')

#login and get the login data
login()
#print(mycookie['csrftoken'])

r = s.get(url='https://leetcode.com/api/problems/algorithms/')
#get the json data
data_json = json.loads(r.text)
#get the algorithm list
alg_list = data_json['stat_status_pairs']

for alg_json in alg_list:
if alg_json['status'] == 'ac':
#get the meta info of algorithms
alg_stat = alg_json['stat']
alg_difficulty = alg_json['difficulty']['level']
alg_paid = alg_json['paid_only']

alg_name = alg_stat['question__title']
alg_name_slug = alg_stat['question__title_slug']
alg_id = alg_stat['question_id']
alg_acs = alg_stat['total_acs']
alg_submitted = alg_stat['total_submitted']

print('writing '+str(alg_id)+' '+alg_name+'...')
url = get_ac_url(alg_name_slug)
code = get_ac_code(url)
subject = get_alg_content(alg_name_slug)
content = {'id': alg_id, 'title':alg_name,'name':alg_name_slug,'difficulty':alg_difficulty,'subject':subject,'code':code}

save_all(content)

print('Done!')
#time.sleep(1)