springer_books_download_script/Springer-Libros.py

52 lines
1.6 KiB
Python
Raw Normal View History

2020-04-27 10:17:53 +02:00
#############################################
###
### Download Springer Books
### Corona Virus Time
###
### carlos@cardenas.pe
###
### GPL 3.0 v
###
### 27/04/2020
###
#############################################
import PyPDF2
import urllib3
import wget
2020-05-04 00:59:24 +02:00
def download(part_page_url):
2020-04-27 10:17:53 +02:00
http =urllib3.PoolManager()
2020-05-04 00:59:24 +02:00
page_url="https"+part_url
2020-04-27 10:17:53 +02:00
2020-05-04 00:59:24 +02:00
res =http.request('GET',page_url)
2020-04-27 10:17:53 +02:00
2020-04-27 18:54:52 +02:00
title=''.join(res.data.decode('utf-8').split('h1')[1].split('>')[1].split('<')[0].split('/')[0])+".pdf"
2020-04-27 10:17:53 +02:00
2020-05-04 00:59:24 +02:00
dl_url="https://link.springer.com/content/"+res.data.decode('utf-8').split('Download book PDF')[0].split('content/')[1].split('title')[0].split('.pdf')[0]+".pdf"
2020-04-27 10:17:53 +02:00
2020-05-04 00:59:24 +02:00
wget.download(dl_url,title)
2020-04-27 10:17:53 +02:00
file =open('Spring.pdf','rb')
f= PyPDF2.PdfFileReader(file)
for i in range(0,f.numPages):
if i ==0:
for j in range (0, len(f.getPage(i).extractText().split('OpenURL')[1].split('ht'))):
if f.getPage(i).extractText().split('OpenURL')[1].split('ht')[j].split('\n')[0] != '':
print(f.getPage(i).extractText().split('OpenURL')[1].split('ht')[j].split('\n')[0])
foo(f.getPage(i).extractText().split('OpenURL')[1].split('ht')[j].split('\n')[0])
else:
for j in range (0, len(f.getPage(i).extractText().split('ht'))):
if f.getPage(i).extractText().split('ht')[j].split('\n')[0] !='':
if len(f.getPage(i).extractText().split('ht')[j].split('\n')[0])==64:
print(f.getPage(i).extractText().split('ht')[j].split('\n')[0])
foo(f.getPage(i).extractText().split('ht')[j].split('\n')[0])