Причина в том, что веб-страница медленно загружается. Я добавил ожидание через 20 секунд после открытия ссылки на веб-страницу
login_attempt.submit()
browser.get("https://www.shareinvestor.com/prices/price_download.html#/?type=price_download_all_stocks_bursa")
print browser.current_url
time.sleep(20)
dl = browser.find_element_by_xpath("//*[@href='/prices/price_download_zip_file.zip?type=history_all&market=bursa']").click()
Не возвращается ошибка.
Дополнительно, /zip
неверен тип MIME. Изменить на profile.set_preference('browser.helperApps.neverAsk.saveToDisk', 'application/zip')
Окончательная коррекция:
#!/usr/bin/python
print "hello from python 2"
import urllib2
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
import time
from pyvirtualdisplay import Display
import requests, zipfile, os
display = Display(visible=0, size=(800, 600))
display.start()
profile = webdriver.FirefoxProfile()
profile.set_preference('browser.download.folderList', 2)
profile.set_preference('browser.download.manager.showWhenStarting', False)
profile.set_preference('browser.download.dir', "/home/vinvin/shKLSE/")
# application/zip not /zip
profile.set_preference('browser.helperApps.neverAsk.saveToDisk', 'application/zip')
for retry in range(5):
try:
browser = webdriver.Firefox(profile)
print "firefox"
break
except:
time.sleep(3)
time.sleep(1)
browser.get("https://www.shareinvestor.com/my")
time.sleep(10)
login_main = browser.find_element_by_xpath("//*[@href='/user/login.html']").click()
print browser.current_url
username = browser.find_element_by_id("sic_login_header_username")
password = browser.find_element_by_id("sic_login_header_password")
print "find id done"
username.send_keys("bkcollection")
password.send_keys("123456")
print "log in done"
login_attempt = browser.find_element_by_xpath("//*[@type='submit']")
login_attempt.submit()
browser.get("https://www.shareinvestor.com/prices/price_download.html#/?type=price_download_all_stocks_bursa")
print browser.current_url
time.sleep(20)
dl = browser.find_element_by_xpath("//*[@href='/prices/price_download_zip_file.zip?type=history_all&market=bursa']").click()
time.sleep(30)
browser.close()
browser.quit()
display.stop()
zip_ref = zipfile.ZipFile('/home/vinvin/shKLSE/file.zip', 'r')
zip_ref.extractall('/home/vinvin/shKLSE')
zip_ref.close()
# remove with correct path
os.remove('/home/vinvin/shKLSE/file.zip')