import time
import csv
import pandas as pd
import os
import wikipedia

from urllib.parse import unquote

wiki_file = pd.read_csv(r'wiki_to_scrape.csv')

fp = open("wiki_summaries_3.csv", "w", newline='', encoding="utf-8")
csvwriter = csv.writer(fp, delimiter=",",  quoting=csv.QUOTE_NONNUMERIC, quotechar="\"")
csvwriter.writerow(["index","url","title","summary"])


summaries = []
num = 0
while num < len(wiki_file):
    print("line", num, "starts search")

    try:

        url = wiki_file['url'][num]
        page = unquote(url.replace('https://en.wikipedia.org/wiki/',''))
        title = wikipedia.search(page)[0]
        if '.jpg' not in title:
            summary = wikipedia.summary(title, auto_suggest = False, redirect = False).replace('\n', '')
        else:
            summary = title.replace('File:', '').split('.jpg')[0]
        csvwriter.writerow([num, url, title, summary])

        fp.flush()
        os.fsync(fp)

        print(num, 'is complete')
        num += 1

    except:

        print(num, 'an error occured')
        time.sleep(2*60)
        continue

fp.close()