import json

# Quick script to get the top posts by engagement, reach and impressions per country

# InstagramPostAnalytics and FacebookPostAnalytics can be exported from MongoDB.
# sites.json was exported from the SQL db using the getsites.sql select statement.

def initData():
    instagramData = []
    siteData = []

    print("Loading data...")

    with open("InstagramPostAnalytics.json", "r") as file:
        for line in file:
            instagramData.append(json.loads(line))

    with open("sites.json", "r", encoding='utf-8') as file:
        siteData = json.loads(file.read())["data"]

    # Only keep one post per foreignPostID (the one with the latest date)
    instagramData.sort(key=lambda x: x["date"]["$date"])
    instagramData.reverse()
    
    # Remove duplicates
    foreignPostIDs = []
    instagramData = [post for post in instagramData if post["foreignPostID"] not in foreignPostIDs and not foreignPostIDs.append(post["foreignPostID"])]

    # Only keep sites where SocialChannel is instagram
    siteData = [site for site in siteData if site["SocialChannel"] == "instagram"]

    # Rename plays to impressions if impressions is not present
    for post in instagramData:
        if "views" not in post and "views" in post:
            post["views"] = post["views"]
            del post["views"]

    sitesByID = {}
    for site in siteData:
        sitesByID[site["SiteID"]] = site

    return instagramData, siteData, sitesByID

# Sites by country
def getSitesByCountry(siteData):
    countries = {}
    for site in siteData:
        # Remove Logiscool from the name of the organisation
        site["Organisation"] = site["Organisation"].replace("Logiscool ", "")
        country = site["Organisation"].split("-")[0].strip()
        if country not in countries:
            countries[country] = []
        countries[country].append(site)
    return countries

# Get top posts by engagement per country
def getTopPostsPerCountry(instagramData, siteData, data, count=5):
    dataSource = [post for post in instagramData if data in post]
    
    returnData = {}
    countries = getSitesByCountry(siteData)
    current, last = 0, len(countries)
    #print("Getting top posts per country...")
    for country in countries:
        topPosts = []
        for site in countries[country]:
            for post in dataSource:
                if post["siteID"] == site["SiteID"]:
                    topPosts.append(post)
        if len(topPosts):
            topPosts.sort(key=lambda x: x[data], reverse=True)
            returnData[country] = topPosts[:count]
        else: 
            returnData[country] = None
        
        current += 1
        if current % 5 == 0:
            pass#print("Progress: {}/{}".format(current, last))

    return returnData

# print sites by country in a pretty way
def printSitesByCountry(siteData):
    countries = getSitesByCountry(siteData)
    for country in countries:
        print(country)
        for site in countries[country]:
            print("\t", site["PageName"])

if __name__ == "__main__":
    instagramData, siteData, sitesByID = initData()

    neededData = ['engagement', 'views', 'reach']

    for data in neededData:
        topPosts = getTopPostsPerCountry(instagramData, siteData, data, 15)
        
        print("Top posts by {}...".format(data))

        # wipe top_posts_by_{}.csv file
        f = open("top_posts_by_{}.csv".format(data), "w", encoding='utf-8')

        #Following fields: country, site name, Organisation parent, Organisation, PostID (foreign),Engagement (click, reactions, comments, share), reach, impression, paid impression
        f.write("country,site name,Organisation parent,Organisation,PostID (foreign),Engagement (click, reactions, comments, share),reach,views")
        f.close()

        for country in topPosts:
            if topPosts[country] is None:
                continue
            
            file = open("top_posts_by_{}.csv".format(data), "a", encoding='utf-8')
            for post in topPosts[country]:
                site = sitesByID[post["siteID"]]
                file.write("\n{},{},{},{},{},{},{},{}".format(country, site["PageName"], site["OrganisationParent"], site["Organisation"], post["foreignPostID"], post["engagement"], post["reach"], "views" in post and post["views"] or "No data"))