-
Notifications
You must be signed in to change notification settings - Fork 0
/
watson.py
167 lines (145 loc) · 6.45 KB
/
watson.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
import json
import nltk
from config import discovery
# Deletes existing collection and creates new collection for current video being analyzed
def setUpCollection(videoId, app):
with app.app_context():
environments = discovery.list_environments()
# Gets environment ID
environment_id = environments["environments"][1]["environment_id"]
configs = discovery.list_configurations(environment_id)
config_id = configs["configurations"][0]["configuration_id"]
# Delete existing collection
collections = discovery.list_collections(environment_id)
try:
collection_id = collections["collections"][1]["collection_id"]
delete_collection = discovery.delete_collection(environment_id, collection_id)
except Exception:
print('Only had one collection')
# Create new collection
new_collection = discovery.create_collection(environment_id=environment_id, name=videoId)
collection_id = new_collection["collection_id"]
# Record environment_id, collection_id
file = open("id.txt","w")
file.write(environment_id)
file.write('\n')
file.write(collection_id)
file.close()
return environment_id, collection_id
def uploadDocsToWatson(comments, environment_id, collection_id):
for comment in comments:
# add the file to discovery
while True:
try:
discovery.add_document(environment_id, collection_id, file=json.dumps(comment),
file_content_type='application/json', filename=comment['Cid'])
break
except Exception:
print('Overflow on Watson write')
def checkUploadCount(environment_id, collection_id):
# Gets collection info
collection = discovery.get_collection(environment_id=environment_id, collection_id=collection_id)
# return the number of available documents
return collection['document_counts']
def performAnalysis():
file = open('id.txt','r')
e_id = file.readline().strip()
c_id = file.readline().strip()
file.close()
my_query = discovery.query(environment_id=e_id, collection_id=c_id, query='', count=9999)
# Finds entities and how many times they are mentioned
entitiesDict = {}
sentiments = {'-1.0to-0.75':0,'-0.75to-0.50':0,'-0.50to-0.25':0,'-0.25to0.00':0,'0.00to0.25':0,'0.25to0.50':0,'0.50to0.75':0, '0.75to1.00':0}
commentSummary1Dict = {}
commentSummary2Dict = {}
commentSummary3Dict = {}
totalComments = 0
#Loop and fill dictionaries
for comment in my_query["results"]:
totalComments += 1
#for sentiment graph
sentiment = comment["enriched_text"]["sentiment"]["document"]["score"]
if sentiment >= -1 and sentiment < -0.75:
sentiments['-1.0to-0.75'] += 1
elif sentiment >= -0.75 and sentiment < -0.5:
sentiments['-0.75to-0.50'] += 1
elif sentiment >= -0.5 and sentiment < -0.25:
sentiments['-0.50to-0.25'] += 1
elif sentiment >= -0.25 and sentiment < 0:
sentiments['-0.25to0.00'] += 1
elif sentiment >= 0 and sentiment < 0.25:
sentiments['0.00to0.25'] += 1
elif sentiment >= 0.25 and sentiment < 0.5:
sentiments['0.25to0.50'] += 1
elif sentiment >= 0.5 and sentiment < 0.75:
sentiments['0.50to0.75'] += 1
elif sentiment >= 0.75 and sentiment < 1:
sentiments['0.75to1.00'] += 1
# for top mentioned entities
entities = comment["enriched_text"]["entities"]
if entities != []:
for entity in entities:
entityName = entity["text"]
if entityName not in entitiesDict:
entitiesDict[entityName] = {'score':[comment["enriched_text"]["sentiment"]["document"]["score"]],'count':1}
else:
entitiesDict[entityName]["score"].append(comment["enriched_text"]["sentiment"]["document"]["score"])
entitiesDict[entityName]["count"] += 1
# comment summary method 1 - not great
entitiesList = ""
for entity in comment["enriched_text"]["entities"]:
entitiesList += entity["text"] + ", "
if sentiment > 0:
entitiesList += str(1)
elif sentiment < 0:
entitiesList += str(-1)
else:
entitiesList += str(0)
if entitiesList not in commentSummary1Dict:
# list that tracks similar comments
comments = []
comments.append(comment["text"])
commentSummary1Dict[entitiesList] = [comments,1]
else:
commentSummary1Dict[entitiesList][0].append(comment["text"])
commentSummary1Dict[entitiesList][1] += 1
# comment summary method 2 - a bit better
theComment = comment["text"].lower()
try:
tokens = nltk.word_tokenize(theComment)
tagged = nltk.pos_tag(tokens)
except Exception as error:
tagged = []
wordList = []
for word in tagged:
if word[1][0] == "N" and len(word[0]) > 1 and word[0] not in wordList: #make sure we don't double count same word multiple times in one comment
noun = word[0]
wordList.append(noun)
if sentiment > 0:
noun += str(1)
elif sentiment < 0:
noun += str(-1)
else:
noun += str(0)
if noun not in commentSummary2Dict:
comments = []
comments.append(theComment)
commentSummary2Dict[noun] = [comments,1]
else:
commentSummary2Dict[noun][0].append(theComment)
commentSummary2Dict[noun][1] += 1
# turn sentiments into percentages
for key in sentiments:
sentiments[key] = sentiments[key]/totalComments * 100
for key in entitiesDict:
lengthOfList = len(entitiesDict[key])
entitiesDict[key]["score"] = sum(entitiesDict[key]["score"])/lengthOfList
newDict = {}
newDict["entitiesResults"] = entitiesDict
newDict["sentimentsResults"] = sentiments
testList = []
for k in commentSummary2Dict:
testList.append((commentSummary2Dict[k][0],k,commentSummary2Dict[k][1]))
sortedList = sorted(testList, key=lambda tup: tup[2])
newDict["commentResults"] = sortedList
return newDict