import spacy
from spacy_ebooks.corpus import Corpus
import plotly
import pandas as pd
from spacytextblob.spacytextblob import SpacyTextBlob
nlp = spacy.load("en_core_web_sm")
nlp.add_pipe("spacytextblob")
<spacytextblob.spacytextblob.SpacyTextBlob at 0x7feb9ad6f760>
c = Corpus("../../resources/")
c.books
[Book({'title': 'Moon Pool, The', 'genre': 'Science Fiction', 'shortname': 'Merritt, A.', 'author': 'A. Merritt', 'subjects': ['Science fiction', 'Oceania -- Fiction', 'Geographical myths -- Fiction', 'Botanists -- Fiction']}), Book({'title': 'Short Fiction', 'genre': ['Fiction', 'Shorts'], 'shortname': 'Flaubert, Gustave', 'author': 'Gustave Flaubert', 'subjects': 'French literature'})]
cols = ["paragraph", "title", "polarity", "subjectivity"]
df = pd.DataFrame(columns=cols)
for book in c:
docs = nlp.pipe(p.text() for p in book.paragraphs())
book_df = pd.DataFrame(
((d.text, book.title(), d._.polarity, d._.subjectivity) for d in docs), columns=cols
)
df = df.append(book_df)
df
# print('Assessments:', doc._.assessments)
paragraph | title | polarity | subjectivity | |
---|---|---|---|---|
0 | I | Moon Pool, The | 0.0 | 0.0 |
1 | The Thing on the Moon Path | Moon Pool, The | 0.0 | 0.0 |
2 | For two months I had been on the d’Entrecastea... | Moon Pool, The | 0.117273 | 0.270909 |
3 | It was one of Papua’s yellow mornings when she... | Moon Pool, The | -0.121429 | 0.735714 |
4 | It is on such mornings that Papua whispers to ... | Moon Pool, The | 0.092635 | 0.357239 |
... | ... | ... | ... | ... |
812 | He tried to encourage the leper, but he only w... | Short Fiction | 0.0 | 1.0 |
813 | “Oh! I am about to die! Come closer to me and ... | Short Fiction | 0.6 | 0.5 |
814 | So Julian stretched himself out upon the leper... | Short Fiction | -0.05 | 0.0 |
815 | Then the leper clasped him close and presently... | Short Fiction | 0.333333 | 0.408333 |
816 | And this is the story of Saint Julian the Hosp... | Short Fiction | 0.0 | 0.0 |
3310 rows × 4 columns
import plotly.express as px
fig = px.scatter(
df,
x="polarity",
y="subjectivity",
color="title",
symbol="title",
marginal_x="histogram",
marginal_y="histogram",
hover_data=["paragraph"],
# height=400,
# width=500,
opacity=0.5,
)
fig.show()
df.sort_values(by="polarity", ascending=False).head(10)
paragraph | title | polarity | subjectivity | |
---|---|---|---|---|
824 | “Best we should know how they came hence,” he ... | Moon Pool, The | 1.0 | 0.3 |
1740 | “Larry!” she repeated it excellently. “And you?” | Moon Pool, The | 1.0 | 1.0 |
575 | Lo, a triumphal throng from Rome, the Eternal ... | Short Fiction | 1.0 | 1.0 |
750 | “Chert!” whispered Marakinoff. “Incredible!” | Moon Pool, The | 1.0 | 0.9 |
1038 | “Fly!” she repeated incredulously. “Like a zit... | Moon Pool, The | 1.0 | 0.9 |
2312 | “Good God!” breathed Larry. “Look!” | Moon Pool, The | 1.0 | 0.6 |
299 | “His power over men is indeed great. In spite ... | Short Fiction | 1.0 | 0.75 |
1978 | “He goes with you,” said Lakla, and threw abou... | Moon Pool, The | 1.0 | 1.0 |
719 | “Well, we can’t fall out, anyway,” he laughed.... | Moon Pool, The | 1.0 | 0.2 |
486 | “Good!” said Olaf Huldricksson. “It is good!” | Moon Pool, The | 0.875 | 0.6 |
df[(df["polarity"] > 0.8) & (df["subjectivity"] < 0.4)]
paragraph | title | polarity | subjectivity | |
---|---|---|---|---|
719 | “Well, we can’t fall out, anyway,” he laughed.... | Moon Pool, The | 1.0 | 0.2 |
824 | “Best we should know how they came hence,” he ... | Moon Pool, The | 1.0 | 0.3 |
864 | “But he shall meet Valdor and Tahola before he... | Moon Pool, The | 0.875 | 0.2 |
321 | The tetrarch stammered in attempting to reply ... | Short Fiction | 0.875 | 0.2 |