Sentiment Analysis
Collection
Sentiment analysis models specialised on social media • 8 items • Updated • 6
How to use cardiffnlp/xlm-twitter-politics-sentiment with Transformers:
# Use a pipeline as a high-level helper
from transformers import pipeline
pipe = pipeline("text-classification", model="cardiffnlp/xlm-twitter-politics-sentiment") # Load model directly
from transformers import AutoTokenizer, AutoModelForSequenceClassification
tokenizer = AutoTokenizer.from_pretrained("cardiffnlp/xlm-twitter-politics-sentiment")
model = AutoModelForSequenceClassification.from_pretrained("cardiffnlp/xlm-twitter-politics-sentiment")This is an "extension" of the multilingual twitter-xlm-roberta-base-sentiment model (model, original paper) with a focus on sentiment from politicians' tweets. The original sentiment fine-tuning was done on 8 languages (Ar, En, Fr, De, Hi, It, Sp, Pt) but further training was done using tweets from Members of Parliament from UK (English), Spain (Spanish) and Greece (Greek).
from transformers import AutoModelForSequenceClassification
from transformers import TFAutoModelForSequenceClassification
from transformers import AutoTokenizer
import numpy as np
from scipy.special import softmax
MODEL = f"cardiffnlp/xlm-twitter-politics-sentiment"
tokenizer = AutoTokenizer.from_pretrained(MODEL)
# PT
model = AutoModelForSequenceClassification.from_pretrained(MODEL)
text = "Good night 😊"
text = preprocess(text)
encoded_input = tokenizer(text, return_tensors='pt')
output = model(**encoded_input)
scores = output[0][0].detach().numpy()
scores = softmax(scores)
# # TF
# model = TFAutoModelForSequenceClassification.from_pretrained(MODEL)
# model.save_pretrained(MODEL)
# text = "Good night 😊"
# encoded_input = tokenizer(text, return_tensors='tf')
# output = model(encoded_input)
# scores = output[0][0].numpy()
# scores = softmax(scores)
# Print labels and scores
ranking = np.argsort(scores)
for i in range(scores.shape[0]):
s = scores[ranking[i]]
print(i, s)
Output:
0 0.0048229103
1 0.03117284
2 0.9640044