Yes, you can calculate a standard SimScore report first and then make adjustments to it based on token weighting. Here’s a general approach on how to do this:
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
# Example function to convert text to vector
def text_to_vector(text):
# Placeholder vectorization method
return np.random.rand(300)
# Sample opinions
opinions = ["Leadership is crucial.", "Collaboration is essential.", "Innovation drives success."]
# Convert opinions to vectors
vectors = [text_to_vector(opinion) for opinion in opinions]
# Calculate the standard centroid (unweighted)
centroid_standard = np.mean(vectors, axis=0)
# Calculate similarities to the standard centroid
similarities_standard = [cosine_similarity([centroid_standard], [vec])[0][0] for vec in vectors]
print("Standard Centroid:", centroid_standard)
print("Standard Similarities:", similarities_standard)
After calculating the standard centroid and similarities, you’ll adjust these by incorporating token weights.
# Sample token weights for the opinions
token_weights = [10, 20, 30] # Corresponding to the three opinions
# Recalculate the weighted centroid
weighted_sum = np.sum([token_weights[i] * vectors[i] for i in range(len(vectors))], axis=0)
total_tokens = sum(token_weights)
centroid_weighted = weighted_sum / total_tokens
# Recalculate similarities to the weighted centroid
similarities_weighted = [cosine_similarity([centroid_weighted], [vec])[0][0] for vec in vectors]
# Optionally adjust original similarities directly by token weights
adjusted_similarities = [similarities_standard[i] * token_weights[i] for i in range(len(similarities_standard))]
print("Weighted Centroid:", centroid_weighted)
print("Weighted Similarities:", similarities_weighted)
print("Adjusted Standard Similarities:", adjusted_similarities)
# Function to update or create a SimScore graph (simplified)
def update_simscore_graph(opinions, similarities):
graph = {}
for i, opinion in enumerate(opinions):
graph[opinion] = {
"similarity": similarities[i]
}
return graph
# Generate the graph with adjusted similarities
simscore_graph = update_simscore_graph(opinions, adjusted_similarities)
print("Updated SimScore Graph:", simscore_graph)