python
Dashboard
My Repos
Compilers
Python Online
Node JS Online
Golang Online
codepy
Login
My Repos
Sign Out
Online Python Interpreter
Stop
Run
import random import math import nltk import collections from nltk.corpus import stopwords import numpy as np from typing import List file = open('shakespeare.txt', encoding="utf8") nltk.download('stopwords') # stopwords stop_words = set(stopwords.words('english')) # create data structure here. wordcount = {} total_count = 0 # Instantiate a dictionary, and for every word in the file, add to # the dictionary if it doesn't exist. If it does, increase the count. # Iterating through each word to store in dictionary WORDS = [] for word in file.read().lower().split(): word = word.replace(".", "") word = word.replace(",", "") word = word.replace("\"", "") word = word.replace("“", "") word = word.replace("?", "") word = word.replace(";", "") word = word.replace("!", "") word = word.replace("-", "") word = word.replace("[", "") word = word.replace("]", "") word = word.replace(":", "") word = word.replace("(", "") word = word.replace(")", "") # if we are seeing a word first time assign the value 1 WORDS.append(word) if word not in stop_words: if word not in wordcount: wordcount[word] = 1 # wordcount[word] = float(1/2) # if the word already exists in dictionary increment the frequency of word else: wordcount[word] += 1 total_count += 1 # This is to calculate the count of all words: total_words = sum(wordcount.values()) # print(total_count) # probab = wordcount[word] / total_words # print(probab) # after building your wordcount, you can then sort it and return the first # n words. d = collections.Counter(wordcount) max_repeated_word = d.most_common(20) counter_value = [] # This is used to get the probability of each word. for times in range(4): # print(f"Round:{times}") for word1, count1 in d.most_common(20): count = 0 for i in WORDS: k = 1 if i == word1: prob = random.randint(0, int(math.pow(8, k))) # print(prob) k += 1 if prob == 0: count += 1 counter_value.append(count) # print(count, word1) times += 1 print(f"Round:{times}") # assert np.allclose(r1, r2) # print("\nMean: ", r1) # probability = (count1 / total_words)*100 # print(word1 + ": " + str(f"count {count1}") + ": " + str(probability) + "\n") # print(word, ": ", count)
Share this code with others
Public
Clear
My Repos
Repo
Lang
Login
Register
Login
Create a free account. No Credit card info required.
I agree with the Codepy
Term of Service
Sign Up