当前位置：网站首页>Word cloud graph, word frequency graph, specially statistics the word cloud word frequency of some keywords

Word cloud graph, word frequency graph, specially statistics the word cloud word frequency of some keywords

2022-07-18 21:24:00 【Listen to my call, rookie evolution】

# 1. Read the text , And use jieba In the library cut() Function for word segmentation 
import jieba
import random
report = open('1.txt','r').read()
words = jieba.cut(report)

# 2. adopt for Loop statement extract list words The middle length is greater than or equal to 4 A word 
report_words = []
for word in words: # Make the length greater than or equal to 4 Put words in the list 
    if len(word) >= 2 and word in [' autonomous ',' Rule of virtue ',' The rule of law ', ' Three treatments ', ' data ', ' Rural revitalization ', ' a farmer ', ' rural ' ,' Agriculture ', ' Governance ',  ' rural ' ,' urban and rural ' ,' Numbers ',' Governance system ']:
        report_words.append(word)
for i in range(0,report.count(' Rural revitalization ')):
    report_words.append(' Rural revitalization ')
for i in range(0,report.count(' Governance ')):
    report_words.append(' Governance ')
for i in range(0,report.count(' Governance system ')):
    report_words.append(' Governance system ')
random.shuffle(report_words)
#print(report_words)

# # 3. Get the number of occurrences of high-frequency words printed out 
# from collections import Counter
# result = Counter(report_words).most_common(50) # Take the most 50 Group 
# #print(result)

# 4. Draw a cloud of words 
from wordcloud import WordCloud # Import related libraries 
content = ' '.join(report_words) # Convert list to string 
wc = WordCloud(
                background_color='pink',
                
                font_path=r"C:\\Windows\\Fonts\\msyh.ttc"
                ).generate(content)
image_produce = wc.to_image()
# # # wordcloud.to_file("new_wordcloud.jpg")
# # image_produce.show()

# # wc.to_file(' Clouds of words .png') # Export to PNG Format picture （ Using relative paths ）


import jieba
import pandas as pd
import numpy as np
import PIL.Image as image
from wordcloud import WordCloud
import matplotlib.pyplot as plt
with open("1.txt", "r",encoding='gbk') as f:  #  Open file 
    texts = f.read()  #  Read the file 
    
print(texts)
texts=texts.replace(' the people ','').replace(' socialist ','').replace(' social ','').replace(' Country ','')
# Clouds of words 
content=[]
words=jieba.lcut(texts)
for word in words:
    if len(word)>1:
        content.append(word)

contents=" ".join(content)
wordcloud = WordCloud(
background_color='pink',
font_path=r"C:\\Windows\\Fonts\\msyh.ttc"
).generate(contents)
image_produce = wordcloud.to_image()
# wordcloud.to_file("new_wordcloud.jpg")
image_produce.show()
plt.imshow(wordcloud)

# Top ten keywords with the highest frequency 
import jieba.analyse
content_str=contents
print(" ".join(jieba.analyse.extract_tags(content_str,topK=20,withWeight=(False))))#topK Number of keywords

import jieba
content = open('2.txt', 'r', encoding='gbk').read()
#txt It is a text file for statistical word cloud 

# content=content.replace(' the people ','').replace(' socialist ','').replace(' social ','').replace(' Country ','').replace(' China ','').replace(' In our country ','')

# words = jieba.lcut(content)
# counts = {}
# for word in words:
# if len(word) == 1: #  Exclude word segmentation results of single words 
# continue
# else:
# counts[word] = counts.get(word, 0) + 1 # dict usage 
# hist = list(counts.items()) #  Form a list 
# hist.sort(key=lambda x: x[1], reverse=True)
# words=[]
# counts=[]
# for i in range(20): #  Output high frequency front 20 Word 
# word, count = hist[i]
# words.append(word)
# counts.append(count)
# print(counts)
# print(words)
import random

import jieba
import pandas as pd
import numpy as np
import PIL.Image as image
from wordcloud import WordCloud
import matplotlib.pyplot as plt
words=[' autonomous ',' Rule of virtue ',' The rule of law ', ' Three treatments ', ' data ', ' Rural revitalization ', ' a farmer ', ' rural ' ,' Agriculture ', ' Governance ',  ' rural ' ,' urban and rural ' ,' Numbers ',' Governance system ']
counts=[]
for i in words:
    num=content.count(i)
    counts.append(num)
print(counts)
x_data = words
y_data = counts

contents=" ".join(words)
wordcloud = WordCloud(
background_color='pink',
font_path=r"C:\\Windows\\Fonts\\msyh.ttc"
).generate(contents)
image_produce = wordcloud.to_image()
# import matplotlib.pyplot as plt
 
# plt.rcParams["font.sans-serif"]=['SimHei']
# plt.rcParams["axes.unicode_minus"]=False
 
# for i in range(len(x_data)):
# plt.bar(x_data[i],y_data[i])
 
# plt.title(" Word frequency display ")
# plt.xlabel(" Word frequency ")
# plt.ylabel(" Number ")
 
# plt.show()

原网站

版权声明
本文为[Listen to my call, rookie evolution]所创，转载请带上原文链接，感谢
https://yzsam.com/2022/199/202207161840068420.html

当前位置：网站首页>Word cloud graph, word frequency graph, specially statistics the word cloud word frequency of some keywords

Word cloud graph, word frequency graph, specially statistics the word cloud word frequency of some keywords

边栏推荐

猜你喜欢

随机推荐