text文件下载: https://python123.io/resources/pye/hamlet.txt
代码:
# CalHamletV1
.py
def
get_text():
txt
= open("hamlet.txt", "r").read()
txt
= txt
.lower() # 将获取到的文章字母变成小写
for ch
in '!"#$%&()*+,-./:;<=>?@[\\]^_‘{|}~': # \:转义字符 \:\
txt
= txt
.replace(ch
, " ") # 把乱七八糟的字符用空格替换掉
return txt
hamletTxt
= get_text()
words
= hamletTxt
.split() # 将文章切片
counts
= {} # 建立一个空字典
for word
in words
:
counts
[word
] = counts
.get(word
, 0) + 1 # 为键赋值,若单词存在则值加一,不存在则值为
0,顺道加一
items
= list(counts
.items()) # 将字典类型变成列表类型
items
.sort(key
=lambda x
:x
[1], reverse
=True
) # 由大到小排序
for i
in range(10): # 输出
10次,即出现次数最多的
10个单词
word
, count
= items
[i
]
print("{0:<30}{1:>7}".format(word
, count
))
效果:
the 1138
and 965
to 754
of 669
you 550
i 542
a 542
my 514
hamlet 462
in 436
Process finished with exit code 0