python统计字符串中每个单词出现的次数
import ioimport re
Class Counter:
def __init__(self,path):
self.mapping = dict()
with io.open(path,encoding="utf-8") as f:
data = f.read()
words = [s.lower() for s in re.findall("\w+",data)]
for word in words:
self.mapping[word] = self.mapping.get(word,0) + 1
def most_common(self,n):
assert n > 0, "n should be larger than 0"
return sorted(self.mapping.items(),key=lambda item:item[1],reverse=True)[:n]
if __name__ == "__mian__":
most_common_5 =Counter("importthis.txt").most_common(5)
foritem inmost_common_5:
print(item)
绛旓細鍙互浣跨敤Python涓殑瀛楀吀锛坉ictionary锛夋潵缁熻姣忎釜鍗曡瘝鍑虹幇鐨勬鏁銆傚叿浣撳疄鐜板涓嬶細text = "This is a sample text with several words and repeated words"word_list = text.split() # 灏嗗瓧绗︿覆鎸夌収绌烘牸鍒嗛殧鎴愬崟璇嶅垪琛 word_count = {} # 瀹氫箟涓涓┖瀛楀吀锛岀敤浜庡瓨鍌ㄦ瘡涓崟璇嶅嚭鐜扮殑娆℃暟 for word in...
绛旓細import io import re Class Counter: def __init__(self,path): self.mapping = dict() with io.open(path,encoding="utf-8") as f: data = f...
绛旓細鏂规硶涓锛氫娇鐢╟ount()鏂规硶鐐瑰嚮瀛︿範澶у巶鍚嶅笀绮惧搧璇綾ount()鏂规硶鏄Python涓鏈绠鍗曠殑涓绉缁熻鏂规硶銆傚畠鍙互缁熻鍒楄〃銆佸厓缁勫拰瀛楃涓蹭腑涓涓厓绱犳垨涓涓崟璇嶅嚭鐜扮殑娆℃暟銆傛柟娉曚簩锛氫娇鐢╟ollections.Counter()鏂规硶Python涓殑collections妯″潡鎻愪緵浜嗕竴涓狢ounter绫伙紝璇ョ被鍙互鐢ㄦ潵缁熻鍒楄〃銆佸厓缁勫拰瀛楃涓蹭腑姣忎釜鍏冪礌鎴栨瘡涓崟璇嶅嚭鐜...
绛旓細缁熻濡備笅瀛楃涓str涓瘡涓鍗曡瘝鍑虹幇鐨勬鏁,缁撴灉瀛樺叆dict涓,鍗曡瘝涓簁ey,娆℃暟涓簐alue,骞舵寜鐓alue鐢遍珮鍒板簳鎺掑簭,杈撳嚭姝ictstr="""TheZenofPython,byTimPetersBeautifulisbetterthanug... 缁熻濡備笅瀛楃涓瞫tr 涓瘡涓崟璇嶅嚭鐜扮殑娆℃暟,缁撴灉瀛樺叆 dict 涓,鍗曡瘝涓簁ey,娆℃暟涓 value,骞舵寜鐓 value 鐢遍珮鍒板簳鎺掑簭,杈撳嚭姝 dictstr = ...
绛旓細鎵撳紑鏂囦欢 txt=txt.lower()#灏嗗瓧姣嶅叏閮ㄨ浆鍖栦负灏忓啓 for ch in ',-.()':#鍘绘帀鐗规畩绗﹀彿 txt=txt.replace(ch,"")#灏嗙壒娈婄鍙锋浛鎹负绌烘牸 return txt Txt=getText()#璇诲彇鏂囦欢 words=Txt.split()#鍒嗛殧寮 counts={}#鍒涘缓瀛楀吀 for word in words:counts[word]=counts.get(word,0)+1 items=list(...
绛旓細缁熻瀛楃涓蹭腑姣忎釜瀛楁瘝鍑虹幇鐨勬鏁板簲璇ユ槸鐢ㄤ竴涓簩杩涘埗鐨勬柟寮忓幓缁熻锛屽彲鑳芥瘡涓樁娈靛涔犵殑鍐呭涓嶅悓锛岀粺璁$殑鏂瑰紡涔熶笉鍚
绛旓細鍙傝冧唬鐮 !/usr/bin/env python# -*- coding: utf-8 -*-#python 2.7import reprint u'璇疯緭鍏ヨ嫳璇彞瀛愶細'wz = raw_input()#鏁村彞杞崲涓哄皬鍐檚 = wz.lower()#灏忓啓鍗曡瘝鐨勬鍒欒〃杈惧紡r='[a-z]+'#鎵惧埌鎵鏈夊崟璇峸s = re.findall(r,s)#瀹氫箟涓涓瓧鍏告潵瀛樺偍鍗曡瘝鍜屾鏁癲t = {}for w in ws...
绛旓細1銆佸彲浠ヨ繖鏍风紪鍐欑▼搴忥細瀹氫箟涓涓惈鏈夋墍鏈夊皬鍐欏瓧姣嶇殑鍒楄〃鍙橀噺w鍙婁竴涓緟娴嬪瓧绗︿覆鍙橀噺s銆傚s瀛楃涓蹭腑鐨姣忎竴涓瓧绗杩涜寰幆杩唬妫娴嬪叾鏄惁浣嶄簬鍙橀噺w涓紝鑻ヤ负鐪燂紝鍒欏璁℃暟鍙橀噺c杩涜鍔犱竴鎿嶄綔銆傝緭鍑篶鍙橀噺锛屽嵆涓烘墍姹傘2銆佸垵瀛﹁呯殑璇濈‘瀹炲彲浠ラ氳繃asciitable鏉ュ垽鏂瓧姣嶅拰鏁板瓧鐨勫尯鍒Python閲岄潰鏈変袱涓唴缃嚱鏁皁rd鍜...
绛旓細-*-#绠鍘嗕竴涓瓧鍏革紝key=26涓嫳鏂囧瓧姣嶏紝value涓哄嚭鐜版鏁皐ordDict = {}#鑾峰緱杈撳叆鍗曡瘝瀛楃涓str = input("璇疯緭鍏ヤ竴涓插崟璇")#鐢ㄧ┖鏍煎垎鍓插崟璇嶏紝瀛樺埌鍒楄〃strArr = str.split(sep=' ')#閬嶅巻鍒楄〃涓殑鍗曡瘝for word in strArr: #閬嶅巻鍗曡瘝涓殑瀛楁瘝 for ch in word: #鍒ゆ柇瀛楀吀涓槸鍚﹀瓨鍦ㄩ敭key...
绛旓細寰堢畝绛旂殑涓滀笢 import re import collections print( collections.Counter( re.findall( '\w+' ,open( 'test.txt' ).read( ) ) ) )杩樻槸澶氱湅鐪嬭祫鏂欏惂,杩欎釜鏄畼鏂圭殑鏍囧噯绛旀