Python对文本里面的内容排序 怎么用Python对大内存的txt文件内容进行排序
python\u5982\u4f55\u4ece\u6587\u672c\u4e2d\u8fdb\u884c\u6392\u5e8f\u53d6\u503cimport os#your file pathfile_abs_path = 'newFile.txt'def get_info(oid, is_max): a, b = None, None with open(file_abs_path, 'rb') as f: for line in f.readlines(): # first line continue if line.strip()[0] == 'F': continue if int(line.strip()[3] == oid): a_temp, b_temp = line.strip().split('')[1:3] a_temp, b_temp = int(a_temp), int(b_temp) if a: a = a_temp if (aa_temp and not is_max) else a else: a = a_temp if b: b = b_temp if (bb_temp and not is_max) else b else: b = b_temp return a,b # get max 1print get_info(1, True)# get min 1print get_info(1, False) \u6ca1\u6d4b\u8fc7\uff0c \u5927\u6982\u903b\u8f91\u5c31\u662f\u8fd9\u6837\u5566\u3002
\u4f60\u4e3b\u8981\u662f\u4e0d\u4e86\u89e3\u600e\u4e48\u8bfb\u6587\u4ef6\u5427
with open(file_path, 'rb') as f
\u6bcf\u4e00\u884c for line in f.readlines()
path = 'E:\\PyWk\\nodepad_py\\' fp = open(path + 'm.txt') a = fp.readlines() a = [x.split('\t') for x in a] a = [[x[0],x[1].replace('\n','')] for x in a] print a fp.close() b = [[int(x[0]), int(x[1])] for x in a] b.sort() b = [str(x[0]) + '\t' + str(x[1]) + '\n' for x in b] fp = open(path+'n.txt', 'w') fp.writelines(b) fp.close() print b
把文本的格式贴出来看看
假设格式是:
5KB
100KB
1MB
3MB
9MB
2MB
可用下面的程序(python3)
units={"KB" : 1, "MB" : 1024}
def calc(x):
for unit, amount in units.items():
if x.find(unit)>0:
number=int(x[:-2])
number*=amount
return number
file=open("sort.txt", "r")
values=list(file)
file.close()
values=[ value[:-1] for value in values]
values.sort(key=calc)
print(values)
values=[value+"
" for value in values]
file=open("sort.txt", "w")
file.write("".join(values))
file.close()
先需要转换啊。将KB,MB都转换成计算机可以读的数据 1024, 1024*1024, 然后就可以排序了。
s="5KB,100KB,1MB,3MB,9MB,2MB"
s=s.replace("KB","*1024").replace("MB","*1024*1024")
ss=[c.strip() for c in s.plit(",") if c.strip()]
ss=map( eval, ss)
ss.sort()
当然你还可以将5120这样的数字转换成5KB, 这样更容易读,办法就是用1024除,直到小于1024
文本.sort()
绛旓細鐢╬ython璇诲彇鏂囨湰鏂囦欢锛屽璇诲嚭鐨勬瘡涓琛岃繘琛屾搷浣滐紝鍐欐硶濡備笅锛歠 = open("test.txt", "r")while True:line = f.readline()if line:pass # do something here line=line.strip()p=line.rfind('.')filename=line[0:p]print "create %s"%line else:break f.close()...
绛旓細鍒涘缓涓涓枃鏈 textbox = p.add_textbox()璁剧疆鏂囨湰妗嗙殑瀹藉害鍜岄珮搴 textbox.width = Pt(200)textbox.height = Pt(200)鍒涘缓鏂囨湰妗涓殑鏂囨湰 text = textbox.add_paragraph().add_run("浣犵殑鏂囨湰鍐呭")璁剧疆鏂囨湰鏂瑰悜涓轰粠鍙冲悜宸︾殑鍨傜洿鏂囨湰 r = text._element r.rPr.rtl = 1 r.rPr.set(qn(...
绛旓細coding =utf-8 reader = open('test.txt').readlines()n=0 for len in reader:writer=open(str(reader.index(len)+1)+'.txt','w')if (len.find('\n')>0):for n in range(0,10):writer.write(len[:-1]+reader[n])else:for n in range(0,10):writer.write(len+reader[n])...
绛旓細鍒嗛殧鏃ュ織鏂囦欢瀛樹负灏忔枃浠 coding:utf-8 file: FileSplit.py import os,os.path,time def FileSplit(sourceFile, targetFolder):sFile = open(sourceFile, 'r')number = 100000#姣忎釜灏忔枃浠朵腑淇濆瓨100000鏉℃暟鎹 dataLine = sFile.readline()tempData = []#缂撳瓨鍒楄〃 fileNum = 1 if not os.path....
绛旓細Python璇鏂囨湰鍙互鐢╳ith涓婁笅鏂囩鐞嗗櫒銆傛牴鎹枃鏈潵鎵ц瀵瑰簲鍚嶅瓧鐨勫嚱鏁板彲浠ョ敤getatter鏂规硶銆備唬鐮佸涓:棣栧厛锛屾柊寤烘枃鏈枃浠秚est.txt锛鍐呭濡備笅:func1,life is short func2,use python func1, hello word 涓嬮潰鏄疨ython浠g爜锛屽0鏄庝袱涓猣unction锛屽姛鑳芥槸鎵撳嵃鍑轰紶鍏ョ殑鍙傛暟銆俶ain閲岄潰鐨鎶婂唴瀹瑰氨鏄寜鐓ф枃鏈殑椤哄簭锛屼紶鍏...
绛旓細python 3.3 浠g爜 import sysreader = open('scores.txt')line = reader.readline()#璇诲彇绗竴琛屾暟鎹畇cores = []#鏀 鍒嗘暟鍊 鐨勬暟鍊約tander = 0#鍙婃牸浜烘暟while line != '' and line != None:#寰幆璇诲彇鏁版嵁琛 tempScore = line.split(' ')[1].replace('\n','')#灏嗗鍚嶅拰鎴愮哗鍒嗗紑锛...
绛旓細import re from itertools import imap, ifilter 瀹氫箟瑙f瀽"b.txt"鏂囦欢鐨勬鍒欒〃杈惧紡 patt = re.compile(r"""(?P<category>\S*)\s*(?P<amount>\d+)""")鍒濆鍖栦竴涓鏁板櫒 counter = {} with open("b.txt", "rt") as handle:鐢ㄦ鍒欒〃杈惧紡閫愯瑙f瀽"b.txt"for m in ifilter(None, imap...
绛旓細鍒嗘涓庣粍缁: 璇诲彇鍐呭鍚庯紝鍑芥暟寮濮嬪垏鍓插伐浣滐紝灏嗘枃浠跺唴瀹瑰垎鎴愪竴涓釜涓嶈秴杩噈ax_chars澶у皬鐨勬钀斤紝鍚屾椂鍒涘缓涓涓叏鏂扮殑鏂囦欢澶癸紝瀛樻斁杩欎簺鍒嗗壊鍚庣殑瀹濊棌銆傜簿缁嗕繚瀛樿繃绋: 姣忎竴娈鏂囨湰閮借缁嗗績鍦板瓨鍌ㄤ负鏂扮殑鏂囦欢锛屾枃浠跺悕宸у鍦颁繚鐣欎簡鍘熷鏂囦欢鍚嶅苟娣诲姞浜嗘钀界紪鍙凤紝鏂逛究璇嗗埆鍜岀鐞嗐傚湪瀹為檯搴旂敤涓紝杩欎釜鑴氭湰鏄暟鎹鐞嗚...
绛旓細jieba鍒嗚瘝
绛旓細test.py --- def toleft(text):lines = text.split('\n')newlines = [line.strip() for line in lines]return '\n'.join(newlines)if __name__ == '__main__':import sys newcontent = toleft(open(sys.argv[1]).read())open(sys.argv[1], 'w').write(newcontent)--- 璇...