用JAVA语言设计一个类,统计一篇英文文章的词频,并按照词频由高到低输出。修改下面代码就行了。 用C语言做一个给定一篇英语文章能够统计词频,共现词频的程序

\u8bf7\u63d0\u4f9b\u4e00\u4e2a\u82f1\u8bed\u8bcd\u9891\u67e5\u8be2\u7f51\u7ad9\u6216\u5e94\u7528\u7a0b\u5e8f\uff1f

\u8c37\u6b4c\u91d1\u5c71\u8bcd\u9738\uff0c\u633a\u597d\u7684\uff0c\u5230\u7f51\u4e0a\u4e0b\u8f7d\u5427\u3002

#include
void main()
{
char c[500];
long i,n=0;
printf("Please input the article:\n");
for(i=0;i<500;i++)
scanf("%c",&c[i]);
for(i=0;i<500;i++)
if(c[i]!=" ")
n++;
printf("This article has %d words",n);
}/*\u7a0b\u5e8f\u6211\u5df2\u7ecf\u8fd0\u884c\u8fc7\uff0c\u53ef\u4ee5\u7528*/

这题目如果能增加一个类的话会高效很多。。。如果非要在这个框框里面,代码麻烦 效率低下呢。

import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.Iterator;
import java.util.List;
import java.util.Set;
import java.util.TreeSet;

public class Article {

//保存文章的内容
String content;
//保存分割后的单词集合
String[] rawWords;
//保存统计后的单词集合
String[] words;
//保存单词对应的词频
int[] wordFreqs;

//构造函数,输入文章内容
//提高部分:从文件中读取
public Article() {
content = "kolya is one of the richest films i've seen in some time . zdenek sverak plays a confirmed old bachelor ( who's likely to remain so ) , who finds his life as a czech cellist increasingly impacted by the five-year old boy that he's taking care of . though it ends rather abruptly-- and i'm whining , 'cause i wanted to spend more time with these characters-- the acting , writing , and production values are as high as , if not higher than , comparable american dramas . this father-and-son delight-- sverak also wrote the script , while his son , jan , directed-- won a golden globe for best foreign language film and , a couple days after i saw it , walked away an oscar . in czech and russian , with english subtitles . ";
}

//对文章根据分隔符进行分词,将结果保存到rawWords数组中
public void splitWord(){
//分词的时候,因为标点符号不参与,所以所有的符号全部替换为空格
final char SPACE = ' ';
content = content.replace('\'', SPACE).replace(',', SPACE).replace('.', SPACE);
content = content.replace('(', SPACE).replace(')', SPACE).replace('-', SPACE);

rawWords = content.split("\\s+");//凡是空格隔开的都算单词,上面替换了', 所以I've 被分成2个 //单词
}

//统计词,遍历数组
public void countWordFreq() {
//将所有出现的字符串放入唯一的set中,不用map,是因为map寻找效率太低了
Set<String> set = new TreeSet<String>();

for(String word: rawWords){
set.add(word);
}

Iterator ite = set.iterator();

List<String> wordsList = new ArrayList<String>();
List<Integer> freqList = new ArrayList<Integer>();
//多少个字符串未知,所以用list来保存先
while(ite.hasNext()){
String word = (String) ite.next();

int count = 0;//统计相同字符串的个数
for(String str: rawWords){
if(str.equals(word)){
count++;
}
}

wordsList.add(word);
freqList.add(count++);
}

//存入数组当中
words = wordsList.toArray(new String[0]);

wordFreqs = new int[freqList.size()];
for(int i = 0; i < freqList.size(); i++){
wordFreqs[i] = freqList.get(i);
}

}

//根据词频,将词数组和词频数组进行降序排序
public void sort() {

class Word{
private String word;
private int freq;

public Word(String word, int freq){
this.word = word;
this.freq = freq;
}
}
//注意:此处排序,1)首先按照词频降序排列, 2)如果词频相同,按照字母降序排列,
//如 'abc' > 'ab' >'aa'
class WordComparator implements Comparator{

public int compare(Object o1, Object o2) {
Word word1 = (Word) o1;
Word word2 = (Word) o2;

if(word1.freq < word2.freq){
return 1;
}else if(word1.freq > word2.freq){
return -1;
}else{

int len1 = word1.word.trim().length();
int len2 = word2.word.trim().length();

String min = len1 > len2? word2.word: word1.word;
String max = len1 > len2? word1.word: word2.word;

for(int i = 0; i < min.length(); i++){
if(min.charAt(i) < max.charAt(i)){
return 1;
}
}

return 1;

}
}

}

List wordList = new ArrayList<Word>();

for(int i = 0; i < words.length; i++){
wordList.add(new Word(words[i], wordFreqs[i]));
}

Collections.sort(wordList, new WordComparator());

for(int i = 0; i < wordList.size(); i++){
Word wor = (Word) wordList.get(i);

words[i] = wor.word;
wordFreqs[i] = wor.freq;
}

}

//将排序结果输出
public void printResult() {
System.out.println("Total " + words.length + " different words in the content!");

for(int i = 0; i < words.length; i++){
System.out.println(wordFreqs[i] + " " + words[i]);
}
}

//测试类的功能
public static void main(String[] args) {
Article a = new Article();
a.splitWord();
a.countWordFreq();
a.sort();
a.printResult();
}
}

-----------------------
Total 99 different words in the content!
5 and
4 the
4 i
4 a
3 as
2 with
2 who
2 to
2 time
2 sverak
2 son
2 s
2 old
2 of
2 it
2 in
2 his
2 czech
1 zdenek
1 year
1 wrote
1 writing
1 won
1 whining
1 while
1 wanted
1 walked
1 ve
1 values
1 though
1 this
1 these
1 that
1 than
1 taking
1 subtitles
1 spend
1 some
1 so
1 seen
1 script
1 saw
1 russian
1 richest
1 remain
1 rather
1 production
1 plays
1 oscar
1 one
1 not
1 more
1 m
1 likely
1 life
1 language
1 kolya
1 jan
1 is
1 increasingly
1 impacted
1 if
1 higher
1 high
1 he
1 golden
1 globe
1 foreign
1 for
1 five
1 finds
1 films
1 film
1 father
1 english
1 ends
1 dramas
1 directed
1 delight
1 days
1 couple
1 confirmed
1 comparable
1 characters
1 cellist
1 cause
1 care
1 by
1 boy
1 best
1 bachelor
1 away
1 are
1 an
1 american
1 also
1 after
1 acting
1 abruptly

测试结果为
共123个单词,以下为该文章出现的单词及其出现次数。
--------单词----次数--------
-------and----5--------
-------a----4--------
-------the----4--------
-------as----3--------
-------of----2--------
-------time----2--------
-------czech----2--------
-------son----2--------
-------i----2--------
-------to----2--------
-------old----2--------
-------his----2--------
-------with----2--------
-------it----2--------
-------sverak----2--------
-------in----2--------
-------for----1--------
-------higher----1--------
-------wrote----1--------
-------production----1--------
-------oscar----1--------
-------confirmed----1--------
-------are----1--------
-------zdenek----1--------
-------year----1--------
-------these----1--------
-------ends----1--------
-------comparable----1--------
-------not----1--------
-------he's----1--------
-------russian----1--------
-------'cause----1--------
-------bachelor----1--------
-------saw----1--------
-------language----1--------
-------some----1--------
-------i've----1--------
-------kolya----1--------
-------abruptly----1--------
-------wanted----1--------
-------delight----1--------
-------life----1--------
-------american----1--------
-------rather----1--------
-------best----1--------
-------subtitles----1--------
-------walked----1--------
-------dramas----1--------
-------films----1--------
-------seen----1--------
-------taking----1--------
-------impacted----1--------
-------remain----1--------
-------days----1--------
-------finds----1--------
-------by----1--------
-------plays----1--------
-------though----1--------
-------who----1--------
-------after----1--------
-------more----1--------
-------values----1--------
-------who's----1--------
-------care----1--------
-------jan----1--------
-------so----1--------
-------likely----1--------
-------richest----1--------
-------script----1--------
-------that----1--------
-------than----1--------
-------i'm----1--------
-------acting----1--------
-------foreign----1--------
-------english----1--------
-------this----1--------
-------characters----1--------
-------golden----1--------
-------one----1--------
-------writing----1--------
-------father----1--------
-------while----1--------
-------if----1--------
-------couple----1--------
-------won----1--------
-------globe----1--------
-------film----1--------
-------whining----1--------
-------is----1--------
-------five----1--------
-------cellist----1--------
-------spend----1--------
-------away----1--------
-------directed----1--------
-------an----1--------
-------increasingly----1--------
-------high----1--------
-------boy----1--------
-------also----1--------

以下是源码

import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

public class Article {
// 保存文章的内容
String content;

// 保存分割后的单词集合
String[] rawWords;

// 保存统计后的单词集合
String[] words;

// 保存单词对应的词频
int[] wordFreqs;

// 构造函数,输入文章内容
// 提高部分:从文件中读取
public Article() {
content = "kolya is one of the richest films i've seen in some time . "
+ "zdenek sverak plays a confirmed old bachelor ( who's likely to remain so ) , "
+ "who finds his life as a czech cellist increasingly impacted by the five-year "
+ "old boy that he's taking care of . though it ends rather abruptly-- and i'm "
+ "whining , 'cause i wanted to spend more time with these characters-- the acting , "
+ "writing , and production values are as high as , if not higher than , comparable "
+ "american dramas . this father-and-son delight-- sverak also wrote the script , "
+ "while his son , jan , directed-- won a golden globe for best foreign language film "
+ "and , a couple days after i saw it , walked away an oscar . in czech and russian , "
+ "with english subtitles . ";
}

// 对文章根据分隔符进行分词,将结果保存到rawWords数组中
public void splitWord() {
rawWords = content.split(" [\\.,()]{0,1} {0,1},{0,1} {0,1}|-- |-");
}

// 统计词,遍历数组
public void countWordFreq() {
words = new String[rawWords.length];
wordFreqs = new int[rawWords.length];
int length = 0;
for (int i = 0; i < rawWords.length; i++) {
boolean isExist = false;
int j = 0;
for (; j < length; j++) {
if (words[j].equals(rawWords[i])) {
isExist = true;
break;
}
}
if (isExist)
wordFreqs[j]++;
else {
wordFreqs[length]++;
words[length] = rawWords[i];
length++;
}
}
}

// 根据词频,将词数组和词频数组进行降序排序
public void sort() {
Map<String, Integer> value = new HashMap<String, Integer>();
for (int i = 0; i < this.words.length; i++) {
if (this.words != null)
value.put(this.words[i], this.wordFreqs[i]);
}
List<Map.Entry<String, Integer>> info = new ArrayList<Map.Entry<String, Integer>>(
value.entrySet());
Collections.sort(info, new Comparator<Map.Entry<String, Integer>>() {
public int compare(Map.Entry<String, Integer> obj1,
Map.Entry<String, Integer> obj2) {
return obj2.getValue() - obj1.getValue();
}
});
this.words = new String[info.size()];
this.wordFreqs = new int[info.size()];
for(int i = 0; i < words.length; i++) {
this.words[i] = info.get(i).getKey();
this.wordFreqs[i] = info.get(i).getValue();
}
}

// 将排序结果输出
public void printResult() {
System.out.println("共" + this.rawWords.length + "个单词,以下为该文章出现的单词及其出现次数。");
System.out.println("--------单词----次数--------");
for(int i = 0; i < this.words.length; i++)
System.out.println("-------" + this.words[i] + "----" + this.wordFreqs[i] + "--------");
}

public static void main(String[] args) {
// 测试类的功能
Article art = new Article();
art.splitWord();
art.countWordFreq();
art.sort();
art.printResult();
}
}

  • java:鐢ㄩ潤鎬佺殑鎴愬憳鍙橀噺鏉缁熻绫XX琚疄渚嬪寲鐨勫璞$殑鏁伴噺
    绛旓細public class MyDemo { static int count = 0;// 姣忔璋冪敤浠绘剰鏋勯犳柟娉曞疄渚嬪寲閮戒細鎵ц { count++;} // 鎻愪緵鏂规硶杩斿洖瀹炰緥鍖栬繃鐨勬鏁 public static int getCount() { return count;} }
  • java 璁捐涓鑸被Person,鍖呮嫭濮撳悕銆佸勾榫勩佹у埆銆佽亴涓氥佷汉鏁缁熻(闈欐佹垚 ...
    绛旓細public class Person { private static String name;private static String sex;private static int age;Person(String name, int age) { this.name = name;this.age = age;} public void print() { System.out.print("鍚嶅瓧锛" + name + " 鎬у埆锛" + sex + " 骞撮緞锛" + age + " ");...
  • 鐢↗AVA缂栧啓涓涓▼搴,缁熻涓涓鏂囨湰鏂囦欢涓瓧绗鐨勪釜鏁
    绛旓細{e.printStackTrace();} catch (IOException e) {e.printStackTrace();} finally {try {br.close();} catch (IOException e) {e.printStackTrace();}}return count;}}娴嬭瘯鏂囦欢锛氳繍琛屾埅鍥撅細PS锛氫唬鐮佸浜庨涓枃浠跺拰鏌ユ壘瀛楃宸茬粡鍗曠嫭灏佽鎴涓涓鏂规硶锛屽彲浠ョ伒娲绘敼鍔ㄦ墍闇瑕佹煡鎵剧殑鏂囦欢鍜屾煡鎵剧殑瀛楃銆
  • java璇█涓浣曡幏鍙栧瓧绗︿覆涓眽瀛楃殑涓暟
    绛旓細銆愬疄鐜拌繃绋嬨戝湪Eclipse涓柊寤洪」鐩瓹hineseCharacter锛屽苟鍦ㄥ叾涓垱寤涓涓ChineseCharacter.java鏂囦欢銆傚湪璇ョ被鐨勪富鏂规硶涓垱寤烘爣鍑嗚緭鍏ユ祦鐨勬壂鎻忓櫒瀵硅薄锛屾帴鏀剁敤鎴疯緭鍏ョ殑瀛楃涓层傛垜浠湪绋嬪簭涓娇鐢╩atches()鏂规硶鏉缁熻璇ュ瓧绗︿覆涓眽瀛楃殑涓暟銆傛牳蹇冧唬鐮佸涓嬫墍绀猴細protectedvoiddo_button_actionPerformed(ActionEvente){String...
  • 浠庨敭鐩樿緭鍏ヤ竴涓插瓧绗︿覆,缂栧啓涓涓猨ava绋嬪簭瀹炵幇缁熻,杈撳嚭鏈夊嚑涓ぇ鍐欏瓧姣...
    绛旓細杩欎釜鏄鐞嗘枃浠剁殑绫籉ileUtil锛=== package org.xhome.leon.test;import java.io.*;public class FileUtil { FileReader fr;BufferedReader br;FileWriter fw;BufferedWriter bw;String source = "";public int upCaseNum = 0;public int lowerCaseNum = 0;public int numerNum = 0;public int oth...
  • 鐢╦ava缂栧啓涓涓绋嬪簭 瑕佹眰闅忔満浜х敓10000涓1鈥10涔嬮棿鐨勬暣鏁,缁熻姣忎釜鏁(1...
    绛旓細import java.util.Calendar;import java.util.Random;public class Rand { private static int[] getBigRandom(int count, int range) { Random rnd = new Random(Calendar.getInstance().getTimeInMillis()); //涓嶅姞杩欎釜姣忔杩愯缁撴灉閮戒竴鏍 int[] big = new int[count];int k = 0 ,f;for ...
  • java涓濡備綍缁熻涓涓瀛楃涓茬殑闀垮害
    绛旓細棣栧厛鎵撳紑eclipse 鏂板缓涓涓猨ava椤圭洰锛屽悕瀛楅殢鎰忚捣 鍚嶅瓧璧峰ソ鍚庯紝鐐瑰嚮瀹屾垚 鍙抽敭鐐瑰嚮椤圭洰鍚嶇О锛屾柊寤猴紝绫 绫荤殑鍚嶅瓧鍙玊extLength 鍖呯殑鍚嶅瓧鍙 com.zf.s2 鐐瑰嚮瀹屾垚 棣栧厛瑕佸垽鏂槸鍚︽槸姹夊瓧 public static int getChineseCount(String s) throws Exception{//鑾峰緱姹夊瓧鐨勯暱搴 char c;int chineseCount=0;if(!""....
  • Java璁捐骞跺疄鐜涓涓搴旂敤绋嬪簭,鑳藉璇诲彇涓涓枃鏈枃浠朵腑鐨勫唴瀹瑰苟鏄剧ず,鍚屾椂...
    绛旓細java缂栧啓鏄剧ず鏂囨湰鐨勫簲鐢ㄧ▼搴, 闇瑕佺敤鍒板浘褰㈢晫闈UI缂栫▼鎶鏈.姝ラ涓: 闇瑕佹惌寤涓涓鏁翠綋鐨勫瑙, 璋冩暣甯冨眬鍜岀粍浠剁殑浣嶇疆.涓昏闇瑕佷娇鐢ㄧ殑缁勪欢 JTextField 鏂囨湰妗嗙粍浠: 鐢ㄤ簬褰撳仛鍦板潃鏍, 濉啓鏂囦欢鐨勮矾寰 JButton 鎸夐挳, 鐢ㄤ簬鍝嶅簲鐐瑰嚮浜嬩欢,鏍规嵁鍦板潃鏍忕殑鍦板潃璇诲彇鏂囦欢, 骞朵笖鏄剧ず鍒版枃鏈煙閲 JTextArea 鏂囨湰鍩, 鐢ㄤ簬...
  • 浣跨敤java璇█缂栧啓绋嬪簭,缁熻浠庢帶鍒跺彴杈撳叆鐨勪竴琛屽瓧绗︿覆涓暟瀛椾釜鏁般佸瓧姣...
    绛旓細);i++) { strs[i]=str.substring(i,i+1);if(strs[i].matches(intpatten))intflag++;else if(strs[i].matches(charpatten))charflag++;else signalflag++;} System.out.println("瀛楃涓暟锛"+charflag+" 鏁板瓧涓暟"+intflag+" 闈炲瓧绗︽暟:"+signalflag);} 姝e垯琛ㄨ揪寮忓仛 鐪嬩笉鎳傝拷闂 ...
  • 姹傚ぇ绁炴暀鎴,java璇█閲岀殑瀛楁暟缁熻鍔熻兘鐨勪唬鐮?
    绛旓細鍏跺疄鐩存帴鎶婅缁熻鐨勫瓧鏁版斁鍦涓涓瀛楃涓查噷灏辫浜唖tring str = "";鐒跺悗鍐嶇敤string鐨刲ength鏂规硶灏卞彲浠ョ粺璁″瓧绗︿覆闀垮害浜嗐俿tring str = "xxxxx";int strlen = str.length();system.out.print(strlen);鎵撳嵃鍑烘潵鐨勬暟瀛楀氨鏄墍杈撳叆鐨勫瓧鏁(杩欓噷鏄寘鎷爣鐐圭鍙风殑锛屽鏋滆鍘绘爣鐐圭┖鏍煎彲浠ョ敤姝e垯琛ㄨ揪寮)...
  • 扩展阅读:java javascript ... 用java设计一个窗口 ... java编程入门 ... 用java制作一个网页代码 ... java指令大全 ... 用java设计一个网页 ... 用java设计一个学生类 ... java语言标识符区分大小写 ... java编程代码大全免费 ...

    本站交流只代表网友个人观点,与本站立场无关
    欢迎反馈与建议,请联系电邮
    2024© 车视网