Parsing a text file in java

import org.apache.commons.lang.RandomStringUtils;
import java.io.File;
import java.io.FileOutputStream;
import java.io.PrintStream;
import java.util.Random;


public class GenerateFile {

  public static void main(String[] args) throws Exception {
File file = new File("hugefile.txt");
PrintStream ps = new PrintStream(new FileOutputStream(file));
Random random = new Random(10);

StringBuffer sb = new StringBuffer();
for(int i=0; i<700000; i++){
for(int j=0; j<10; j++){
sb.append(RandomStringUtils.random(3+random.nextInt(10)%10, true, true));
if(j<9){
sb.append("|");
}
}
ps.println(sb.toString());
sb = new StringBuffer();
}

ps.close();
  }
}



import org.apache.commons.lang.StringUtils;
import org.apache.commons.lang.time.StopWatch;
import java.io.File;
import java.io.FileReader;
import java.io.BufferedReader;



public class TokenizeFile {

  public static void main(String[] args) throws Exception {
File file = new File("hugefile.txt");
BufferedReader br = new BufferedReader(new FileReader(file));

StopWatch stopWatch = new StopWatch();

stopWatch.start();

String line = null;
long totalLinesProcessed = 0l;

while((line=br.readLine())!=null){
totalLinesProcessed ++;
StringUtils.split(line, "|");
}


stopWatch.stop();

br.close();

System.out.println("Total lines processed = "+totalLinesProcessed+" Time taken = "+stopWatch.getTime() +" ms");
  }
}

If you run the above two files; you would be processing a 55 MB file.

A sample run results:
Total lines processed = 700000 Time taken = 4457 ms (1.6 GHZ, 512 MB RAM)






main


0 comments: