import org.apache.commons.lang.RandomStringUtils;
import java.io.File;
import java.io.FileOutputStream;
import java.io.PrintStream;
import java.util.Random;
public class GenerateFile {
public static void main(String[] args) throws Exception {
File file = new File("hugefile.txt");
PrintStream ps = new PrintStream(new FileOutputStream(file));
Random random = new Random(10);
StringBuffer sb = new StringBuffer();
for(int i=0; i<700000; i++){
for(int j=0; j<10; j++){
sb.append(RandomStringUtils.random(3+random.nextInt(10)%10, true, true));
if(j<9){
sb.append("|");
}
}
ps.println(sb.toString());
sb = new StringBuffer();
}
ps.close();
}
}
import org.apache.commons.lang.StringUtils;
import org.apache.commons.lang.time.StopWatch;
import java.io.File;
import java.io.FileReader;
import java.io.BufferedReader;
public class TokenizeFile {
public static void main(String[] args) throws Exception {
File file = new File("hugefile.txt");
BufferedReader br = new BufferedReader(new FileReader(file));
StopWatch stopWatch = new StopWatch();
stopWatch.start();
String line = null;
long totalLinesProcessed = 0l;
while((line=br.readLine())!=null){
totalLinesProcessed ++;
StringUtils.split(line, "|");
}
stopWatch.stop();
br.close();
System.out.println("Total lines processed = "+totalLinesProcessed+" Time taken = "+stopWatch.getTime() +" ms");
}
}
If you run the above two files; you would be processing a 55 MB file.
A sample run results:
Total lines processed = 700000 Time taken = 4457 ms (1.6 GHZ, 512 MB RAM)
main
0 comments:
Post a Comment