Compare commits

...

3 Commits

Author SHA1 Message Date
Jeff Yates e2aaa7a2b5 turned frequency into a string for json 2020-11-22 15:24:29 -05:00
Jeff Yates fa39e4d040 fixed NaN error when dividing by 0 2020-11-22 15:18:37 -05:00
Jeff Yates ddb4003e66 added threading to file processing 2020-11-21 19:11:29 -05:00
1 changed files with 11 additions and 4 deletions

View File

@ -148,7 +148,7 @@ def process_file (file_name, binfile, type)
sections.delete_at(0) #we can ignore the first chunk of text sections.delete_at(0) #we can ignore the first chunk of text
sections.each do |chunk| sections.each do |chunk|
timestamp = chunk.lines.delete_if {|line| line == "\r\n"}[0] #pulling out the timestamp timestamp = chunk.lines.delete_if {|line| line == "\r\n"}[0] #pulling out the timestamp
timestamp.tr!('/','-').tr!(':','').tr!(' ','_') #remove slashes and colons from timestamp, replaces spaces with unserscores timestamp.tr!('/','-').tr!(':','').tr!(' ','_') #remove slashes and colons from timestamp, replaces spaces with underscores
timestamp.strip! timestamp.strip!
output = Hash.new #Creating the output storage object output = Hash.new #Creating the output storage object
outfile = file_name + '_' + timestamp outfile = file_name + '_' + timestamp
@ -161,7 +161,10 @@ def process_file (file_name, binfile, type)
output[key] = Hash.new output[key] = Hash.new
output[key][:words] = bin_counter(bins[bin_number], text) output[key][:words] = bin_counter(bins[bin_number], text)
output[key][:total] = count_total(output[key]) output[key][:total] = count_total(output[key])
output[key][:frequency] = output[key][:total].to_f / output[:total_words].to_f output[key][:frequency] = 0
#output[key][:frequency] = output[key][:total].to_f / output[:total_words].to_f if output[[:total_words] != 0
freq = output[key][:total].to_f / output[:total_words].to_f
output[key][:frequency] = freq.to_s
end end
write_output_json(output,outfile + '-out.json') write_output_json(output,outfile + '-out.json')
write_output_csv(output,outfile + '-out.csv') write_output_csv(output,outfile + '-out.csv')
@ -176,10 +179,14 @@ end
# #
#This method will process all .txt files in the supplied directory #This method will process all .txt files in the supplied directory
def process_dir(dir_name, binfile, type) def process_dir(dir_name, binfile, type)
threads = []
Dir.glob(dir_name + '*.txt') do |file_name| Dir.glob(dir_name + '*.txt') do |file_name|
threads << Thread.new do
puts "Processing " + file_name puts "Processing " + file_name
process_file(file_name, binfile, type) process_file(file_name, binfile, type)
end end
end
threads.each { |thr| thr.join }
end end
def generate_master_output(dir_name, binfile) def generate_master_output(dir_name, binfile)