Compare commits

..

5 Commits

Author SHA1 Message Date
Jeff Yates ecd02eea4f added .gitignore 2020-11-27 09:36:08 -05:00
Jeff Yates 1654510a0f added code to catch error in timestamp 2020-11-22 15:58:47 -05:00
Jeff Yates f02c088201 fixed missing end 2020-11-22 15:37:46 -05:00
Jeff Yates a0abcac6e0 error handling for writing json 2020-11-22 15:37:03 -05:00
Jeff Yates cd7adff9cf print output hash for debugging 2020-11-22 15:29:25 -05:00
2 changed files with 21 additions and 15 deletions

3
.gitignore vendored Normal file
View File

@ -0,0 +1,3 @@
test-data/*
master.csv
bins.csv

View File

@ -47,9 +47,14 @@ end
# output - a hash containing all of our output # output - a hash containing all of our output
#This method converts the output hash to JSON and writes it to "output.json" #This method converts the output hash to JSON and writes it to "output.json"
def write_output_json (output, filename) def write_output_json (output, filename)
outfile = File.open(filename,'w') begin
outfile.write(output.to_json) outfile = File.open(filename,'w')
outfile.close outfile.write(output.to_json)
outfile.close
rescue
pp output
abort
end
end end
def write_output_csv (output, filename) def write_output_csv (output, filename)
@ -147,8 +152,13 @@ def process_file (file_name, binfile, type)
sections = text.lines("Date and time:", chomp: true) #sections is an arrary of each date section from the text sections = text.lines("Date and time:", chomp: true) #sections is an arrary of each date section from the text
sections.delete_at(0) #we can ignore the first chunk of text sections.delete_at(0) #we can ignore the first chunk of text
sections.each do |chunk| sections.each do |chunk|
timestamp = chunk.lines.delete_if {|line| line == "\r\n"}[0] #pulling out the timestamp begin
timestamp.tr!('/','-').tr!(':','').tr!(' ','_') #remove slashes and colons from timestamp, replaces spaces with underscores timestamp = chunk.lines.delete_if {|line| line == "\r\n"}[0] #pulling out the timestamp
timestamp.tr!('/','-').tr!(':','').tr!(' ','_') #remove slashes and colons from timestamp, replaces spaces with unserscores
rescue
pp timestamp
abort
end
timestamp.strip! timestamp.strip!
output = Hash.new #Creating the output storage object output = Hash.new #Creating the output storage object
outfile = file_name + '_' + timestamp outfile = file_name + '_' + timestamp
@ -161,10 +171,7 @@ def process_file (file_name, binfile, type)
output[key] = Hash.new output[key] = Hash.new
output[key][:words] = bin_counter(bins[bin_number], text) output[key][:words] = bin_counter(bins[bin_number], text)
output[key][:total] = count_total(output[key]) output[key][:total] = count_total(output[key])
output[key][:frequency] = 0 output[key][:frequency] = output[key][:total].to_f / output[:total_words].to_f
#output[key][:frequency] = output[key][:total].to_f / output[:total_words].to_f if output[[:total_words] != 0
freq = output[key][:total].to_f / output[:total_words].to_f
output[key][:frequency] = freq.to_s
end end
write_output_json(output,outfile + '-out.json') write_output_json(output,outfile + '-out.json')
write_output_csv(output,outfile + '-out.csv') write_output_csv(output,outfile + '-out.csv')
@ -179,14 +186,10 @@ end
# #
#This method will process all .txt files in the supplied directory #This method will process all .txt files in the supplied directory
def process_dir(dir_name, binfile, type) def process_dir(dir_name, binfile, type)
threads = []
Dir.glob(dir_name + '*.txt') do |file_name| Dir.glob(dir_name + '*.txt') do |file_name|
threads << Thread.new do puts "Processing " + file_name
puts "Processing " + file_name process_file(file_name, binfile, type)
process_file(file_name, binfile, type)
end
end end
threads.each { |thr| thr.join }
end end
def generate_master_output(dir_name, binfile) def generate_master_output(dir_name, binfile)