From 9a9886df9d54f155e7d90c9fd6667b67f72d3e9b Mon Sep 17 00:00:00 2001 From: Jeff Yates Date: Sat, 21 Nov 2020 16:45:55 -0500 Subject: [PATCH] made pn option operate on each date section --- sorter.rb | 53 +++++++++++++++++++++++++++++++++++++++-------------- 1 file changed, 39 insertions(+), 14 deletions(-) diff --git a/sorter.rb b/sorter.rb index b642075..b86f4e6 100755 --- a/sorter.rb +++ b/sorter.rb @@ -123,27 +123,52 @@ end # #This method is the meat and potatos. Preforms the text stripping, word counting, and creates output files. def process_file (file_name, binfile, type) + #text = split_text(text, 'Narrative:', 'Signatures:') if type == 'pn' csv = CSV.read(binfile) text = File.read(file_name) - text = strip_text(text, 'PLOVEINTAKE', 'PLOVECLOSING') if type == 'iat' - text = split_text(text, 'Narrative:', 'Signatures:') if type == 'pn' - output = Hash.new #Creating the output storage object bins = Hash.new #This hash stores the bins + csv.each { |bin| bins[bin[0]] = bin[1..].compact } #turn the csv array into a hash, remove nils outfile = file_name outfile.slice!('.txt') + if type == 'iat' + text = strip_text(text, 'PLOVEINTAKE', 'PLOVECLOSING') if type == 'iat' + output = Hash.new #Creating the output storage object #puts outfile - output[:filename] = outfile - output[:total_words] = text.split.size - puts output - csv.each { |bin| bins[bin[0]] = bin[1..].compact } #turn the csv array into a hash, remove nils - bins.each_key do |bin_number| - key = bin_number.to_sym - output[key] = Hash.new - output[key][:words] = bin_counter(bins[bin_number], text) - output[key][:total] = count_total(output[key]) + output[:filename] = outfile + output[:total_words] = text.split.size + bins.each_key do |bin_number| + key = bin_number.to_sym + output[key] = Hash.new + output[key][:words] = bin_counter(bins[bin_number], text) + output[key][:total] = count_total(output[key]) + end + write_output_json(output,outfile + '-out.json') + write_output_csv(output,outfile + '-out.csv') + elsif type == 'pn' + puts 'pn' + sections = text.lines("Date and time:", chomp: true) #sections is an arrary of each date section from the text + sections.delete_at(0) #we can ignore the first chunk of text + sections.each do |chunk| + timestamp = chunk.lines.delete_if {|line| line == "\r\n"}[0] #pulling out the timestamp + timestamp.tr!('/','').tr!(':','').tr!(' ','_') #remove slashes and colons from timestamp, replaces spaces with unserscores + timestamp.strip! + output = Hash.new #Creating the output storage object + outfile = file_name + '_' + timestamp + outfile.slice!('.txt') + puts outfile + text = strip_text(chunk, 'Narrative:', 'Signatures:') + output[:filename] = outfile + output[:total_words] = text.split.size + bins.each_key do |bin_number| + key = bin_number.to_sym + output[key] = Hash.new + output[key][:words] = bin_counter(bins[bin_number], text) + output[key][:total] = count_total(output[key]) + end + write_output_json(output,outfile + '-out.json') + write_output_csv(output,outfile + '-out.csv') + end end - write_output_json(output,outfile + '-out.json') - write_output_csv(output,outfile + '-out.csv') end #process_dir expects: