made pn option operate on each date section

2020-11-21 16:45:55 -05:00 · 2020-11-21 16:45:55 -05:00 · 9a9886df9d
parent 174effc0fb
commit 9a9886df9d
1 changed files with 39 additions and 14 deletions
--- a/sorter.rb
+++ b/sorter.rb
@ -123,27 +123,52 @@ end
 #
 #This method is the meat and potatos. Preforms the text stripping, word counting, and creates output files.
 def process_file (file_name, binfile, type)
+  #text = split_text(text, 'Narrative:', 'Signatures:') if type == 'pn'
  csv = CSV.read(binfile)
  text = File.read(file_name)
-  text = strip_text(text, 'PLOVEINTAKE', 'PLOVECLOSING') if type == 'iat'
-  text = split_text(text, 'Narrative:', 'Signatures:') if type == 'pn'
-  output = Hash.new #Creating the output storage object
  bins = Hash.new #This hash stores the bins
+  csv.each { |bin| bins[bin[0]] = bin[1..].compact } #turn the csv array into a hash, remove nils
  outfile = file_name
  outfile.slice!('.txt')
+  if type == 'iat'
+    text = strip_text(text, 'PLOVEINTAKE', 'PLOVECLOSING') if type == 'iat'
+    output = Hash.new #Creating the output storage object
  #puts outfile
-  output[:filename] = outfile
-  output[:total_words] = text.split.size
-  puts output
-  csv.each { |bin| bins[bin[0]] = bin[1..].compact } #turn the csv array into a hash, remove nils
-  bins.each_key do |bin_number|
-    key = bin_number.to_sym
-    output[key] = Hash.new
-    output[key][:words] = bin_counter(bins[bin_number], text)
-    output[key][:total] = count_total(output[key])
+    output[:filename] = outfile
+    output[:total_words] = text.split.size
+    bins.each_key do |bin_number|
+      key = bin_number.to_sym
+      output[key] = Hash.new
+      output[key][:words] = bin_counter(bins[bin_number], text)
+      output[key][:total] = count_total(output[key])
+    end
+    write_output_json(output,outfile + '-out.json')
+    write_output_csv(output,outfile + '-out.csv')
+  elsif type == 'pn'
+    puts 'pn'
+    sections = text.lines("Date and time:", chomp: true) #sections is an arrary of each date section from the text
+    sections.delete_at(0) #we can ignore the first chunk of text
+    sections.each do |chunk|
+      timestamp = chunk.lines.delete_if {|line| line == "\r\n"}[0] #pulling out the timestamp
+      timestamp.tr!('/','').tr!(':','').tr!(' ','_') #remove slashes and colons from timestamp, replaces spaces with unserscores
+      timestamp.strip!
+      output = Hash.new #Creating the output storage object
+      outfile = file_name + '_' + timestamp
+      outfile.slice!('.txt')
+      puts outfile
+      text = strip_text(chunk, 'Narrative:', 'Signatures:')
+      output[:filename] = outfile
+      output[:total_words] = text.split.size
+      bins.each_key do |bin_number|
+        key = bin_number.to_sym
+        output[key] = Hash.new
+        output[key][:words] = bin_counter(bins[bin_number], text)
+        output[key][:total] = count_total(output[key])
+      end
+      write_output_json(output,outfile + '-out.json')
+      write_output_csv(output,outfile + '-out.csv')
+    end
  end
-  write_output_json(output,outfile + '-out.json')
-  write_output_csv(output,outfile + '-out.csv')
 end

 #process_dir expects: