fixed merge conflict

minor changes to output and date format
made pn option operate on each date section
2020-11-21 17:09:08 -05:00 · 2020-11-21 16:53:48 -05:00 · 2020-11-21 16:45:55 -05:00 · 2020-11-21 13:08:58 -05:00
1 changed files with 49 additions and 11 deletions
--- a/sorter.rb
+++ b/sorter.rb
@ -13,6 +13,9 @@
 # The output file is single-line JSON, use jq to format for reading.       #
 #                                                                          #
 #To Do:                                                                    #
 # * Fix word count                                                         #
 # * add frequency                                                          #
 # * split by dates for progress notes                                      #
 ############################################################################
 require 'json'
 require 'pp'
@ -55,6 +58,7 @@ def write_output_csv (output, filename)
  CSV.open(filename, 'wb') do |csv|
    csv << ["bin", "words", "total"]
    output.delete(:filename)
    output.delete(:total_words)
    output.each_key do |key|
      line = []
      line.push(key)
@ -119,17 +123,20 @@ end
 #
 #This method is the meat and potatos. Preforms the text stripping, word counting, and creates output files.
 def process_file (file_name, binfile, type)
  #text = split_text(text, 'Narrative:', 'Signatures:') if type == 'pn'
  csv = CSV.read(binfile)
  text = File.read(file_name)
  text = strip_text(text, 'PLOVEINTAKE', 'PLOVECLOSING') if type == 'iat'
  text = split_text(text, 'Narrative:', 'Signatures:') if type == 'pn'
  output = Hash.new #Creating the output storage object
  bins = Hash.new #This hash stores the bins
  csv.each { |bin| bins[bin[0]] = bin[1..].compact } #turn the csv array into a hash, remove nils
  outfile = file_name
  outfile.slice!('.txt')
-  puts outfile
+<<<<<<< HEAD
  if type == 'iat'
    text = strip_text(text, 'PLOVEINTAKE', 'PLOVECLOSING') if type == 'iat'
    output = Hash.new #Creating the output storage object
  #puts outfile
    output[:filename] = outfile
-  csv.each { |bin| bins[bin[0]] = bin[1..].compact } #turn the csv array into a hash, remove nils
+    output[:total_words] = text.split.size
    bins.each_key do |bin_number|
      key = bin_number.to_sym
      output[key] = Hash.new
@ -138,6 +145,39 @@ def process_file (file_name, binfile, type)
    end
    write_output_json(output,outfile + '-out.json')
    write_output_csv(output,outfile + '-out.csv')
  elsif type == 'pn'
    sections = text.lines("Date and time:", chomp: true) #sections is an arrary of each date section from the text
    sections.delete_at(0) #we can ignore the first chunk of text
    sections.each do |chunk|
      timestamp = chunk.lines.delete_if {|line| line == "\r\n"}[0] #pulling out the timestamp
      timestamp.tr!('/','-').tr!(':','').tr!(' ','_') #remove slashes and colons from timestamp, replaces spaces with unserscores
      timestamp.strip!
      output = Hash.new #Creating the output storage object
      outfile = file_name + '_' + timestamp
      outfile.slice!('.txt')
      text = strip_text(chunk, 'Narrative:', 'Signatures:')
      output[:filename] = outfile
      output[:total_words] = text.split.size
      bins.each_key do |bin_number|
        key = bin_number.to_sym
        output[key] = Hash.new
        output[key][:words] = bin_counter(bins[bin_number], text)
        output[key][:total] = count_total(output[key])
      end
      write_output_json(output,outfile + '-out.json')
      write_output_csv(output,outfile + '-out.csv')
    end
 =======
  puts outfile
  output[:filename] = outfile
  csv.each { |bin| bins[bin[0]] = bin[1..].compact } #turn the csv array into a hash, remove nils
  bins.each_key do |bin_number|
    key = bin_number.to_sym
    output[key] = Hash.new
    output[key][:words] = bin_counter(bins[bin_number], text)
    output[key][:total] = count_total(output[key])
 >>>>>>> d40b0ae9853ecb6d5d479ea121a7a3cdba00323c
  end
 end
 #process_dir expects:
@ -154,7 +194,6 @@ def process_dir(dir_name, binfile, type)
 end
 def generate_master_output(dir_name, binfile)
  puts dir_name
  file=File.open(binfile,"r")
  bin_count = file.readlines.size
  file.close
@ -170,19 +209,18 @@ def generate_master_output(dir_name, binfile)
    header = ["File", "Total Words" ] + bin_header
    csv << header
    Dir.glob(dir_name + '*.json') do |file_name|
-      puts file_name
+      puts 'Getting data from: ' + file_name
      csv_row = []
      json_file = File.read(file_name)
      data_hash = JSON.parse(json_file)
      csv_row.push(data_hash["filename"])
      csv_row.push(data_hash["total_words"])
      data_hash.delete("filename")
-      word_total = 0
+      data_hash.delete("total_words")
      data_hash.each_key do |key|
        csv_row.push(data_hash[key]["words"])
        csv_row.push(data_hash[key]["total"])
        word_total += data_hash[key]["total"]
      end
      csv_row = csv_row.insert(1, word_total)
      csv << csv_row
    end
  end
Author	SHA1	Message	Date
Jeff Yates	a2f3259fdf	fixed merge conflict	2020-11-21 17:09:08 -05:00
Jeff Yates	fd88c3457c	minor changes to output and date format	2020-11-21 16:53:48 -05:00
Jeff Yates	9a9886df9d	made pn option operate on each date section	2020-11-21 16:45:55 -05:00
Jeff Yates	174effc0fb	changed total word count behaviour	2020-11-21 13:08:58 -05:00