1 changed files with 23 additions and 89 deletions
--- a/sorter.rb
+++ b/sorter.rb
@ -13,6 +13,7 @@
 # The output file is single-line JSON, use jq to format for reading.       #
 #                                                                          #
 #To Do:                                                                    #
 # * Output to csv                                                          #
 ############################################################################
 require 'json'
 require 'pp'
@ -54,13 +55,12 @@ end
 def write_output_csv (output, filename)
  CSV.open(filename, 'wb') do |csv|
    csv << ["bin", "words", "total"]
    output.delete(:filename)
    output.each_key do |key|
      line = []
      line.push(key)
      output[key].each_key do |sub_key|
        line.push(output[key][sub_key])
-      end 
+      end
      csv << line
    end
  end
@ -112,82 +112,6 @@ def split_text (text, start, fin)
  return ret
 end
 #process_file expects:
 # file_name - the name of the file to process
 # binfile - the name of the bin file (csv) to use
 # type - which type of file are we processing, must be 'pn' or 'iat'
 #
 #This method is the meat and potatos. Preforms the text stripping, word counting, and creates output files.
 def process_file (file_name, binfile, type)
  csv = CSV.read(binfile)
  text = File.read(file_name)
  text = strip_text(text, 'PLOVEINTAKE', 'PLOVECLOSING') if type == 'iat'
  text = split_text(text, 'Narrative:', 'Signatures:') if type == 'pn'
  output = Hash.new #Creating the output storage object
  bins = Hash.new #This hash stores the bins
  outfile = file_name
  outfile.slice!('.txt')
  #puts outfile
  output[:filename] = outfile
  csv.each { |bin| bins[bin[0]] = bin[1..].compact } #turn the csv array into a hash, remove nils
  bins.each_key do |bin_number|
    key = bin_number.to_sym
    output[key] = Hash.new
    output[key][:words] = bin_counter(bins[bin_number], text)
    output[key][:total] = count_total(output[key])
  end
  write_output_json(output,outfile + '-out.json')
  write_output_csv(output,outfile + '-out.csv')
 end
 #process_dir expects:
 # dir_name - a direcroty containing text files to process
 # binfile - the name of the bin file
 # type - which type of file are we processing, must be 'pn' or 'iat'
 #
 #This method will process all .txt files in the supplied directory
 def process_dir(dir_name, binfile, type)
  Dir.glob(dir_name + '*.txt') do |file_name|
    puts "Processing" + file_name
    process_file(file_name, binfile, type)
  end
 end
 def generate_master_output(dir_name, binfile)
  puts dir_name
  file=File.open(binfile,"r")
  bin_count = file.readlines.size
  file.close
  bin_header = []
  bin_count.times do |num|
    num += 1
    words_head = "Bin " + num.to_s + " words"
    total_head = "Bin " + num.to_s + " total"
    bin_header.push(words_head)
    bin_header.push(total_head)
  end
  CSV.open('master.csv', 'wb') do |csv|
    header = ["File", "Total Words" ] + bin_header
    csv << header
    Dir.glob(dir_name + '*.json') do |file_name|
      puts file_name
      csv_row = []
      json_file = File.read(file_name)
      data_hash = JSON.parse(json_file)
      csv_row.push(data_hash["filename"])
      data_hash.delete("filename")
      word_total = 0
      data_hash.each_key do |key|
        csv_row.push(data_hash[key]["words"])
        csv_row.push(data_hash[key]["total"])
        word_total += data_hash[key]["total"]
      end
      csv_row = csv_row.insert(1, word_total)
      csv << csv_row
    end
  end
 end
 options = Hash.new
 OptionParser.new do |opts|
  opts.banner = 'sorter.rb --options'
@ -200,17 +124,27 @@ OptionParser.new do |opts|
  opts.on("-b", "--bin-file binfile", "Name of the bin file") do |binfile|
    options[:binfile] = binfile
  end
  opts.on("-d", "--directory dir", "Directory containing text files to process") do |dir|
    options[:dir] = dir
  end
 end.parse!
-if options[:file] && options[:dir]
+
-  puts "Invalid options, you must either a file or a directoy of files."
+csv = CSV.read(options[:binfile])
-elsif options[:file]
+text = File.read(options[:file])
-  process_file(options[:file], options[:binfile], options[:type])
+text = strip_text(text, 'PLOVEINTAKE', 'PLOVECLOSING') if options[:type] == 'iat'
-elsif options[:dir]
+text = split_text(text, 'Narrative:', 'Signatures:') if options[:type] == 'pn'
-  process_dir(options[:dir], options[:binfile], options[:type])
+output = Hash.new #Creating the output storage object
-  generate_master_output(options[:dir], options[:binfile])
+bins = Hash.new #This hash stores the bins
-end
+outfile = options[:file]
 outfile.slice!('.txt')
 puts outfile
 csv.each { |bin| bins[bin[0]] = bin[1..].compact } #turn the csv array into a hash, remove nils
 bins.each_key do |bin_number|
  key = bin_number.to_sym
  output[key] = Hash.new
  output[key][:words] = bin_counter(bins[bin_number], text)
  output[key][:total] = count_total(output[key])
 end
 write_output_json(output,outfile + '-out.json')
 write_output_csv(output,outfile + '-out.csv')