added split_text() and --type option

2020-10-24 11:04:04 -04:00 · 2020-10-24 11:04:04 -04:00 · 89bb9cf027
parent 96740f161a
commit 89bb9cf027
1 changed files with 23 additions and 4 deletions
--- a/sorter.rb
+++ b/sorter.rb
@ -5,7 +5,7 @@
 #Current Usage:                                                            #
 # Scans a file for groups of words and counts the totals                   #
 # Input file:       ./tester.txt                                           #
-# Wroud group file: ./bins.json                                            #
+# Word group file: ./bins.json                                            #
 # Output file:      ./output.json                                          #
 #                                                                          #
 # The input file is human readable, easy to edit.                          #
@ -13,7 +13,7 @@
 #                                                                          #
 #To Do:                                                                    #
 # * use STDIN to accept file to scan                                       #
-# * Support scaning multiple files                                         #
+# * Support scanning multiple files                                         #
 # * Output to csv                                                          #
 # * Strip out header and footer text                                       #
 ############################################################################
@ -25,7 +25,7 @@ require 'optparse'
 #bin_counter expects:
 # bin - a hash with strings to search for
 # test - the document text to search through
-#This method returns a hash containg the strings and their frequency
+#This method returns a hash containing the strings and their frequency
 def bin_counter (bin, text)
  ret = Hash.new
  bin.each do |word|
@ -63,18 +63,36 @@ def strip_text (text, start, fin)
  text.lines(start,chomp: true)[1].lines(fin,chomp: true)[0]
 end
 def split_text (text, start, fin)
  split1 = text.lines(start, chomp: true)[1..-1]
  split2 = []
  ret = ""
  split1.each do |section|
    split2 += section.lines(fin, chomp: true)
  end
  split2.each do |section|
    ret += section
  end
  return ret
 end
 options = Hash.new
 OptionParser.new do |opts|
  opts.banner = 'sorter.rb --options'
  opts.on("-f", "--file file", "Name of the file to process") do |file|
    options[:file] = file
  end
  opts.on("-t", "--type type", "Type of file. Must be \"iat\" or \"pn\"") do |type|
    options[:type] = type
  end
 end.parse!
 csv = CSV.read('./bins.csv')
 text = File.read(options[:file])
-text = strip_text(text,'PLOVEINTAKE','PLOVECLOSING')
+text = strip_text(text, 'PLOVEINTAKE', 'PLOVECLOSING') if options[:type] == 'iat'
 text = split_text(text, 'Narrative:', 'Signatures:') if options[:type] == 'pn'
 output = Hash.new #Creating the output storage object
 bins = Hash.new #This hash stores the bins
 outfile = options[:file] + '-out.json'
@ -90,3 +108,4 @@ bins.each_key do |bin_number|
 end
 write_output(output,outfile)
 puts text