diff --git a/sorter.rb b/sorter.rb index 06c7198..b339d52 100755 --- a/sorter.rb +++ b/sorter.rb @@ -5,7 +5,7 @@ #Current Usage: # # Scans a file for groups of words and counts the totals # # Input file: ./tester.txt # -# Wroud group file: ./bins.json # +# Word group file: ./bins.json # # Output file: ./output.json # # # # The input file is human readable, easy to edit. # @@ -13,7 +13,7 @@ # # #To Do: # # * use STDIN to accept file to scan # -# * Support scaning multiple files # +# * Support scanning multiple files # # * Output to csv # # * Strip out header and footer text # ############################################################################ @@ -25,7 +25,7 @@ require 'optparse' #bin_counter expects: # bin - a hash with strings to search for # test - the document text to search through -#This method returns a hash containg the strings and their frequency +#This method returns a hash containing the strings and their frequency def bin_counter (bin, text) ret = Hash.new bin.each do |word| @@ -63,18 +63,36 @@ def strip_text (text, start, fin) text.lines(start,chomp: true)[1].lines(fin,chomp: true)[0] end +def split_text (text, start, fin) + split1 = text.lines(start, chomp: true)[1..-1] + split2 = [] + ret = "" + split1.each do |section| + split2 += section.lines(fin, chomp: true) + end + split2.each do |section| + ret += section + end + return ret +end + options = Hash.new OptionParser.new do |opts| opts.banner = 'sorter.rb --options' opts.on("-f", "--file file", "Name of the file to process") do |file| options[:file] = file end + opts.on("-t", "--type type", "Type of file. Must be \"iat\" or \"pn\"") do |type| + options[:type] = type + end end.parse! + csv = CSV.read('./bins.csv') text = File.read(options[:file]) -text = strip_text(text,'PLOVEINTAKE','PLOVECLOSING') +text = strip_text(text, 'PLOVEINTAKE', 'PLOVECLOSING') if options[:type] == 'iat' +text = split_text(text, 'Narrative:', 'Signatures:') if options[:type] == 'pn' output = Hash.new #Creating the output storage object bins = Hash.new #This hash stores the bins outfile = options[:file] + '-out.json' @@ -90,3 +108,4 @@ bins.each_key do |bin_number| end write_output(output,outfile) +puts text