added split_text() and --type option

batch
Jeff Yates 2020-10-24 11:04:04 -04:00
parent 96740f161a
commit 89bb9cf027
1 changed files with 23 additions and 4 deletions

View File

@ -5,7 +5,7 @@
#Current Usage: # #Current Usage: #
# Scans a file for groups of words and counts the totals # # Scans a file for groups of words and counts the totals #
# Input file: ./tester.txt # # Input file: ./tester.txt #
# Wroud group file: ./bins.json # # Word group file: ./bins.json #
# Output file: ./output.json # # Output file: ./output.json #
# # # #
# The input file is human readable, easy to edit. # # The input file is human readable, easy to edit. #
@ -13,7 +13,7 @@
# # # #
#To Do: # #To Do: #
# * use STDIN to accept file to scan # # * use STDIN to accept file to scan #
# * Support scaning multiple files # # * Support scanning multiple files #
# * Output to csv # # * Output to csv #
# * Strip out header and footer text # # * Strip out header and footer text #
############################################################################ ############################################################################
@ -25,7 +25,7 @@ require 'optparse'
#bin_counter expects: #bin_counter expects:
# bin - a hash with strings to search for # bin - a hash with strings to search for
# test - the document text to search through # test - the document text to search through
#This method returns a hash containg the strings and their frequency #This method returns a hash containing the strings and their frequency
def bin_counter (bin, text) def bin_counter (bin, text)
ret = Hash.new ret = Hash.new
bin.each do |word| bin.each do |word|
@ -63,18 +63,36 @@ def strip_text (text, start, fin)
text.lines(start,chomp: true)[1].lines(fin,chomp: true)[0] text.lines(start,chomp: true)[1].lines(fin,chomp: true)[0]
end end
def split_text (text, start, fin)
split1 = text.lines(start, chomp: true)[1..-1]
split2 = []
ret = ""
split1.each do |section|
split2 += section.lines(fin, chomp: true)
end
split2.each do |section|
ret += section
end
return ret
end
options = Hash.new options = Hash.new
OptionParser.new do |opts| OptionParser.new do |opts|
opts.banner = 'sorter.rb --options' opts.banner = 'sorter.rb --options'
opts.on("-f", "--file file", "Name of the file to process") do |file| opts.on("-f", "--file file", "Name of the file to process") do |file|
options[:file] = file options[:file] = file
end end
opts.on("-t", "--type type", "Type of file. Must be \"iat\" or \"pn\"") do |type|
options[:type] = type
end
end.parse! end.parse!
csv = CSV.read('./bins.csv') csv = CSV.read('./bins.csv')
text = File.read(options[:file]) text = File.read(options[:file])
text = strip_text(text,'PLOVEINTAKE','PLOVECLOSING') text = strip_text(text, 'PLOVEINTAKE', 'PLOVECLOSING') if options[:type] == 'iat'
text = split_text(text, 'Narrative:', 'Signatures:') if options[:type] == 'pn'
output = Hash.new #Creating the output storage object output = Hash.new #Creating the output storage object
bins = Hash.new #This hash stores the bins bins = Hash.new #This hash stores the bins
outfile = options[:file] + '-out.json' outfile = options[:file] + '-out.json'
@ -90,3 +108,4 @@ bins.each_key do |bin_number|
end end
write_output(output,outfile) write_output(output,outfile)
puts text