moved file processing into a method to prepare for batch operation code

batch
Jeff Yates 2020-10-26 12:56:52 -04:00
parent 79e5bc63e1
commit f7f95b6b93
1 changed files with 29 additions and 21 deletions

View File

@ -13,7 +13,6 @@
# The output file is single-line JSON, use jq to format for reading. # # The output file is single-line JSON, use jq to format for reading. #
# # # #
#To Do: # #To Do: #
# * Output to csv #
############################################################################ ############################################################################
require 'json' require 'json'
require 'pp' require 'pp'
@ -112,6 +111,28 @@ def split_text (text, start, fin)
return ret return ret
end end
def process_file (file_name, binfile, type)
csv = CSV.read(binfile)
text = File.read(file_name)
text = strip_text(text, 'PLOVEINTAKE', 'PLOVECLOSING') if type == 'iat'
text = split_text(text, 'Narrative:', 'Signatures:') if type == 'pn'
output = Hash.new #Creating the output storage object
bins = Hash.new #This hash stores the bins
outfile = file_name
outfile.slice!('.txt')
#puts outfile
csv.each { |bin| bins[bin[0]] = bin[1..].compact } #turn the csv array into a hash, remove nils
bins.each_key do |bin_number|
key = bin_number.to_sym
output[key] = Hash.new
output[key][:words] = bin_counter(bins[bin_number], text)
output[key][:total] = count_total(output[key])
end
write_output_json(output,outfile + '-out.json')
write_output_csv(output,outfile + '-out.csv')
end
options = Hash.new options = Hash.new
OptionParser.new do |opts| OptionParser.new do |opts|
opts.banner = 'sorter.rb --options' opts.banner = 'sorter.rb --options'
@ -124,27 +145,14 @@ OptionParser.new do |opts|
opts.on("-b", "--bin-file binfile", "Name of the bin file") do |binfile| opts.on("-b", "--bin-file binfile", "Name of the bin file") do |binfile|
options[:binfile] = binfile options[:binfile] = binfile
end end
opts.on("-d", "--directoy dir", "Directory containing text files to process") do |dir|
options[:dir] = dir
end
end.parse! end.parse!
if options[:file] && options[:dir]
csv = CSV.read(options[:binfile]) puts "Invalid options, you must either a file or a directoy of files."
text = File.read(options[:file]) elsif options[:file]
text = strip_text(text, 'PLOVEINTAKE', 'PLOVECLOSING') if options[:type] == 'iat' process_file(options[:file], options[:binfile], options[:type])
text = split_text(text, 'Narrative:', 'Signatures:') if options[:type] == 'pn'
output = Hash.new #Creating the output storage object
bins = Hash.new #This hash stores the bins
outfile = options[:file]
outfile.slice!('.txt')
puts outfile
csv.each { |bin| bins[bin[0]] = bin[1..].compact } #turn the csv array into a hash, remove nils
bins.each_key do |bin_number|
key = bin_number.to_sym
output[key] = Hash.new
output[key][:words] = bin_counter(bins[bin_number], text)
output[key][:total] = count_total(output[key])
end end
write_output_json(output,outfile + '-out.json')
write_output_csv(output,outfile + '-out.csv')