From f7f95b6b93c587d211d42acd1e7c06a36cf95033 Mon Sep 17 00:00:00 2001 From: Jeff Yates Date: Mon, 26 Oct 2020 12:56:52 -0400 Subject: [PATCH] moved file processing into a method to prepare for batch operation code --- sorter.rb | 50 +++++++++++++++++++++++++++++--------------------- 1 file changed, 29 insertions(+), 21 deletions(-) diff --git a/sorter.rb b/sorter.rb index 616f2b9..d14b633 100755 --- a/sorter.rb +++ b/sorter.rb @@ -13,7 +13,6 @@ # The output file is single-line JSON, use jq to format for reading. # # # #To Do: # -# * Output to csv # ############################################################################ require 'json' require 'pp' @@ -112,6 +111,28 @@ def split_text (text, start, fin) return ret end +def process_file (file_name, binfile, type) + csv = CSV.read(binfile) + text = File.read(file_name) + text = strip_text(text, 'PLOVEINTAKE', 'PLOVECLOSING') if type == 'iat' + text = split_text(text, 'Narrative:', 'Signatures:') if type == 'pn' + output = Hash.new #Creating the output storage object + bins = Hash.new #This hash stores the bins + outfile = file_name + outfile.slice!('.txt') + #puts outfile + csv.each { |bin| bins[bin[0]] = bin[1..].compact } #turn the csv array into a hash, remove nils + bins.each_key do |bin_number| + key = bin_number.to_sym + output[key] = Hash.new + output[key][:words] = bin_counter(bins[bin_number], text) + output[key][:total] = count_total(output[key]) + end + write_output_json(output,outfile + '-out.json') + write_output_csv(output,outfile + '-out.csv') + +end + options = Hash.new OptionParser.new do |opts| opts.banner = 'sorter.rb --options' @@ -124,27 +145,14 @@ OptionParser.new do |opts| opts.on("-b", "--bin-file binfile", "Name of the bin file") do |binfile| options[:binfile] = binfile end + opts.on("-d", "--directoy dir", "Directory containing text files to process") do |dir| + options[:dir] = dir + end end.parse! - -csv = CSV.read(options[:binfile]) -text = File.read(options[:file]) -text = strip_text(text, 'PLOVEINTAKE', 'PLOVECLOSING') if options[:type] == 'iat' -text = split_text(text, 'Narrative:', 'Signatures:') if options[:type] == 'pn' -output = Hash.new #Creating the output storage object -bins = Hash.new #This hash stores the bins -outfile = options[:file] -outfile.slice!('.txt') -puts outfile - -csv.each { |bin| bins[bin[0]] = bin[1..].compact } #turn the csv array into a hash, remove nils - -bins.each_key do |bin_number| - key = bin_number.to_sym - output[key] = Hash.new - output[key][:words] = bin_counter(bins[bin_number], text) - output[key][:total] = count_total(output[key]) +if options[:file] && options[:dir] + puts "Invalid options, you must either a file or a directoy of files." +elsif options[:file] + process_file(options[:file], options[:binfile], options[:type]) end -write_output_json(output,outfile + '-out.json') -write_output_csv(output,outfile + '-out.csv')