moved file processing into a method to prepare for batch operation code
parent
79e5bc63e1
commit
f7f95b6b93
50
sorter.rb
50
sorter.rb
|
@ -13,7 +13,6 @@
|
|||
# The output file is single-line JSON, use jq to format for reading. #
|
||||
# #
|
||||
#To Do: #
|
||||
# * Output to csv #
|
||||
############################################################################
|
||||
require 'json'
|
||||
require 'pp'
|
||||
|
@ -112,6 +111,28 @@ def split_text (text, start, fin)
|
|||
return ret
|
||||
end
|
||||
|
||||
def process_file (file_name, binfile, type)
|
||||
csv = CSV.read(binfile)
|
||||
text = File.read(file_name)
|
||||
text = strip_text(text, 'PLOVEINTAKE', 'PLOVECLOSING') if type == 'iat'
|
||||
text = split_text(text, 'Narrative:', 'Signatures:') if type == 'pn'
|
||||
output = Hash.new #Creating the output storage object
|
||||
bins = Hash.new #This hash stores the bins
|
||||
outfile = file_name
|
||||
outfile.slice!('.txt')
|
||||
#puts outfile
|
||||
csv.each { |bin| bins[bin[0]] = bin[1..].compact } #turn the csv array into a hash, remove nils
|
||||
bins.each_key do |bin_number|
|
||||
key = bin_number.to_sym
|
||||
output[key] = Hash.new
|
||||
output[key][:words] = bin_counter(bins[bin_number], text)
|
||||
output[key][:total] = count_total(output[key])
|
||||
end
|
||||
write_output_json(output,outfile + '-out.json')
|
||||
write_output_csv(output,outfile + '-out.csv')
|
||||
|
||||
end
|
||||
|
||||
options = Hash.new
|
||||
OptionParser.new do |opts|
|
||||
opts.banner = 'sorter.rb --options'
|
||||
|
@ -124,27 +145,14 @@ OptionParser.new do |opts|
|
|||
opts.on("-b", "--bin-file binfile", "Name of the bin file") do |binfile|
|
||||
options[:binfile] = binfile
|
||||
end
|
||||
opts.on("-d", "--directoy dir", "Directory containing text files to process") do |dir|
|
||||
options[:dir] = dir
|
||||
end
|
||||
end.parse!
|
||||
|
||||
|
||||
csv = CSV.read(options[:binfile])
|
||||
text = File.read(options[:file])
|
||||
text = strip_text(text, 'PLOVEINTAKE', 'PLOVECLOSING') if options[:type] == 'iat'
|
||||
text = split_text(text, 'Narrative:', 'Signatures:') if options[:type] == 'pn'
|
||||
output = Hash.new #Creating the output storage object
|
||||
bins = Hash.new #This hash stores the bins
|
||||
outfile = options[:file]
|
||||
outfile.slice!('.txt')
|
||||
puts outfile
|
||||
|
||||
csv.each { |bin| bins[bin[0]] = bin[1..].compact } #turn the csv array into a hash, remove nils
|
||||
|
||||
bins.each_key do |bin_number|
|
||||
key = bin_number.to_sym
|
||||
output[key] = Hash.new
|
||||
output[key][:words] = bin_counter(bins[bin_number], text)
|
||||
output[key][:total] = count_total(output[key])
|
||||
if options[:file] && options[:dir]
|
||||
puts "Invalid options, you must either a file or a directoy of files."
|
||||
elsif options[:file]
|
||||
process_file(options[:file], options[:binfile], options[:type])
|
||||
end
|
||||
write_output_json(output,outfile + '-out.json')
|
||||
write_output_csv(output,outfile + '-out.csv')
|
||||
|
||||
|
|
Loading…
Reference in New Issue