made pn option operate on each date section

threading
Jeff Yates 2020-11-21 16:45:55 -05:00
parent 174effc0fb
commit 9a9886df9d
1 changed files with 39 additions and 14 deletions

View File

@ -123,27 +123,52 @@ end
#
#This method is the meat and potatos. Preforms the text stripping, word counting, and creates output files.
def process_file (file_name, binfile, type)
#text = split_text(text, 'Narrative:', 'Signatures:') if type == 'pn'
csv = CSV.read(binfile)
text = File.read(file_name)
text = strip_text(text, 'PLOVEINTAKE', 'PLOVECLOSING') if type == 'iat'
text = split_text(text, 'Narrative:', 'Signatures:') if type == 'pn'
output = Hash.new #Creating the output storage object
bins = Hash.new #This hash stores the bins
csv.each { |bin| bins[bin[0]] = bin[1..].compact } #turn the csv array into a hash, remove nils
outfile = file_name
outfile.slice!('.txt')
if type == 'iat'
text = strip_text(text, 'PLOVEINTAKE', 'PLOVECLOSING') if type == 'iat'
output = Hash.new #Creating the output storage object
#puts outfile
output[:filename] = outfile
output[:total_words] = text.split.size
puts output
csv.each { |bin| bins[bin[0]] = bin[1..].compact } #turn the csv array into a hash, remove nils
bins.each_key do |bin_number|
key = bin_number.to_sym
output[key] = Hash.new
output[key][:words] = bin_counter(bins[bin_number], text)
output[key][:total] = count_total(output[key])
output[:filename] = outfile
output[:total_words] = text.split.size
bins.each_key do |bin_number|
key = bin_number.to_sym
output[key] = Hash.new
output[key][:words] = bin_counter(bins[bin_number], text)
output[key][:total] = count_total(output[key])
end
write_output_json(output,outfile + '-out.json')
write_output_csv(output,outfile + '-out.csv')
elsif type == 'pn'
puts 'pn'
sections = text.lines("Date and time:", chomp: true) #sections is an arrary of each date section from the text
sections.delete_at(0) #we can ignore the first chunk of text
sections.each do |chunk|
timestamp = chunk.lines.delete_if {|line| line == "\r\n"}[0] #pulling out the timestamp
timestamp.tr!('/','').tr!(':','').tr!(' ','_') #remove slashes and colons from timestamp, replaces spaces with unserscores
timestamp.strip!
output = Hash.new #Creating the output storage object
outfile = file_name + '_' + timestamp
outfile.slice!('.txt')
puts outfile
text = strip_text(chunk, 'Narrative:', 'Signatures:')
output[:filename] = outfile
output[:total_words] = text.split.size
bins.each_key do |bin_number|
key = bin_number.to_sym
output[key] = Hash.new
output[key][:words] = bin_counter(bins[bin_number], text)
output[key][:total] = count_total(output[key])
end
write_output_json(output,outfile + '-out.json')
write_output_csv(output,outfile + '-out.csv')
end
end
write_output_json(output,outfile + '-out.json')
write_output_csv(output,outfile + '-out.csv')
end
#process_dir expects: