made pn option operate on each date section
parent
174effc0fb
commit
9a9886df9d
53
sorter.rb
53
sorter.rb
|
@ -123,27 +123,52 @@ end
|
|||
#
|
||||
#This method is the meat and potatos. Preforms the text stripping, word counting, and creates output files.
|
||||
def process_file (file_name, binfile, type)
|
||||
#text = split_text(text, 'Narrative:', 'Signatures:') if type == 'pn'
|
||||
csv = CSV.read(binfile)
|
||||
text = File.read(file_name)
|
||||
text = strip_text(text, 'PLOVEINTAKE', 'PLOVECLOSING') if type == 'iat'
|
||||
text = split_text(text, 'Narrative:', 'Signatures:') if type == 'pn'
|
||||
output = Hash.new #Creating the output storage object
|
||||
bins = Hash.new #This hash stores the bins
|
||||
csv.each { |bin| bins[bin[0]] = bin[1..].compact } #turn the csv array into a hash, remove nils
|
||||
outfile = file_name
|
||||
outfile.slice!('.txt')
|
||||
if type == 'iat'
|
||||
text = strip_text(text, 'PLOVEINTAKE', 'PLOVECLOSING') if type == 'iat'
|
||||
output = Hash.new #Creating the output storage object
|
||||
#puts outfile
|
||||
output[:filename] = outfile
|
||||
output[:total_words] = text.split.size
|
||||
puts output
|
||||
csv.each { |bin| bins[bin[0]] = bin[1..].compact } #turn the csv array into a hash, remove nils
|
||||
bins.each_key do |bin_number|
|
||||
key = bin_number.to_sym
|
||||
output[key] = Hash.new
|
||||
output[key][:words] = bin_counter(bins[bin_number], text)
|
||||
output[key][:total] = count_total(output[key])
|
||||
output[:filename] = outfile
|
||||
output[:total_words] = text.split.size
|
||||
bins.each_key do |bin_number|
|
||||
key = bin_number.to_sym
|
||||
output[key] = Hash.new
|
||||
output[key][:words] = bin_counter(bins[bin_number], text)
|
||||
output[key][:total] = count_total(output[key])
|
||||
end
|
||||
write_output_json(output,outfile + '-out.json')
|
||||
write_output_csv(output,outfile + '-out.csv')
|
||||
elsif type == 'pn'
|
||||
puts 'pn'
|
||||
sections = text.lines("Date and time:", chomp: true) #sections is an arrary of each date section from the text
|
||||
sections.delete_at(0) #we can ignore the first chunk of text
|
||||
sections.each do |chunk|
|
||||
timestamp = chunk.lines.delete_if {|line| line == "\r\n"}[0] #pulling out the timestamp
|
||||
timestamp.tr!('/','').tr!(':','').tr!(' ','_') #remove slashes and colons from timestamp, replaces spaces with unserscores
|
||||
timestamp.strip!
|
||||
output = Hash.new #Creating the output storage object
|
||||
outfile = file_name + '_' + timestamp
|
||||
outfile.slice!('.txt')
|
||||
puts outfile
|
||||
text = strip_text(chunk, 'Narrative:', 'Signatures:')
|
||||
output[:filename] = outfile
|
||||
output[:total_words] = text.split.size
|
||||
bins.each_key do |bin_number|
|
||||
key = bin_number.to_sym
|
||||
output[key] = Hash.new
|
||||
output[key][:words] = bin_counter(bins[bin_number], text)
|
||||
output[key][:total] = count_total(output[key])
|
||||
end
|
||||
write_output_json(output,outfile + '-out.json')
|
||||
write_output_csv(output,outfile + '-out.csv')
|
||||
end
|
||||
end
|
||||
write_output_json(output,outfile + '-out.json')
|
||||
write_output_csv(output,outfile + '-out.csv')
|
||||
end
|
||||
|
||||
#process_dir expects:
|
||||
|
|
Loading…
Reference in New Issue