minor changes to output and date format

threading
Jeff Yates 2020-11-21 16:53:48 -05:00
parent 9a9886df9d
commit fd88c3457c
1 changed files with 3 additions and 6 deletions

View File

@ -145,17 +145,15 @@ def process_file (file_name, binfile, type)
write_output_json(output,outfile + '-out.json') write_output_json(output,outfile + '-out.json')
write_output_csv(output,outfile + '-out.csv') write_output_csv(output,outfile + '-out.csv')
elsif type == 'pn' elsif type == 'pn'
puts 'pn'
sections = text.lines("Date and time:", chomp: true) #sections is an arrary of each date section from the text sections = text.lines("Date and time:", chomp: true) #sections is an arrary of each date section from the text
sections.delete_at(0) #we can ignore the first chunk of text sections.delete_at(0) #we can ignore the first chunk of text
sections.each do |chunk| sections.each do |chunk|
timestamp = chunk.lines.delete_if {|line| line == "\r\n"}[0] #pulling out the timestamp timestamp = chunk.lines.delete_if {|line| line == "\r\n"}[0] #pulling out the timestamp
timestamp.tr!('/','').tr!(':','').tr!(' ','_') #remove slashes and colons from timestamp, replaces spaces with unserscores timestamp.tr!('/','-').tr!(':','').tr!(' ','_') #remove slashes and colons from timestamp, replaces spaces with unserscores
timestamp.strip! timestamp.strip!
output = Hash.new #Creating the output storage object output = Hash.new #Creating the output storage object
outfile = file_name + '_' + timestamp outfile = file_name + '_' + timestamp
outfile.slice!('.txt') outfile.slice!('.txt')
puts outfile
text = strip_text(chunk, 'Narrative:', 'Signatures:') text = strip_text(chunk, 'Narrative:', 'Signatures:')
output[:filename] = outfile output[:filename] = outfile
output[:total_words] = text.split.size output[:total_words] = text.split.size
@ -179,13 +177,12 @@ end
#This method will process all .txt files in the supplied directory #This method will process all .txt files in the supplied directory
def process_dir(dir_name, binfile, type) def process_dir(dir_name, binfile, type)
Dir.glob(dir_name + '*.txt') do |file_name| Dir.glob(dir_name + '*.txt') do |file_name|
puts "Processing" + file_name puts "Processing " + file_name
process_file(file_name, binfile, type) process_file(file_name, binfile, type)
end end
end end
def generate_master_output(dir_name, binfile) def generate_master_output(dir_name, binfile)
puts dir_name
file=File.open(binfile,"r") file=File.open(binfile,"r")
bin_count = file.readlines.size bin_count = file.readlines.size
file.close file.close
@ -201,7 +198,7 @@ def generate_master_output(dir_name, binfile)
header = ["File", "Total Words" ] + bin_header header = ["File", "Total Words" ] + bin_header
csv << header csv << header
Dir.glob(dir_name + '*.json') do |file_name| Dir.glob(dir_name + '*.json') do |file_name|
puts file_name puts 'Getting data from: ' + file_name
csv_row = [] csv_row = []
json_file = File.read(file_name) json_file = File.read(file_name)
data_hash = JSON.parse(json_file) data_hash = JSON.parse(json_file)