minor changes to output and date format
parent
9a9886df9d
commit
fd88c3457c
|
@ -145,17 +145,15 @@ def process_file (file_name, binfile, type)
|
||||||
write_output_json(output,outfile + '-out.json')
|
write_output_json(output,outfile + '-out.json')
|
||||||
write_output_csv(output,outfile + '-out.csv')
|
write_output_csv(output,outfile + '-out.csv')
|
||||||
elsif type == 'pn'
|
elsif type == 'pn'
|
||||||
puts 'pn'
|
|
||||||
sections = text.lines("Date and time:", chomp: true) #sections is an arrary of each date section from the text
|
sections = text.lines("Date and time:", chomp: true) #sections is an arrary of each date section from the text
|
||||||
sections.delete_at(0) #we can ignore the first chunk of text
|
sections.delete_at(0) #we can ignore the first chunk of text
|
||||||
sections.each do |chunk|
|
sections.each do |chunk|
|
||||||
timestamp = chunk.lines.delete_if {|line| line == "\r\n"}[0] #pulling out the timestamp
|
timestamp = chunk.lines.delete_if {|line| line == "\r\n"}[0] #pulling out the timestamp
|
||||||
timestamp.tr!('/','').tr!(':','').tr!(' ','_') #remove slashes and colons from timestamp, replaces spaces with unserscores
|
timestamp.tr!('/','-').tr!(':','').tr!(' ','_') #remove slashes and colons from timestamp, replaces spaces with unserscores
|
||||||
timestamp.strip!
|
timestamp.strip!
|
||||||
output = Hash.new #Creating the output storage object
|
output = Hash.new #Creating the output storage object
|
||||||
outfile = file_name + '_' + timestamp
|
outfile = file_name + '_' + timestamp
|
||||||
outfile.slice!('.txt')
|
outfile.slice!('.txt')
|
||||||
puts outfile
|
|
||||||
text = strip_text(chunk, 'Narrative:', 'Signatures:')
|
text = strip_text(chunk, 'Narrative:', 'Signatures:')
|
||||||
output[:filename] = outfile
|
output[:filename] = outfile
|
||||||
output[:total_words] = text.split.size
|
output[:total_words] = text.split.size
|
||||||
|
@ -179,13 +177,12 @@ end
|
||||||
#This method will process all .txt files in the supplied directory
|
#This method will process all .txt files in the supplied directory
|
||||||
def process_dir(dir_name, binfile, type)
|
def process_dir(dir_name, binfile, type)
|
||||||
Dir.glob(dir_name + '*.txt') do |file_name|
|
Dir.glob(dir_name + '*.txt') do |file_name|
|
||||||
puts "Processing" + file_name
|
puts "Processing " + file_name
|
||||||
process_file(file_name, binfile, type)
|
process_file(file_name, binfile, type)
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
def generate_master_output(dir_name, binfile)
|
def generate_master_output(dir_name, binfile)
|
||||||
puts dir_name
|
|
||||||
file=File.open(binfile,"r")
|
file=File.open(binfile,"r")
|
||||||
bin_count = file.readlines.size
|
bin_count = file.readlines.size
|
||||||
file.close
|
file.close
|
||||||
|
@ -201,7 +198,7 @@ def generate_master_output(dir_name, binfile)
|
||||||
header = ["File", "Total Words" ] + bin_header
|
header = ["File", "Total Words" ] + bin_header
|
||||||
csv << header
|
csv << header
|
||||||
Dir.glob(dir_name + '*.json') do |file_name|
|
Dir.glob(dir_name + '*.json') do |file_name|
|
||||||
puts file_name
|
puts 'Getting data from: ' + file_name
|
||||||
csv_row = []
|
csv_row = []
|
||||||
json_file = File.read(file_name)
|
json_file = File.read(file_name)
|
||||||
data_hash = JSON.parse(json_file)
|
data_hash = JSON.parse(json_file)
|
||||||
|
|
Loading…
Reference in New Issue