Looping through files in S3 or a local folder

October 11, 2018

gem 'aws-sdk', '~> 3' gem 'pry' require 'pry' require 'aws-sdk' Aws.config.update({ access_key_id: "AWS_ACCESS_KEY_ID", secret_access_key: "AWS_SECRET_ACCESS_KEY" }) # csv of UUIDs that are missing (compiled after we uploaded missing files) @missing_ids = File.read('/Users/murph/Desktop/SDK/missing_ids.csv').split(/\r/) # hits S3 and loops though specific subfolders and compares them to a list of missing IDS # this is to see if we have any on S3 that we thought failed to upload at all def search_on_s3 s3_items = Array.new s3 = Aws::S3::Resource.new prefix = 'surveys/form_submissions/' survey_ids = ["2161/", "2162/", "2209/", "2210/"] puts "Collecting S3 keys ..." survey_ids.each do |id| s3.bucket('sdk-core-production').objects({prefix: "#{prefix}#{id}"}).each do |obj| name = obj.key.gsub("#{prefix}#{id}", '').gsub('.xml','') s3_items << name end end puts "FILES FOUND ON S3 ..." @missing_ids.each do |id| s3_items.each do |s3_id| if id == s3_id puts "ID::#{id}::::S3_ID::#{s3_id}" end end end puts "Done." end # searches folders downloaded from S3 as backups before manual import # this is to see if we have any on S3 that we though failed to upload at all # really the S3 method should be done before backing up but I needed both methods def search_in_folders folder_items = Array.new folder_path = "/Users/murph/Desktop/SDK/" # folders contain .xml missing files folders = ["niger_tt_cluster_surveyv1_original/", "niger_tt_resident_surveyv3_original/", "niger_tt_household_survey1_original/", "niger_tt_absent_surveyv2_original/", "niger_tt_supervisor_v2_original/", "niger_tt_absent_surveyv3_original/", "niger_tt_resident_surveyv2_original/"] folders.each do |folder| path = "#{folder_path}#{folder}" Dir.foreach(path) do |item| next if item == '.' or item == '..' folder_items << item.gsub('.xml','') end end puts "FILES FOUND IN FOLDER ..." @missing_ids.each do |id| folder_items.each do |f_id| if id == f_id puts "ID::#{id}::::S3_ID::#{f_id}" end end end puts "Done." end search_on_s3 search_in_folders