quran-json/bin/json/pull-english

76 lines
1.8 KiB
Ruby
Executable file

#!/usr/bin/env ruby
# frozen_string_literal: true
##
# This script requests each verse, in each chapter
# of The Qur'an - in the English language. The content
# is requested from the https://quran.com website.
#
# Each chapter is then saved to a JSON file - for example:
# "src/json/en/1.json", "src/json/en/2.json", etc.
##
# Set process name - primarily for the "ps" command
Process.setproctitle("quran-pull (pull-english)")
##
# Dependencies
require "bundler/setup"
require "net/http"
require "nokogiri"
require "json"
require "paint"
require_relative "../../binlib/io/line"
##
# Configuration variables.
base_uri = "quran.com"
dest = File.join("src", "json", "en", "%{chapter}.json")
delay = 1.5
##
# Map chapters to their verse count.
vmap_path = File.join("bindata", "chapters-length.json")
vmap = JSON.parse(File.read(vmap_path))
##
# Share a single Net::HTTP instance.
http = Net::HTTP.new(base_uri, 443)
http.use_ssl = true
##
# Utils
line = IO::Line.new($stdout)
find_content = ->(res) do
html = Nokogiri::HTML(res.body)
el = html.css("div[class^='TranslationText']").last
el.text.gsub(/[0-9]/, "")
end
1.upto(114) do |chapter|
rows = []
vcount = vmap[chapter.to_s]
line.rewind.print("Download chapter #{chapter}:").end
1.upto(vcount) do |verse|
res = http.request_get("/#{chapter}/#{verse}")
if Net::HTTPOK === res
rows.push([verse, find_content.(res)])
line.rewind.print("#{verse} of #{vcount} verses downloaded. ")
else
line.end.print(Paint["ABORT", :red, :bold])
.end.print("Bad response: ", res.class)
.end
exit(1)
end
sleep(delay)
end
File.write(format(dest, chapter:), JSON.pretty_generate(rows))
line.end.print(Paint["Done.", :bold])
.end.end
end
##
# Spawn bin/json/insert-chapter-metadata
Process.wait Process.spawn(
"./bin/json/insert-chapter-metadata", "en"
)