improve the pull-farsi, and pull-portuguese scripts
This commit is contained in:
parent
a077db4b7f
commit
6f44b12410
4 changed files with 206 additions and 191 deletions
78
bin/json/private/al-quran
Executable file
78
bin/json/private/al-quran
Executable file
|
@ -0,0 +1,78 @@
|
|||
#!/usr/bin/env ruby
|
||||
# frozen_string_literal: true
|
||||
|
||||
##
|
||||
# This script requests each verse, from each chapter
|
||||
# in The Qur'an in a language that is determined by
|
||||
# its first two arguments. For example:
|
||||
#
|
||||
# * ./al-quran pt portuguese
|
||||
# * ./al-quran fa farsi
|
||||
# * etc..
|
||||
|
||||
##
|
||||
# Set process title - primarily for the "ps" command
|
||||
Process.setproctitle("quran-pull/json/private/al-quran (#{ARGV[1]})")
|
||||
|
||||
##
|
||||
# Dependencies
|
||||
require "net/http"
|
||||
require "nokogiri"
|
||||
require "json"
|
||||
require "paint"
|
||||
require_relative "../../../binlib/io/line"
|
||||
|
||||
##
|
||||
# Configuration variables
|
||||
base_uri = "al-quran.cc"
|
||||
path = "/quran-translation/#{ARGV[1]}/%{chapter}/%{verse}.html"
|
||||
dest = File.join("src", "json", ARGV[0], "%{chapter}.json")
|
||||
delay = 0.5
|
||||
|
||||
##
|
||||
# Chapter names
|
||||
names = JSON.parse(
|
||||
File.read(
|
||||
File.join('bindata', 'al-quran.cc', 'chapter-names.json')
|
||||
)
|
||||
)
|
||||
|
||||
##
|
||||
# Map chapters to their verse count.
|
||||
vmap_path = File.join("bindata", "chapters-length.json")
|
||||
vmap = JSON.parse(File.read(vmap_path))
|
||||
|
||||
##
|
||||
# Share a single Net::HTTP instance.
|
||||
http = Net::HTTP.new(base_uri, 443)
|
||||
http.use_ssl = true
|
||||
|
||||
##
|
||||
# Utils
|
||||
line = IO::Line.new($stdout)
|
||||
find_content = ->(res) do
|
||||
doc = Nokogiri::HTML(res.body)
|
||||
doc.css(".ayah .translate").text
|
||||
end
|
||||
|
||||
1.upto(114) do |chapter|
|
||||
line.rewind.print("Download chapter #{chapter}:").end
|
||||
rows = []
|
||||
vcount = vmap[chapter.to_s]
|
||||
1.upto(vcount) do |verse|
|
||||
res = http.request_get(format(path, chapter: names[chapter - 1], verse:))
|
||||
if Net::HTTPOK === res
|
||||
rows.push([verse, find_content.(res)])
|
||||
line.rewind.print("#{verse} of #{vcount} verses downloaded. ")
|
||||
else
|
||||
line.end.print(Paint["ABORT", :red, :bold])
|
||||
.end.print("Bad response: ", res.class)
|
||||
.end
|
||||
exit(1)
|
||||
end
|
||||
sleep(delay)
|
||||
end
|
||||
File.binwrite(format(dest, chapter:), JSON.pretty_generate(rows))
|
||||
line.end.print(Paint["Done.", :bold])
|
||||
.end.end
|
||||
end
|
|
@ -7,105 +7,16 @@
|
|||
# is requested from the https://al-quran.cc website.
|
||||
#
|
||||
# Each chapter is then saved in a JSON file, for example:
|
||||
# "src/fa/<chapter_num>.json"
|
||||
# "src/json/fa/1.json", "src/json/fa/2.json", etc.
|
||||
|
||||
##
|
||||
# Dependencies
|
||||
require "net/http"
|
||||
require "nokogiri"
|
||||
require "json"
|
||||
require "paint"
|
||||
# Set process name - primarily for the "ps" command.
|
||||
Process.setproctitle("quran-pull/json/pull-farsi")
|
||||
|
||||
##
|
||||
# Chapter names
|
||||
ch_names = %w[
|
||||
al-fatiha al-baqara aal-e-imran an-nisa
|
||||
al-maeda al-anaam al-araf al-anfal
|
||||
at-tawba yunus hud yusuf
|
||||
ar-rad ibrahim al-hijr an-nahl
|
||||
al-isra al-kahf maryam ta-ha
|
||||
al-anbiya al-hajj al-mumenoon an-nur
|
||||
al-furqan ash-shuara an-naml al-qasas
|
||||
al-ankabut ar-rum luqman as-sajda
|
||||
al-ahzab saba fatir ya-seen
|
||||
as-saaffat sad az-zamar ghafir
|
||||
fussilat ash-shura az-zukhruf ad-dukhan
|
||||
al-jathiya al-ahqaf muhammad al-fath
|
||||
al-hujraat qaf adh-dhariyat at-tur
|
||||
an-najm al-qamar ar-rahman al-waqia
|
||||
al-hadid al-mujadala al-hashr al-mumtahana
|
||||
as-saff al-jumua al-munafiqoon at-taghabun
|
||||
at-talaq at-tahrim al-mulk al-qalam
|
||||
al-haaqqa al-maarij nooh al-jinn
|
||||
al-muzzammil al-muddaththir al-qiyama al-insan
|
||||
al-mursalat an-naba an-naziat abasa
|
||||
at-takwir al-infitar al-mutaffifin al-inshiqaq
|
||||
al-burooj at-tariq al-ala al-ghashiya
|
||||
al-fajr al-balad ash-shams al-lail
|
||||
ad-dhuha al-sharh at-tin al-alaq
|
||||
al-qadr al-bayyina az-zalzala al-adiyat
|
||||
al-qaria at-takathur al-asr al-humaza
|
||||
al-fil quraish al-maun al-kauther
|
||||
al-kafiroon an-nasr al-masadd al-ikhlas
|
||||
al-falaq an-nas
|
||||
]
|
||||
|
||||
##
|
||||
# Configuration variables.
|
||||
base_uri = "al-quran.cc"
|
||||
path = "/quran-translation/farsi/%{ch_name}/%{verse_num}.html"
|
||||
cool_off = 5
|
||||
src_path = File.join(
|
||||
__dir__, "..", "src", "json", "ar", "%{chapter_num}.json"
|
||||
)
|
||||
dest_path = File.join(
|
||||
__dir__, "..", "src", "json", "fa", "%{chapter_num}.json"
|
||||
# Spawn bin/json/private/al-quran
|
||||
Process.wait Process.spawn(
|
||||
"./bin/json/private/al-quran", "fa", "farsi"
|
||||
)
|
||||
|
||||
##
|
||||
# Share a single Net::HTTP instance.
|
||||
http = Net::HTTP.new(base_uri, 443)
|
||||
http.use_ssl = true
|
||||
|
||||
##
|
||||
# Helper method.
|
||||
def get_request(path)
|
||||
Net::HTTP::Get.new(
|
||||
path,
|
||||
"Accept" => "text/html"
|
||||
)
|
||||
end
|
||||
|
||||
##
|
||||
# Helper method.
|
||||
def extract_verse!(res)
|
||||
doc = Nokogiri::HTML(res.body)
|
||||
verse = doc.css(".ayah .translate").first
|
||||
verse.text
|
||||
end
|
||||
|
||||
##
|
||||
# main()
|
||||
1.upto(114) do |chapter_num|
|
||||
verses = JSON.parse File.read(format(src_path, chapter_num:))
|
||||
rows = []
|
||||
ch_name = ch_names[chapter_num - 1]
|
||||
final_dest = format(dest_path, chapter_num:)
|
||||
|
||||
print "Fetch: ", ch_name, "\n"
|
||||
verses.each.with_index(1) do |_, verse_num|
|
||||
htm_file = format(path, ch_name:, verse_num:)
|
||||
case res = http.request(get_request(htm_file))
|
||||
when Net::HTTPOK
|
||||
rows.push([verse_num, extract_verse!(res)])
|
||||
else
|
||||
print Paint["ERROR (#{res.class}): ", :red, :bold], final_dest, "\n"
|
||||
break
|
||||
end
|
||||
sleep 0.1
|
||||
end
|
||||
File.write(final_dest, JSON.pretty_generate(rows))
|
||||
print Paint["OK: ", :green, :bold], final_dest.sub(ENV["HOME"], ""), "\n"
|
||||
sleep cool_off
|
||||
print Paint["Chill for #{cool_off} seconds", :blue, :bold], "\n", "\n"
|
||||
end
|
||||
|
|
|
@ -7,104 +7,14 @@
|
|||
# is requested from the https://al-quran.cc website.
|
||||
#
|
||||
# Each chapter is then saved in a JSON file, for example:
|
||||
# "src/pt/<chapter_num>.json"
|
||||
# "src/json/pt/1/.json", "src/json/pt/2.json", etc.
|
||||
|
||||
##
|
||||
# Dependencies
|
||||
require "net/http"
|
||||
require "nokogiri"
|
||||
require "json"
|
||||
require "paint"
|
||||
# Set process title - primarily for the "ps" command.
|
||||
Process.setproctitle("quran-pull/json/pull-portuguese")
|
||||
|
||||
##
|
||||
# Chapter names
|
||||
ch_names = %w[
|
||||
al-fatiha al-baqara aal-e-imran an-nisa
|
||||
al-maeda al-anaam al-araf al-anfal
|
||||
at-tawba yunus hud yusuf
|
||||
ar-rad ibrahim al-hijr an-nahl
|
||||
al-isra al-kahf maryam ta-ha
|
||||
al-anbiya al-hajj al-mumenoon an-nur
|
||||
al-furqan ash-shuara an-naml al-qasas
|
||||
al-ankabut ar-rum luqman as-sajda
|
||||
al-ahzab saba fatir ya-seen
|
||||
as-saaffat sad az-zamar ghafir
|
||||
fussilat ash-shura az-zukhruf ad-dukhan
|
||||
al-jathiya al-ahqaf muhammad al-fath
|
||||
al-hujraat qaf adh-dhariyat at-tur
|
||||
an-najm al-qamar ar-rahman al-waqia
|
||||
al-hadid al-mujadala al-hashr al-mumtahana
|
||||
as-saff al-jumua al-munafiqoon at-taghabun
|
||||
at-talaq at-tahrim al-mulk al-qalam
|
||||
al-haaqqa al-maarij nooh al-jinn
|
||||
al-muzzammil al-muddaththir al-qiyama al-insan
|
||||
al-mursalat an-naba an-naziat abasa
|
||||
at-takwir al-infitar al-mutaffifin al-inshiqaq
|
||||
al-burooj at-tariq al-ala al-ghashiya
|
||||
al-fajr al-balad ash-shams al-lail
|
||||
ad-dhuha al-sharh at-tin al-alaq
|
||||
al-qadr al-bayyina az-zalzala al-adiyat
|
||||
al-qaria at-takathur al-asr al-humaza
|
||||
al-fil quraish al-maun al-kauther
|
||||
al-kafiroon an-nasr al-masadd al-ikhlas
|
||||
al-falaq an-nas
|
||||
]
|
||||
|
||||
##
|
||||
# Configuration variables.
|
||||
base_uri = "al-quran.cc"
|
||||
path = "/quran-translation/portuguese/%{ch_name}/%{verse_num}.html"
|
||||
cool_off = 5
|
||||
src_path = File.join(
|
||||
__dir__, "..", "src", "json", "ar", "%{chapter_num}.json"
|
||||
# Spawn bin/json/private/al-quran
|
||||
Process.wait Process.spawn(
|
||||
"./bin/json/private/al-quran", "pt", "portuguese"
|
||||
)
|
||||
dest_path = File.join(
|
||||
__dir__, "..", "src", "json", "pt", "%{chapter_num}.json"
|
||||
)
|
||||
|
||||
##
|
||||
# Share a single Net::HTTP instance.
|
||||
http = Net::HTTP.new(base_uri, 443)
|
||||
http.use_ssl = true
|
||||
|
||||
##
|
||||
# Helper method.
|
||||
def get_request(path)
|
||||
Net::HTTP::Get.new(
|
||||
path,
|
||||
"Accept" => "text/html"
|
||||
)
|
||||
end
|
||||
|
||||
##
|
||||
# Helper method.
|
||||
def extract_verse!(res, remove_suratu)
|
||||
doc = Nokogiri::HTML(res.body)
|
||||
verse = doc.css(".ayah .translate").first
|
||||
(remove_suratu ? verse.text.sub(/^Suratu [\w\-.]+/, "") : verse.text).strip
|
||||
end
|
||||
|
||||
##
|
||||
# main()
|
||||
1.upto(114) do |chapter_num|
|
||||
verses = JSON.parse File.read(format(src_path, chapter_num:))
|
||||
rows = []
|
||||
ch_name = ch_names[chapter_num - 1]
|
||||
final_dest = format(dest_path, chapter_num:)
|
||||
|
||||
print "Fetch: ", ch_name, "\n"
|
||||
verses.each.with_index(1) do |_, verse_num|
|
||||
htm_file = format(path, ch_name:, verse_num:)
|
||||
case res = http.request(get_request(htm_file))
|
||||
when Net::HTTPOK
|
||||
rows.push([verse_num, extract_verse!(res, verse_num == 1)])
|
||||
else
|
||||
print Paint["ERROR (#{res.class}): ", :red, :bold], final_dest, "\n"
|
||||
break
|
||||
end
|
||||
end
|
||||
File.write(final_dest, JSON.pretty_generate(rows))
|
||||
print Paint["OK: ", :green, :bold], final_dest.sub(ENV["HOME"], ""), "\n"
|
||||
sleep cool_off
|
||||
print Paint["Chill for #{cool_off} seconds", :blue, :bold], "\n", "\n"
|
||||
end
|
||||
|
|
116
bindata/al-quran.cc/chapter-names.json
Normal file
116
bindata/al-quran.cc/chapter-names.json
Normal file
|
@ -0,0 +1,116 @@
|
|||
[
|
||||
"al-fatiha",
|
||||
"al-baqara",
|
||||
"aal-e-imran",
|
||||
"an-nisa",
|
||||
"al-maeda",
|
||||
"al-anaam",
|
||||
"al-araf",
|
||||
"al-anfal",
|
||||
"at-tawba",
|
||||
"yunus",
|
||||
"hud",
|
||||
"yusuf",
|
||||
"ar-rad",
|
||||
"ibrahim",
|
||||
"al-hijr",
|
||||
"an-nahl",
|
||||
"al-isra",
|
||||
"al-kahf",
|
||||
"maryam",
|
||||
"ta-ha",
|
||||
"al-anbiya",
|
||||
"al-hajj",
|
||||
"al-mumenoon",
|
||||
"an-nur",
|
||||
"al-furqan",
|
||||
"ash-shuara",
|
||||
"an-naml",
|
||||
"al-qasas",
|
||||
"al-ankabut",
|
||||
"ar-rum",
|
||||
"luqman",
|
||||
"as-sajda",
|
||||
"al-ahzab",
|
||||
"saba",
|
||||
"fatir",
|
||||
"ya-seen",
|
||||
"as-saaffat",
|
||||
"sad",
|
||||
"az-zamar",
|
||||
"ghafir",
|
||||
"fussilat",
|
||||
"ash-shura",
|
||||
"az-zukhruf",
|
||||
"ad-dukhan",
|
||||
"al-jathiya",
|
||||
"al-ahqaf",
|
||||
"muhammad",
|
||||
"al-fath",
|
||||
"al-hujraat",
|
||||
"qaf",
|
||||
"adh-dhariyat",
|
||||
"at-tur",
|
||||
"an-najm",
|
||||
"al-qamar",
|
||||
"ar-rahman",
|
||||
"al-waqia",
|
||||
"al-hadid",
|
||||
"al-mujadala",
|
||||
"al-hashr",
|
||||
"al-mumtahana",
|
||||
"as-saff",
|
||||
"al-jumua",
|
||||
"al-munafiqoon",
|
||||
"at-taghabun",
|
||||
"at-talaq",
|
||||
"at-tahrim",
|
||||
"al-mulk",
|
||||
"al-qalam",
|
||||
"al-haaqqa",
|
||||
"al-maarij",
|
||||
"nooh",
|
||||
"al-jinn",
|
||||
"al-muzzammil",
|
||||
"al-muddaththir",
|
||||
"al-qiyama",
|
||||
"al-insan",
|
||||
"al-mursalat",
|
||||
"an-naba",
|
||||
"an-naziat",
|
||||
"abasa",
|
||||
"at-takwir",
|
||||
"al-infitar",
|
||||
"al-mutaffifin",
|
||||
"al-inshiqaq",
|
||||
"al-burooj",
|
||||
"at-tariq",
|
||||
"al-ala",
|
||||
"al-ghashiya",
|
||||
"al-fajr",
|
||||
"al-balad",
|
||||
"ash-shams",
|
||||
"al-lail",
|
||||
"ad-dhuha",
|
||||
"al-sharh",
|
||||
"at-tin",
|
||||
"al-alaq",
|
||||
"al-qadr",
|
||||
"al-bayyina",
|
||||
"az-zalzala",
|
||||
"al-adiyat",
|
||||
"al-qaria",
|
||||
"at-takathur",
|
||||
"al-asr",
|
||||
"al-humaza",
|
||||
"al-fil",
|
||||
"quraish",
|
||||
"al-maun",
|
||||
"al-kauther",
|
||||
"al-kafiroon",
|
||||
"an-nasr",
|
||||
"al-masadd",
|
||||
"al-ikhlas",
|
||||
"al-falaq",
|
||||
"an-nas"
|
||||
]
|
Loading…
Reference in a new issue