improve the pull-farsi, and pull-portuguese scripts
This commit is contained in:
parent
a077db4b7f
commit
6f44b12410
4 changed files with 206 additions and 191 deletions
78
bin/json/private/al-quran
Executable file
78
bin/json/private/al-quran
Executable file
|
@ -0,0 +1,78 @@
|
||||||
|
#!/usr/bin/env ruby
|
||||||
|
# frozen_string_literal: true
|
||||||
|
|
||||||
|
##
|
||||||
|
# This script requests each verse, from each chapter
|
||||||
|
# in The Qur'an in a language that is determined by
|
||||||
|
# its first two arguments. For example:
|
||||||
|
#
|
||||||
|
# * ./al-quran pt portuguese
|
||||||
|
# * ./al-quran fa farsi
|
||||||
|
# * etc..
|
||||||
|
|
||||||
|
##
|
||||||
|
# Set process title - primarily for the "ps" command
|
||||||
|
Process.setproctitle("quran-pull/json/private/al-quran (#{ARGV[1]})")
|
||||||
|
|
||||||
|
##
|
||||||
|
# Dependencies
|
||||||
|
require "net/http"
|
||||||
|
require "nokogiri"
|
||||||
|
require "json"
|
||||||
|
require "paint"
|
||||||
|
require_relative "../../../binlib/io/line"
|
||||||
|
|
||||||
|
##
|
||||||
|
# Configuration variables
|
||||||
|
base_uri = "al-quran.cc"
|
||||||
|
path = "/quran-translation/#{ARGV[1]}/%{chapter}/%{verse}.html"
|
||||||
|
dest = File.join("src", "json", ARGV[0], "%{chapter}.json")
|
||||||
|
delay = 0.5
|
||||||
|
|
||||||
|
##
|
||||||
|
# Chapter names
|
||||||
|
names = JSON.parse(
|
||||||
|
File.read(
|
||||||
|
File.join('bindata', 'al-quran.cc', 'chapter-names.json')
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
##
|
||||||
|
# Map chapters to their verse count.
|
||||||
|
vmap_path = File.join("bindata", "chapters-length.json")
|
||||||
|
vmap = JSON.parse(File.read(vmap_path))
|
||||||
|
|
||||||
|
##
|
||||||
|
# Share a single Net::HTTP instance.
|
||||||
|
http = Net::HTTP.new(base_uri, 443)
|
||||||
|
http.use_ssl = true
|
||||||
|
|
||||||
|
##
|
||||||
|
# Utils
|
||||||
|
line = IO::Line.new($stdout)
|
||||||
|
find_content = ->(res) do
|
||||||
|
doc = Nokogiri::HTML(res.body)
|
||||||
|
doc.css(".ayah .translate").text
|
||||||
|
end
|
||||||
|
|
||||||
|
1.upto(114) do |chapter|
|
||||||
|
line.rewind.print("Download chapter #{chapter}:").end
|
||||||
|
rows = []
|
||||||
|
vcount = vmap[chapter.to_s]
|
||||||
|
1.upto(vcount) do |verse|
|
||||||
|
res = http.request_get(format(path, chapter: names[chapter - 1], verse:))
|
||||||
|
if Net::HTTPOK === res
|
||||||
|
rows.push([verse, find_content.(res)])
|
||||||
|
line.rewind.print("#{verse} of #{vcount} verses downloaded. ")
|
||||||
|
else
|
||||||
|
line.end.print(Paint["ABORT", :red, :bold])
|
||||||
|
.end.print("Bad response: ", res.class)
|
||||||
|
.end
|
||||||
|
exit(1)
|
||||||
|
end
|
||||||
|
sleep(delay)
|
||||||
|
end
|
||||||
|
File.binwrite(format(dest, chapter:), JSON.pretty_generate(rows))
|
||||||
|
line.end.print(Paint["Done.", :bold])
|
||||||
|
.end.end
|
||||||
|
end
|
|
@ -7,105 +7,16 @@
|
||||||
# is requested from the https://al-quran.cc website.
|
# is requested from the https://al-quran.cc website.
|
||||||
#
|
#
|
||||||
# Each chapter is then saved in a JSON file, for example:
|
# Each chapter is then saved in a JSON file, for example:
|
||||||
# "src/fa/<chapter_num>.json"
|
# "src/json/fa/1.json", "src/json/fa/2.json", etc.
|
||||||
|
|
||||||
##
|
##
|
||||||
# Dependencies
|
# Set process name - primarily for the "ps" command.
|
||||||
require "net/http"
|
Process.setproctitle("quran-pull/json/pull-farsi")
|
||||||
require "nokogiri"
|
|
||||||
require "json"
|
|
||||||
require "paint"
|
|
||||||
|
|
||||||
##
|
##
|
||||||
# Chapter names
|
# Spawn bin/json/private/al-quran
|
||||||
ch_names = %w[
|
Process.wait Process.spawn(
|
||||||
al-fatiha al-baqara aal-e-imran an-nisa
|
"./bin/json/private/al-quran", "fa", "farsi"
|
||||||
al-maeda al-anaam al-araf al-anfal
|
|
||||||
at-tawba yunus hud yusuf
|
|
||||||
ar-rad ibrahim al-hijr an-nahl
|
|
||||||
al-isra al-kahf maryam ta-ha
|
|
||||||
al-anbiya al-hajj al-mumenoon an-nur
|
|
||||||
al-furqan ash-shuara an-naml al-qasas
|
|
||||||
al-ankabut ar-rum luqman as-sajda
|
|
||||||
al-ahzab saba fatir ya-seen
|
|
||||||
as-saaffat sad az-zamar ghafir
|
|
||||||
fussilat ash-shura az-zukhruf ad-dukhan
|
|
||||||
al-jathiya al-ahqaf muhammad al-fath
|
|
||||||
al-hujraat qaf adh-dhariyat at-tur
|
|
||||||
an-najm al-qamar ar-rahman al-waqia
|
|
||||||
al-hadid al-mujadala al-hashr al-mumtahana
|
|
||||||
as-saff al-jumua al-munafiqoon at-taghabun
|
|
||||||
at-talaq at-tahrim al-mulk al-qalam
|
|
||||||
al-haaqqa al-maarij nooh al-jinn
|
|
||||||
al-muzzammil al-muddaththir al-qiyama al-insan
|
|
||||||
al-mursalat an-naba an-naziat abasa
|
|
||||||
at-takwir al-infitar al-mutaffifin al-inshiqaq
|
|
||||||
al-burooj at-tariq al-ala al-ghashiya
|
|
||||||
al-fajr al-balad ash-shams al-lail
|
|
||||||
ad-dhuha al-sharh at-tin al-alaq
|
|
||||||
al-qadr al-bayyina az-zalzala al-adiyat
|
|
||||||
al-qaria at-takathur al-asr al-humaza
|
|
||||||
al-fil quraish al-maun al-kauther
|
|
||||||
al-kafiroon an-nasr al-masadd al-ikhlas
|
|
||||||
al-falaq an-nas
|
|
||||||
]
|
|
||||||
|
|
||||||
##
|
|
||||||
# Configuration variables.
|
|
||||||
base_uri = "al-quran.cc"
|
|
||||||
path = "/quran-translation/farsi/%{ch_name}/%{verse_num}.html"
|
|
||||||
cool_off = 5
|
|
||||||
src_path = File.join(
|
|
||||||
__dir__, "..", "src", "json", "ar", "%{chapter_num}.json"
|
|
||||||
)
|
|
||||||
dest_path = File.join(
|
|
||||||
__dir__, "..", "src", "json", "fa", "%{chapter_num}.json"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
##
|
|
||||||
# Share a single Net::HTTP instance.
|
|
||||||
http = Net::HTTP.new(base_uri, 443)
|
|
||||||
http.use_ssl = true
|
|
||||||
|
|
||||||
##
|
|
||||||
# Helper method.
|
|
||||||
def get_request(path)
|
|
||||||
Net::HTTP::Get.new(
|
|
||||||
path,
|
|
||||||
"Accept" => "text/html"
|
|
||||||
)
|
|
||||||
end
|
|
||||||
|
|
||||||
##
|
|
||||||
# Helper method.
|
|
||||||
def extract_verse!(res)
|
|
||||||
doc = Nokogiri::HTML(res.body)
|
|
||||||
verse = doc.css(".ayah .translate").first
|
|
||||||
verse.text
|
|
||||||
end
|
|
||||||
|
|
||||||
##
|
|
||||||
# main()
|
|
||||||
1.upto(114) do |chapter_num|
|
|
||||||
verses = JSON.parse File.read(format(src_path, chapter_num:))
|
|
||||||
rows = []
|
|
||||||
ch_name = ch_names[chapter_num - 1]
|
|
||||||
final_dest = format(dest_path, chapter_num:)
|
|
||||||
|
|
||||||
print "Fetch: ", ch_name, "\n"
|
|
||||||
verses.each.with_index(1) do |_, verse_num|
|
|
||||||
htm_file = format(path, ch_name:, verse_num:)
|
|
||||||
case res = http.request(get_request(htm_file))
|
|
||||||
when Net::HTTPOK
|
|
||||||
rows.push([verse_num, extract_verse!(res)])
|
|
||||||
else
|
|
||||||
print Paint["ERROR (#{res.class}): ", :red, :bold], final_dest, "\n"
|
|
||||||
break
|
|
||||||
end
|
|
||||||
sleep 0.1
|
|
||||||
end
|
|
||||||
File.write(final_dest, JSON.pretty_generate(rows))
|
|
||||||
print Paint["OK: ", :green, :bold], final_dest.sub(ENV["HOME"], ""), "\n"
|
|
||||||
sleep cool_off
|
|
||||||
print Paint["Chill for #{cool_off} seconds", :blue, :bold], "\n", "\n"
|
|
||||||
end
|
|
||||||
|
|
|
@ -7,104 +7,14 @@
|
||||||
# is requested from the https://al-quran.cc website.
|
# is requested from the https://al-quran.cc website.
|
||||||
#
|
#
|
||||||
# Each chapter is then saved in a JSON file, for example:
|
# Each chapter is then saved in a JSON file, for example:
|
||||||
# "src/pt/<chapter_num>.json"
|
# "src/json/pt/1/.json", "src/json/pt/2.json", etc.
|
||||||
|
|
||||||
##
|
##
|
||||||
# Dependencies
|
# Set process title - primarily for the "ps" command.
|
||||||
require "net/http"
|
Process.setproctitle("quran-pull/json/pull-portuguese")
|
||||||
require "nokogiri"
|
|
||||||
require "json"
|
|
||||||
require "paint"
|
|
||||||
|
|
||||||
##
|
##
|
||||||
# Chapter names
|
# Spawn bin/json/private/al-quran
|
||||||
ch_names = %w[
|
Process.wait Process.spawn(
|
||||||
al-fatiha al-baqara aal-e-imran an-nisa
|
"./bin/json/private/al-quran", "pt", "portuguese"
|
||||||
al-maeda al-anaam al-araf al-anfal
|
|
||||||
at-tawba yunus hud yusuf
|
|
||||||
ar-rad ibrahim al-hijr an-nahl
|
|
||||||
al-isra al-kahf maryam ta-ha
|
|
||||||
al-anbiya al-hajj al-mumenoon an-nur
|
|
||||||
al-furqan ash-shuara an-naml al-qasas
|
|
||||||
al-ankabut ar-rum luqman as-sajda
|
|
||||||
al-ahzab saba fatir ya-seen
|
|
||||||
as-saaffat sad az-zamar ghafir
|
|
||||||
fussilat ash-shura az-zukhruf ad-dukhan
|
|
||||||
al-jathiya al-ahqaf muhammad al-fath
|
|
||||||
al-hujraat qaf adh-dhariyat at-tur
|
|
||||||
an-najm al-qamar ar-rahman al-waqia
|
|
||||||
al-hadid al-mujadala al-hashr al-mumtahana
|
|
||||||
as-saff al-jumua al-munafiqoon at-taghabun
|
|
||||||
at-talaq at-tahrim al-mulk al-qalam
|
|
||||||
al-haaqqa al-maarij nooh al-jinn
|
|
||||||
al-muzzammil al-muddaththir al-qiyama al-insan
|
|
||||||
al-mursalat an-naba an-naziat abasa
|
|
||||||
at-takwir al-infitar al-mutaffifin al-inshiqaq
|
|
||||||
al-burooj at-tariq al-ala al-ghashiya
|
|
||||||
al-fajr al-balad ash-shams al-lail
|
|
||||||
ad-dhuha al-sharh at-tin al-alaq
|
|
||||||
al-qadr al-bayyina az-zalzala al-adiyat
|
|
||||||
al-qaria at-takathur al-asr al-humaza
|
|
||||||
al-fil quraish al-maun al-kauther
|
|
||||||
al-kafiroon an-nasr al-masadd al-ikhlas
|
|
||||||
al-falaq an-nas
|
|
||||||
]
|
|
||||||
|
|
||||||
##
|
|
||||||
# Configuration variables.
|
|
||||||
base_uri = "al-quran.cc"
|
|
||||||
path = "/quran-translation/portuguese/%{ch_name}/%{verse_num}.html"
|
|
||||||
cool_off = 5
|
|
||||||
src_path = File.join(
|
|
||||||
__dir__, "..", "src", "json", "ar", "%{chapter_num}.json"
|
|
||||||
)
|
)
|
||||||
dest_path = File.join(
|
|
||||||
__dir__, "..", "src", "json", "pt", "%{chapter_num}.json"
|
|
||||||
)
|
|
||||||
|
|
||||||
##
|
|
||||||
# Share a single Net::HTTP instance.
|
|
||||||
http = Net::HTTP.new(base_uri, 443)
|
|
||||||
http.use_ssl = true
|
|
||||||
|
|
||||||
##
|
|
||||||
# Helper method.
|
|
||||||
def get_request(path)
|
|
||||||
Net::HTTP::Get.new(
|
|
||||||
path,
|
|
||||||
"Accept" => "text/html"
|
|
||||||
)
|
|
||||||
end
|
|
||||||
|
|
||||||
##
|
|
||||||
# Helper method.
|
|
||||||
def extract_verse!(res, remove_suratu)
|
|
||||||
doc = Nokogiri::HTML(res.body)
|
|
||||||
verse = doc.css(".ayah .translate").first
|
|
||||||
(remove_suratu ? verse.text.sub(/^Suratu [\w\-.]+/, "") : verse.text).strip
|
|
||||||
end
|
|
||||||
|
|
||||||
##
|
|
||||||
# main()
|
|
||||||
1.upto(114) do |chapter_num|
|
|
||||||
verses = JSON.parse File.read(format(src_path, chapter_num:))
|
|
||||||
rows = []
|
|
||||||
ch_name = ch_names[chapter_num - 1]
|
|
||||||
final_dest = format(dest_path, chapter_num:)
|
|
||||||
|
|
||||||
print "Fetch: ", ch_name, "\n"
|
|
||||||
verses.each.with_index(1) do |_, verse_num|
|
|
||||||
htm_file = format(path, ch_name:, verse_num:)
|
|
||||||
case res = http.request(get_request(htm_file))
|
|
||||||
when Net::HTTPOK
|
|
||||||
rows.push([verse_num, extract_verse!(res, verse_num == 1)])
|
|
||||||
else
|
|
||||||
print Paint["ERROR (#{res.class}): ", :red, :bold], final_dest, "\n"
|
|
||||||
break
|
|
||||||
end
|
|
||||||
end
|
|
||||||
File.write(final_dest, JSON.pretty_generate(rows))
|
|
||||||
print Paint["OK: ", :green, :bold], final_dest.sub(ENV["HOME"], ""), "\n"
|
|
||||||
sleep cool_off
|
|
||||||
print Paint["Chill for #{cool_off} seconds", :blue, :bold], "\n", "\n"
|
|
||||||
end
|
|
||||||
|
|
116
bindata/al-quran.cc/chapter-names.json
Normal file
116
bindata/al-quran.cc/chapter-names.json
Normal file
|
@ -0,0 +1,116 @@
|
||||||
|
[
|
||||||
|
"al-fatiha",
|
||||||
|
"al-baqara",
|
||||||
|
"aal-e-imran",
|
||||||
|
"an-nisa",
|
||||||
|
"al-maeda",
|
||||||
|
"al-anaam",
|
||||||
|
"al-araf",
|
||||||
|
"al-anfal",
|
||||||
|
"at-tawba",
|
||||||
|
"yunus",
|
||||||
|
"hud",
|
||||||
|
"yusuf",
|
||||||
|
"ar-rad",
|
||||||
|
"ibrahim",
|
||||||
|
"al-hijr",
|
||||||
|
"an-nahl",
|
||||||
|
"al-isra",
|
||||||
|
"al-kahf",
|
||||||
|
"maryam",
|
||||||
|
"ta-ha",
|
||||||
|
"al-anbiya",
|
||||||
|
"al-hajj",
|
||||||
|
"al-mumenoon",
|
||||||
|
"an-nur",
|
||||||
|
"al-furqan",
|
||||||
|
"ash-shuara",
|
||||||
|
"an-naml",
|
||||||
|
"al-qasas",
|
||||||
|
"al-ankabut",
|
||||||
|
"ar-rum",
|
||||||
|
"luqman",
|
||||||
|
"as-sajda",
|
||||||
|
"al-ahzab",
|
||||||
|
"saba",
|
||||||
|
"fatir",
|
||||||
|
"ya-seen",
|
||||||
|
"as-saaffat",
|
||||||
|
"sad",
|
||||||
|
"az-zamar",
|
||||||
|
"ghafir",
|
||||||
|
"fussilat",
|
||||||
|
"ash-shura",
|
||||||
|
"az-zukhruf",
|
||||||
|
"ad-dukhan",
|
||||||
|
"al-jathiya",
|
||||||
|
"al-ahqaf",
|
||||||
|
"muhammad",
|
||||||
|
"al-fath",
|
||||||
|
"al-hujraat",
|
||||||
|
"qaf",
|
||||||
|
"adh-dhariyat",
|
||||||
|
"at-tur",
|
||||||
|
"an-najm",
|
||||||
|
"al-qamar",
|
||||||
|
"ar-rahman",
|
||||||
|
"al-waqia",
|
||||||
|
"al-hadid",
|
||||||
|
"al-mujadala",
|
||||||
|
"al-hashr",
|
||||||
|
"al-mumtahana",
|
||||||
|
"as-saff",
|
||||||
|
"al-jumua",
|
||||||
|
"al-munafiqoon",
|
||||||
|
"at-taghabun",
|
||||||
|
"at-talaq",
|
||||||
|
"at-tahrim",
|
||||||
|
"al-mulk",
|
||||||
|
"al-qalam",
|
||||||
|
"al-haaqqa",
|
||||||
|
"al-maarij",
|
||||||
|
"nooh",
|
||||||
|
"al-jinn",
|
||||||
|
"al-muzzammil",
|
||||||
|
"al-muddaththir",
|
||||||
|
"al-qiyama",
|
||||||
|
"al-insan",
|
||||||
|
"al-mursalat",
|
||||||
|
"an-naba",
|
||||||
|
"an-naziat",
|
||||||
|
"abasa",
|
||||||
|
"at-takwir",
|
||||||
|
"al-infitar",
|
||||||
|
"al-mutaffifin",
|
||||||
|
"al-inshiqaq",
|
||||||
|
"al-burooj",
|
||||||
|
"at-tariq",
|
||||||
|
"al-ala",
|
||||||
|
"al-ghashiya",
|
||||||
|
"al-fajr",
|
||||||
|
"al-balad",
|
||||||
|
"ash-shams",
|
||||||
|
"al-lail",
|
||||||
|
"ad-dhuha",
|
||||||
|
"al-sharh",
|
||||||
|
"at-tin",
|
||||||
|
"al-alaq",
|
||||||
|
"al-qadr",
|
||||||
|
"al-bayyina",
|
||||||
|
"az-zalzala",
|
||||||
|
"al-adiyat",
|
||||||
|
"al-qaria",
|
||||||
|
"at-takathur",
|
||||||
|
"al-asr",
|
||||||
|
"al-humaza",
|
||||||
|
"al-fil",
|
||||||
|
"quraish",
|
||||||
|
"al-maun",
|
||||||
|
"al-kauther",
|
||||||
|
"al-kafiroon",
|
||||||
|
"an-nasr",
|
||||||
|
"al-masadd",
|
||||||
|
"al-ikhlas",
|
||||||
|
"al-falaq",
|
||||||
|
"an-nas"
|
||||||
|
]
|
Loading…
Reference in a new issue