The great rewrite

The project's directory structure has been changed to be similar
to how FreeBSD organizes its filesystem. The english, portuguese,
and farsi translations are now sourced from https://quran.com. The
original Arabic is still sourced from searchtruth.com. Files
have been removed, and / or replaced. The SQL-related files have
been removed, with the intention to separate them into a new project
in the future (quran-sql). The lessons learnt from the development of
quran-audio were an inspiration for this change.
This commit is contained in:
0x1eef 2023-02-04 17:26:12 -03:00 committed by Robert
parent 1dfe6ead25
commit 2d9a22ea8f
488 changed files with 273 additions and 157834 deletions

1
.gitignore vendored
View file

@ -1 +1,2 @@
*.db *.db

View file

@ -9,7 +9,7 @@ inherit_gem:
standard: config/base.yml standard: config/base.yml
AllCops: AllCops:
TargetRubyVersion: 3.1 TargetRubyVersion: 3.2
## ##
# Enabled cops # Enabled cops
@ -24,3 +24,5 @@ Layout/ArgumentAlignment:
Enabled: false Enabled: false
Style/LambdaCall: Style/LambdaCall:
Enabled: false Enabled: false
Style/FrozenStringLiteralComment:
Enabled: false

View file

@ -5,3 +5,4 @@ gem "paint", "~> 2.2"
gem "json", "~> 2.6" gem "json", "~> 2.6"
gem "standard", "~> 1.12" gem "standard", "~> 1.12"
gem "io-line.rb", "~> 0.1", github: "0x1eef/io-line.rb", tag: "v0.1.0" gem "io-line.rb", "~> 0.1", github: "0x1eef/io-line.rb", tag: "v0.1.0"
gem "ryo.rb", "~> 0.3", github: "0x1eef/ryo.rb", tag: "v0.3.0"

View file

@ -6,6 +6,13 @@ GIT
io-line.rb (0.1.0) io-line.rb (0.1.0)
io-console (~> 0.5) io-console (~> 0.5)
GIT
remote: https://github.com/0x1eef/ryo.rb.git
revision: 1fc0e7a2187d8020aadfb71b6b066dbc3af2fc2e
tag: v0.3.0
specs:
ryo.rb (0.3.0)
GEM GEM
remote: https://rubygems.org/ remote: https://rubygems.org/
specs: specs:
@ -55,6 +62,7 @@ DEPENDENCIES
json (~> 2.6) json (~> 2.6)
nokogiri (~> 1.13) nokogiri (~> 1.13)
paint (~> 2.2) paint (~> 2.2)
ryo.rb (~> 0.3)!
standard (~> 1.12) standard (~> 1.12)
BUNDLED WITH BUNDLED WITH

View file

@ -51,15 +51,16 @@ The [bin/](bin/) directory contains scripts that generate the contents of the
* [bin/sql/create-sql-seed-file](bin/sql/create-sql-seed-file) <br> * [bin/sql/create-sql-seed-file](bin/sql/create-sql-seed-file) <br>
This script creates [src/sql/seed.sql](src/sql/seed.sql). This script creates [src/sql/seed.sql](src/sql/seed.sql).
## Credit ## Thanks
Thanks to the following websites: First and foremost, Alhamdulillah.
* https://searchtruth.com - for the original Arabic. I'd also like to extend thanks to the following websites for providing
* https://quran.com - for the English translation. the content that quran-pull downloads:
* https://al-quran.cc - for the Farsi, and Portuguese translations.
* https://searchtruth.com for the original Arabic.
* https://quran.com for the English, Portuguese, and Farsi translations.
## License ## License
This software is released into the Public Domain. Public domain.

View file

@ -1,81 +0,0 @@
#!/usr/bin/env ruby
# frozen_string_literal: true
##
# This script requests each verse, from each chapter
# in The Qur'an in a language that is determined by
# its first two arguments. For example:
#
# * ./al-quran pt portuguese
# * ./al-quran fa farsi
# * etc..
##
# Set process title - primarily for the "ps" command
Process.setproctitle("quran-pull (al-quran) (#{ARGV[1]})")
##
# Dependencies
require "bundler/setup"
require "net/http"
require "nokogiri"
require "json"
require "paint"
require "fileutils"
require "io/line"
##
# Configuration variables
base_uri = "al-quran.cc"
path = "/quran-translation/#{ARGV[1]}/%{chapter}/%{verse}.html"
dest = File.join("src", "json", ARGV[0], "%{chapter}.json")
delay = 0.5
##
# Chapter names
names = JSON.parse(
File.read(
File.join("bindata", "al-quran.cc", "chapter-names.json")
)
)
##
# Map chapters to their verse count.
vmap_path = File.join("bindata", "chapters-length.json")
vmap = JSON.parse(File.read(vmap_path))
##
# Share a single Net::HTTP instance.
http = Net::HTTP.new(base_uri, 443)
http.use_ssl = true
##
# Utils
line = IO::Line.new($stdout)
find_content = ->(res) do
doc = Nokogiri::HTML(res.body)
doc.css(".ayah .translate").text
end
1.upto(114) do |chapter|
line.rewind.print("Download chapter #{chapter}:").end
rows = []
vcount = vmap[chapter.to_s]
1.upto(vcount) do |verse|
res = http.request_get(format(path, chapter: names[chapter - 1], verse:))
if Net::HTTPOK === res
rows.push([verse, find_content.(res)])
line.rewind.print("#{verse} of #{vcount} verses downloaded. ")
else
line.end.print(Paint["ABORT", :red, :bold])
.end.print("Bad response: ", res.class)
.end
exit(1)
end
sleep(delay)
end
FileUtils.mkdir_p(File.dirname(dest))
File.binwrite(format(dest, chapter:), JSON.pretty_generate(rows))
line.end.print(Paint["Done.", :bold])
.end.end
end

View file

@ -1,67 +0,0 @@
#!/usr/bin/env ruby
# frozen_string_literal: true
##
# This script requests each verse, in each chapter
# of The Qur'an - in the Arabic language. The content
# is requested from the https://searchtruth.com website.
#
# Each chapter is then saved in a JSON file, for example:
# "src/json/ar/1.json", "src/json/ar/2.json", etc.
##
# Set process name - primarily for the "ps" command
Process.setproctitle("quran-pull (pull-arabic)")
##
# Dependencies
require "bundler/setup"
require "net/http"
require "nokogiri"
require "json"
require "paint"
require "io/line"
##
# Configuration variables.
base_uri = "www.searchtruth.com"
dest = File.join("src", "json", "ar", "%{chapter}.json")
delay = 1.5
##
# Share a single Net::HTTP instance.
http = Net::HTTP.new(base_uri, 443)
http.use_ssl = true
##
# Utils
line = IO::Line.new($stdout)
find_content = ->(res) do
html = Nokogiri::HTML(res.body)
html.css("table[dir='rtl'] tr td div:last-child").map do
_1.text.strip
end
end
1.upto(114) do |chapter|
rows = []
res = http.request_get("/chapter_display.php?chapter=#{chapter}&translator=7")
if Net::HTTPOK === res
rows.concat(find_content.(res).map.with_index(1) { [_2, _1] })
File.write(format(dest, chapter:), JSON.pretty_generate(rows))
else
line.end.print(Paint["ABORT", :red, :bold])
.end.print("Bad response: ", res.class)
.end
exit(1)
end
line.rewind.print("#{chapter} of 114 chapters saved. ")
sleep(delay)
end
line.end.print(Paint["Done.", :bold]).end
##
# Spawn bin/json/insert-chapter-metadata
Process.wait Process.spawn(
"./bin/json/insert-chapter-metadata", "ar"
)

View file

@ -1,76 +0,0 @@
#!/usr/bin/env ruby
# frozen_string_literal: true
##
# This script requests each verse, in each chapter
# of The Qur'an - in the English language. The content
# is requested from the https://quran.com website.
#
# Each chapter is then saved to a JSON file - for example:
# "src/json/en/1.json", "src/json/en/2.json", etc.
##
# Set process name - primarily for the "ps" command
Process.setproctitle("quran-pull (pull-english)")
##
# Dependencies
require "bundler/setup"
require "net/http"
require "nokogiri"
require "json"
require "paint"
require "io/line"
##
# Configuration variables.
base_uri = "quran.com"
dest = File.join("src", "json", "en", "%{chapter}.json")
delay = 1.5
##
# Map chapters to their verse count.
vmap_path = File.join("bindata", "chapters-length.json")
vmap = JSON.parse(File.read(vmap_path))
##
# Share a single Net::HTTP instance.
http = Net::HTTP.new(base_uri, 443)
http.use_ssl = true
##
# Utils
line = IO::Line.new($stdout)
find_content = ->(res) do
html = Nokogiri::HTML(res.body)
el = html.css("div[class^='TranslationText']").last
el.text.gsub(/[0-9]/, "")
end
1.upto(114) do |chapter|
rows = []
vcount = vmap[chapter.to_s]
line.rewind.print("Download chapter #{chapter}:").end
1.upto(vcount) do |verse|
res = http.request_get("/#{chapter}/#{verse}")
if Net::HTTPOK === res
rows.push([verse, find_content.(res)])
line.rewind.print("#{verse} of #{vcount} verses downloaded. ")
else
line.end.print(Paint["ABORT", :red, :bold])
.end.print("Bad response: ", res.class)
.end
exit(1)
end
sleep(delay)
end
File.write(format(dest, chapter:), JSON.pretty_generate(rows))
line.end.print(Paint["Done.", :bold])
.end.end
end
##
# Spawn bin/json/insert-chapter-metadata
Process.wait Process.spawn(
"./bin/json/insert-chapter-metadata", "en"
)

View file

@ -1,26 +0,0 @@
#!/usr/bin/env ruby
# frozen_string_literal: true
##
# This script requests each verse, in each chapter
# of The Qur'an - in the Farsi language. The content
# is requested from the https://al-quran.cc website.
#
# Each chapter is then saved in a JSON file, for example:
# "src/json/fa/1.json", "src/json/fa/2.json", etc.
##
# Set process name - primarily for the "ps" command.
Process.setproctitle("quran-pull (pull-farsi)")
##
# Spawn bin/json/private/al-quran
Process.wait Process.spawn(
"./bin/json/private/al-quran", "fa", "farsi"
)
##
# Spawn bin/json/insert-chapter-metadata
Process.wait Process.spawn(
"./bin/json/insert-chapter-metadata", "fa"
)

View file

@ -1,26 +0,0 @@
#!/usr/bin/env ruby
# frozen_string_literal: true
##
# This script requests each verse, in each chapter
# of The Qur'an - in the Portuguese language. The content
# is requested from the https://al-quran.cc website.
#
# Each chapter is then saved in a JSON file, for example:
# "src/json/pt/1/.json", "src/json/pt/2.json", etc.
##
# Set process title - primarily for the "ps" command.
Process.setproctitle("quran-pull (pull-portuguese)")
##
# Spawn bin/json/private/al-quran
Process.wait Process.spawn(
"./bin/json/private/al-quran", "pt", "portuguese"
)
##
# Spawn bin/json/insert-chapter-metadata
Process.wait Process.spawn(
"./bin/json/insert-chapter-metadata", "pt"
)

35
bin/quran-pull Normal file
View file

@ -0,0 +1,35 @@
#!/usr/bin/env ruby
##
# Utils
def spawn(libexec_dir, exec, *args)
Process.spawn File.join(libexec_dir, exec), *args
end
def wait(pid)
Process.wait(pid)
rescue Interrupt
retry
end
##
# main
def main(argv)
root_dir = File.realpath(File.join(__dir__, ".."))
lib_dir = File.join(root_dir, "lib", "quran-pull")
libexec_dir = File.join(root_dir, "libexec", "quran-pull")
require File.join(lib_dir, "pull")
case ARGV[0]
when "pull"
options = Pull.cli(argv[1..])
if %w[en pt fa].include?(options.locale)
wait spawn(libexec_dir, "quran.com", *argv[1..])
elsif %w[ar].include?(options.locale)
wait spawn(libexec_dir, "searchtruth.com", *argv[1..])
end
else
warn "Usage: quran-pull pull|help [OPTIONS]"
end
end
main(ARGV)

View file

@ -1,22 +0,0 @@
#!/usr/bin/env ruby
# frozen_string_literal: true
##
# This script is responsible for the creation "src/sql/seed.sql" -
# a SQL file that contains the contents of The Qur'an in Arabic,
# and as translations in English, Portuguese and Farsi. The
# "src/json/" directory is used as the data source.
##
# Dependencies
require "json"
require "erb"
require_relative "../../binlib/sql"
base_dir = File.join("src", "sql")
template = File.read File.join(base_dir, "seed.sql.erb")
result = ERB.new(template)
.result(SQL::Template.context)
.each_line.map(&:strip)
.reject(&:empty?).join($/)
File.write File.join(base_dir, "seed.sql"), result

View file

@ -1,116 +0,0 @@
[
"al-fatiha",
"al-baqara",
"aal-e-imran",
"an-nisa",
"al-maeda",
"al-anaam",
"al-araf",
"al-anfal",
"at-tawba",
"yunus",
"hud",
"yusuf",
"ar-rad",
"ibrahim",
"al-hijr",
"an-nahl",
"al-isra",
"al-kahf",
"maryam",
"ta-ha",
"al-anbiya",
"al-hajj",
"al-mumenoon",
"an-nur",
"al-furqan",
"ash-shuara",
"an-naml",
"al-qasas",
"al-ankabut",
"ar-rum",
"luqman",
"as-sajda",
"al-ahzab",
"saba",
"fatir",
"ya-seen",
"as-saaffat",
"sad",
"az-zamar",
"ghafir",
"fussilat",
"ash-shura",
"az-zukhruf",
"ad-dukhan",
"al-jathiya",
"al-ahqaf",
"muhammad",
"al-fath",
"al-hujraat",
"qaf",
"adh-dhariyat",
"at-tur",
"an-najm",
"al-qamar",
"ar-rahman",
"al-waqia",
"al-hadid",
"al-mujadala",
"al-hashr",
"al-mumtahana",
"as-saff",
"al-jumua",
"al-munafiqoon",
"at-taghabun",
"at-talaq",
"at-tahrim",
"al-mulk",
"al-qalam",
"al-haaqqa",
"al-maarij",
"nooh",
"al-jinn",
"al-muzzammil",
"al-muddaththir",
"al-qiyama",
"al-insan",
"al-mursalat",
"an-naba",
"an-naziat",
"abasa",
"at-takwir",
"al-infitar",
"al-mutaffifin",
"al-inshiqaq",
"al-burooj",
"at-tariq",
"al-ala",
"al-ghashiya",
"al-fajr",
"al-balad",
"ash-shams",
"al-lail",
"ad-dhuha",
"al-sharh",
"at-tin",
"al-alaq",
"al-qadr",
"al-bayyina",
"az-zalzala",
"al-adiyat",
"al-qaria",
"at-takathur",
"al-asr",
"al-humaza",
"al-fil",
"quraish",
"al-maun",
"al-kauther",
"al-kafiroon",
"an-nasr",
"al-masadd",
"al-ikhlas",
"al-falaq",
"an-nas"
]

View file

@ -1,9 +0,0 @@
# frozen_string_literal: true
module SQL
require_relative "sql/language"
require_relative "sql/chapter"
require_relative "sql/verse"
require_relative "sql/utils"
require_relative "sql/template"
end

View file

@ -1,41 +0,0 @@
# frozen_string_literal: true
class SQL::Chapter
def initialize(path)
@contents = JSON.parse(File.read(path))
@metadata = @contents.shift
@locale = File.basename(File.dirname(path))
end
def number
Integer(@metadata["id"])
end
def name
if @locale == "ar"
@metadata["codepoints"].pack("U*")
else
@metadata["translated_name"]
end
end
def tr_name
if @locale == "ar"
name
else
@metadata["transliterated_name"]
end
end
def slug
@metadata["slug"]
end
def city
@metadata["place_of_revelation"].capitalize
end
def verses
@contents.map { SQL::Verse.new(*_1) }
end
end

View file

@ -1,9 +0,0 @@
# frozen_string_literal: true
class SQL::Language < Struct.new(:locale)
def chapters
Dir.glob(File.join(
"src", "json", locale, "*.json"
)).map { SQL::Chapter.new(_1) }.sort_by(&:number)
end
end

View file

@ -1,37 +0,0 @@
# frozen_string_literal: true
class SQL::Template
def self.context
new.context
end
def context
context = binding
locals.each { context.local_variable_set(_1, _2) }
context
end
def chapter_row(quran_id, chapter_id, chapter)
[
chapter_id, chapter.number, chapter.name,
chapter.tr_name, chapter.slug, chapter.city,
quran_id
].map { Integer === _1 ? _1 : SQL::Utils.escape(_1) }.join(",")
end
def verse_row(verse, quran_id, chapter_id)
[
verse.number, quran_id,
chapter_id, verse.content
].map { Integer === _1 ? _1 : SQL::Utils.escape(_1) }.join(",")
end
private
def locals
{
languages: %w(ar en pt fa).map { SQL::Language.new(_1) },
chapter_id: 1
}
end
end

View file

@ -1,10 +0,0 @@
# frozen_string_literal: true
module SQL::Utils
module_function
def escape(str)
char = "'"
char + str.gsub(char, char * 2) + char
end
end

View file

@ -1,4 +0,0 @@
# frozen_string_literal: true
class SQL::Verse < Struct.new(:number, :content)
end

37
lib/quran-pull/command.rb Normal file
View file

@ -0,0 +1,37 @@
module Command
require "ryo"
require "json"
require "io/line"
def root_dir
File.realpath File.join(__dir__, "..", "..")
end
def share_dir
File.join(root_dir, "share", "quran-pull")
end
def data_dir
File.join(share_dir, "data")
end
def quran_dir
File.join(share_dir, "TheQuran")
end
def line
@line ||= IO::Line.new($stdout)
end
def count
@count ||= Ryo.from(
JSON.parse File.binread(File.join(data_dir, "count.json"))
)
end
def surah_info
@surah_info ||= Ryo.from(
JSON.parse File.binread(File.join(data_dir, "surahinfo.json"))
)
end
end

77
lib/quran-pull/pull.rb Normal file
View file

@ -0,0 +1,77 @@
class Pull
require "ryo"
require "json"
require "net/http"
require "fileutils"
require "optparse"
require_relative "command"
include Command
include FileUtils
attr_reader :options,
:source,
:http
def self.cli(argv)
op = nil
options = Ryo({locale: "en"})
OptionParser.new do |o|
op = o
o.on("-l", "--locale LOCALE", "ar, en, pt, or fa (default: en)")
end.parse(argv, into: options)
options
rescue
puts op.help
exit
end
def initialize(options)
@options = options
@source = sources[options.locale]
@http = Net::HTTP.new(source.http.hostname, 443).tap { _1.use_ssl = true }
end
def pull_surah(surah_no, &b)
pull req_path(vars(binding)), &b
end
def pull_ayah(surah_no, ayah_no, &b)
pull req_path(vars(binding)), &b
end
def write(surah_no, rows)
dir = File.join(quran_dir, options.locale)
mkdir_p(dir)
rows.unshift(Ryo.table_of(surah_info[surah_no - 1]))
File.binwrite File.join(dir, "#{surah_no}.json"), JSON.pretty_generate(rows)
end
private
def req_path(vars)
format source.http.path, source.http.vars.map { [_1.to_sym, vars[_1.to_sym]] }.to_h
end
def pull(path)
res = http.get(path)
case res
when Net::HTTPOK
yield(res)
else
##
# TODO: Handle error
end
end
def vars(binding)
binding.local_variables.map do
[_1.to_sym, binding.local_variable_get(_1)]
end.to_h
end
def sources
@sources ||= Ryo.from(
JSON.parse File.binread(File.join(data_dir, "sources.json"))
)
end
end

32
libexec/quran-pull/quran.com Executable file
View file

@ -0,0 +1,32 @@
#!/usr/bin/env ruby
lib_dir = File.realpath File.join(__dir__, "..", "..", "lib", "quran-pull")
require File.join(lib_dir, "pull")
require "optparse"
require "nokogiri"
def grep(res)
html = Nokogiri::HTML(res.body)
el = html.css("div[class^='TranslationText']").last
el.text.gsub(/[0-9]/, "")
end
##
# main
def main(argv)
cmd = Pull.new(Pull.cli(argv))
cmd.http.start
1.upto(114) do |surah_no|
rows = []
1.upto(cmd.count[surah_no]) do |ayah_no|
cmd.pull_ayah(surah_no, ayah_no) do |res|
rows.concat([ayah_no, grep(res)])
end
cmd.line.rewind.print "Surah #{surah_no} [#{ayah_no}/#{cmd.count[surah_no]}]"
end
cmd.write(surah_no, rows)
cmd.line.end
end
ensure
cmd.http.finish
end
main(ARGV)

View file

@ -0,0 +1,29 @@
#!/usr/bin/env ruby
lib_dir = File.realpath File.join(__dir__, "..", "..", "lib", "quran-pull")
require File.join(lib_dir, "pull")
require "optparse"
require "nokogiri"
def grep(res)
html = Nokogiri::HTML(res.body)
html.css("table[dir='rtl'] tr td div:last-child").map { _1.text.strip }
end
##
# main
def main(argv)
cmd = Pull.new(Pull.cli(argv))
cmd.http.start
1.upto(114) do |surah_no|
rows = []
cmd.pull_surah(surah_no) do |res|
rows.concat(grep(res).map.with_index(1) { [_2, _1] })
end
cmd.line.rewind.print "Surah #{surah_no} [#{surah_no}/114]"
cmd.write(surah_no, rows)
end
cmd.line.end
ensure
cmd.http.finish
end
main(ARGV)

Some files were not shown because too many files have changed in this diff Show more