مستخدم مجهول
sync from sandbox;
ط (تحديث) |
(sync from sandbox;) |
||
سطر ٢١: | سطر ٢١: | ||
title = strip_apostrophe_markup (title); -- strip any apostrophe markup | title = strip_apostrophe_markup (title); -- strip any apostrophe markup | ||
else | else | ||
title=''; -- if not set, make sure title is an empty string | title = ''; -- if not set, make sure title is an empty string | ||
end | end | ||
if is_set (script) then | if is_set (script) then | ||
سطر ٢٧: | سطر ٢٧: | ||
script = strip_apostrophe_markup (script); -- strip any apostrophe markup | script = strip_apostrophe_markup (script); -- strip any apostrophe markup | ||
else | else | ||
script=''; | script = ''; -- if not set, make sure script is an empty string | ||
end | end | ||
if is_set (title) and is_set (script) then | if is_set (title) and is_set (script) then | ||
سطر ٣٨: | سطر ٣٨: | ||
--[[--------------------------< E S C A P E _ L U A _ M A G I C _ C H A R S >---------------------------------- | --[[--------------------------< E S C A P E _ L U A _ M A G I C _ C H A R S >---------------------------------- | ||
Returns a string where all of | Returns a string where all of Lua's magic characters have been escaped. This is important because functions like | ||
string.gsub() treat their pattern and replace strings as patterns, not literal strings. | string.gsub() treat their pattern and replace strings as patterns, not literal strings. | ||
]] | ]] | ||
سطر ٤٤: | سطر ٤٤: | ||
local function escape_lua_magic_chars (argument) | local function escape_lua_magic_chars (argument) | ||
argument = argument:gsub("%%", "%%%%"); -- replace % with %% | argument = argument:gsub("%%", "%%%%"); -- replace % with %% | ||
argument = argument:gsub("([%^%$%(%)%.%[%]%*%+%-%?])", "%%%1"); -- replace all other | argument = argument:gsub("([%^%$%(%)%.%[%]%*%+%-%?])", "%%%1"); -- replace all other Lua magic pattern characters | ||
return argument; | return argument; | ||
end | end | ||
سطر ٦٠: | سطر ٦٠: | ||
while true do | while true do | ||
pattern = pages:match("%[(%w*:?//[^ ]+%s+)[%w%d].*%]"); -- pattern is the opening bracket, the | pattern = pages:match("%[(%w*:?//[^ ]+%s+)[%w%d].*%]"); -- pattern is the opening bracket, the URL and following space(s): "[url " | ||
if nil == pattern then break; end -- no more | if nil == pattern then break; end -- no more URLs | ||
pattern = escape_lua_magic_chars (pattern); -- pattern is not a literal string; escape | pattern = escape_lua_magic_chars (pattern); -- pattern is not a literal string; escape Lua's magic pattern characters | ||
pages = pages:gsub(pattern, ""); -- remove as many instances of pattern as possible | pages = pages:gsub(pattern, ""); -- remove as many instances of pattern as possible | ||
end | end | ||
pages = pages:gsub("[%[%]]", ""); -- remove the brackets | pages = pages:gsub("[%[%]]", ""); -- remove the brackets | ||
pages = pages:gsub("–", "-" ); | pages = pages:gsub("–", "-" ); -- replace endashes with hyphens | ||
pages = pages:gsub("&%w+;", "-" ); | pages = pages:gsub("&%w+;", "-" ); -- and replace HTML entities (– etc.) with hyphens; do we need to replace numerical entities like   and the like? | ||
return pages; | return pages; | ||
end | end | ||
سطر ٨٠: | سطر ٨٠: | ||
MathML with SVG or PNG fallback | MathML with SVG or PNG fallback | ||
All three are heavy with | All three are heavy with HTML and CSS which doesn't belong in the metadata. | ||
Without this function, the metadata saved in the raw wikitext contained the rendering determined by the settings | Without this function, the metadata saved in the raw wikitext contained the rendering determined by the settings | ||
سطر ٨٧: | سطر ٨٧: | ||
This function gets the rendered form of an equation according to the editor's preference before the page is saved. It | This function gets the rendered form of an equation according to the editor's preference before the page is saved. It | ||
then searches the rendering for the text equivalent of the rendered equation and replaces the rendering with that so | then searches the rendering for the text equivalent of the rendered equation and replaces the rendering with that so | ||
that the page is saved without extraneous | that the page is saved without extraneous HTML/CSS markup and with a reasonably readable text form of the equation. | ||
When a replacement is made, this function returns true and the value with replacement; otherwise false and the | When a replacement is made, this function returns true and the value with replacement; otherwise false and the initial | ||
value. To replace multipe equations it is | value. To replace multipe equations it is necessary to call this function from within a loop. | ||
]=] | ]=] | ||
سطر ١٢٠: | سطر ١٢٠: | ||
--[[--------------------------< C O I N S _ C L E A N U P >---------------------------------------------------- | --[[--------------------------< C O I N S _ C L E A N U P >---------------------------------------------------- | ||
Cleanup parameter values for the metadata by removing or replacing invisible characters and certain | Cleanup parameter values for the metadata by removing or replacing invisible characters and certain HTML entities. | ||
2015-12-10: there is a bug in mw.text.unstripNoWiki (). It replaces math stripmarkers with the appropriate content | 2015-12-10: there is a bug in mw.text.unstripNoWiki (). It replaces math stripmarkers with the appropriate content | ||
سطر ١٣٧: | سطر ١٣٧: | ||
end | end | ||
value = value:gsub (cfg.stripmarkers['math'], "MATH RENDER ERROR"); | value = value:gsub (cfg.stripmarkers['math'], "MATH RENDER ERROR"); -- one or more couldn't be replaced; insert vague error message | ||
value = mw.text.unstripNoWiki (value); -- replace nowiki stripmarkers with their content | value = mw.text.unstripNoWiki (value); -- replace nowiki stripmarkers with their content | ||
سطر ١٤٣: | سطر ١٤٣: | ||
value = value:gsub (' ', ' '); -- replace entity with plain space | value = value:gsub (' ', ' '); -- replace entity with plain space | ||
value = value:gsub ('\226\128\138', ' '); -- replace hair space with plain space | value = value:gsub ('\226\128\138', ' '); -- replace hair space with plain space | ||
if not mw.ustring.find (value, cfg.indic_script) then -- don't remove zero width joiner characters from indic script | if not mw.ustring.find (value, cfg.indic_script) then -- don't remove zero-width joiner characters from indic script | ||
value = value:gsub ('‍', ''); | value = value:gsub ('‍', ''); -- remove ‍ entities | ||
value = mw.ustring.gsub (value, '[\226\128\141\226\128\139\194\173]', ''); -- remove zero-width joiner, zero-width space, soft hyphen | value = mw.ustring.gsub (value, '[\226\128\141\226\128\139\194\173]', ''); -- remove zero-width joiner, zero-width space, soft hyphen | ||
end | end | ||
value = value:gsub ('[\009\010\013]', ' '); | value = value:gsub ('[\009\010\013 ]+', ' '); -- replace horizontal tab, line feed, carriage return with plain space | ||
return value; | return value; | ||
end | end | ||
سطر ١٩٧: | سطر ١٩٧: | ||
-- these used only for periodicals | -- these used only for periodicals | ||
OCinSoutput["rft.ssn"] = data.Season; -- keywords: winter, spring, summer, fall | OCinSoutput["rft.ssn"] = data.Season; -- keywords: winter, spring, summer, fall | ||
OCinSoutput["rft.quarter"] = data.Quarter; -- single digits 1->first quarter, etc. | |||
OCinSoutput["rft.chron"] = data.Chron; -- free-form date components | OCinSoutput["rft.chron"] = data.Chron; -- free-form date components | ||
OCinSoutput["rft.volume"] = data.Volume; -- does not apply to books | OCinSoutput["rft.volume"] = data.Volume; -- does not apply to books | ||
سطر ٢٢٠: | سطر ٢٢١: | ||
end | end | ||
end | end | ||
else --{'audio-visual', 'AV-media-notes', 'DVD-notes', 'episode', 'interview', 'mailinglist', 'map', 'newsgroup', 'podcast', 'press release', 'serial', 'sign', 'speech', 'web'} | else -- {'audio-visual', 'AV-media-notes', 'DVD-notes', 'episode', 'interview', 'mailinglist', 'map', 'newsgroup', 'podcast', 'press release', 'serial', 'sign', 'speech', 'web'} | ||
OCinSoutput["rft.genre"] = "unknown"; | OCinSoutput["rft.genre"] = "unknown"; | ||
end | end | ||
سطر ٢٣٦: | سطر ٢٣٧: | ||
OCinSoutput['rft.inst'] = data.PublisherName; -- book and dissertation | OCinSoutput['rft.inst'] = data.PublisherName; -- book and dissertation | ||
end | end | ||
-- NB. Not currently supported are "info:ofi/fmt:kev:mtx:patent", "info:ofi/fmt:kev:mtx:dc", "info:ofi/fmt:kev:mtx:sch_svc", "info:ofi/fmt:kev:mtx:ctx" | |||
-- and now common parameters (as much as possible) | -- and now common parameters (as much as possible) | ||
OCinSoutput["rft.date"] = data.Date; -- book, journal, dissertation | OCinSoutput["rft.date"] = data.Date; -- book, journal, dissertation | ||
for k, v in pairs( data.ID_list ) do -- what to do about these? For now assume that they are common to all? | for k, v in pairs( data.ID_list ) do -- what to do about these? For now assume that they are common to all? | ||
if k == 'ISBN' then v = v:gsub( "[^-0-9X]", "" ); end | if k == 'ISBN' then v = v:gsub( "[^-0-9X]", "" ); end | ||
سطر ٢٤٤: | سطر ٢٤٦: | ||
if string.sub( id or "", 1, 4 ) == 'info' then -- for ids that are in the info:registry | if string.sub( id or "", 1, 4 ) == 'info' then -- for ids that are in the info:registry | ||
OCinSoutput["rft_id"] = table.concat{ id, "/", v }; | OCinSoutput["rft_id"] = table.concat{ id, "/", v }; | ||
elseif string.sub (id or "", 1, 3 ) == 'rft' then -- for isbn, issn, eissn, etc that have defined COinS keywords | elseif string.sub (id or "", 1, 3 ) == 'rft' then -- for isbn, issn, eissn, etc. that have defined COinS keywords | ||
OCinSoutput[ id ] = v; | OCinSoutput[ id ] = v; | ||
elseif id then -- when cfg.id_handlers[k].COinS is not nil | elseif 'url' == id then -- for urls that are assembled in ~/Identifiers; |asin= and |ol= | ||
OCinSoutput["rft_id"] = table.concat{ cfg.id_handlers[k].prefix, v }; -- others; provide a | OCinSoutput["rft_id"] = table.concat ({data.ID_list[k], "#id-name=", cfg.id_handlers[k].label}); | ||
elseif id then -- when cfg.id_handlers[k].COinS is not nil so urls created here | |||
OCinSoutput["rft_id"] = table.concat{ cfg.id_handlers[k].prefix, v, cfg.id_handlers[k].suffix or '', "#id-name=", cfg.id_handlers[k].label }; -- others; provide a URL and indicate identifier name as #fragment (human-readable, but transparent to browsers) | |||
end | end | ||
end | end | ||
سطر ٢٥٣: | سطر ٢٥٧: | ||
local last, first; | local last, first; | ||
for k, v in ipairs( data.Authors ) do | for k, v in ipairs( data.Authors ) do | ||
last, first = coins_cleanup (v.last), coins_cleanup (v.first or ''); -- replace any nowiki | last, first = coins_cleanup (v.last), coins_cleanup (v.first or ''); -- replace any nowiki stripmarkers, non-printing or invisible characters | ||
if k == 1 then -- for the first author name only | if k == 1 then -- for the first author name only | ||
if is_set(last) | if is_set(last) and is_set(first) then -- set these COinS values if |first= and |last= specify the first author name | ||
OCinSoutput["rft.aulast"] = last; -- book, journal, dissertation | OCinSoutput["rft.aulast"] = last; -- book, journal, dissertation | ||
OCinSoutput["rft.aufirst"] = first; -- book, journal, dissertation | OCinSoutput["rft.aufirst"] = first; -- book, journal, dissertation | ||
سطر ٢٦٧: | سطر ٢٧١: | ||
OCinSoutput["rft.au"] = last; -- book, journal, dissertation | OCinSoutput["rft.au"] = last; -- book, journal, dissertation | ||
end | end | ||
-- TODO: At present we do not report "et al.". Add anything special if this condition applies? | |||
end | end | ||
end | end | ||
سطر ٢٧٢: | سطر ٢٧٧: | ||
OCinSoutput.rft_id = data.URL; | OCinSoutput.rft_id = data.URL; | ||
OCinSoutput.rfr_id = table.concat{ "info:sid/", mw.site.server:match( "[^/]*$" ), ":", data.RawPage }; | OCinSoutput.rfr_id = table.concat{ "info:sid/", mw.site.server:match( "[^/]*$" ), ":", data.RawPage }; | ||
-- TODO: Add optional extra info: | |||
-- rfr_dat=#REVISION<version> (referrer private data) | |||
-- ctx_id=<data.RawPage>#<ref> (identifier for the context object) | |||
-- ctx_tim=<ts> (timestamp in format yyyy-mm-ddThh:mm:ssTZD or yyyy-mm-dd) | |||
-- ctx_enc=info:ofi/enc:UTF-8 (character encoding) | |||
OCinSoutput = setmetatable( OCinSoutput, nil ); | OCinSoutput = setmetatable( OCinSoutput, nil ); | ||
-- sort with version string always first, and combine. | -- sort with version string always first, and combine. | ||
--table.sort( OCinSoutput ); | -- table.sort( OCinSoutput ); | ||
table.insert( OCinSoutput, 1, "ctx_ver=" .. ctx_ver ); | table.insert( OCinSoutput, 1, "ctx_ver=" .. ctx_ver ); -- such as "Z39.88-2004" | ||
return table.concat(OCinSoutput, "&"); | return table.concat(OCinSoutput, "&"); | ||
end | end |