Jump to content

Module:Find country: Difference between revisions

From Wikipedia, the free encyclopedia
Content deleted Content added
Initial clone of Title monthname, but will only work for single-word countries
 
m find
Line 17: Line 17:


local countryList = {
local countryList = {
'testcase',
'Find',
'South Africa',
'South Africa',
'Africa',
'Africa',

Revision as of 14:50, 17 July 2020

--[[ v1.00
     Split the page title into words then test each of them against
     the list of countries/continents.
     Optionally, an alternative page name may be supplied as a parameter.
     Return the first word which matches a country/continent name ...
     unless the "match=" parameter specifies a different match.
     If there is no match, then return an empty string ... unless
     the "nomatch" parameter specifies something different
]]

local getArgs = require('Module:Arguments').getArgs
local p = {}

-- config
local nomatch = ""
local matchnum = 1

local countryList = {
	'Find',
	'South Africa',
	'Africa',
	'Antarctica',
	'Central Asia',
	'South Asia',
	'South East Asia',
	'Southeast Asia',
	'Asia',
	'the Caribbean',
	'Caribbean',
	'Eurasia',
	'Europe',
	'the Middle East',
	'Middle East',
	'Central America',
	'North America',
	'South America',
	'the Americas',
	'Afghanistan',
	'Albania',
	'Algeria',
	'American Samoa',
	'Andorra',
	'Angola',
	'Anguilla',
	'Antigua and Barbuda',
	'Argentina',
	'Armenia',
	'Aruba',
	'Australia',
	'Austria',
	'Azerbaijan',
	'the Bahamas',
	'Bahamas',
	'Bahrain',
	'Bangladeshi',
	'Barbados',
	'Belarus',
	'Belgium',
	'Belize',
	'Benin',
	'Bermuda',
	'Bhutan',
	'Bolivia',
	'Bosnia',
	'Botswana',
	'Brazil',
	'Brunei Darussalam',
	'Brunei',
	'Bulgaria',
	'Burkina Faso',
	'Burundi',
	'Cambodia',
	'Cameroon',
	'Canada',
	'Cape Verde',
	'the Cayman Islands',
	'Cayman Islands',
	'the Central African Republic',
	'Central African Republic',
	'Chad',
	'Chile',
	"the People's Republic of China",
	"People's Republic of China",
	'China',
	'China PR',
	'PR China',
	'Colombia',
	'Comoros',
	'the Republic of the Congo',
	'the Congo',
	'Congo',
	'Cook Islands',
	'Costa Rica',
	'Croatia',
	'Cuba',
	'Curaçao',
	'Curacao',
	'Cyprus',
	'the Czech Republic',
	'Czech Republic',
	'Czechia',
	'the Democratic Republic of the Congo',
	'Democratic Republic of the Congo',
	'Congo DR',
	'DR Congo',
	'DRC',
	'Denmark',
	'Djibouti',
	'Dominica',
	'the Dominican Republic',
	'Dominican Republic',
	'East Timor',
	'Timor-Leste',
	'Ecuador',
	'Egypt',
	'El Salvador',
	'England',
	'Equatorial Guinea',
	'Eritrea',
	'Estonia',
	'Ethiopia',
	'the Falkland Islands',
	'Falkland Islands',
	'the Faroe Islands',
	'Faroe Islands',
	'Fiji',
	'Finland',
	'France',
	'Gabon',
	'the Gambia',
	'Gambia',
	'Georgia',
	'Germany',
	'Ghana',
	'Gibraltar',
	'Great Britain',
	'Britain',
	'Greece',
	'Grenada',
	'Guam',
	'Guatemala',
	'Guinea',
	'Guinea-Bissau',
	'Guyana',
	'Haiti',
	'Honduras',
	'Hong Kong',
	'Hungary',
	'Iceland',
	'India',
	'Indonesia',
	'Iran',
	'Iraq',
	'Ireland',
	'the Republic of Ireland',
	'Republic of Ireland',
	'Israel',
	'Italy',
	'Ivory Coast',
	"Côte d'Ivoire",
	'Jamaica',
	'Japan',
	'Jordan',
	'Kazakhstan',
	'Kenya',
	'Kiribati',
	'Kosovo',
	'the Republic of Kosovo',
	'Republic of Kosovo',
	'Kuwait',
	'Kyrgyzstan',
	'the Kyrgyz Republic',
	'Kyrgyz Republic',
	"the Lao People's Democratic Republic",
	"Lao People's Democratic Republic",
	'Laos',
	'Latvia',
	'Lebanon',
	'Lesotho',
	'Liberia',
	'Libya',
	'Liechtenstein',
	'Lithuania',
	'Luxembourg',
	'Macau',
	'the Republic of Macedonia',
	'Republic of Macedonia',
	'North Macedonia',
	'Macedonia',
	'Madagascar',
	'Malawi',
	'Malaysia',
	'the Maldives',
	'Maldives',
	'Mali',
	'Malta',
	'the Marshall Islands',
	'Marshall Islands',
	'Mauritania',
	'Mauritius',
	'Mexico',
	'the Federated States of Micronesia',
	'Federated States of Micronesia',
	'Micronesia',
	'FSM',
	'Moldova',
	'Monaco',
	'Mongolia',
	'Montenegro',
	'Montserrat',
	'Morocco',
	'Mozambique',
	'Myanmar',
	'Namibia',
	'Nauru',
	'Nepal',
	'the Netherlands',
	'Netherlands',
	'New Caledonia',
	'New Zealand',
	'Nicaragua',
	'Niger',
	'Nigeria',
	'North Korea',
	"the People's Democratic Republic of Korea",
	"the Democratic People's Republic of Korea",
	"Democratic People's Republic of Korea",
	"People's Democratic Republic of Korea",
	'DPR Korea',
	'Korea DPR',
	'Northern Ireland',
	'Norway',
	'Oman',
	'Pakistan',
	'Palau',
	'Mandatory Palestine',
	'Palestine',
	'the Palestinian territories',
	'Palestinian territories',
	'Panama',
	'Papua New Guinea',
	'Paraguay',
	'Peru',
	'the Philippines',
	'Philippines',
	'Poland',
	'Portugal',
	'Puerto Rico',
	'the Spanish Virgin Islands',
	'Spanish Virgin Islands',
	'Qatar',
	'Romania',
	'Russia',
	'the Russian Federation',
	'Russian Federation',
	'Rwanda',
	'Saint Kitts and Nevis',
	'Saint Lucia',
	'Saint Vincent and the Grenadines',
	'Western Samoa',
	'Samoa',
	'San Marino',
	'São Tomé and Príncipe',
	'São Tomé and Príncipe',
	'Sao Tome and Principe',
	'Saudi Arabia',
	'Scotland',
	'Senegal',
	'Serbia',
	'Seychelles',
	'Sierra Leone',
	'Singapore',
	'Slovakia',
	'Slovenia',
	'the Solomon Islands',
	'Solomon Islands',
	'Somalia',
	'South Korea',
	'the Republic of Korea',
	'Korea Republic',
	'South Sudan',
	'Spain',
	'Sri Lanka',
	'Sudan',
	'Suriname',
	'Swaziland',
	'Sweden',
	'Switzerland',
	'Syria',
	'Tahiti',
	'the Republic of China',
	'Republic of China',
	'Taiwan',
	'Tajikistan',
	'Tanzania',
	'Thailand',
	'Togo',
	'Tonga',
	'Trinidad and Tobago',
	'Trinidad',
	'Tobago',
	'Tunisia',
	'Turkey',
	'Turkmenistan',
	'Turks and Caicos Islands',
	'Tuvalu',
	'Uganda',
	'Ukraine',
	'the United Arab Emirates',
	'the UAE',
	'the U.A.E.',
	'United Arab Emirates',
	'UAE',
	'U.A.E.',
	'the United Kingdom',
	'the UK',
	'the U.K.',
	'United Kingdom',
	'UK',
	'U.K.',
	'the United States of America',
	'the United States',
	'the USA',
	'the U.S.A.',
	'United States of America',
	'United States',
	'USA',
	'U.S.A.',
	'America',
	'Uruguay',
	'Uzbekistan',
	'Vanuatu',
	'Venezuela',
	'Vietnam',
	'the British Virgin Islands',
	'British Virgin Islands',
	'UK Virgin Islands',
	'U.K. Virgin Islands',
	'the United States Virgin Islands',
	'the US Virgin Islands',
	'the U.S. Virgin Islands',
	'United States Virgin Islands',
	'US Virgin Islands',
	'U.S. Virgin Islands',
	'the Virgin Islands',
	'Virgin Islands',
	'Wales',
	'Yemen',
	'Zambia',
	'Zimbabwe'
}

-- splits a string into "words"
-- a "word" is a set of characters delineated at each end by one 
--    or more whitespace characters or punctaution charaters
function splitIntoWords(str)
	result = {}
	index = 1
	s = mw.ustring.gsub(str, "^[%s%p]+", "") -- strip leading whitespace or punctuation
	for s2 in mw.ustring.gmatch(s, "[^%s%p]+[%s%p]*") do
		s3 = mw.ustring.gsub(s2, "[%s%p]+$", "") -- strip trailing separators
		result[index] = s3
		index = index + 1
	end
return result
end

-- returns the first word is the pagename which matches the name of a month
-- ... or an empty string if there is no match
function findcountryinstring(str)
	-- split the pagename into sparate words
	titleWords = splitIntoWords(str)
	
	nMatches = 0
	myMatches ={}
	
	-- check each words in turn, to see if it matches a country
	for w, thisWord in ipairs(titleWords) do
		-- check agaist each country
		-- if there is a match, then return that country
		for i, thisCountry in ipairs(countryList) do
			if (thisCountry == thisWord) then
				nMatches = nMatches + 1
				myMatches[nMatches] = thisCountry
			end
		end
	end

	if (nMatches == 0) then
		-- none of the title words matches a whole country
		return nomatch
	end
	
	if ((matchnum >= 1) and (matchnum <= nMatches)) then
		return myMatches[matchnum]
	end

	if (matchnum < 0) then
		matchnum = matchnum + 1 -- so that -1 is the last match etc
		if ((matchnum + nMatches) >= 1) then
			return myMatches[matchnum + nMatches]
		end
	end
	
	-- if we get here, we have not found a match at the position specified by "matchnum"
	return nomatch
end

function p.main(frame)
	local args = getArgs(frame)
	return p._main(args)
end

function p._main(args)
	if (args['nomatch'] ~= nil) then
		nomatch = args['nomatch']
	end
	
	-- by default, we return the first match
	-- but the optional "C" paarmeter sets the "matchnum" variable, which
	-- * for a positive matchnum "n", returns the nth match if it exists
	-- * for a positive matchnum "n", returns (if it exists) the nth match
	--   counting backwards from the end.
	--   So "match=-1" returns the last match
	--   and "match=-3" returns the 3rd-last match
	if (args['match'] ~= nil) then
		matchnum = tonumber(args['match'])
		if ((matchnum == nil) or (matchnum == 0)) then
			matchnum = 1
		end
	end
	
	-- by default, we use the current page
	-- but if the "page=" parameters is supplied, we use that
	-- so we try the parameter first
	thispagename = nil
	if ((args['page'] ~= nil) and (args['page'] ~= "")) then
		-- we have a non-empty "page" parameter, so we use it
		thispagename = args['page']
	else
		-- get the page title
		thispage = mw.title.getCurrentTitle()
		thispagename = thispage.text;
	end
	
	-- now check the pagename to try to find a country
	result = findcountryinstring(thispagename)
	if (result == "") then
		return nomatch
	end
	return result
end

return p