/* <Br/> * $ ID: Ruby. c 571 23: 32: 14z elliotth $ <br/> * copyright (c) 2000-2001, thaddeus covert <sahuagin@mediaone.net> <br/> * copyright (c) 2002 Matthias Veit <matthias_veit@yahoo.de> <br/> * copyright (c) 2004 Elliott Hughes <enh@acm.org> <br/> * this source code is released for free distribution under the terms of the <br/> * GNU General Public License. <br/> * this MO Dule contains functions for generating tags for Ruby language <br/> * files. <br/> */<br/>/* <br/> * include files <br/> */<br/> # include "General. H "/* must always come first */<br/> # include <string. h> <br/> # include "entry. H "<br/> # include" parse. H "<br/> # include" read. H "<br/> # include" vstring. H "<br/>/* <br/> * data declarations <br/> */<br/> typedef Enum {<br/> k_undefined =-1, k_class, k_method, K_module, k_singleton, k_ruby_variable <br/>} rubykind; <br/>/* <br/> * data definitions <br/> */<br/> static kindoption rubykinds [] ={< br/> {true, 'C', "class", "classes" },< br/> {true, 'F', "method", "methods" },< br/> {true, 'M', "module", "modules" },< br/> {true, 'V', "variable", "variables" },< br/> {true, 'F', "Singleton method", "Singleton methods" }< br/>}; <br/> static stringlist * nest Ing = 0; <br/>/* <br/> * function definitions <br/> */<br/>/* <br/> * returns a string describing the scope in 'LIST'. <br/> * we record the current scope as a list of entered scopes. <br/> * scopes corresponding to 'if' statements and the like are <br/> * represented by empty strings. scopes corresponding to <br/> * modules and classes are represented by the name of the <br/> * module or class. <br/> */< Br/> static vstring * stringlisttoscope (const stringlist * List) <br/>{< br/> unsigned int I; <br/> unsigned int chunks_output = 0; <br/> vstring * result = vstringnew (); <br/> const unsigned int max = stringlistcount (list); <br/> for (I = 0; I <Max; ++ I) <br/>{< br/> vstring * chunk = stringlistitem (list, I); <br/> If (vstringlength (chunk)> 0) <br/>{< br/> vstringcats (result, (chunks_output ++> 0 )? ". ":" "); <Br/> vstringcats (result, vstringvalue (chunk); <br/>}< br/> return result; <br/>}< br/>/* <br/> * attempts to advance's 'past 'literal '. <br/> * returns true if it did, false (and leaves's 'where <br/> * it was) otherwise. <br/> */<br/> static Boolean canmatch (const unsigned char ** S, const char * literal) <br/>{< br/> const int literal_length = strlen (literal); <br/> const unsig Ned char next_char = * (* s + literal_length); <br/> If (strncmp (const char *) * s, literal, literal_length )! = 0) <br/>{< br/> return false; <br/>}< br/>/* additionally check that we're at the end of a token. */<br/> If (! (Next_char = 0 | isspace (next_char) | next_char = '(') <br/>{< br/> return false; <br/>}< br/> * s + = literal_length; <br/> return true; <br/>}< br/>/* <br/> * attempts to advance 'cp' past a ruby operator method name. returns <br/> * true if successful (and copies the name into 'name'), false otherwise. <br/> */<br/> static Boolean parserubyoperator (vstring * Name, const unsigned char ** CP) <br/> {<Br/> static const char * ruby_operators [] = {<br/> "[]", "[] =", <br/> "**", <br/> "! ","~ "," + @ ","-@ ", <Br/>" * ","/"," % ", <br/>" + ","-", <br/> ">", "<", <br/> "&", <br/> "^", "| ", <br/> "<=", "<", ">", "> =", <br/> "<=>", "= ", "= ","! = "," = ~ ","!~ ", <Br/>" '", <br/> 0 <br/>}; <br/> int I; <br/> for (I = 0; ruby_operators [I]! = 0; ++ I) <br/>{< br/> If (canmatch (CP, ruby_operators [I]) <br/>{< br/> vstringcats (name, ruby_operators [I]); <br/> return true; <br/>}< br/> return false; <br/>}< br/>/* <br/> * emits a tag for the given 'name' of kind 'kind' at the current nesting. <br/> */<br/> static void emitrubytag (vstring * Name, rubykind kind) <br/>{< br/> tagentryinfo tag; <br/> vstring * scope; <br/> vstringterminate (Name); <br/> scope = stringlisttoscope (nesting); <br/> inittagentry (& tag, vstringvalue (name )); <br/> If (vstringlength (scope)> 0) {<br/> tag. extensionfields. scope [0] = "class"; <br/> tag. extensionfields. scope [1] = vstringvalue (scope); <br/>}< br/> tag. kindname = rubykinds [kind]. name; <br/> tag. kind = rubykinds [kind]. letter; <br/> maketagentry (& tag); <br/> stringlistadd (nesting, vstringnewc Opy (name); <br/> vstringclear (name); <br/> vstringdelete (scope ); <br/>}< br/>/* tests whether 'ch' is a character in 'LIST '. */<br/> static Boolean charisin (char CH, const char * List) <br/>{< br/> return (strchr (list, CH )! = 0); <br/>}< br/>/* advances 'cp' over leading whitespace. */<br/> static void skipwhitespace (const unsigned char ** CP) <br/>{< br/> while (isspace (** CP )) <br/>{< br/> ++ * CP; <br/>}< br/>/* <br/> * copies the characters forming an Identifier from * CP into <br/> * Name, leaving * CP pointing to the character after the identifier. <br/> */<br/> static rubykind parseidentifier (<br/> const unsign Ed char ** CP, vstring * Name, rubykind kind) <br/>{< br/>/* method names are slightly different to class and variable names. <br/> * A method name may optionally end with a question mark, exclamation <br/> * point or equals sign. these are all part of the name. <br/> * A method name may also contain a period if it's a singleton method. <br/> */<br/> const char * also_ OK = (kind = k_method )? "_.?! = ":" _ "; <Br/> skipwhitespace (CP); <br/>/* check for an anonymous (Singleton) Class such as" class <pttp ". */<br/> If (kind = k_class & ** CP = '<' & * (* CP + 1) = '<') <br/>{< br/> return k_undefined; <br/>}< br/>/* Check for operators such as "def [] = (Key, Val )". */<br/> If (kind = k_method | kind = k_singleton) <br/>{< br/> If (parserubyoperator (name, CP )) <br/>{< br/> return kind; <br/>} <Br/>}< br/>/* copy the identifier into 'name'. */<br/> while (** CP! = 0 & (isalnum (** CP) | charisin (** CP, also_ OK) <br/>{< br/> char last_char = ** CP; <br/> vstringput (name, last_char); <br/> ++ * CP; <br/> If (kind = k_method) <br/> {<br/>/* recognize Singleton methods. */<br/> If (last_char = '. ') <br/>{< br/> vstringterminate (name); <br/> vstringclear (name); <br/> return parseidentifier (CP, name, k_singleton ); <br/>}< br/>/* recognize characters which mark th E end of a method name. */<br/> If (charisin (last_char ,"?! = ") <Br/>{< br/> break; <br/>}< br/> return kind; <br/>}< br/> static void readandemittag (const unsigned char ** CP, rubykind expected_kind) <br/>{< br/> If (isspace (** CP) <br/>{< br/> vstring * name = vstringnew (); <br/> rubykind actual_kind = parseidentifier (CP, name, expected_kind); <br/> If (actual_kind = k_undefined | vstringlength (name) = 0) <br/> {<br/>/* <br/> * what kind of T AGS shoshould we create for code like this? <Br/> * % W (self. clfloor ). each do | Name | <br/> * module_eval <-"end;" <br/> * def # {name} (X, Y = 1) <br/> * Q, R = x. divmod (y) <br/> * q = Q. to_ I <br/> * return Q, R <br/> * end; <br/> * end <br/> * or this? <Br/> * class <pttp <br/> * for now, we don't create any. <br/> */<br/>}< br/> else <br/> {<br/> emitrubytag (name, actual_kind ); <br/>}< br/> vstringdelete (name ); <br/>}< br/> static rubykind parseidentifierofvariable (<br/> const unsigned char ** CP, vstring * Name, rubykind kind) <br/>{< br/> skipwhitespace (CP); <br/> // printf ("parse: Cp = % s/n", * CP ); <br/>/* copy the identifie R into 'name'. */<br/> while (** CP! = 0) <br/>{< br/> char last_char = ** CP; <br/> vstringput (name, last_char); <br/> ++ * CP; <br/>/* If next char is space, exit */<br/> If (isspace (last_char) <br/>{< br/> break; <br/>}< br/> return kind; <br/>}< br/> static void readandemittagofvariable (const unsigned char ** CP, rubykind expected_kind) <br/>{< br/> vstring * name = vstringnew (); <br/> rubykind actual_kind = parseidentifierof Variable (CP, name, expected_kind); <br/> emitrubytag (name, actual_kind); <br/> vstringdelete (name ); <br/>}< br/> static void enterunnamedscope (void) <br/>{< br/> stringlistadd (nesting, vstringnewinit ("")); <br/>}< br/>/* <br/> @ name ={< br/>}< br/> */<br/> static Boolean canmatchclassvariable (const unsigned char ** s, const char * literal) <br/>{< br/> const unsigned char * STR = * s; <br/> If (S Trlen (STR) = 0) <br/> return false; <br/> const int literal_length = strlen (literal ); <br/> const unsigned char next_char = (* STR + literal_length); <br/>/* if not found */<br/> If (strncmp (const char *) STR, literal, literal_length )! = 0) <br/>{< br/> return false; <br/>}< br/>/* If found, judge whether it is a definition */<br/> while (* Str) {<br/> If (! Isspace (* Str) {<br/> ++ STR; <br/> continue; <br/>}< br/> else {/* is a space now */<br/> + STR; /* deal with next char */<br/> while (* Str) {<br/> If (isspace (* Str) {<br/> ++ STR; <br/> continue; <br/>}< br/> else if (* STR = ') {<br/> ++ * s; <br/> + + * s; <br/> return true; <br/>}< br/> else {<br/> return false; <br/>}< br/> + + STR; <br/>}< br/> return true; <br/>}< br/> static Boolean canmatchconstant (const unsigned char ** s) <br/>{< br/> const unsigned char * STR = * s; <br/> If (strlen (STR) = 0) <br/> return false; <br/>/* judge the word whether is a upcase */<br/> while (* Str! = 0) <br/>{< br/> If (* STR = '_') {<br/> ++ STR; <br/> If (isspace (* Str) <br/> break; <br/> else <br/> continue; <br/>}< br/> If (! Isupper (* Str) <br/> return false; <br/>/* Next char */<br/> ++ STR; <br/> If (isspace (* Str) <br/> break; <br/>}< br/>/* judge whether is a definition */<br/> STR = * s; <br/> while (* Str) {<br/> If (! Isspace (* Str) {<br/> ++ STR; <br/> continue; <br/>}< br/> else {/* is a space now */<br/> + STR; /* deal with next char */<br/> while (* Str) {<br/> If (isspace (* Str) {<br/> ++ STR; <br/> continue; <br/>}< br/> else if (* STR = ') {<br/> return true; <br/>}< br/> else {<br/> return false; <br/>}< br/> + + STR; <br/>}< br/> return true; <br/>}< br/> static void findrubytags (void) <Br/>{< br/> const unsigned char * line; <br/> Boolean inmultilinecomment = false; <br/> nesting = stringlistnew (); <br/>/* fixme: This whole scheme is wrong, because Ruby isn' t line-based. <br/> * You cocould perfectly well write: <br/> * def <br/> * method <br/> * puts ("hello ") <br/> * end <br/> * If you have wished, and this function wowould fail to recognize anything. <br/> */<br/> while (line = Filereadline ())! = NULL) <br/>{< br/> const unsigned char * CP = line; <br/> If (canmatch (& CP, "= begin ")) <br/>{< br/> inmultilinecomment = true; <br/> continue; <br/>}< br/> If (canmatch (& CP, "= end") <br/>{< br/> inmultilinecomment = false; <br/> continue; <br/>}< br/> skipwhitespace (& CP ); <br/>/* avoid mistakenly starting a scope for modifiers such as <br/> * return if <exp> <br/> * fixme: this is fooled B Y Code such as <br/> * result = If <exp> <br/> * <A> <br/> * else <br/> * <b> <br/> * end <br/> * fixme: we're re also fooled if someone does something heinous such as <br/> * puts ("hello ") /<br/> * Unless <exp> <br/> */<br/> If (canmatch (& CP, "case") | canmatch (& CP, "for") | <br/> canmatch (& CP, "if") | canmatch (& CP, "unless ") | <br/> canmatch (& CP, "while") <br/> {<br/> enterunnam Edscope (); <br/>}< br/>/* <br/> * "module M ", "Class C" and "def M" shocould only be at the beginning <br/> * of a line. <br/> */<br/> If (canmatch (& CP, "module") <br/> {<br/> readandemittag (& CP, k_module ); <br/>}< br/> else if (canmatch (& CP, "class") <br/>{< br/> readandemittag (& CP, k_class ); <br/>}< br/> else if (canmatch (& CP, "def") <br/>{< br/> readandemittag (& CP, k_method ); <br/>}< br/> else If (canmatchclassvariable (& CP, "@") <br/>{< br/> // printf ("cp = % s/n", CP ); <br/> readandemittagofvariable (& CP, k_ruby_variable); <br/>}< br/> else if (canmatchconstant (& CP )) <br/>{< br/> readandemittagofvariable (& CP, k_ruby_variable); <br/>}< br/> while (* CP! = '/0') <br/>{< br/>/* fixme: We don't need with here documents, <br/> * or regular expression literals, or... you get the idea. <br/> * hopefully, the restriction above that insists on seeing <br/> * definitions at the starts of lines shoshould keep us out of <br/> * mischief. <br/> */<br/> If (inmultilinecomment | isspace (* CP) <br/>{< br/> ++ CP; <br/>}< br/> else if (* CP = '#') <br/>{< br/>/* fixme: T His is wrong, but there * probably * won't be a <br/> * Definition after an interpolated string (where # Doesn't <br/> * mean 'comment '). <br/> */<br/> break; <br/>}< br/> else if (canmatch (& CP, "begin") | canmatch (& CP, "Do") <br/>{< br/> enterunnamedscope (); <br/>}< br/> else if (canmatch (& CP, "end ") & stringlistcount (nesting)> 0) <br/>{< br/>/* leave the most recent scope. */<br/> vstringdelete (Stringlistlast (nesting); <br/> stringlistremovelast (nesting); <br/>}< br/> else if (* CP = '"') <br/>{< br/>/* Skip string literals. <br/> * fixme: shocould conflict with escapes and interpolation. <br/> */<br/> do {<br/> + + CP; <br/>} while (* CP! = 0 & * CP! = '"'); <Br/>}< br/> else if (* CP! = '/0') <br/>{< br/> DO <br/> ++ CP; <br/> while (isalnum (* CP) | * CP = '_'); <br/>}< br/> stringlistdelete (nesting ); <br/>}< br/> extern parserdefinition * rubyparser (void) <br/>{< br/> static const char * const extensions [] = {"rb ", "Ruby", null }; <br/> parserdefinition * def = parsernew ("Ruby"); <br/> def-> kinds = rubykinds; <br/> def-> kindcount = kind_count (rubykinds); <br/> def-> extensions = extensions; <br/> def-> parser = findrubytags; <br/> return def; <br/>}< br/>/* VI: Set tabstop = 4 shiftwidth = 4: */<br/>